Skip to content

Fix RuntimeWarning: invalid value encountered in test_calibration.py #19421

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions sklearn/calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,10 +656,13 @@ def predict_proba(self, X):
if n_classes == 2:
proba[:, 0] = 1. - proba[:, 1]
else:
proba /= np.sum(proba, axis=1)[:, np.newaxis]

# XXX : for some reason all probas can be 0
proba[np.isnan(proba)] = 1. / n_classes
denominator = np.sum(proba, axis=1)[:, np.newaxis]
# In the edge case where for each class calibrator returns a null
# probability for a given sample, use the uniform distribution
# instead.
uniform_proba = np.full_like(proba, 1 / n_classes)
proba = np.divide(proba, denominator, out=uniform_proba,
where=denominator != 0)

# Deal with cases where the predicted probability minimally exceeds 1.0
proba[(1.0 < proba) & (proba <= 1.0 + 1e-5)] = 1.0
Expand Down
26 changes: 25 additions & 1 deletion sklearn/tests/test_calibration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from scipy import sparse

from sklearn.base import BaseEstimator
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import LeaveOneOut, train_test_split

from sklearn.utils._testing import (assert_array_almost_equal,
Expand All @@ -26,7 +27,7 @@
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import brier_score_loss
from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import CalibratedClassifierCV, _CalibratedClassifier
from sklearn.calibration import _sigmoid_calibration, _SigmoidCalibration
from sklearn.calibration import calibration_curve

Expand Down Expand Up @@ -275,6 +276,29 @@ def multiclass_brier(y_true, proba_pred, n_classes):
assert calibrated_brier < 1.1 * uncalibrated_brier


def test_calibration_zero_probability():
# Test an edge case where _CalibratedClassifier avoids numerical errors
# in the multiclass normalization step if all the calibrators output
# are zero all at once for a given sample and instead fallback to uniform
# probabilities.
class ZeroCalibrator():
# This function is called from _CalibratedClassifier.predict_proba.
def predict(self, X):
return np.zeros(X.shape[0])

X, y = make_blobs(n_samples=50, n_features=10, random_state=7,
centers=10, cluster_std=15.0)
clf = DummyClassifier().fit(X, y)
calibrator = ZeroCalibrator()
cal_clf = _CalibratedClassifier(
base_estimator=clf, calibrators=[calibrator], classes=clf.classes_)

probas = cal_clf.predict_proba(X)

# Check that all probabilities are uniformly 1. / clf.n_classes_
assert_allclose(probas, 1. / clf.n_classes_)


def test_calibration_prefit():
"""Test calibration for prefitted classifiers"""
n_samples = 50
Expand Down