scikit-learn · ghost · Sep 28, 2020 · Sep 28, 2020 · glemaitre · Oct 7, 2020
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -341,6 +341,9 @@ Changelog
   efficient leave-one-out cross-validation scheme ``cv=None``. :pr:`6624` by
   :user:`Marijn van Vliet <wmvanvliet>`.
 
+- |Fix|: Fixed a bug in :class:`linear_model.LogisticRegression`: the
+  sample_weight object is not modified anymore. :pr:`18480` by
+  :user:`Bart Van Dosselaer <Ansur>`:
 
 :mod:`sklearn.manifold`
 .......................

diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
@@ -665,7 +665,7 @@ def _logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     if isinstance(class_weight, dict) or multi_class == 'multinomial':
         class_weight_ = compute_class_weight(class_weight,
                                              classes=classes, y=y)
-        sample_weight *= class_weight_[le.fit_transform(y)]
+        sample_weight = sample_weight * class_weight_[le.fit_transform(y)]
 
     # For doing a ovr, we need to mask the labels first. for the
     # multinomial case this is not necessary.
@@ -681,7 +681,7 @@ def _logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
             class_weight_ = compute_class_weight(class_weight,
                                                  classes=mask_classes,
                                                  y=y_bin)
-            sample_weight *= class_weight_[le.fit_transform(y_bin)]
+            sample_weight = sample_weight * class_weight_[le.fit_transform(y_bin)]
 
     else:
         if solver not in ['sag', 'saga']:

diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
@@ -1865,3 +1865,22 @@ def test_multinomial_identifiability_on_iris(fit_intercept):
     assert_allclose(clf.coef_.sum(axis=0), 0, atol=1e-10)
     if fit_intercept:
         clf.intercept_.sum(axis=0) == pytest.approx(0, abs=1e-15)
+
+
+@pytest.mark.parametrize("multi_class", {'ovr', 'multinomial', 'auto'})
+def test_sample_weight_not_modified(multi_class):
+    X, y = load_iris(return_X_y=True)
+    np.random.seed(1234)
+    W = np.random.random(len(X)) * 10.0
+
+    for weight in [{0: 1.0, 1: 10.0, 2: 1.0}]:
+        for class_weight in (weight, 'balanced'):
+            expected = W.sum()
+
+            clf = LogisticRegression(random_state=0,
+                                    class_weight=class_weight,
+                                    max_iter=200,
+                                    multi_class=multi_class)
+            clf.fit(X, y, sample_weight=W)
+            actual = W.sum()
+            assert expected == actual, 'Sum of weight before ({}) should be the same as sum if weight after ({})'.format(expected, actual)
-            assert expected == actual, 'Sum of weight before ({}) should be the same as sum if weight after ({})'.format(expected, actual)
+            msg = (
+                f'Sum of weight before ({expected}) should be the same as'
+                f'sum if weight after ({actual})'
+            )
+            assert_allclose(expected, actual, err_msg=msg)
-            assert expected == actual, 'Sum of weight before ({}) should be the same as sum if weight after ({})'.format(expected, actual)
+            msg = (
+                f'Sum of weight before ({expected}) should be the same as'
+                f'sum if weight after ({actual})'
+            )
+            assert_allclose(expected, actual, err_msg=msg)