FIX index sample_weight in least_absolute_deviation loss in HistGradientBoosting (#19407)

vadim-ushtanit · Ushtanit · NicolasHug · glemaitre · commit 94ce9b8439d8 · 2021-04-28T09:40:11.000+02:00
Co-authored-by: Vadim Ushtanit &lt;vadim.ushtanit@gmail.com&gt;
Co-authored-by: Nicolas Hug &lt;contact@nicolas-hug.com&gt;
Co-authored-by: Guillaume Lemaitre &lt;g.lemaitre58@gmail.com&gt;
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -12,6 +12,13 @@ Version 0.24.2
 Changelog
 ---------
 
+:mod:`sklearn.ensemble`
+.......................
+
+- |Fix| Fixed a bug in :class:`ensemble.HistGradientBoostingRegressor` `fit`
+  with `sample_weight` parameter and `least_absolute_deviation` loss function.
+  :pr:`19407` by :user:`Vadim Ushtanit <vadim-ushtanit>`.
+
 :mod:`sklearn.preprocessing`
 ............................
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/loss.py b/sklearn/ensemble/_hist_gradient_boosting/loss.py
@@ -261,10 +261,11 @@ def update_leaves_values(self, grower, y_true, raw_predictions,
                 median_res = np.median(y_true[indices]
                                        - raw_predictions[indices])
             else:
-                median_res = _weighted_percentile(y_true[indices]
-                                                  - raw_predictions[indices],
-                                                  sample_weight=sample_weight,
-                                                  percentile=50)
+                median_res = _weighted_percentile(
+                    y_true[indices] - raw_predictions[indices],
+                    sample_weight=sample_weight[indices],
+                    percentile=50
+                )
             leaf.value = grower.shrinkage * median_res
             # Note that the regularization is ignored here
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -203,6 +203,20 @@ def test_least_absolute_deviation():
     assert gbdt.score(X, y) > .9
 
 
+def test_least_absolute_deviation_sample_weight():
+    # non regression test for issue #19400
+    # make sure no error is thrown during fit of
+    # HistGradientBoostingRegressor with least_absolute_deviation loss function
+    # and passing sample_weight
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.uniform(-1, 1, size=(n_samples, 2))
+    y = rng.uniform(-1, 1, size=n_samples)
+    sample_weight = rng.uniform(0, 1, size=n_samples)
+    gbdt = HistGradientBoostingRegressor(loss='least_absolute_deviation')
+    gbdt.fit(X, y, sample_weight=sample_weight)
+
+
 @pytest.mark.parametrize('y', [([1., -2., 0.]), ([0., 0., 0.])])
 def test_poisson_y_positive(y):
     # Test that ValueError is raised if either one y_i < 0 or sum(y_i) <= 0.