scikit-learn · thomasjpfan · Mar 12, 2021 · Mar 8, 2021 · Mar 8, 2021 · Mar 8, 2021
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
@@ -165,6 +165,11 @@ Changelog
   class methods and will be removed in 1.2.
   :pr:`18543` by `Guillaume Lemaitre`_.
 
+- |Enhancement| A fix to raise an error in :func:`metrics.hinge_loss` when
+  ``pred_decision`` is 1d whereas it is a multiclass classification or when
+  ``pred_decision`` parameter is not consistent with the ``labels`` parameter.
+  :pr:`19643` by :user:`Pierre Attard <PierreAttard>`.
+
 - |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for
   quantile regression. :pr:`19415` by :user:`Xavier Dupré <sdpython>`
   and :user:`Oliver Grisel <ogrisel>`.

diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
@@ -2370,11 +2370,29 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
     pred_decision = check_array(pred_decision, ensure_2d=False)
     y_true = column_or_1d(y_true)
     y_true_unique = np.unique(labels if labels is not None else y_true)
+
     if y_true_unique.size > 2:
-        if (labels is None and pred_decision.ndim > 1 and
-                (np.size(y_true_unique) != pred_decision.shape[1])):
-            raise ValueError("Please include all labels in y_true "
-                             "or pass labels as third argument")
+
+        if pred_decision.ndim <= 1:
+            raise ValueError("The shape of pred_decision cannot be 1d array"
+                             "with a multiclass target. pred_decision shape "
+                             "must be (n_samples, n_classes), that is "
+                             f"({y_true.shape[0]}, {y_true_unique.size})."
+                             f" Got: {pred_decision.shape}")
+
+        # pred_decision.ndim > 1 is true
+        if y_true_unique.size != pred_decision.shape[1]:
+            if labels is None:
+                raise ValueError("Please include all labels in y_true "
+                                 "or pass labels as third argument")
+            else:
+                raise ValueError("The shape of pred_decision is not "
+                                 "consistent with the number of classes. "
+                                 "With a multiclass target, pred_decision "
+                                 "shape must be "
+                                 "(n_samples, n_classes), that is "
+                                 f"({y_true.shape[0]}, {y_true_unique.size}). "
+                                 f"Got: {pred_decision.shape}")
         if labels is None:
             labels = y_true_unique
         le = LabelEncoder()

diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -4,6 +4,7 @@
 from itertools import chain
 from itertools import permutations
 import warnings
+import re
 
 import numpy as np
 from scipy import linalg
@@ -2135,6 +2136,31 @@ def test_hinge_loss_multiclass_missing_labels_with_labels_none():
         hinge_loss(y_true, pred_decision)
 
 
+def test_hinge_loss_multiclass_no_consistent_pred_decision_shape():
+    # test for inconsistency between multiclass problem and pred_decision
+    # argument
+    y_true = np.array([2, 1, 0, 1, 0, 1, 1])
+    pred_decision = np.array([0, 1, 2, 1, 0, 2, 1])
+    error_message = ("The shape of pred_decision cannot be 1d array"
+                     "with a multiclass target. pred_decision shape "
+                     "must be (n_samples, n_classes), that is "
+                     "(7, 3). Got: (7,)")
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        hinge_loss(y_true=y_true, pred_decision=pred_decision)
+
+    # test for inconsistency between pred_decision shape and labels number
+    pred_decision = np.array([[0, 1], [0, 1], [0, 1], [0, 1],
+                              [2, 0], [0, 1], [1, 0]])
+    labels = [0, 1, 2]
+    error_message = ("The shape of pred_decision is not "
+                     "consistent with the number of classes. "
+                     "With a multiclass target, pred_decision "
+                     "shape must be (n_samples, n_classes), that is "
+                     "(7, 3). Got: (7, 2)")
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        hinge_loss(y_true=y_true, pred_decision=pred_decision, labels=labels)
+
+
 def test_hinge_loss_multiclass_with_missing_labels():
     pred_decision = np.array([
         [+0.36, -0.17, -0.58, -0.99],