sdpython · sdpython · Jan 29, 2024 · Jan 27, 2024 · Jan 27, 2024 · Jan 28, 2024
diff --git a/_doc/examples/plot_constraint_kmeans.py b/_doc/examples/plot_constraint_kmeans.py
@@ -10,6 +10,7 @@
 Data
 ====
 """
+
 from collections import Counter
 
 import matplotlib.pyplot as plt

diff --git a/_doc/examples/plot_decision_tree_logreg.py b/_doc/examples/plot_decision_tree_logreg.py
@@ -13,6 +13,7 @@
 models on the `Iris
 dataset <https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html>`_.
 """
+
 import numpy
 from scipy.spatial.distance import cdist
 import matplotlib.pyplot as plt

diff --git a/_doc/examples/plot_digitize.py b/_doc/examples/plot_digitize.py
@@ -13,6 +13,7 @@
 Simple example
 ==============
 """
+
 import numpy
 import matplotlib.pyplot as plt
 from onnxruntime import InferenceSession

diff --git a/_doc/examples/plot_kmeans_l1.py b/_doc/examples/plot_kmeans_l1.py
@@ -8,7 +8,6 @@
 algorithm.
 """
 
-
 import matplotlib.pyplot as plt
 import numpy
 import numpy.random as rnd

diff --git a/_doc/examples/plot_leave_neighbors.py b/_doc/examples/plot_leave_neighbors.py
@@ -7,7 +7,6 @@
 the predict the same value (or class). Do they share a border?
 """
 
-
 ##############################
 # A simple tree
 # +++++++++++++

diff --git a/_doc/examples/plot_logistic_regression_clustering.py b/_doc/examples/plot_logistic_regression_clustering.py
@@ -13,7 +13,6 @@
 -------------------------------
 """
 
-
 import numpy
 import pandas
 import matplotlib.pyplot as plt

diff --git a/_doc/examples/plot_piecewise_linear_regression_criterion.py b/_doc/examples/plot_piecewise_linear_regression_criterion.py
@@ -17,7 +17,6 @@
 Let's build a toy problem based on two linear models.
 """
 
-
 import matplotlib.pyplot as plt
 import numpy
 import numpy.random as npr
@@ -154,7 +153,7 @@
 #
 #    ctypedef double float64_t
 #
-#    cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
+#    cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
 #                    float64_t *weight) nogil:
 #        if start == end:
 #            mean[0] = 0.
@@ -168,7 +167,7 @@
 #        weight[0] = w
 #        mean[0] = 0. if w == 0. else m / w
 #
-#    cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
+#    cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
 #                     float64_t weight) nogil:
 #        if start == end:
 #            return 0.
@@ -193,7 +192,7 @@
 #
 #    ctypedef double float64_t
 #
-#    cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
+#    cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
 #                    float64_t *weight) nogil:
 #        if start == end:
 #            mean[0] = 0.
@@ -205,7 +204,7 @@
 #        weight[0] = w
 #        mean[0] = 0. if w == 0. else m / w
 #
-#    cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
+#    cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
 #                        float64_t weight) nogil:
 #        if start == end:
 #            return 0.

diff --git a/_doc/examples/plot_quantile_mlpregression.py b/_doc/examples/plot_quantile_mlpregression.py
@@ -11,7 +11,6 @@
 We first generate some dummy data.
 """
 
-
 import numpy
 from pandas import DataFrame
 import matplotlib.pyplot as plt

diff --git a/_doc/examples/plot_traceable_ngrams_tfidf.py b/_doc/examples/plot_traceable_ngrams_tfidf.py
@@ -15,7 +15,6 @@
 ~~~~~~~~~~~~~~~~~~~~
 """
 
-
 import numpy
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from mlinsights.mlmodel.sklearn_text import (

diff --git a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py
@@ -140,9 +140,9 @@ def test_criterions_check_value(self):
         X = numpy.array([[10.0, 12.0, 13.0]]).T
         y = numpy.array([[20.0, 22.0, 23.0]]).T
         c2 = LinearRegressorCriterion.create(X, y)
-        coef = numpy.empty((3,))
+        coef = numpy.empty((3,), dtype=X.dtype)
         c2.node_beta(coef)
-        self.assertEqual(coef[:2], numpy.array([1, 10]))
+        self.assertEqualArray(coef[:2], numpy.array([1, 10], dtype=X.dtype), atol=1e-8)
 
     @unittest.skipIf(
         pv.Version(skl_ver) < pv.Version("1.3.3"),

diff --git a/_unittests/ut_plotting/test_dot.py b/_unittests/ut_plotting/test_dot.py
@@ -346,7 +346,7 @@ def test_pipeline_passthrough(self):
 
         # numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
         categorical_transformer = Pipeline(
-            [("onehot", OneHotEncoder(sparse=False, handle_unknown="ignore"))]
+            [("onehot", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))]
         )
         preprocessor = ColumnTransformer(
             transformers=[("cat", categorical_transformer, cat_cols)],

diff --git a/mlinsights/helpers/pipeline.py b/mlinsights/helpers/pipeline.py
@@ -105,9 +105,9 @@ def __init__(self, model):
             )
         if hasattr(model, "decision_function") and callable(model.decision_function):
             model._debug_decision_function = model.decision_function
-            self.methods[
-                "decision_function"
-            ] = lambda model, X: model._debug_decision_function(X)
+            self.methods["decision_function"] = (
+                lambda model, X: model._debug_decision_function(X)
+            )
 
     def __repr__(self):
         """

diff --git a/mlinsights/mlmodel/_piecewise_tree_regression_common.pxd b/mlinsights/mlmodel/_piecewise_tree_regression_common.pxd
@@ -6,20 +6,22 @@ cimport numpy as cnp
 cnp.import_array()
 
 from sklearn.tree._criterion cimport Criterion
+from sklearn.utils._typedefs cimport intp_t, float64_t
 
-ctypedef double float64_t
-ctypedef cnp.npy_intp SIZE_t
+# ctypedef double float64_t
+# ctypedef cnp.npy_intp intp_t
+# ctypedef Py_sintp_t intp_t
 
 
 cdef class CommonRegressorCriterion(Criterion):
 
-    cdef void _update_weights(self, SIZE_t start, SIZE_t end,
-                              SIZE_t old_pos, SIZE_t new_pos) noexcept nogil
+    cdef void _update_weights(self, intp_t start, intp_t end,
+                              intp_t old_pos, intp_t new_pos) noexcept nogil
 
-    cdef void _mean(self, SIZE_t start, SIZE_t end,
+    cdef void _mean(self, intp_t start, intp_t end,
                     float64_t *mean, float64_t *weight) noexcept nogil
 
-    cdef float64_t _mse(self, SIZE_t start, SIZE_t end,
+    cdef float64_t _mse(self, intp_t start, intp_t end,
                         float64_t mean, float64_t weight) noexcept nogil
 
     cdef void children_impurity_weights(self, float64_t* impurity_left,

diff --git a/mlinsights/mlmodel/_piecewise_tree_regression_common.pyx b/mlinsights/mlmodel/_piecewise_tree_regression_common.pyx
@@ -39,8 +39,8 @@ cdef class CommonRegressorCriterion(Criterion):
         inst = self.__class__(self.n_outputs, self.n_samples)
         return inst
 
-    cdef void _update_weights(self, SIZE_t start, SIZE_t end,
-                              SIZE_t old_pos, SIZE_t new_pos) noexcept nogil:
+    cdef void _update_weights(self, intp_t start, intp_t end,
+                              intp_t old_pos, intp_t new_pos) noexcept nogil:
         """
         Updates members `weighted_n_right` and `weighted_n_left`
         when `pos` changes. This method should be overloaded.
@@ -63,27 +63,27 @@ cdef class CommonRegressorCriterion(Criterion):
         self._update_weights(self.start, self.end, self.pos, self.end)
         self.pos = self.end
 
-    cdef int update(self, SIZE_t new_pos) except -1 nogil:
+    cdef int update(self, intp_t new_pos) except -1 nogil:
         """
         Updates statistics by moving ``samples[pos:new_pos]`` to the left child.
         This updates the collected statistics by moving ``samples[pos:new_pos]``
         from the right child to the left child. It must be implemented by
         the subclass.
 
-        :param new_pos: SIZE_t
+        :param new_pos: intp_t
             New starting index position of the samples in the right child
         """
         self._update_weights(self.start, self.end, self.pos, new_pos)
         self.pos = new_pos
 
-    cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
+    cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
                     float64_t *weight) noexcept nogil:
         """
         Computes the mean of *y* between *start* and *end*.
         """
         pass
 
-    cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
+    cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
                         float64_t weight) noexcept nogil:
         """
         Computes mean square error between *start* and *end*
@@ -219,8 +219,8 @@ cdef int _ctest_criterion_init(Criterion criterion,
                                const float64_t[:, ::1] y,
                                float64_t[:] sample_weight,
                                float64_t weighted_n_samples,
-                               SIZE_t[:] samples,
-                               SIZE_t start, SIZE_t end):
+                               const intp_t[:] samples,
+                               intp_t start, intp_t end):
     "Test purposes. Methods cannot be directly called from python."
     cdef const float64_t[:, ::1] y2 = y
     return criterion.init(y2, sample_weight, weighted_n_samples, samples, start, end)
@@ -230,8 +230,8 @@ def _test_criterion_init(Criterion criterion,
                          const float64_t[:, ::1] y,
                          float64_t[:] sample_weight,
                          float64_t weighted_n_samples,
-                         SIZE_t[:] samples,
-                         SIZE_t start, SIZE_t end):
+                         const intp_t[:] samples,
+                         intp_t start, intp_t end):
     "Test purposes. Methods cannot be directly called from python."
     if _ctest_criterion_init(criterion, y, sample_weight, weighted_n_samples,
                              samples, start, end) != 0:
@@ -300,7 +300,7 @@ def _test_criterion_node_value(Criterion criterion):
     return value
 
 
-def _test_criterion_update(Criterion criterion, SIZE_t new_pos):
+def _test_criterion_update(Criterion criterion, intp_t new_pos):
     "Test purposes. Methods cannot be directly called from python."
     return criterion.update(new_pos)
 

diff --git a/mlinsights/mlmodel/piecewise_tree_regression.py b/mlinsights/mlmodel/piecewise_tree_regression.py
@@ -159,7 +159,6 @@ def _fit_reglin(self, X, y, sample_weight):
             # Fatal Python error: __pyx_fatalerror: Acquisition count is 0 (line 26868)
             dec = LinearRegressorCriterion.create(xs, ys, ws)
             dec.node_beta(self.betas_[i, :])
-        print("end")
 
     def predict(self, X, check_input=True):
         """

diff --git a/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx b/mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx
@@ -6,8 +6,8 @@ cnp.import_array()
 
 from ._piecewise_tree_regression_common cimport (
     CommonRegressorCriterion,
-    SIZE_t,
     float64_t,
+    intp_t,
 )
 
 
@@ -32,7 +32,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
     """
     cdef float64_t* sample_w
     cdef float64_t* sample_wy
-    cdef SIZE_t* sample_i
+    cdef intp_t* sample_i
     cdef float64_t sample_sum_wy
     cdef float64_t sample_sum_w
 
@@ -51,7 +51,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
     def __setstate__(self, d):
         pass
 
-    def __cinit__(self, SIZE_t n_outputs, SIZE_t n_samples):
+    def __cinit__(self, intp_t n_outputs, intp_t n_samples):
         self.n_outputs = n_outputs
         self.n_samples = n_samples
 
@@ -70,7 +70,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
         if self.sample_wy == NULL:
             self.sample_wy = <float64_t*> calloc(n_samples, sizeof(float64_t))
         if self.sample_i == NULL:
-            self.sample_i = <SIZE_t*> calloc(n_samples, sizeof(SIZE_t))
+            self.sample_i = <intp_t*> calloc(n_samples, sizeof(intp_t))
 
     def __str__(self):
         "usual"
@@ -83,8 +83,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
     cdef int init(self, const float64_t[:, ::1] y,
                   const float64_t[:] sample_weight,
                   float64_t weighted_n_samples,
-                  const SIZE_t[:] sample_indices,
-                  SIZE_t start, SIZE_t end) except -1 nogil:
+                  const intp_t[:] sample_indices,
+                  intp_t start, intp_t end) except -1 nogil:
         """
         This function is overwritten to check *y* and *X* size are the same.
         """
@@ -115,8 +115,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
                          const float64_t[:, ::1] y,
                          const float64_t[:] sample_weight,
                          float64_t weighted_n_samples,
-                         const SIZE_t[:] sample_indices,
-                         SIZE_t start, SIZE_t end) except -1 nogil:
+                         const intp_t[:] sample_indices,
+                         intp_t start, intp_t end) except -1 nogil:
         """
         Initializes the criterion.
 
@@ -129,13 +129,13 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
         :param samples: array-like, dtype=float64_t
             Indices of the samples in X and y, where samples[start:end]
             correspond to the samples in this node
-        :param start: SIZE_t
+        :param start: intp_t
             The first sample to be used on this node
-        :param end: SIZE_t
+        :param end: intp_t
             The last sample used on this node
         :return: 0 if everything is fine
         """
-        cdef SIZE_t ki, ks
+        cdef intp_t ki, ks
         self.start = start
         self.pos = start
         self.end = end
@@ -165,8 +165,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
         self.weighted_n_node_samples = self.sample_sum_w
         return self.reset()
 
-    cdef void _update_weights(self, SIZE_t start, SIZE_t end, SIZE_t old_pos,
-                              SIZE_t new_pos) noexcept nogil:
+    cdef void _update_weights(self, intp_t start, intp_t end, intp_t old_pos,
+                              intp_t new_pos) noexcept nogil:
         """
         Updates members `weighted_n_right` and `weighted_n_left`
         when `pos` changes.
@@ -178,7 +178,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
         for k in range(new_pos, end):
             self.weighted_n_right += self.sample_w[k]
 
-    cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
+    cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
                     float64_t *weight) noexcept nogil:
         """
         Computes the mean of *y* between *start* and *end*.
@@ -196,7 +196,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
         mean[0] = 0. if w == 0. else m / w
 
     @cython.boundscheck(False)
-    cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
+    cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
                         float64_t weight) noexcept nogil:
         """
         Computes mean square error between *start* and *end*
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,6 +10,7 @@ @@
     Data
     ====
     """
     from collections import Counter
     import matplotlib.pyplot as plt
@@ Expand Down @@