Skip to content

Update style and fix compilation issues with the latest numpy/scikit-learn #128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions _doc/examples/plot_constraint_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Data
====
"""

from collections import Counter

import matplotlib.pyplot as plt
Expand Down
1 change: 1 addition & 0 deletions _doc/examples/plot_decision_tree_logreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
models on the `Iris
dataset <https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html>`_.
"""

import numpy
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
Expand Down
1 change: 1 addition & 0 deletions _doc/examples/plot_digitize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Simple example
==============
"""

import numpy
import matplotlib.pyplot as plt
from onnxruntime import InferenceSession
Expand Down
1 change: 0 additions & 1 deletion _doc/examples/plot_kmeans_l1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
algorithm.
"""


import matplotlib.pyplot as plt
import numpy
import numpy.random as rnd
Expand Down
1 change: 0 additions & 1 deletion _doc/examples/plot_leave_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
the predict the same value (or class). Do they share a border?
"""


##############################
# A simple tree
# +++++++++++++
Expand Down
1 change: 0 additions & 1 deletion _doc/examples/plot_logistic_regression_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
-------------------------------
"""


import numpy
import pandas
import matplotlib.pyplot as plt
Expand Down
9 changes: 4 additions & 5 deletions _doc/examples/plot_piecewise_linear_regression_criterion.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
Let's build a toy problem based on two linear models.
"""


import matplotlib.pyplot as plt
import numpy
import numpy.random as npr
Expand Down Expand Up @@ -154,7 +153,7 @@
#
# ctypedef double float64_t
#
# cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
# cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
# float64_t *weight) nogil:
# if start == end:
# mean[0] = 0.
Expand All @@ -168,7 +167,7 @@
# weight[0] = w
# mean[0] = 0. if w == 0. else m / w
#
# cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
# cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
# float64_t weight) nogil:
# if start == end:
# return 0.
Expand All @@ -193,7 +192,7 @@
#
# ctypedef double float64_t
#
# cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
# cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
# float64_t *weight) nogil:
# if start == end:
# mean[0] = 0.
Expand All @@ -205,7 +204,7 @@
# weight[0] = w
# mean[0] = 0. if w == 0. else m / w
#
# cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
# cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
# float64_t weight) nogil:
# if start == end:
# return 0.
Expand Down
1 change: 0 additions & 1 deletion _doc/examples/plot_quantile_mlpregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
We first generate some dummy data.
"""


import numpy
from pandas import DataFrame
import matplotlib.pyplot as plt
Expand Down
1 change: 0 additions & 1 deletion _doc/examples/plot_traceable_ngrams_tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
~~~~~~~~~~~~~~~~~~~~
"""


import numpy
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from mlinsights.mlmodel.sklearn_text import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ def test_criterions_check_value(self):
X = numpy.array([[10.0, 12.0, 13.0]]).T
y = numpy.array([[20.0, 22.0, 23.0]]).T
c2 = LinearRegressorCriterion.create(X, y)
coef = numpy.empty((3,))
coef = numpy.empty((3,), dtype=X.dtype)
c2.node_beta(coef)
self.assertEqual(coef[:2], numpy.array([1, 10]))
self.assertEqualArray(coef[:2], numpy.array([1, 10], dtype=X.dtype), atol=1e-8)

@unittest.skipIf(
pv.Version(skl_ver) < pv.Version("1.3.3"),
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_plotting/test_dot.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def test_pipeline_passthrough(self):

# numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])
categorical_transformer = Pipeline(
[("onehot", OneHotEncoder(sparse=False, handle_unknown="ignore"))]
[("onehot", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))]
)
preprocessor = ColumnTransformer(
transformers=[("cat", categorical_transformer, cat_cols)],
Expand Down
6 changes: 3 additions & 3 deletions mlinsights/helpers/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ def __init__(self, model):
)
if hasattr(model, "decision_function") and callable(model.decision_function):
model._debug_decision_function = model.decision_function
self.methods[
"decision_function"
] = lambda model, X: model._debug_decision_function(X)
self.methods["decision_function"] = (
lambda model, X: model._debug_decision_function(X)
)

def __repr__(self):
"""
Expand Down
14 changes: 8 additions & 6 deletions mlinsights/mlmodel/_piecewise_tree_regression_common.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,22 @@ cimport numpy as cnp
cnp.import_array()

from sklearn.tree._criterion cimport Criterion
from sklearn.utils._typedefs cimport intp_t, float64_t

ctypedef double float64_t
ctypedef cnp.npy_intp SIZE_t
# ctypedef double float64_t
# ctypedef cnp.npy_intp intp_t
# ctypedef Py_sintp_t intp_t


cdef class CommonRegressorCriterion(Criterion):

cdef void _update_weights(self, SIZE_t start, SIZE_t end,
SIZE_t old_pos, SIZE_t new_pos) noexcept nogil
cdef void _update_weights(self, intp_t start, intp_t end,
intp_t old_pos, intp_t new_pos) noexcept nogil

cdef void _mean(self, SIZE_t start, SIZE_t end,
cdef void _mean(self, intp_t start, intp_t end,
float64_t *mean, float64_t *weight) noexcept nogil

cdef float64_t _mse(self, SIZE_t start, SIZE_t end,
cdef float64_t _mse(self, intp_t start, intp_t end,
float64_t mean, float64_t weight) noexcept nogil

cdef void children_impurity_weights(self, float64_t* impurity_left,
Expand Down
22 changes: 11 additions & 11 deletions mlinsights/mlmodel/_piecewise_tree_regression_common.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ cdef class CommonRegressorCriterion(Criterion):
inst = self.__class__(self.n_outputs, self.n_samples)
return inst

cdef void _update_weights(self, SIZE_t start, SIZE_t end,
SIZE_t old_pos, SIZE_t new_pos) noexcept nogil:
cdef void _update_weights(self, intp_t start, intp_t end,
intp_t old_pos, intp_t new_pos) noexcept nogil:
"""
Updates members `weighted_n_right` and `weighted_n_left`
when `pos` changes. This method should be overloaded.
Expand All @@ -63,27 +63,27 @@ cdef class CommonRegressorCriterion(Criterion):
self._update_weights(self.start, self.end, self.pos, self.end)
self.pos = self.end

cdef int update(self, SIZE_t new_pos) except -1 nogil:
cdef int update(self, intp_t new_pos) except -1 nogil:
"""
Updates statistics by moving ``samples[pos:new_pos]`` to the left child.
This updates the collected statistics by moving ``samples[pos:new_pos]``
from the right child to the left child. It must be implemented by
the subclass.

:param new_pos: SIZE_t
:param new_pos: intp_t
New starting index position of the samples in the right child
"""
self._update_weights(self.start, self.end, self.pos, new_pos)
self.pos = new_pos

cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
float64_t *weight) noexcept nogil:
"""
Computes the mean of *y* between *start* and *end*.
"""
pass

cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
float64_t weight) noexcept nogil:
"""
Computes mean square error between *start* and *end*
Expand Down Expand Up @@ -219,8 +219,8 @@ cdef int _ctest_criterion_init(Criterion criterion,
const float64_t[:, ::1] y,
float64_t[:] sample_weight,
float64_t weighted_n_samples,
SIZE_t[:] samples,
SIZE_t start, SIZE_t end):
const intp_t[:] samples,
intp_t start, intp_t end):
"Test purposes. Methods cannot be directly called from python."
cdef const float64_t[:, ::1] y2 = y
return criterion.init(y2, sample_weight, weighted_n_samples, samples, start, end)
Expand All @@ -230,8 +230,8 @@ def _test_criterion_init(Criterion criterion,
const float64_t[:, ::1] y,
float64_t[:] sample_weight,
float64_t weighted_n_samples,
SIZE_t[:] samples,
SIZE_t start, SIZE_t end):
const intp_t[:] samples,
intp_t start, intp_t end):
"Test purposes. Methods cannot be directly called from python."
if _ctest_criterion_init(criterion, y, sample_weight, weighted_n_samples,
samples, start, end) != 0:
Expand Down Expand Up @@ -300,7 +300,7 @@ def _test_criterion_node_value(Criterion criterion):
return value


def _test_criterion_update(Criterion criterion, SIZE_t new_pos):
def _test_criterion_update(Criterion criterion, intp_t new_pos):
"Test purposes. Methods cannot be directly called from python."
return criterion.update(new_pos)

Expand Down
1 change: 0 additions & 1 deletion mlinsights/mlmodel/piecewise_tree_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ def _fit_reglin(self, X, y, sample_weight):
# Fatal Python error: __pyx_fatalerror: Acquisition count is 0 (line 26868)
dec = LinearRegressorCriterion.create(xs, ys, ws)
dec.node_beta(self.betas_[i, :])
print("end")

def predict(self, X, check_input=True):
"""
Expand Down
30 changes: 15 additions & 15 deletions mlinsights/mlmodel/piecewise_tree_regression_criterion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ cnp.import_array()

from ._piecewise_tree_regression_common cimport (
CommonRegressorCriterion,
SIZE_t,
float64_t,
intp_t,
)


Expand All @@ -32,7 +32,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
"""
cdef float64_t* sample_w
cdef float64_t* sample_wy
cdef SIZE_t* sample_i
cdef intp_t* sample_i
cdef float64_t sample_sum_wy
cdef float64_t sample_sum_w

Expand All @@ -51,7 +51,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
def __setstate__(self, d):
pass

def __cinit__(self, SIZE_t n_outputs, SIZE_t n_samples):
def __cinit__(self, intp_t n_outputs, intp_t n_samples):
self.n_outputs = n_outputs
self.n_samples = n_samples

Expand All @@ -70,7 +70,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
if self.sample_wy == NULL:
self.sample_wy = <float64_t*> calloc(n_samples, sizeof(float64_t))
if self.sample_i == NULL:
self.sample_i = <SIZE_t*> calloc(n_samples, sizeof(SIZE_t))
self.sample_i = <intp_t*> calloc(n_samples, sizeof(intp_t))

def __str__(self):
"usual"
Expand All @@ -83,8 +83,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
cdef int init(self, const float64_t[:, ::1] y,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const SIZE_t[:] sample_indices,
SIZE_t start, SIZE_t end) except -1 nogil:
const intp_t[:] sample_indices,
intp_t start, intp_t end) except -1 nogil:
"""
This function is overwritten to check *y* and *X* size are the same.
"""
Expand Down Expand Up @@ -115,8 +115,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
const float64_t[:, ::1] y,
const float64_t[:] sample_weight,
float64_t weighted_n_samples,
const SIZE_t[:] sample_indices,
SIZE_t start, SIZE_t end) except -1 nogil:
const intp_t[:] sample_indices,
intp_t start, intp_t end) except -1 nogil:
"""
Initializes the criterion.

Expand All @@ -129,13 +129,13 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
:param samples: array-like, dtype=float64_t
Indices of the samples in X and y, where samples[start:end]
correspond to the samples in this node
:param start: SIZE_t
:param start: intp_t
The first sample to be used on this node
:param end: SIZE_t
:param end: intp_t
The last sample used on this node
:return: 0 if everything is fine
"""
cdef SIZE_t ki, ks
cdef intp_t ki, ks
self.start = start
self.pos = start
self.end = end
Expand Down Expand Up @@ -165,8 +165,8 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
self.weighted_n_node_samples = self.sample_sum_w
return self.reset()

cdef void _update_weights(self, SIZE_t start, SIZE_t end, SIZE_t old_pos,
SIZE_t new_pos) noexcept nogil:
cdef void _update_weights(self, intp_t start, intp_t end, intp_t old_pos,
intp_t new_pos) noexcept nogil:
"""
Updates members `weighted_n_right` and `weighted_n_left`
when `pos` changes.
Expand All @@ -178,7 +178,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
for k in range(new_pos, end):
self.weighted_n_right += self.sample_w[k]

cdef void _mean(self, SIZE_t start, SIZE_t end, float64_t *mean,
cdef void _mean(self, intp_t start, intp_t end, float64_t *mean,
float64_t *weight) noexcept nogil:
"""
Computes the mean of *y* between *start* and *end*.
Expand All @@ -196,7 +196,7 @@ cdef class SimpleRegressorCriterion(CommonRegressorCriterion):
mean[0] = 0. if w == 0. else m / w

@cython.boundscheck(False)
cdef float64_t _mse(self, SIZE_t start, SIZE_t end, float64_t mean,
cdef float64_t _mse(self, intp_t start, intp_t end, float64_t mean,
float64_t weight) noexcept nogil:
"""
Computes mean square error between *start* and *end*
Expand Down
Loading