Skip to content

Updates tree decision criterion for scikit-learn 1.2 #115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jul 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- run:
name: Install standard libraries
command: |
python -m pip install scipy matplotlib numpy cython pandas
python -m pip install scipy matplotlib numpy cython pandas pyquicksetup

- run:
name: install dependencies
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ build/
*.pidb
*.log
*.scc
*.so
*.pyd

# Visual C++ cache files
Expand Down
1 change: 0 additions & 1 deletion .local.jenkins.lin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ install:
- $PYINT -m pip install --upgrade pip
- $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper cpyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
- $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ scikit-learn>=0.22 --extra-index-url=https://pypi.python.org/simple/
- $PYINT -m pip install -r requirements-win.txt
- $PYINT -m pip install -r requirements.txt
- $PYINT --version
- $PYINT -m pip freeze
Expand Down
4 changes: 2 additions & 2 deletions _unittests/ut_helpers/test_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_union_features_reg(self):
self.assertNotIn(" object at 0x", text)
self.assertIn(") -> (", text)
else:
raise Exception("should not be the case")
raise AssertionError("should not be the case")

def test_union_features_cl(self):
data = numpy.random.randn(4, 5)
Expand All @@ -60,7 +60,7 @@ def test_union_features_cl(self):
self.assertNotIn(" object at 0x", text)
self.assertIn(") -> (", text)
else:
raise Exception("should not be the case")
raise AssertionError("should not be the case")


if __name__ == "__main__":
Expand Down
16 changes: 8 additions & 8 deletions _unittests/ut_mlmodel/test_anmf_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def test_anmf_predictor_sparse(self):
exp = mod.estimator_nmf_.inverse_transform(
mod.estimator_nmf_.transform(mat))
got = mod.predict(mat)
sc1 = mean_squared_error(mat.todense(), exp)
sc2 = mean_squared_error(mat.todense(), got)
sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp)
sc2 = mean_squared_error(numpy.asarray(mat.todense()), got)
self.assertGreater(sc1, sc2)

mat2 = numpy.array([[1, 1, 1, 1]], dtype=numpy.float64)
Expand All @@ -68,17 +68,17 @@ def test_anmf_predictor_sparse_sparse(self):
exp = mod.estimator_nmf_.inverse_transform(
mod.estimator_nmf_.transform(mat))
got = mod.predict(mat)
sc1 = mean_squared_error(mat.todense(), exp)
sc2 = mean_squared_error(mat.todense(), got)
sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp)
sc2 = mean_squared_error(numpy.asarray(mat.todense()), got)
self.assertGreater(sc1, sc2)

mat2 = numpy.array([[1, 1, 1, 1]], dtype=numpy.float64)
mat2 = csr_matrix(mat2)
exp2 = mod.estimator_nmf_.inverse_transform(
mod.estimator_nmf_.transform(mat2))
got2 = mod.predict(mat2)
sc1 = mean_squared_error(mat2.todense(), exp2)
sc2 = mean_squared_error(mat2.todense(), got2)
sc1 = mean_squared_error(numpy.asarray(mat2.todense()), exp2)
sc2 = mean_squared_error(numpy.asarray(mat2.todense()), got2)
self.assertGreater(sc1, sc2)

def test_anmf_predictor_positive(self):
Expand Down Expand Up @@ -118,8 +118,8 @@ def test_anmf_predictor_positive_sparse(self):
exp = mod.estimator_nmf_.inverse_transform(
mod.estimator_nmf_.transform(mat))
got = mod.predict(mat)
sc1 = mean_squared_error(mat.todense(), exp)
sc2 = mean_squared_error(mat.todense(), got)
sc1 = mean_squared_error(numpy.asarray(mat.todense()), exp)
sc2 = mean_squared_error(numpy.asarray(mat.todense()), got)
self.assertGreater(sc1, sc2)
mx = numpy.min(got)
self.assertGreater(mx, 0)
Expand Down
14 changes: 8 additions & 6 deletions _unittests/ut_mlmodel/test_categories_to_integers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from pyquickhelper.texthelper import compare_module_version
from mlinsights.mlmodel import CategoriesToIntegers
from mlinsights.mlmodel import (
test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv)
run_test_sklearn_pickle,
run_test_sklearn_clone,
run_test_sklearn_grid_search_cv)

skipped_warnings = (ConvergenceWarning, UserWarning, FitFailedWarning)

Expand Down Expand Up @@ -103,12 +105,12 @@ def test_categories_to_integers_pickle(self):
data = os.path.join(os.path.abspath(
os.path.dirname(__file__)), "data", "adult_set.txt")
df = pandas.read_csv(data, sep="\t")
test_sklearn_pickle(lambda: CategoriesToIntegers(skip_errors=True), df)
run_test_sklearn_pickle(lambda: CategoriesToIntegers(skip_errors=True), df)

@ignore_warnings(skipped_warnings)
def test_categories_to_integers_clone(self):
self.maxDiff = None
test_sklearn_clone(lambda: CategoriesToIntegers())
run_test_sklearn_clone(lambda: CategoriesToIntegers())

@ignore_warnings(skipped_warnings)
def test_categories_to_integers_grid_search(self):
Expand All @@ -119,19 +121,19 @@ def test_categories_to_integers_grid_search(self):
y = df['income'] # pylint: disable=E1136
pipe = make_pipeline(CategoriesToIntegers(),
LogisticRegression())
self.assertRaise(lambda: test_sklearn_grid_search_cv(
self.assertRaise(lambda: run_test_sklearn_grid_search_cv(
lambda: pipe, df), ValueError)
if (compare_module_version(sklver, "0.24") >= 0 and # pylint: disable=R1716
compare_module_version(pandas.__version__, "1.3") < 0):
self.assertRaise(
lambda: test_sklearn_grid_search_cv(
lambda: run_test_sklearn_grid_search_cv(
lambda: pipe, X, y, categoriestointegers__single=[True, False]),
ValueError, "Unable to find category value")
pipe = make_pipeline(CategoriesToIntegers(),
Imputer(strategy='most_frequent'),
LogisticRegression(n_jobs=1))
try:
res = test_sklearn_grid_search_cv(
res = run_test_sklearn_grid_search_cv(
lambda: pipe, X, y, categoriestointegers__single=[True, False],
categoriestointegers__skip_errors=[True])
except AttributeError as e:
Expand Down
12 changes: 6 additions & 6 deletions _unittests/ut_mlmodel/test_classification_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from pyquickhelper.pycode import ExtTestCase
from pyquickhelper.texthelper import compare_module_version
from mlinsights.mlmodel import (
ClassifierAfterKMeans, test_sklearn_pickle, test_sklearn_clone,
test_sklearn_grid_search_cv)
ClassifierAfterKMeans, run_test_sklearn_pickle,
run_test_sklearn_clone, run_test_sklearn_grid_search_cv)


class TestClassifierAfterKMeans(ExtTestCase):
Expand Down Expand Up @@ -58,24 +58,24 @@ def test_classification_kmeans_pickle(self):
iris = datasets.load_iris()
X, y = iris.data, iris.target
try:
test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y)
run_test_sklearn_pickle(lambda: ClassifierAfterKMeans(), X, y)
except AttributeError as e:
if compare_module_version(sklver, "0.24") < 0:
return
raise e

def test_classification_kmeans_clone(self):
self.maxDiff = None
test_sklearn_clone(lambda: ClassifierAfterKMeans())
run_test_sklearn_clone(lambda: ClassifierAfterKMeans())

@ignore_warnings(category=ConvergenceWarning)
def test_classification_kmeans_grid_search(self):
iris = datasets.load_iris()
X, y = iris.data, iris.target
self.assertRaise(lambda: test_sklearn_grid_search_cv(
self.assertRaise(lambda: run_test_sklearn_grid_search_cv(
lambda: ClassifierAfterKMeans(), X, y), ValueError)
try:
res = test_sklearn_grid_search_cv(
res = run_test_sklearn_grid_search_cv(
lambda: ClassifierAfterKMeans(),
X, y, c_n_clusters=[2, 3])
except AttributeError as e:
Expand Down
18 changes: 9 additions & 9 deletions _unittests/ut_mlmodel/test_decision_tree_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
from sklearn.tree import DecisionTreeClassifier
from pyquickhelper.pycode import ExtTestCase
from mlinsights.mlmodel import (
test_sklearn_pickle, test_sklearn_clone, test_sklearn_grid_search_cv,
DecisionTreeLogisticRegression
)
run_test_sklearn_pickle, run_test_sklearn_clone,
run_test_sklearn_grid_search_cv, DecisionTreeLogisticRegression)
from mlinsights.mltree import predict_leaves


Expand Down Expand Up @@ -63,22 +62,23 @@ def test_classifier_pickle(self):
X = random(100)
Y = X > 0.5 # pylint: disable=W0143
X = X.reshape((100, 1)) # pylint: disable=E1101
test_sklearn_pickle(lambda: LogisticRegression(), X, Y)
test_sklearn_pickle(lambda: DecisionTreeLogisticRegression(
run_test_sklearn_pickle(lambda: LogisticRegression(), X, Y)
run_test_sklearn_pickle(lambda: DecisionTreeLogisticRegression(
fit_improve_algo=None), X, Y)

def test_classifier_clone(self):
test_sklearn_clone(
run_test_sklearn_clone(
lambda: DecisionTreeLogisticRegression(fit_improve_algo=None))

def test_classifier_grid_search(self):
X = random(100)
Y = X > 0.5 # pylint: disable=W0143
X = X.reshape((100, 1)) # pylint: disable=E1101
self.assertRaise(lambda: test_sklearn_grid_search_cv(
self.assertRaise(lambda: run_test_sklearn_grid_search_cv(
lambda: DecisionTreeLogisticRegression(fit_improve_algo=None), X, Y), ValueError)
res = test_sklearn_grid_search_cv(lambda: DecisionTreeLogisticRegression(fit_improve_algo=None),
X, Y, max_depth=[2, 3])
res = run_test_sklearn_grid_search_cv(
lambda: DecisionTreeLogisticRegression(fit_improve_algo=None),
X, Y, max_depth=[2, 3])
self.assertIn('model', res)
self.assertIn('score', res)
self.assertGreater(res['score'], 0)
Expand Down
Loading