Skip to content

Fix min_pos when all negative + speed up #19328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 20 additions & 29 deletions sklearn/utils/arrayfuncs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,35 +19,26 @@ np.import_array()


def min_pos(np.ndarray X):
"""
Find the minimum value of an array over positive values

Returns a huge value if none of the values are positive
"""
if X.dtype.name == 'float32':
return _float_min_pos(<float *> X.data, X.size)
elif X.dtype.name == 'float64':
return _double_min_pos(<double *> X.data, X.size)
else:
raise ValueError('Unsupported dtype for array X')


cdef float _float_min_pos(float *X, Py_ssize_t size):
cdef Py_ssize_t i
cdef float min_val = DBL_MAX
for i in range(size):
if 0. < X[i] < min_val:
min_val = X[i]
return min_val


cdef double _double_min_pos(double *X, Py_ssize_t size):
cdef Py_ssize_t i
cdef np.float64_t min_val = FLT_MAX
for i in range(size):
if 0. < X[i] < min_val:
min_val = X[i]
return min_val
"""Find the minimum value of an array over positive values

Returns the maximum representable value of the input dtype if none of the
values are positive.
"""
if X.dtype == np.float32:
return _min_pos[float](<float *> X.data, X.size)
elif X.dtype == np.float64:
return _min_pos[double](<double *> X.data, X.size)
else:
raise ValueError('Unsupported dtype for array X')


cdef floating _min_pos(floating* X, Py_ssize_t size):
cdef Py_ssize_t i
cdef floating min_val = FLT_MAX if floating is float else DBL_MAX
for i in range(size):
if 0. < X[i] < min_val:
min_val = X[i]
return min_val


# General Cholesky Delete.
Expand Down
26 changes: 26 additions & 0 deletions sklearn/utils/tests/test_arrayfuncs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
import numpy as np

from sklearn.utils._testing import assert_allclose
from sklearn.utils.arrayfuncs import min_pos


def test_min_pos():
# Check that min_pos returns a positive value and that it's consistent
# between float and double
X = np.random.RandomState(0).randn(100)

min_double = min_pos(X)
min_float = min_pos(X.astype(np.float32))

assert_allclose(min_double, min_float)
assert min_double >= 0


@pytest.mark.parametrize("dtype", [np.float32, np.float64])
def test_min_pos_no_positive(dtype):
# Check that the return value of min_pos is the maximum representable
# value of the input dtype when all input elements are <= 0 (#19328)
X = np.full(100, -1.).astype(dtype, copy=False)

assert min_pos(X) == np.finfo(dtype).max