Skip to content

DOC Fix doc of defaults in sklearn.utils.sparsefuncs.py #18025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 15, 2020
91 changes: 49 additions & 42 deletions sklearn/utils/sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ def inplace_csr_column_scale(X, scale):

Parameters
----------
X : CSR matrix with shape (n_samples, n_features)
X : sparse matrix of shape (n_samples, n_features)
Matrix to normalize using the variance of the features.
It should be of CSR format.

scale : float array with shape (n_features,)
scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
Array of precomputed feature-wise values to use for scaling.
"""
assert scale.shape[0] == X.shape[1]
Expand All @@ -53,25 +54,25 @@ def inplace_csr_row_scale(X, scale):

Parameters
----------
X : CSR sparse matrix, shape (n_samples, n_features)
Matrix to be scaled.
X : sparse matrix of shape (n_samples, n_features)
Matrix to be scaled. It should be of CSR format.

scale : float array with shape (n_samples,)
scale : ndarray of float of shape (n_samples,)
Array of precomputed sample-wise values to use for scaling.
"""
assert scale.shape[0] == X.shape[0]
X.data *= np.repeat(scale, np.diff(X.indptr))


def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):
"""Compute mean and variance along an axix on a CSR or CSC matrix
"""Compute mean and variance along an axis on a CSR or CSC matrix.

Parameters
----------
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
Input data.
X : sparse matrix of shape (n_samples, n_features)
Input data. It can be of CSR or CSC format.

axis : int (either 0 or 1)
axis : {0, 1}
Axis along which the axis should be computed.

weights : ndarray of shape (n_samples,) or (n_features,), default=None
Expand All @@ -91,10 +92,10 @@ def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):
-------

means : ndarray of shape (n_features,), dtype=floating
Feature-wise means
Feature-wise means.

variances : ndarray of shape (n_features,), dtype=floating
Feature-wise variances
Feature-wise variances.

sum_weights : ndarray of shape (n_features,), dtype=floating
Returned if `return_sum_weights` is `True`.
Expand Down Expand Up @@ -122,7 +123,7 @@ def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):
@_deprecate_positional_args
def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n,
weights=None):
"""Compute incremental mean and variance along an axix on a CSR or
"""Compute incremental mean and variance along an axis on a CSR or
CSC matrix.

last_mean, last_var are the statistics computed at the last step by this
Expand All @@ -132,10 +133,10 @@ def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n,

Parameters
----------
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
X : CSR or CSC sparse matrix of shape (n_samples, n_features)
Input data.

axis : int (either 0 or 1)
axis : {0, 1}
Axis along which the axis should be computed.

last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating
Expand Down Expand Up @@ -226,10 +227,11 @@ def inplace_column_scale(X, scale):

Parameters
----------
X : CSC or CSR matrix with shape (n_samples, n_features)
Matrix to normalize using the variance of the features.
X : sparse matrix of shape (n_samples, n_features)
Matrix to normalize using the variance of the features. It should be
of CSC or CSR format.

scale : float array with shape (n_features,)
scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
Array of precomputed feature-wise values to use for scaling.
"""
if isinstance(X, sp.csc_matrix):
Expand All @@ -248,10 +250,10 @@ def inplace_row_scale(X, scale):

Parameters
----------
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
Matrix to be scaled.
X : sparse matrix of shape (n_samples, n_features)
Matrix to be scaled. It should be of CSR or CSC format.

scale : float array with shape (n_features,)
scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
Array of precomputed sample-wise values to use for scaling.
"""
if isinstance(X, sp.csc_matrix):
Expand All @@ -268,8 +270,9 @@ def inplace_swap_row_csc(X, m, n):

Parameters
----------
X : scipy.sparse.csc_matrix, shape=(n_samples, n_features)
Matrix whose two rows are to be swapped.
X : sparse matrix of shape (n_samples, n_features)
Matrix whose two rows are to be swapped. It should be of
CSC format.

m : int
Index of the row of X to be swapped.
Expand Down Expand Up @@ -297,8 +300,9 @@ def inplace_swap_row_csr(X, m, n):

Parameters
----------
X : scipy.sparse.csr_matrix, shape=(n_samples, n_features)
Matrix whose two rows are to be swapped.
X : sparse matrix of shape (n_samples, n_features)
Matrix whose two rows are to be swapped. It should be of
CSR format.

m : int
Index of the row of X to be swapped.
Expand Down Expand Up @@ -352,8 +356,9 @@ def inplace_swap_row(X, m, n):

Parameters
----------
X : CSR or CSC sparse matrix, shape=(n_samples, n_features)
Matrix whose two rows are to be swapped.
X : sparse matrix of shape (n_samples, n_features)
Matrix whose two rows are to be swapped. It should be of CSR or
CSC format.

m : int
Index of the row of X to be swapped.
Expand All @@ -375,8 +380,9 @@ def inplace_swap_column(X, m, n):

Parameters
----------
X : CSR or CSC sparse matrix, shape=(n_samples, n_features)
Matrix whose two columns are to be swapped.
X : sparse matrix of shape (n_samples, n_features)
Matrix whose two columns are to be swapped. It should be of
CSR or CSC format.

m : int
Index of the column of X to be swapped.
Expand Down Expand Up @@ -465,10 +471,10 @@ def min_max_axis(X, axis, ignore_nan=False):

Parameters
----------
X : CSR or CSC sparse matrix, shape (n_samples, n_features)
Input data.
X : sparse matrix of shape (n_samples, n_features)
Input data. It should be of CSR or CSC format.

axis : int (either 0 or 1)
axis : {0, 1}
Axis along which the axis should be computed.

ignore_nan : bool, default=False
Expand All @@ -479,11 +485,11 @@ def min_max_axis(X, axis, ignore_nan=False):
Returns
-------

mins : float array with shape (n_features,)
Feature-wise minima
mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}
Feature-wise minima.

maxs : float array with shape (n_features,)
Feature-wise maxima
maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
Feature-wise maxima.
"""
if isinstance(X, sp.csr_matrix) or isinstance(X, sp.csc_matrix):
if ignore_nan:
Expand All @@ -501,10 +507,10 @@ def count_nonzero(X, axis=None, sample_weight=None):

Parameters
----------
X : CSR sparse matrix of shape (n_samples, n_labels)
Input data.
X : sparse matrix of shape (n_samples, n_labels)
Input data. It should be of CSR format.

axis : None, 0 or 1
axis : {0, 1}, default=None
The axis on which the data is aggregated.

sample_weight : array-like of shape (n_samples,), default=None
Expand Down Expand Up @@ -546,7 +552,8 @@ def count_nonzero(X, axis=None, sample_weight=None):
def _get_median(data, n_zeros):
"""Compute the median of data with n_zeros additional zeros.

This function is used to support sparse matrices; it modifies data in-place
This function is used to support sparse matrices; it modifies data
in-place.
"""
n_elems = len(data) + n_zeros
if not n_elems:
Expand Down Expand Up @@ -577,12 +584,12 @@ def csc_median_axis_0(X):

Parameters
----------
X : CSC sparse matrix, shape (n_samples, n_features)
Input data.
X : sparse matrix of shape (n_samples, n_features)
Input data. It should be of CSC format.

Returns
-------
median : ndarray, shape (n_features,)
median : ndarray of shape (n_features,)
Median.

"""
Expand Down