python-graphblas · eriknw · May 3, 2023 · Apr 21, 2023 · Apr 25, 2023 · Apr 25, 2023
diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml
@@ -210,18 +210,12 @@ jobs:
           else
             psgver=""
           fi
-          # TODO: drop 0.57.0rc1 and use 0.57 once numba 0.57 is properly released
           if [[ ${npver} == "=1.24" || ${{ startsWith(steps.pyver.outputs.selected, '3.11') }} == true ]] ; then
-            numbaver=$(python -c 'import random ; print(random.choice(["=0.57.0rc1", ""]))')
+            numbaver=$(python -c 'import random ; print(random.choice(["=0.57", ""]))')
           elif [[ ${npver} == "=1.21" ]] ; then
-            numbaver=$(python -c 'import random ; print(random.choice(["=0.55", "=0.56", "=0.57.0rc1", ""]))')
+            numbaver=$(python -c 'import random ; print(random.choice(["=0.55", "=0.56", "=0.57", ""]))')
           else
-            numbaver=$(python -c 'import random ; print(random.choice(["=0.56", "=0.57.0rc1", ""]))')
-          fi
-          if [[ ${{ matrix.os == 'windows-latest' }} == true && ( ${npver} == "=1.24" || ${numbaver} == "=0.57.0rc1" ) ]] ; then
-            # TODO: numba 0.57.0rc1 currently crashes sometimes on windows, so skip it for now
-            npver=""
-            numbaver=""
+            numbaver=$(python -c 'import random ; print(random.choice(["=0.56", "=0.57", ""]))')
           fi
           fmm=fast_matrix_market${fmmver}
           awkward=awkward${akver}
@@ -254,7 +248,7 @@ jobs:
           fi
           echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} sparse${sparsever} psgver${psgver}"
 
-          # TODO: remove `-c numba` when numba 0.57 is properly released
+          # TODO: remove `-c numba` when numba 0.57 is properly released on conda-forge
           $(command -v mamba || command -v conda) install -c numba packaging pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig tomli \
             pyyaml${yamlver} ${sparse} pandas${pdver} scipy${spver} numpy${npver} ${awkward} \
             networkx${nxver} ${numba} ${fmm} ${psg} \

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -30,8 +30,8 @@ repos:
       - id: validate-pyproject
         name: Validate pyproject.toml
   # I don't yet trust ruff to do what autoflake does
-  - repo: https://github.com/myint/autoflake
-    rev: v2.0.2
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.1.1
     hooks:
       - id: autoflake
         args: [--in-place]
@@ -43,7 +43,7 @@ repos:
       - id: isort
   # Let's keep `pyupgrade` even though `ruff --fix` probably does most of it
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.3.2
     hooks:
       - id: pyupgrade
         args: [--py38-plus]
@@ -58,7 +58,7 @@ repos:
       - id: black
       - id: black-jupyter
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.261
+    rev: v0.0.264
     hooks:
       - id: ruff
         args: [--fix-only, --show-fixes]
@@ -86,7 +86,7 @@ repos:
         additional_dependencies: [tomli]
         files: ^(graphblas|docs)/
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.261
+    rev: v0.0.264
     hooks:
       - id: ruff
   - repo: https://github.com/sphinx-contrib/sphinx-lint

diff --git a/docs/conf.py b/docs/conf.py
@@ -25,7 +25,7 @@
 # The full version, including alpha/beta/rc tags
 # release = "1.3.2"
 # See: https://github.com/pypa/setuptools_scm/#usage-from-sphinx
-from importlib.metadata import version  # noqa: E402 isort: skip
+from importlib.metadata import version  # noqa: E402 isort:skip
 
 release = version("python-graphblas")
 del version

diff --git a/graphblas/binary/__init__.py b/graphblas/binary/__init__.py
@@ -1,6 +1,6 @@
 # All items are dynamically added by classes in operator.py
 # This module acts as a container of BinaryOp instances
-from ..core import _supports_udfs  # isort:skip
+from ..core import _supports_udfs
 
 _delayed = {}
 _delayed_commutes_to = {

diff --git a/graphblas/core/agg.py b/graphblas/core/agg.py
@@ -8,7 +8,7 @@
 """
 import warnings
 
-from .operator.agg import *
+from .operator.agg import *  # pylint: disable=wildcard-import,unused-wildcard-import
 
 warnings.warn(
     "graphblas.core.agg namespace is deprecated; please use graphblas.core.operator.agg instead.",

diff --git a/graphblas/core/operator/binary.py b/graphblas/core/operator/binary.py
@@ -630,6 +630,34 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals
         """Register a BinaryOp without registering it in the ``graphblas.binary`` namespace.
 
         Because it is not registered in the namespace, the name is optional.
+
+        Parameters
+        ----------
+        func : FunctionType
+            The function to compile. For all current backends, this must be able
+            to be compiled with ``numba.njit``.
+            ``func`` takes two input parameters of any dtype and returns any dtype.
+        name : str, optional
+            The name of the operator. This *does not* show up as ``gb.binary.{name}``.
+        parameterized : bool, default False
+            When True, create a parameterized user-defined operator, which means
+            additional parameters can be "baked into" the operator when used.
+            For example, ``gb.binary.isclose`` is a parameterized function that
+            optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it
+            can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``.
+            When creating a parameterized user-defined operator, the ``func``
+            parameter must be a callable that *returns* a function that will
+            then get compiled.
+        is_udt : bool, default False
+            Whether the operator is intended to operate on user-defined types.
+            If True, then the function will not be automatically compiled for
+            builtin types, and it will be compiled "just in time" when used.
+            Setting ``is_udt=True`` is also helpful when the left and right
+            dtypes need to be different.
+
+        Returns
+        -------
+        BinaryOp or ParameterizedBinaryOp
         """
         cls._check_supports_udf("register_anonymous")
         if parameterized:
@@ -638,19 +666,60 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals
 
     @classmethod
     def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False):
-        """Register a BinaryOp. The name will be used to identify the BinaryOp in the
-        ``graphblas.binary`` namespace.
-
-            >>> def max_zero(x, y):
-                    r = 0
-                    if x > r:
-                        r = x
-                    if y > r:
-                        r = y
-                    return r
-            >>> gb.core.operator.BinaryOp.register_new("max_zero", max_zero)
-            >>> dir(gb.binary)
-            [..., 'max_zero', ...]
+        """Register a new BinaryOp and save it to ``graphblas.binary`` namespace.
+
+        Parameters
+        ----------
+        name : str
+            The name of the operator. This will show up as ``gb.binary.{name}``.
+            The name may contain periods, ".", which will result in nested objects
+            such as ``gb.binary.x.y.z`` for name ``"x.y.z"``.
+        func : FunctionType
+            The function to compile. For all current backends, this must be able
+            to be compiled with ``numba.njit``.
+            ``func`` takes two input parameters of any dtype and returns any dtype.
+        parameterized : bool, default False
+            When True, create a parameterized user-defined operator, which means
+            additional parameters can be "baked into" the operator when used.
+            For example, ``gb.binary.isclose`` is a parameterized function that
+            optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it
+            can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``.
+            When creating a parameterized user-defined operator, the ``func``
+            parameter must be a callable that *returns* a function that will
+            then get compiled. See the ``user_isclose`` example below.
+        is_udt : bool, default False
+            Whether the operator is intended to operate on user-defined types.
+            If True, then the function will not be automatically compiled for
+            builtin types, and it will be compiled "just in time" when used.
+            Setting ``is_udt=True`` is also helpful when the left and right
+            dtypes need to be different.
+        lazy : bool, default False
+            If False (the default), then the function will be automatically
+            compiled for builtin types (unless ``is_udt`` is True).
+            Compiling functions can be slow, however, so you may want to
+            delay compilation and only compile when the operator is used,
+            which is done by setting ``lazy=True``.
+
+        Examples
+        --------
+        >>> def max_zero(x, y):
+                r = 0
+                if x > r:
+                    r = x
+                if y > r:
+                    r = y
+                return r
+        >>> gb.core.operator.BinaryOp.register_new("max_zero", max_zero)
+        >>> dir(gb.binary)
+        [..., 'max_zero', ...]
+
+        This is how ``gb.binary.isclose`` is defined:
+
+        >>> def user_isclose(rel_tol=1e-7, abs_tol=0.0):
+        >>>     def inner(x, y):
+        >>>         return x == y or abs(x - y) <= max(rel_tol * max(abs(x), abs(y)), abs_tol)
+        >>>     return inner
+        >>> gb.binary.register_new("user_isclose", user_isclose, parameterized=True)
         """
         cls._check_supports_udf("register_new")
         module, funcname = cls._remove_nesting(name)

diff --git a/graphblas/core/operator/indexunary.py b/graphblas/core/operator/indexunary.py
@@ -241,10 +241,42 @@ def _compile_udt(self, dtype, dtype2):
 
     @classmethod
     def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=False):
-        """Register an IndexUnaryOp without registering it in the
-        ``graphblas.indexunary`` namespace.
+        """Register a IndexUnary without registering it in the ``graphblas.indexunary`` namespace.
 
         Because it is not registered in the namespace, the name is optional.
+
+        Parameters
+        ----------
+        func : FunctionType
+            The function to compile. For all current backends, this must be able
+            to be compiled with ``numba.njit``.
+            ``func`` takes four input parameters--any dtype, int64, int64,
+            any dtype and returns any dtype. The first argument (any dtype) is
+            the value of the input Matrix or Vector, the second argument (int64)
+            is the row index of the Matrix or the index of the Vector, the third
+            argument (int64) is the column index of the Matrix or 0 for a Vector,
+            and the fourth argument (any dtype) is the value of the input Scalar.
+        name : str, optional
+            The name of the operator. This *does not* show up as ``gb.indexunary.{name}``.
+        parameterized : bool, default False
+            When True, create a parameterized user-defined operator, which means
+            additional parameters can be "baked into" the operator when used.
+            For example, ``gb.binary.isclose`` is a parameterized BinaryOp that
+            optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it
+            can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``.
+            When creating a parameterized user-defined operator, the ``func``
+            parameter must be a callable that *returns* a function that will
+            then get compiled.
+        is_udt : bool, default False
+            Whether the operator is intended to operate on user-defined types.
+            If True, then the function will not be automatically compiled for
+            builtin types, and it will be compiled "just in time" when used.
+            Setting ``is_udt=True`` is also helpful when the left and right
+            dtypes need to be different.
+
+        Returns
+        -------
+        return IndexUnaryOp or ParameterizedIndexUnaryOp
         """
         cls._check_supports_udf("register_anonymous")
         if parameterized:
@@ -253,15 +285,53 @@ def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=Fals
 
     @classmethod
     def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False):
-        """Register an IndexUnaryOp. The name will be used to identify the IndexUnaryOp in the
-        ``graphblas.indexunary`` namespace.
+        """Register a new IndexUnaryOp and save it to ``graphblas.indexunary`` namespace.
 
         If the return type is Boolean, the function will also be registered as a SelectOp
-        with the same name.
-
-            >>> gb.indexunary.register_new("row_mod", lambda x, i, j, thunk: i % max(thunk, 2))
-            >>> dir(gb.indexunary)
-            [..., 'row_mod', ...]
+        (and saved to ``grablas.select`` namespace) with the same name.
+
+        Parameters
+        ----------
+        name : str
+            The name of the operator. This will show up as ``gb.indexunary.{name}``.
+            The name may contain periods, ".", which will result in nested objects
+            such as ``gb.indexunary.x.y.z`` for name ``"x.y.z"``.
+        func : FunctionType
+            The function to compile. For all current backends, this must be able
+            to be compiled with ``numba.njit``.
+            ``func`` takes four input parameters--any dtype, int64, int64,
+            any dtype and returns any dtype. The first argument (any dtype) is
+            the value of the input Matrix or Vector, the second argument (int64)
+            is the row index of the Matrix or the index of the Vector, the third
+            argument (int64) is the column index of the Matrix or 0 for a Vector,
+            and the fourth argument (any dtype) is the value of the input Scalar.
+        parameterized : bool, default False
+            When True, create a parameterized user-defined operator, which means
+            additional parameters can be "baked into" the operator when used.
+            For example, ``gb.binary.isclose`` is a parameterized BinaryOp that
+            optionally accepts ``rel_tol`` and ``abs_tol`` parameters, and it
+            can be used as: ``A.ewise_mult(B, gb.binary.isclose(rel_tol=1e-5))``.
+            When creating a parameterized user-defined operator, the ``func``
+            parameter must be a callable that *returns* a function that will
+            then get compiled.
+        is_udt : bool, default False
+            Whether the operator is intended to operate on user-defined types.
+            If True, then the function will not be automatically compiled for
+            builtin types, and it will be compiled "just in time" when used.
+            Setting ``is_udt=True`` is also helpful when the left and right
+            dtypes need to be different.
+        lazy : bool, default False
+            If False (the default), then the function will be automatically
+            compiled for builtin types (unless ``is_udt`` is True).
+            Compiling functions can be slow, however, so you may want to
+            delay compilation and only compile when the operator is used,
+            which is done by setting ``lazy=True``.
+
+        Examples
+        --------
+        >>> gb.indexunary.register_new("row_mod", lambda x, i, j, thunk: i % max(thunk, 2))
+        >>> dir(gb.indexunary)
+        [..., 'row_mod', ...]
         """
         cls._check_supports_udf("register_new")
         module, funcname = cls._remove_nesting(name)

diff --git a/graphblas/core/operator/monoid.py b/graphblas/core/operator/monoid.py
@@ -269,22 +269,25 @@ def _compile_udt(self, dtype, dtype2):
     def register_anonymous(cls, binaryop, identity, name=None, *, is_idempotent=False):
         """Register a Monoid without registering it in the ``graphblas.monoid`` namespace.
 
+        A monoid is a binary operator whose inputs and output are the same dtype.
         Because it is not registered in the namespace, the name is optional.
 
         Parameters
         ----------
-        binaryop : BinaryOp
-            Builtin or registered binary operator
-        identity :
-            Identity value of the monoid
+        binaryop: BinaryOp or ParameterizedBinaryOp
+            The binary operator of the monoid, which should be able to use the same
+            dtype for both inputs and the output.
+        identity: scalar or Mapping
+            The identity of the monoid such that ``op(x, identity) == x`` for any x.
+            ``identity`` may also be a mapping from dtype to scalar.
         name : str, optional
-            Name associated with the monoid
+            The name of the operator. This *does not* show up as ``gb.monoid.{name}``.
         is_idempotent : bool, default False
             Does ``op(x, x) == x`` for any x?
 
         Returns
         -------
-        Function handle
+        Monoid or ParameterizedMonoid
         """
         if type(binaryop) is ParameterizedBinaryOp:
             return ParameterizedMonoid(
@@ -294,12 +297,36 @@ def register_anonymous(cls, binaryop, identity, name=None, *, is_idempotent=Fals
 
     @classmethod
     def register_new(cls, name, binaryop, identity, *, is_idempotent=False, lazy=False):
-        """Register a Monoid. The name will be used to identify the Monoid in the
-        ``graphblas.monoid`` namespace.
+        """Register a new Monoid and save it to ``graphblas.monoid`` namespace.
 
-            >>> gb.core.operator.Monoid.register_new("max_zero", gb.binary.max_zero, 0)
-            >>> dir(gb.monoid)
-            [..., 'max_zero', ...]
+        A monoid is a binary operator whose inputs and output are the same dtype.
+
+        Parameters
+        ----------
+        name : str
+            The name of the operator. This will show up as ``gb.monoid.{name}``.
+            The name may contain periods, ".", which will result in nested objects
+            such as ``gb.monoid.x.y.z`` for name ``"x.y.z"``.
+        binaryop: BinaryOp or ParameterizedBinaryOp
+            The binary operator of the monoid, which should be able to use the same
+            dtype for both inputs and the output.
+        identity: scalar or Mapping
+            The identity of the monoid such that ``op(x, identity) == x`` for any x.
+            ``identity`` may also be a mapping from dtype to scalar.
+        is_idempotent : bool, default False
+            Does ``op(x, x) == x`` for any x?
+        lazy : bool, default False
+            If False (the default), then the function will be automatically
+            compiled for builtin types (unless ``is_udt`` was True for the binaryop).
+            Compiling functions can be slow, however, so you may want to
+            delay compilation and only compile when the operator is used,
+            which is done by setting ``lazy=True``.
+
+        Examples
+        --------
+        >>> gb.core.operator.Monoid.register_new("max_zero", gb.binary.max_zero, 0)
+        >>> dir(gb.monoid)
+        [..., 'max_zero', ...]
         """
         module, funcname = cls._remove_nesting(name)
         if lazy: