Skip to content

Split io.py into multiple files #448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
697 changes: 0 additions & 697 deletions graphblas/io.py

This file was deleted.

7 changes: 7 additions & 0 deletions graphblas/io/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from ._awkward import from_awkward, to_awkward
from ._matrixmarket import mmread, mmwrite
from ._networkx import from_networkx, to_networkx
from ._numpy import from_numpy, to_numpy # deprecated
from ._scipy import from_scipy_sparse, to_scipy_sparse
from ._sparse import from_pydata_sparse, to_pydata_sparse
from ._viz import draw # deprecated
181 changes: 181 additions & 0 deletions graphblas/io/_awkward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import numpy as np

from ..core.matrix import Matrix
from ..core.utils import output_type
from ..core.vector import Vector

_AwkwardDoublyCompressedMatrix = None


def from_awkward(A, *, name=None):
"""Create a Matrix or Vector from an Awkward Array.

The Awkward Array must have top-level parameters: format, shape

The Awkward Array must have top-level attributes based on format:
- vec/csr/csc: values, indices
- hypercsr/hypercsc: values, indices, offset_labels

Parameters
----------
A : awkward.Array
Awkward Array with values and indices
name : str, optional
Name of resulting Matrix or Vector

Returns
-------
Vector or Matrix
"""
params = A.layout.parameters
if missing := {"format", "shape"} - params.keys():
raise ValueError(f"Missing parameters: {missing}")
format = params["format"]
shape = params["shape"]

if len(shape) == 1:
if format != "vec":
raise ValueError(f"Invalid format for Vector: {format}")
return Vector.from_coo(
A.indices.layout.data, A.values.layout.data, size=shape[0], name=name
)
nrows, ncols = shape
values = A.values.layout.content.data
indptr = A.values.layout.offsets.data
if format == "csr":
cols = A.indices.layout.content.data
return Matrix.from_csr(indptr, cols, values, ncols=ncols, name=name)
if format == "csc":
rows = A.indices.layout.content.data
return Matrix.from_csc(indptr, rows, values, nrows=nrows, name=name)
if format == "hypercsr":
rows = A.offset_labels.layout.data
cols = A.indices.layout.content.data
return Matrix.from_dcsr(rows, indptr, cols, values, nrows=nrows, ncols=ncols, name=name)
if format == "hypercsc":
cols = A.offset_labels.layout.data
rows = A.indices.layout.content.data
return Matrix.from_dcsc(cols, indptr, rows, values, nrows=nrows, ncols=ncols, name=name)
raise ValueError(f"Invalid format for Matrix: {format}")


def to_awkward(A, format=None):
"""Create an Awkward Array from a GraphBLAS Matrix.

Parameters
----------
A : Matrix or Vector
GraphBLAS object to be converted
format : str {'csr', 'csc', 'hypercsr', 'hypercsc', 'vec}
Default format is csr for Matrix; vec for Vector

The Awkward Array will have top-level attributes based on format:
- vec/csr/csc: values, indices
- hypercsr/hypercsc: values, indices, offset_labels

Top-level parameters will also be set: format, shape

Returns
-------
awkward.Array

"""
try:
# awkward version 1
# MAINT: we can probably drop awkward v1 at the end of 2024 or 2025
import awkward._v2 as ak
from awkward._v2.forms.listoffsetform import ListOffsetForm
from awkward._v2.forms.numpyform import NumpyForm
from awkward._v2.forms.recordform import RecordForm
except ImportError:
# awkward version 2
import awkward as ak
from awkward.forms.listoffsetform import ListOffsetForm
from awkward.forms.numpyform import NumpyForm
from awkward.forms.recordform import RecordForm

out_type = output_type(A)
if format is None:
format = "vec" if out_type is Vector else "csr"
format = format.lower()
classname = None

if out_type is Vector:
if format != "vec":
raise ValueError(f"Invalid format for Vector: {format}")
size = A.nvals
indices, values = A.to_coo()
form = RecordForm(
contents=[
NumpyForm(A.dtype.np_type.name, form_key="node1"),
NumpyForm("int64", form_key="node0"),
],
fields=["values", "indices"],
)
d = {"node0-data": indices, "node1-data": values}

elif out_type is Matrix:
if format == "csr":
indptr, cols, values = A.to_csr()
d = {"node3-data": cols}
size = A.nrows
elif format == "csc":
indptr, rows, values = A.to_csc()
d = {"node3-data": rows}
size = A.ncols
elif format == "hypercsr":
rows, indptr, cols, values = A.to_dcsr()
d = {"node3-data": cols, "node5-data": rows}
size = len(rows)
elif format == "hypercsc":
cols, indptr, rows, values = A.to_dcsc()
d = {"node3-data": rows, "node5-data": cols}
size = len(cols)
else:
raise ValueError(f"Invalid format for Matrix: {format}")
d["node1-offsets"] = indptr
d["node4-data"] = np.ascontiguousarray(values)

form = ListOffsetForm(
"i64",
RecordForm(
contents=[
NumpyForm("int64", form_key="node3"),
NumpyForm(A.dtype.np_type.name, form_key="node4"),
],
fields=["indices", "values"],
),
form_key="node1",
)
if format.startswith("hyper"):
global _AwkwardDoublyCompressedMatrix
if _AwkwardDoublyCompressedMatrix is None: # pylint: disable=used-before-assignment
# Define behaviors to make all fields function at the top-level
@ak.behaviors.mixins.mixin_class(ak.behavior)
class _AwkwardDoublyCompressedMatrix:
@property
def values(self): # pragma: no branch (???)
return self.data.values

@property
def indices(self): # pragma: no branch (???)
return self.data.indices

form = RecordForm(
contents=[
form,
NumpyForm("int64", form_key="node5"),
],
fields=["data", "offset_labels"],
)
classname = "_AwkwardDoublyCompressedMatrix"

else:
raise TypeError(f"A must be a Matrix or Vector, found {type(A)}")

ret = ak.from_buffers(form, size, d)
ret = ak.with_parameter(ret, "format", format)
ret = ak.with_parameter(ret, "shape", list(A.shape))
if classname:
ret = ak.with_name(ret, classname)
return ret
131 changes: 131 additions & 0 deletions graphblas/io/_matrixmarket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from .. import backend
from ..core.matrix import Matrix
from ._scipy import to_scipy_sparse


def mmread(source, engine="auto", *, dup_op=None, name=None, **kwargs):
"""Create a GraphBLAS Matrix from the contents of a Matrix Market file.

This uses `scipy.io.mmread
<https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.mmread.html>`_
or `fast_matrix_market.mmread
<https://github.com/alugowski/fast_matrix_market/tree/main/python>`_.

By default, ``fast_matrix_market`` will be used if available, because it
is faster. Additional keyword arguments in ``**kwargs`` will be passed
to the engine's ``mmread``. For example, ``parallelism=8`` will set the
number of threads to use to 8 when using ``fast_matrix_market``.

Parameters
----------
source : str or file
Filename (.mtx or .mtz.gz) or file-like object
engine : {"auto", "scipy", "fmm", "fast_matrix_market"}, default "auto"
How to read the matrix market file. "scipy" uses ``scipy.io.mmread``,
"fmm" and "fast_matrix_market" uses ``fast_matrix_market.mmread``,
and "auto" will use "fast_matrix_market" if available.
dup_op : BinaryOp, optional
Aggregation function for duplicate coordinates (if found)
name : str, optional
Name of resulting Matrix

Returns
-------
:class:`~graphblas.Matrix`
"""
try:
# scipy is currently needed for *all* engines
from scipy.io import mmread
from scipy.sparse import isspmatrix_coo
except ImportError: # pragma: no cover (import)
raise ImportError("scipy is required to read Matrix Market files") from None
engine = engine.lower()
if engine in {"auto", "fmm", "fast_matrix_market"}:
try:
from fast_matrix_market import mmread # noqa: F811
except ImportError: # pragma: no cover (import)
if engine != "auto":
raise ImportError(
"fast_matrix_market is required to read Matrix Market files "
f'using the "{engine}" engine'
) from None
elif engine != "scipy":
raise ValueError(
f'Bad engine value: {engine!r}. Must be "auto", "scipy", "fmm", or "fast_matrix_market"'
)
array = mmread(source, **kwargs)
if isspmatrix_coo(array):
nrows, ncols = array.shape
return Matrix.from_coo(
array.row, array.col, array.data, nrows=nrows, ncols=ncols, dup_op=dup_op, name=name
)
return Matrix.from_dense(array, name=name)


def mmwrite(
target,
matrix,
engine="auto",
*,
comment="",
field=None,
precision=None,
symmetry=None,
**kwargs,
):
"""Write a Matrix Market file from the contents of a GraphBLAS Matrix.

This uses `scipy.io.mmwrite
<https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.mmwrite.html>`_.

Parameters
----------
target : str or file target
Filename (.mtx) or file-like object opened for writing
matrix : Matrix
Matrix to be written
engine : {"auto", "scipy", "fmm", "fast_matrix_market"}, default "auto"
How to read the matrix market file. "scipy" uses ``scipy.io.mmwrite``,
"fmm" and "fast_matrix_market" uses ``fast_matrix_market.mmwrite``,
and "auto" will use "fast_matrix_market" if available.
comment : str, optional
Comments to be prepended to the Matrix Market file
field : str
{"real", "complex", "pattern", "integer"}
precision : int, optional
Number of digits to write for real or complex values
symmetry : str, optional
{"general", "symmetric", "skew-symmetric", "hermetian"}
"""
try:
# scipy is currently needed for *all* engines
from scipy.io import mmwrite
except ImportError: # pragma: no cover (import)
raise ImportError("scipy is required to write Matrix Market files") from None
engine = engine.lower()
if engine in {"auto", "fmm", "fast_matrix_market"}:
try:
from fast_matrix_market import mmwrite # noqa: F811
except ImportError: # pragma: no cover (import)
if engine != "auto":
raise ImportError(
"fast_matrix_market is required to write Matrix Market files "
f'using the "{engine}" engine'
) from None
elif engine != "scipy":
raise ValueError(
f'Bad engine value: {engine!r}. Must be "auto", "scipy", "fmm", or "fast_matrix_market"'
)
if backend == "suitesparse" and matrix.ss.format in {"fullr", "fullc"}:
array = matrix.ss.export()["values"]
else:
array = to_scipy_sparse(matrix, format="coo")
mmwrite(
target,
array,
comment=comment,
field=field,
precision=precision,
symmetry=symmetry,
**kwargs,
)
59 changes: 59 additions & 0 deletions graphblas/io/_networkx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from ..dtypes import lookup_dtype
from ._scipy import from_scipy_sparse


def from_networkx(G, nodelist=None, dtype=None, weight="weight", name=None):
"""Create a square adjacency Matrix from a networkx Graph.

Parameters
----------
G : nx.Graph
Graph to convert
nodelist : list, optional
List of nodes in the nx.Graph. If not provided, all nodes will be used.
dtype :
Data type
weight : str, default="weight"
Weight attribute
name : str, optional
Name of resulting Matrix

Returns
-------
:class:`~graphblas.Matrix`
"""
import networkx as nx

if dtype is not None:
dtype = lookup_dtype(dtype).np_type
A = nx.to_scipy_sparse_array(G, nodelist=nodelist, dtype=dtype, weight=weight)
return from_scipy_sparse(A, name=name)


# TODO: add parameters to allow different networkx classes and attribute names
def to_networkx(m, edge_attribute="weight"):
"""Create a networkx DiGraph from a square adjacency Matrix.

Parameters
----------
m : Matrix
Square adjacency Matrix
edge_attribute : str, optional
Name of edge attribute from values of Matrix. If None, values will be skipped.
Default is "weight".

Returns
-------
nx.DiGraph
"""
import networkx as nx

rows, cols, vals = m.to_coo()
rows = rows.tolist()
cols = cols.tolist()
G = nx.DiGraph()
if edge_attribute is None:
G.add_edges_from(zip(rows, cols))
else:
G.add_weighted_edges_from(zip(rows, cols, vals.tolist()), weight=edge_attribute)
return G
Loading