diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8bf0101..38b1dfa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,11 @@ # To run: `pre-commit run --all-files` # To update: `pre-commit autoupdate` # - &flake8_dependencies below needs updated manually +ci: + # See: https://pre-commit.ci/#configuration + autofix_prs: false + autoupdate_schedule: monthly + skip: [no-commit-to-branch] fail_fast: true default_language_version: python: python3 @@ -20,12 +25,13 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.12.1 + rev: v0.12.2 hooks: - id: validate-pyproject name: Validate pyproject.toml + # I don't yet trust ruff to do what autoflake does - repo: https://github.com/myint/autoflake - rev: v2.0.1 + rev: v2.0.2 hooks: - id: autoflake args: [--in-place] @@ -44,10 +50,15 @@ repos: - id: auto-walrus args: [--line-length, "100"] - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.3.0 hooks: - id: black # - id: black-jupyter + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.261 + hooks: + - id: ruff + args: [--fix-only, --show-fixes] - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 hooks: @@ -55,25 +66,31 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.0.0 - - flake8-comprehensions==3.10.1 - - flake8-bugbear==23.2.13 - - flake8-simplify==0.19.3 + - flake8-bugbear==23.3.23 + - flake8-simplify==0.20.0 - repo: https://github.com/asottile/yesqa rev: v1.4.0 hooks: - id: yesqa additional_dependencies: *flake8_dependencies - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.4 hooks: - id: codespell types_or: [python, rst, markdown] additional_dependencies: [tomli] files: ^(graphblas_algorithms|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.253 + rev: v0.0.261 hooks: - id: ruff + # `pyroma` may help keep our package standards up to date if best practices change. + # This is probably a "low value" check though and safe to remove if we want faster pre-commit. + - repo: https://github.com/regebro/pyroma + rev: "4.2" + hooks: + - id: pyroma + args: [-n, "10", .] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: diff --git a/README.md b/README.md index a4dfd50..0136abe 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,10 @@ dispatch pattern shown above. - Community - inter_community_edges - intra_community_edges +- Components + - is_connected + - is_weakly_connected + - node_connected_component - Core - k_truss - Cuts @@ -147,11 +151,15 @@ dispatch pattern shown above. - is_k_regular - is_regular - Shortest Paths + - all_pairs_bellman_ford_path_length + - all_pairs_shortest_path_length - floyd_warshall - floyd_warshall_predecessor_and_distance - - single_source_bellman_ford_path_length - - all_pairs_bellman_ford_path_length - has_path + - negative_edge_cycle + - single_source_bellman_ford_path_length + - single_source_shortest_path_length + - single_target_shortest_path_length - Simple Paths - is_simple_path - S Metric @@ -162,5 +170,8 @@ dispatch pattern shown above. - is_tournament - score_sequence - tournament_matrix +- Traversal + - bfs_layers + - descendants_at_distance - Triads - is_triad diff --git a/graphblas_algorithms/algorithms/__init__.py b/graphblas_algorithms/algorithms/__init__.py index 0e4c9ee..303535a 100644 --- a/graphblas_algorithms/algorithms/__init__.py +++ b/graphblas_algorithms/algorithms/__init__.py @@ -3,6 +3,7 @@ from .centrality import * from .cluster import * from .community import * +from .components import * from .core import * from .cuts import * from .dag import * @@ -16,4 +17,5 @@ from .smetric import * from .structuralholes import * from .tournament import * +from .traversal import * from .triads import * diff --git a/graphblas_algorithms/algorithms/centrality/eigenvector.py b/graphblas_algorithms/algorithms/centrality/eigenvector.py index 5a2ee78..5172f61 100644 --- a/graphblas_algorithms/algorithms/centrality/eigenvector.py +++ b/graphblas_algorithms/algorithms/centrality/eigenvector.py @@ -27,7 +27,7 @@ def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, name="eigen # Power iteration: make up to max_iter iterations A = G._A xprev = Vector(float, N, name="x_prev") - for _ in range(max_iter): + for _i in range(max_iter): xprev << x x += x @ A normalize(x, "L2") diff --git a/graphblas_algorithms/algorithms/centrality/katz.py b/graphblas_algorithms/algorithms/centrality/katz.py index 3d21331..78de982 100644 --- a/graphblas_algorithms/algorithms/centrality/katz.py +++ b/graphblas_algorithms/algorithms/centrality/katz.py @@ -44,7 +44,7 @@ def katz_centrality( # Power iteration: make up to max_iter iterations xprev = Vector(float, N, name="x_prev") - for _ in range(max_iter): + for _i in range(max_iter): xprev, x = x, xprev # x << alpha * semiring(xprev @ A) + beta x << semiring(xprev @ A) diff --git a/graphblas_algorithms/algorithms/components/__init__.py b/graphblas_algorithms/algorithms/components/__init__.py new file mode 100644 index 0000000..bb0aea6 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/__init__.py @@ -0,0 +1,2 @@ +from .connected import * +from .weakly_connected import * diff --git a/graphblas_algorithms/algorithms/components/connected.py b/graphblas_algorithms/algorithms/components/connected.py new file mode 100644 index 0000000..37c0fc9 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/connected.py @@ -0,0 +1,31 @@ +from graphblas import Vector, replace +from graphblas.semiring import any_pair + +from graphblas_algorithms.algorithms.exceptions import PointlessConcept + + +def is_connected(G): + if len(G) == 0: + raise PointlessConcept("Connectivity is undefined for the null graph.") + return _plain_bfs(G, next(iter(G))).nvals == len(G) + + +def node_connected_component(G, n): + return _plain_bfs(G, n) + + +def _plain_bfs(G, source): + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q = Vector(bool, n, name="q") + v[index] = True + q[index] = True + any_pair_bool = any_pair[bool] + for _i in range(1, n): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << True + return v diff --git a/graphblas_algorithms/algorithms/components/weakly_connected.py b/graphblas_algorithms/algorithms/components/weakly_connected.py new file mode 100644 index 0000000..eb3dc75 --- /dev/null +++ b/graphblas_algorithms/algorithms/components/weakly_connected.py @@ -0,0 +1,77 @@ +from graphblas import Vector, binary, replace +from graphblas.semiring import any_pair + +from graphblas_algorithms.algorithms.exceptions import PointlessConcept + + +def is_weakly_connected(G): + if len(G) == 0: + raise PointlessConcept("Connectivity is undefined for the null graph.") + return _plain_bfs(G, next(iter(G))).nvals == len(G) + + +# TODO: benchmark this and the version commented out below +def _plain_bfs(G, source): + # Bi-directional BFS w/o symmetrizing the adjacency matrix + index = G._key_to_id[source] + A = G.get_property("offdiag") + # XXX: should we use `AT` if available? + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q_out = Vector(bool, n, name="q_out") + q_in = Vector(bool, n, name="q_in") + v[index] = True + q_in[index] = True + any_pair_bool = any_pair[bool] + is_out_empty = True + is_in_empty = False + for _i in range(1, n): + # Traverse out-edges from the most recent `q_in` and `q_out` + if is_out_empty: + q_out(~v.S) << any_pair_bool(q_in @ A) + else: + q_out << binary.any(q_out | q_in) + q_out(~v.S, replace) << any_pair_bool(q_out @ A) + is_out_empty = q_out.nvals == 0 + if not is_out_empty: + v(q_out.S) << True + elif is_in_empty: + break + # Traverse in-edges from the most recent `q_in` and `q_out` + if is_in_empty: + q_in(~v.S) << any_pair_bool(A @ q_out) + else: + q_in << binary.any(q_out | q_in) + q_in(~v.S, replace) << any_pair_bool(A @ q_in) + is_in_empty = q_in.nvals == 0 + if not is_in_empty: + v(q_in.S) << True + elif is_out_empty: + break + return v + + +""" +def _plain_bfs(G, source): + # Bi-directional BFS w/o symmetrizing the adjacency matrix + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, n, name="bfs_plain") + q = Vector(bool, n, name="q") + q2 = Vector(bool, n, name="q_2") + v[index] = True + q[index] = True + any_pair_bool = any_pair[bool] + for _i in range(1, n): + q2(~v.S, replace) << any_pair_bool(q @ A) + v(q2.S) << True + q(~v.S, replace) << any_pair_bool(A @ q) + if q.nvals == 0: + if q2.nvals == 0: + break + q, q2 = q2, q + elif q2.nvals != 0: + q << binary.any(q | q2) + return v +""" diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py index 3cceeef..c75dae1 100644 --- a/graphblas_algorithms/algorithms/dag.py +++ b/graphblas_algorithms/algorithms/dag.py @@ -1,5 +1,5 @@ from graphblas import Vector, replace -from graphblas.semiring import lor_pair +from graphblas.semiring import any_pair __all__ = ["descendants", "ancestors"] @@ -10,10 +10,12 @@ def descendants(G, source): raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] A = G.get_property("offdiag") - q = Vector.from_coo(index, True, size=A.nrows, name="q") + q = Vector(bool, size=A.nrows, name="q") + q[index] = True rv = q.dup(name="descendants") - for _ in range(A.nrows): - q(~rv.S, replace) << lor_pair(q @ A) + any_pair_bool = any_pair[bool] + for _i in range(A.nrows): + q(~rv.S, replace) << any_pair_bool(q @ A) if q.nvals == 0: break rv(q.S) << True @@ -26,10 +28,12 @@ def ancestors(G, source): raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] A = G.get_property("offdiag") - q = Vector.from_coo(index, True, size=A.nrows, name="q") + q = Vector(bool, size=A.nrows, name="q") + q[index] = True rv = q.dup(name="descendants") - for _ in range(A.nrows): - q(~rv.S, replace) << lor_pair(A @ q) + any_pair_bool = any_pair[bool] + for _i in range(A.nrows): + q(~rv.S, replace) << any_pair_bool(A @ q) if q.nvals == 0: break rv(q.S) << True diff --git a/graphblas_algorithms/algorithms/dominating.py b/graphblas_algorithms/algorithms/dominating.py index 60c3426..2894bd8 100644 --- a/graphblas_algorithms/algorithms/dominating.py +++ b/graphblas_algorithms/algorithms/dominating.py @@ -1,8 +1,8 @@ -from graphblas.semiring import lor_pair +from graphblas.semiring import any_pair __all__ = ["is_dominating_set"] def is_dominating_set(G, nbunch): - nbrs = lor_pair(nbunch @ G._A).new(mask=~nbunch.S) # A or A.T? + nbrs = any_pair[bool](nbunch @ G._A).new(mask=~nbunch.S) # A or A.T? return nbrs.size - nbunch.nvals - nbrs.nvals == 0 diff --git a/graphblas_algorithms/algorithms/exceptions.py b/graphblas_algorithms/algorithms/exceptions.py index f4ef352..7c911c9 100644 --- a/graphblas_algorithms/algorithms/exceptions.py +++ b/graphblas_algorithms/algorithms/exceptions.py @@ -14,5 +14,9 @@ class PointlessConcept(GraphBlasAlgorithmException): pass +class NoPath(GraphBlasAlgorithmException): + pass + + class Unbounded(GraphBlasAlgorithmException): pass diff --git a/graphblas_algorithms/algorithms/link_analysis/hits_alg.py b/graphblas_algorithms/algorithms/link_analysis/hits_alg.py index aadd77e..515806e 100644 --- a/graphblas_algorithms/algorithms/link_analysis/hits_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/hits_alg.py @@ -30,7 +30,7 @@ def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True, *, with_auth a, h = h, a ATA = (A.T @ A).new(name="ATA") # Authority matrix aprev = Vector(float, N, name="a_prev") - for _ in range(max_iter): + for _i in range(max_iter): aprev, a = a, aprev a << ATA @ aprev normalize(a, "Linf") @@ -41,7 +41,7 @@ def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True, *, with_auth raise ConvergenceFailure(max_iter) else: hprev = Vector(float, N, name="h_prev") - for _ in range(max_iter): + for _i in range(max_iter): hprev, h = h, hprev a << hprev @ A h << A @ a diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index 1623819..9b28fa3 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -79,7 +79,7 @@ def pagerank( # Power iteration: make up to max_iter iterations xprev = Vector(float, N, name="x_prev") w = Vector(float, N, name="w") - for _ in range(max_iter): + for _i in range(max_iter): xprev, x = x, xprev # x << alpha * ((xprev * S) @ A + "dangling_weights") + (1 - alpha) * p diff --git a/graphblas_algorithms/algorithms/shortest_paths/__init__.py b/graphblas_algorithms/algorithms/shortest_paths/__init__.py index 9fc57fb..781db9d 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/__init__.py +++ b/graphblas_algorithms/algorithms/shortest_paths/__init__.py @@ -1,3 +1,4 @@ from .dense import * from .generic import * +from .unweighted import * from .weighted import * diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py index f91c9cf..b92f7d6 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/generic.py +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -1,5 +1,5 @@ from graphblas import Vector, replace -from graphblas.semiring import lor_pair +from graphblas.semiring import any_pair __all__ = ["has_path"] @@ -11,23 +11,26 @@ def has_path(G, source, target): if src == dst: return True A = G.get_property("offdiag") - q_src = Vector.from_coo(src, True, size=A.nrows, name="q_src") + q_src = Vector(bool, size=A.nrows, name="q_src") + q_src[src] = True seen_src = q_src.dup(name="seen_src") - q_dst = Vector.from_coo(dst, True, size=A.nrows, name="q_dst") - seen_dst = q_dst.dup(name="seen_dst") - for _ in range(A.nrows // 2): - q_src(~seen_src.S, replace) << lor_pair(q_src @ A) + q_dst = Vector(bool, size=A.nrows, name="q_dst") + q_dst[dst] = True + seen_dst = q_dst.dup(name="seen_dst", clear=True) + any_pair_bool = any_pair[bool] + for _i in range(A.nrows // 2): + q_src(~seen_src.S, replace) << any_pair_bool(q_src @ A) if q_src.nvals == 0: return False - if lor_pair(q_src @ q_dst): + if any_pair_bool(q_src @ q_dst): return True - q_dst(~seen_dst.S, replace) << lor_pair(A @ q_dst) + seen_dst(q_dst.S) << True + q_dst(~seen_dst.S, replace) << any_pair_bool(A @ q_dst) if q_dst.nvals == 0: return False - if lor_pair(q_src @ q_dst): + if any_pair_bool(q_src @ q_dst): return True seen_src(q_src.S) << True - seen_dst(q_dst.S) << True return False diff --git a/graphblas_algorithms/algorithms/shortest_paths/unweighted.py b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py new file mode 100644 index 0000000..3c8243f --- /dev/null +++ b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py @@ -0,0 +1,83 @@ +import numpy as np +from graphblas import Matrix, Vector, replace, unary +from graphblas.semiring import any_pair + +__all__ = [ + "single_source_shortest_path_length", + "single_target_shortest_path_length", + "all_pairs_shortest_path_length", +] + + +def single_source_shortest_path_length(G, source, cutoff=None): + return _bfs_level(G, source, cutoff) + + +def single_target_shortest_path_length(G, target, cutoff=None): + return _bfs_level(G, target, cutoff, transpose=True) + + +def all_pairs_shortest_path_length(G, cutoff=None, *, nodes=None, expand_output=False): + D = _bfs_levels(G, nodes, cutoff) + if nodes is not None and expand_output and D.ncols != D.nrows: + ids = G.list_to_ids(nodes) + rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) + rv[ids, :] = D + return rv + return D + + +def _bfs_level(G, source, cutoff, *, transpose=False): + index = G._key_to_id[source] + A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? + n = A.nrows + v = Vector(int, n, name="bfs_unweighted") + q = Vector(bool, n, name="q") + v[index] = 0 + q[index] = True + any_pair_bool = any_pair[bool] + if cutoff is None or cutoff >= n: + cutoff = n # Everything + else: + cutoff += 1 # Inclusive + for i in range(1, cutoff): + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + v(q.S) << i + return v + + +def _bfs_levels(G, nodes, cutoff): + A = G.get_property("offdiag") + n = A.nrows + if nodes is None: + # TODO: `D = Vector.from_scalar(0, n, dtype).diag()` + D = Vector(int, n, name="bfs_unweighted_vector") + D << 0 + D = D.diag(name="bfs_unweighted") + else: + ids = G.list_to_ids(nodes) + D = Matrix.from_coo( + np.arange(len(ids), dtype=np.uint64), + ids, + 0, + int, + nrows=len(ids), + ncols=n, + name="bfs_unweighted", + ) + Q = unary.one[bool](D).new(name="Q") + any_pair_bool = any_pair[bool] + if cutoff is None or cutoff >= n: + cutoff = n # Everything + else: + cutoff += 1 # Inclusive + for i in range(1, cutoff): + Q(~D.S, replace) << any_pair_bool(Q @ A) + if Q.nvals == 0: + break + D(Q.S) << i + return D diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index a5cec41..8e6efef 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -7,6 +7,7 @@ __all__ = [ "single_source_bellman_ford_path_length", "bellman_ford_path_lengths", + "negative_edge_cycle", ] @@ -196,8 +197,7 @@ def _bfs_levels(G, nodes=None, *, dtype=int): ncols=n, name="bfs_levels", ) - Q = Matrix(bool, D.nrows, D.ncols, name="Q") - Q << unary.one[bool](D) + Q = unary.one[bool](D).new(name="Q") any_pair_bool = any_pair[bool] for i in range(1, n): Q(~D.S, replace) << any_pair_bool(Q @ A) @@ -205,3 +205,43 @@ def _bfs_levels(G, nodes=None, *, dtype=int): break D(Q.S) << i return D + + +def negative_edge_cycle(G): + # TODO: use a heuristic to try to stop early + if G.is_directed(): + deg = "total_degrees-" + else: + deg = "degrees-" + A, degrees, has_negative_diagonal, has_negative_edges = G.get_properties( + f"offdiag {deg} has_negative_diagonal has_negative_edges-" + ) + if has_negative_diagonal: + return True + if not has_negative_edges: + return False + if A.dtype == bool: + # Should we upcast e.g. INT8 to INT64 as well? + dtype = int + else: + dtype = A.dtype + n = A.nrows + # Begin from every node that has edges + d = Vector(dtype, n, name="negative_edge_cycle") + d(degrees.S) << 0 + cur = d.dup(name="cur") + mask = Vector(bool, n, name="mask") + one = unary.one[bool] + for _i in range(n - 1): + cur << min_plus(cur @ A) + mask << one(cur) + mask(binary.second) << binary.lt(cur & d) + cur(mask.V, replace) << cur + if cur.nvals == 0: + return False + d(cur.S) << cur + cur << min_plus(cur @ A) + mask << binary.lt(cur & d) + if mask.reduce(monoid.lor): + return True + return False diff --git a/graphblas_algorithms/algorithms/traversal/__init__.py b/graphblas_algorithms/algorithms/traversal/__init__.py new file mode 100644 index 0000000..7811162 --- /dev/null +++ b/graphblas_algorithms/algorithms/traversal/__init__.py @@ -0,0 +1 @@ +from .breadth_first_search import * diff --git a/graphblas_algorithms/algorithms/traversal/breadth_first_search.py b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py new file mode 100644 index 0000000..e9be539 --- /dev/null +++ b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py @@ -0,0 +1,45 @@ +from graphblas import Vector, replace +from graphblas.semiring import any_pair + +__all__ = [ + "bfs_layers", + "descendants_at_distance", +] + + +def bfs_layers(G, sources): + if sources in G: + sources = [sources] + ids = G.list_to_ids(sources) + if not ids: + return + A = G.get_property("offdiag") + n = A.nrows + v = Vector(bool, size=n, name="bfs_layers") + q = Vector.from_coo(ids, True, size=n, name="q") + any_pair_bool = any_pair[bool] + yield q.dup(name="bfs_layer_0") + for i in range(1, n): + v(q.S) << True + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + return + yield q.dup(name=f"bfs_layer_{i}") + + +def descendants_at_distance(G, source, distance): + index = G._key_to_id[source] + A = G.get_property("offdiag") + n = A.nrows + q = Vector(bool, size=n, name=f"descendants_at_distance_{distance}") + q[index] = True + if distance == 0: + return q + v = Vector(bool, size=n, name="bfs_seen") + any_pair_bool = any_pair[bool] + for _i in range(1, distance + 1): + v(q.S) << True + q(~v.S, replace) << any_pair_bool(q @ A) + if q.nvals == 0: + break + return q diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index c52b2be..92febc5 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -119,6 +119,16 @@ def vector_to_dict(self, v, *, mask=None, fill_value=None): return {id_to_key[index]: value for index, value in zip(*v.to_coo(sort=False))} +def vector_to_list(self, v, *, values_are_keys=False): + id_to_key = self.id_to_key + return [ + id_to_key[idx] + for idx in v.to_coo(indices=not values_are_keys, values=values_are_keys, sort=True)[ + bool(values_are_keys) + ].tolist() + ] + + def vector_to_nodemap(self, v, *, mask=None, fill_value=None, values_are_keys=False): from .nodemap import NodeMap diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index 0bc1ec7..83e7356 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -548,6 +548,7 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): set_to_vector = _utils.set_to_vector to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set diff --git a/graphblas_algorithms/classes/graph.py b/graphblas_algorithms/classes/graph.py index 718264f..03a2893 100644 --- a/graphblas_algorithms/classes/graph.py +++ b/graphblas_algorithms/classes/graph.py @@ -396,6 +396,7 @@ def __init__(self, incoming_graph_data=None, *, key_to_id=None, **attr): set_to_vector = _utils.set_to_vector to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set diff --git a/graphblas_algorithms/classes/nodemap.py b/graphblas_algorithms/classes/nodemap.py index 63b7a5e..2a32502 100644 --- a/graphblas_algorithms/classes/nodemap.py +++ b/graphblas_algorithms/classes/nodemap.py @@ -28,6 +28,7 @@ def __init__(self, v, *, fill_value=None, values_are_keys=False, key_to_id=None) set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set @@ -95,6 +96,7 @@ def get(self, key, default=None): return default if self._values_are_keys: return self.id_to_key[rv] + return rv # items # keys @@ -220,6 +222,7 @@ def _get_rows(self): set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set @@ -335,6 +338,7 @@ def _get_rows(self): set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set diff --git a/graphblas_algorithms/classes/nodeset.py b/graphblas_algorithms/classes/nodeset.py index 1713a7d..b79895e 100644 --- a/graphblas_algorithms/classes/nodeset.py +++ b/graphblas_algorithms/classes/nodeset.py @@ -1,6 +1,6 @@ from collections.abc import MutableSet -from graphblas.semiring import lor_pair, plus_pair +from graphblas.semiring import any_pair, plus_pair from . import _utils @@ -26,6 +26,7 @@ def __init__(self, v, *, key_to_id=None): set_to_vector = _utils.set_to_vector # to_networkx = _utils.to_networkx vector_to_dict = _utils.vector_to_dict + vector_to_list = _utils.vector_to_list vector_to_nodemap = _utils.vector_to_nodemap vector_to_nodeset = _utils.vector_to_nodeset vector_to_set = _utils.vector_to_set @@ -76,7 +77,7 @@ def clear(self): def isdisjoin(self, other): if isinstance(other, NodeSet): - return not lor_pair(self.vector @ other.vector) + return not any_pair[bool](self.vector @ other.vector) return super().isdisjoint(other) def pop(self): @@ -104,3 +105,8 @@ def _from_iterable(self, it): # Add more set methods (as needed) def union(self, *args): return set(self).union(*args) # TODO: can we make this better? + + def copy(self): + rv = type(self)(self.vector.dup(), key_to_id=self._key_to_id) + rv._id_to_key = self._id_to_key + return rv diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index 1a142c3..d8430e5 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -25,6 +25,10 @@ class Dispatcher: # Community inter_community_edges = nxapi.community.quality.inter_community_edges intra_community_edges = nxapi.community.quality.intra_community_edges + # Components + is_connected = nxapi.components.connected.is_connected + node_connected_component = nxapi.components.connected.node_connected_component + is_weakly_connected = nxapi.components.weakly_connected.is_weakly_connected # Core k_truss = nxapi.core.k_truss # Cuts @@ -60,9 +64,17 @@ class Dispatcher: nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance ) has_path = nxapi.shortest_paths.generic.has_path + single_source_shortest_path_length = ( + nxapi.shortest_paths.unweighted.single_source_shortest_path_length + ) + single_target_shortest_path_length = ( + nxapi.shortest_paths.unweighted.single_target_shortest_path_length + ) + all_pairs_shortest_path_length = nxapi.shortest_paths.unweighted.all_pairs_shortest_path_length all_pairs_bellman_ford_path_length = ( nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length ) + negative_edge_cycle = nxapi.shortest_paths.weighted.negative_edge_cycle single_source_bellman_ford_path_length = ( nxapi.shortest_paths.weighted.single_source_bellman_ford_path_length ) @@ -76,6 +88,9 @@ class Dispatcher: is_tournament = nxapi.tournament.is_tournament score_sequence = nxapi.tournament.score_sequence tournament_matrix = nxapi.tournament.tournament_matrix + # Traversal + bfs_layers = nxapi.traversal.breadth_first_search.bfs_layers + descendants_at_distance = nxapi.traversal.breadth_first_search.descendants_at_distance # Triads is_triad = nxapi.triads.is_triad diff --git a/graphblas_algorithms/nxapi/__init__.py b/graphblas_algorithms/nxapi/__init__.py index 75c7aa7..fe5ba87 100644 --- a/graphblas_algorithms/nxapi/__init__.py +++ b/graphblas_algorithms/nxapi/__init__.py @@ -2,6 +2,7 @@ from .centrality import * from .cluster import * from .community import * +from .components import * from .core import * from .cuts import * from .dag import * @@ -14,11 +15,14 @@ from .simple_paths import * from .smetric import * from .structuralholes import * +from .traversal import * from .triads import * from . import centrality from . import cluster from . import community +from . import components from . import link_analysis from . import shortest_paths from . import tournament +from . import traversal diff --git a/graphblas_algorithms/nxapi/_utils.py b/graphblas_algorithms/nxapi/_utils.py index db309a4..0bb9617 100644 --- a/graphblas_algorithms/nxapi/_utils.py +++ b/graphblas_algorithms/nxapi/_utils.py @@ -100,7 +100,7 @@ def partition(chunksize, L, *, evenly=True): yield from L return if evenly: - k = ceil(L / chunksize) + k = ceil(len(L) / chunksize) if k * chunksize != N: yield from split_evenly(k, L) return diff --git a/graphblas_algorithms/nxapi/cluster.py b/graphblas_algorithms/nxapi/cluster.py index 425fd09..8e61f9b 100644 --- a/graphblas_algorithms/nxapi/cluster.py +++ b/graphblas_algorithms/nxapi/cluster.py @@ -78,19 +78,6 @@ def average_clustering(G, nodes=None, weight=None, count_zeros=True): return func(G, weighted=weighted, count_zeros=count_zeros, mask=mask) -def _split(L, k): - """Split a list into approximately-equal parts""" - N = len(L) - start = 0 - for i in range(1, k): - stop = (N * i + k - 1) // k - if stop != start: - yield L[start:stop] - start = stop - if stop != N: - yield L[stop:] - - # TODO: should this move into algorithms? def _square_clustering_split(G, node_ids=None, *, chunksize): if node_ids is None: diff --git a/graphblas_algorithms/nxapi/components/__init__.py b/graphblas_algorithms/nxapi/components/__init__.py new file mode 100644 index 0000000..bb0aea6 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/__init__.py @@ -0,0 +1,2 @@ +from .connected import * +from .weakly_connected import * diff --git a/graphblas_algorithms/nxapi/components/connected.py b/graphblas_algorithms/nxapi/components/connected.py new file mode 100644 index 0000000..d55a430 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/connected.py @@ -0,0 +1,27 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.algorithms.exceptions import PointlessConcept +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXPointlessConcept + +__all__ = [ + "is_connected", + "node_connected_component", +] + + +@not_implemented_for("directed") +def is_connected(G): + G = to_undirected_graph(G) + try: + return algorithms.is_connected(G) + except PointlessConcept as e: + raise NetworkXPointlessConcept(*e.args) from e + + +@not_implemented_for("directed") +def node_connected_component(G, n): + G = to_undirected_graph(G) + rv = algorithms.node_connected_component(G, n) + return G.vector_to_nodeset(rv) diff --git a/graphblas_algorithms/nxapi/components/weakly_connected.py b/graphblas_algorithms/nxapi/components/weakly_connected.py new file mode 100644 index 0000000..c72b532 --- /dev/null +++ b/graphblas_algorithms/nxapi/components/weakly_connected.py @@ -0,0 +1,19 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.algorithms.exceptions import PointlessConcept +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +from ..exception import NetworkXPointlessConcept + +__all__ = [ + "is_weakly_connected", +] + + +@not_implemented_for("undirected") +def is_weakly_connected(G): + G = to_directed_graph(G) + try: + return algorithms.is_weakly_connected(G) + except PointlessConcept as e: + raise NetworkXPointlessConcept(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/__init__.py b/graphblas_algorithms/nxapi/shortest_paths/__init__.py index 9fc57fb..781db9d 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/__init__.py +++ b/graphblas_algorithms/nxapi/shortest_paths/__init__.py @@ -1,3 +1,4 @@ from .dense import * from .generic import * +from .unweighted import * from .weighted import * diff --git a/graphblas_algorithms/nxapi/shortest_paths/unweighted.py b/graphblas_algorithms/nxapi/shortest_paths/unweighted.py new file mode 100644 index 0000000..f1700f3 --- /dev/null +++ b/graphblas_algorithms/nxapi/shortest_paths/unweighted.py @@ -0,0 +1,45 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from .._utils import normalize_chunksize, partition +from ..exception import NodeNotFound + +__all__ = [ + "single_source_shortest_path_length", + "single_target_shortest_path_length", + "all_pairs_shortest_path_length", +] + + +def single_source_shortest_path_length(G, source, cutoff=None): + G = to_graph(G) + if source not in G: + raise NodeNotFound(f"Source {source} is not in G") + v = algorithms.single_source_shortest_path_length(G, source, cutoff) + return G.vector_to_nodemap(v) + + +def single_target_shortest_path_length(G, target, cutoff=None): + G = to_graph(G) + if target not in G: + raise NodeNotFound(f"Target {target} is not in G") + v = algorithms.single_target_shortest_path_length(G, target, cutoff) + return G.vector_to_nodemap(v) + + +def all_pairs_shortest_path_length(G, cutoff=None, *, chunksize="10 MiB"): + G = to_graph(G) + chunksize = normalize_chunksize(chunksize, len(G) * G._A.dtype.np_type.itemsize, len(G)) + if chunksize is None: + D = algorithms.all_pairs_shortest_path_length(G, cutoff) + yield from G.matrix_to_nodenodemap(D).items() + elif chunksize < 2: + for source in G: + d = algorithms.single_source_shortest_path_length(G, source, cutoff) + yield (source, G.vector_to_nodemap(d)) + else: + for cur_nodes in partition(chunksize, list(G)): + D = algorithms.all_pairs_shortest_path_length(G, cutoff, nodes=cur_nodes) + for i, source in enumerate(cur_nodes): + d = D[i, :].new(name=f"all_pairs_shortest_path_length_{i}") + yield (source, G.vector_to_nodemap(d)) diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index d6bf1d2..a44a18e 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -6,6 +6,7 @@ __all__ = [ "all_pairs_bellman_ford_path_length", + "negative_edge_cycle", "single_source_bellman_ford_path_length", ] @@ -52,3 +53,10 @@ def single_source_bellman_ford_path_length(G, source, weight="weight"): except KeyError as e: raise NodeNotFound(*e.args) from e return G.vector_to_nodemap(d) + + +def negative_edge_cycle(G, weight="weight", heuristic=True): + # TODO: what if weight is a function? + # TODO: use a heuristic to try to stop early + G = to_graph(G, weight=weight) + return algorithms.negative_edge_cycle(G) diff --git a/graphblas_algorithms/nxapi/traversal/__init__.py b/graphblas_algorithms/nxapi/traversal/__init__.py new file mode 100644 index 0000000..7811162 --- /dev/null +++ b/graphblas_algorithms/nxapi/traversal/__init__.py @@ -0,0 +1 @@ +from .breadth_first_search import * diff --git a/graphblas_algorithms/nxapi/traversal/breadth_first_search.py b/graphblas_algorithms/nxapi/traversal/breadth_first_search.py new file mode 100644 index 0000000..0b2c6a7 --- /dev/null +++ b/graphblas_algorithms/nxapi/traversal/breadth_first_search.py @@ -0,0 +1,27 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "bfs_layers", + "descendants_at_distance", +] + + +def bfs_layers(G, sources): + G = to_graph(G) + try: + for layer in algorithms.bfs_layers(G, sources): + yield G.vector_to_list(layer) + except KeyError as e: + raise NetworkXError(*e.args) from e + + +def descendants_at_distance(G, source, distance): + G = to_graph(G) + try: + v = algorithms.descendants_at_distance(G, source, distance) + except KeyError as e: + raise NetworkXError(*e.args) from e + return G.vector_to_nodeset(v) diff --git a/graphblas_algorithms/tests/test_core.py b/graphblas_algorithms/tests/test_core.py index 7718ef6..5acd529 100644 --- a/graphblas_algorithms/tests/test_core.py +++ b/graphblas_algorithms/tests/test_core.py @@ -33,4 +33,6 @@ def test_packages(): pytest.skip("Did not find pyproject.toml") with pyproject.open("rb") as f: pkgs2 = sorted(tomli.load(f)["tool"]["setuptools"]["packages"]) - assert pkgs == pkgs2 + assert ( + pkgs == pkgs2 + ), "If there are extra items on the left, add them to pyproject.toml:tool.setuptools.packages" diff --git a/graphblas_algorithms/tests/test_match_nx.py b/graphblas_algorithms/tests/test_match_nx.py index 6c42d54..c50896f 100644 --- a/graphblas_algorithms/tests/test_match_nx.py +++ b/graphblas_algorithms/tests/test_match_nx.py @@ -130,12 +130,20 @@ def nx_to_gb_info(info): ) +def module_exists(info): + return info[2].rsplit(".", 1)[0] in sys.modules + + @pytest.mark.checkstructure def test_dispatched_funcs_in_nxapi(nx_names_to_info, gb_names_to_info): """Are graphblas_algorithms functions in the correct locations in nxapi?""" failing = False for name in nx_names_to_info.keys() & gb_names_to_info.keys(): - nx_paths = {nx_to_gb_info(info) for info in nx_names_to_info[name]} + nx_paths = { + gbinfo + for info in nx_names_to_info[name] + if module_exists(gbinfo := nx_to_gb_info(info)) + } gb_paths = gb_names_to_info[name] if nx_paths != gb_paths: # pragma: no cover failing = True diff --git a/pyproject.toml b/pyproject.toml index f1e4472..7811266 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,9 @@ readme = "README.md" requires-python = ">=3.8" license = {file = "LICENSE"} authors = [ - {name = "Erik Welch"}, + {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, {name = "Jim Kitchen"}, + {name = "Graphblas-algorithms contributors"}, ] maintainers = [ {name = "Erik Welch", email = "erik.n.welch@gmail.com"}, @@ -87,6 +88,7 @@ complete = [ [tool.setuptools] # Let's be explicit (we test this too) +# TODO: it would be nice if setuptools (or our build backend) could handle this automatically and reliably. # $ python -c 'from setuptools import find_packages ; [print(x) for x in sorted(find_packages())]' # $ find graphblas_algorithms/ -name __init__.py -print | sort | sed -e 's/\/__init__.py//g' -e 's/\//./g' # $ python -c 'import tomli ; [print(x) for x in sorted(tomli.load(open("pyproject.toml", "rb"))["tool"]["setuptools"]["packages"])]' @@ -95,16 +97,20 @@ packages = [ "graphblas_algorithms.algorithms", "graphblas_algorithms.algorithms.centrality", "graphblas_algorithms.algorithms.community", + "graphblas_algorithms.algorithms.components", "graphblas_algorithms.algorithms.link_analysis", "graphblas_algorithms.algorithms.shortest_paths", "graphblas_algorithms.algorithms.tests", + "graphblas_algorithms.algorithms.traversal", "graphblas_algorithms.classes", "graphblas_algorithms.nxapi", "graphblas_algorithms.nxapi.centrality", "graphblas_algorithms.nxapi.community", + "graphblas_algorithms.nxapi.components", "graphblas_algorithms.nxapi.link_analysis", "graphblas_algorithms.nxapi.shortest_paths", "graphblas_algorithms.nxapi.tests", + "graphblas_algorithms.nxapi.traversal", "graphblas_algorithms.tests", "graphblas_algorithms.utils", ] diff --git a/scripts/scipy_impl.py b/scripts/scipy_impl.py index 277cece..06244ea 100644 --- a/scripts/scipy_impl.py +++ b/scripts/scipy_impl.py @@ -43,7 +43,7 @@ def pagerank( is_dangling = np.where(S == 0)[0] # power iteration: make up to max_iter iterations - for _ in range(max_iter): + for _i in range(max_iter): xlast = x x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p # check convergence, l1 norm