| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354 |
- """Functions for splitting a network into two communities (finding a bipartition)."""
- import random
- from copy import deepcopy
- from itertools import count
- import networkx as nx
- __all__ = [
- "kernighan_lin_bisection",
- "spectral_modularity_bipartition",
- "greedy_node_swap_bipartition",
- ]
- def _kernighan_lin_sweep(edge_info, side):
- """
- This is a modified form of Kernighan-Lin, which moves single nodes at a
- time, alternating between sides to keep the bisection balanced. We keep
- two min-heaps of swap costs to make optimal-next-move selection fast.
- """
- heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap()
- # we use heap methods insert, pop, and get
- for u, nbrs in edge_info.items():
- cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items())
- if side[u]:
- heap1.insert(u, cost_u)
- else:
- heap0.insert(u, -cost_u)
- def _update_heap_values(node):
- side_node = side[node]
- for nbr, wt in edge_info[node].items():
- side_nbr = side[nbr]
- if side_nbr == side_node:
- wt = -wt
- heap_nbr = cost_heaps[side_nbr]
- if nbr in heap_nbr:
- cost_nbr = heap_nbr.get(nbr) + 2 * wt
- # allow_increase lets us update a value already on the heap
- heap_nbr.insert(nbr, cost_nbr, allow_increase=True)
- i = 0
- totcost = 0
- while heap0 and heap1:
- u, cost_u = heap0.pop()
- _update_heap_values(u)
- v, cost_v = heap1.pop()
- _update_heap_values(v)
- totcost += cost_u + cost_v
- i += 1
- yield totcost, i, (u, v)
- @nx.utils.not_implemented_for("directed")
- @nx.utils.py_random_state(4)
- @nx._dispatchable(edge_attrs="weight")
- def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
- """Partition a graph into two blocks using the Kernighan–Lin algorithm.
- This algorithm partitions a network into two sets by iteratively
- swapping pairs of nodes to reduce the edge cut between the two sets. The
- pairs are chosen according to a modified form of Kernighan-Lin [1]_, which
- moves nodes individually, alternating between sides to keep the bisection
- balanced.
- Kernighan-Lin is an approximate algorithm for maximal modularity bisection.
- In [2]_ they suggest that fine-tuned improvements can be made using
- greedy node swapping, (see `greedy_node_swap_bipartition`).
- The improvements are typically only a few percent of the modularity value.
- But they claim that can make a difference between a good and excellent method.
- This function does not perform any improvements. But you can do that yourself.
- Parameters
- ----------
- G : NetworkX graph
- Graph must be undirected.
- partition : tuple
- Pair of iterables containing an initial partition. If not
- specified, a random balanced partition is used.
- max_iter : int
- Maximum number of times to attempt swaps to find an
- improvement before giving up.
- weight : string or function (default: "weight")
- If this is a string, then edge weights will be accessed via the
- edge attribute with this key (that is, the weight of the edge
- joining `u` to `v` will be ``G.edges[u, v][weight]``). If no
- such edge attribute exists, the weight of the edge is assumed to
- be one.
- If this is a function, the weight of an edge is the value
- returned by the function. The function must accept exactly three
- positional arguments: the two endpoints of an edge and the
- dictionary of edge attributes for that edge. The function must
- return a number or None to indicate a hidden edge.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Only used if partition is None
- Returns
- -------
- partition : tuple
- A pair of sets of nodes representing the bipartition.
- Raises
- ------
- NetworkXError
- If `partition` is not a valid partition of the nodes of the graph.
- References
- ----------
- .. [1] Kernighan, B. W.; Lin, Shen (1970).
- "An efficient heuristic procedure for partitioning graphs."
- *Bell Systems Technical Journal* 49: 291--307.
- Oxford University Press 2011.
- .. [2] M. E. J. Newman,
- "Modularity and community structure in networks",
- PNAS, 103 (23), p. 8577-8582,
- https://doi.org/10.1073/pnas.0601602103
- """
- nodes = list(G)
- if partition is None:
- seed.shuffle(nodes)
- mid = len(nodes) // 2
- A, B = nodes[:mid], nodes[mid:]
- else:
- try:
- A, B = partition
- except (TypeError, ValueError) as err:
- raise nx.NetworkXError("partition must be two sets") from err
- if not nx.community.is_partition(G, [A, B]):
- raise nx.NetworkXError("partition invalid")
- side = {node: (node in A) for node in nodes}
- # ruff: noqa: E731 skips check for no lambda functions
- # Using shortest_paths _weight_function with sum instead of min on multiedges
- if callable(weight):
- sum_weight = weight
- elif G.is_multigraph():
- sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values())
- else:
- sum_weight = lambda u, v, d: d.get(weight, 1)
- edge_info = {
- u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None}
- for u, nbrs in G._adj.items()
- }
- for i in range(max_iter):
- costs = list(_kernighan_lin_sweep(edge_info, side))
- # find out how many edges to update: min_i
- min_cost, min_i, _ = min(costs)
- if min_cost >= 0:
- break
- for _, _, (u, v) in costs[:min_i]:
- side[u] = 1
- side[v] = 0
- part1 = {u for u, s in side.items() if s == 0}
- part2 = {u for u, s in side.items() if s == 1}
- return part1, part2
- @nx.utils.not_implemented_for("directed")
- @nx.utils.not_implemented_for("multigraph")
- def spectral_modularity_bipartition(G):
- r"""Return a bipartition of the nodes based on the spectrum of the
- modularity matrix of the graph.
- This method calculates the eigenvector associated with the second
- largest eigenvalue of the modularity matrix, where the modularity
- matrix *B* is defined by
- ..math::
- B_{i j} = A_{i j} - \frac{k_i k_j}{2 m},
- where *A* is the adjacency matrix, `k_i` is the degree of node *i*,
- and *m* is the number of edges in the graph. Nodes whose
- corresponding values in the eigenvector are negative are placed in
- one block, nodes whose values are nonnegative are placed in another
- block.
- Parameters
- ----------
- G : NetworkX graph
- Returns
- -------
- C : tuple
- Pair of communities as two sets of nodes of ``G``, partitioned
- according to second largest eigenvalue of the modularity matrix.
- Examples
- --------
- >>> G = nx.karate_club_graph()
- >>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G)
- >>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)])
- >>> MrHi
- [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21]
- >>> Officer
- [8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
- References
- ----------
- .. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378
- Oxford University Press 2011.
- .. [2] M. E. J. Newman,
- "Modularity and community structure in networks",
- PNAS, 103 (23), p. 8577-8582,
- https://doi.org/10.1073/pnas.0601602103
- """
- import numpy as np
- B = nx.linalg.modularity_matrix(G)
- eigenvalues, eigenvectors = np.linalg.eig(B)
- index = np.argsort(eigenvalues)[-1]
- v2 = zip(np.real(eigenvectors[:, index]), G)
- left, right = set(), set()
- for u, n in v2:
- if u < 0:
- left.add(n)
- else:
- right.add(n)
- return left, right
- @nx.utils.not_implemented_for("multigraph")
- def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10):
- """Split the nodes into two communities based on greedy
- modularity maximization.
- The algorithm works by selecting a node to change communities which
- will maximize the modularity. The swap is made and the community
- structure with the highest modularity is kept.
- Parameters
- ----------
- G : NetworkX graph
- init_split : 2-tuple of sets of nodes
- Pair of sets of nodes in ``G`` providing an initial bipartition
- for the algorithm. If not specified, a random balanced partition
- is used. If this pair of sets is not a partition of the nodes of `G`,
- :exc:`NetworkXException` is raised.
- max_iter : int
- Maximum number of iterations of attempting swaps to find an improvement.
- Returns
- -------
- max_split : 2-tuple of sets of nodes
- Pair of sets of nodes of ``G``, partitioned according to a
- node swap greedy modularity maximization algorithm.
- Raises
- ------
- NetworkXError
- if init_split is not a valid partition of the
- graph into two communities or if G is a MultiGraph
- Examples
- --------
- >>> G = nx.barbell_graph(3, 0)
- >>> left, right = nx.community.greedy_node_swap_bipartition(G)
- >>> # Sort the communities so the nodes appear in increasing order.
- >>> left, right = sorted([sorted(left), sorted(right)])
- >>> sorted(left)
- [0, 1, 2]
- >>> sorted(right)
- [3, 4, 5]
- Notes
- -----
- This function is not implemented for multigraphs.
- References
- ----------
- .. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375.
- Oxford University Press 2011.
- """
- if init_split is None:
- m1 = len(G) // 2
- m2 = len(G) - m1
- some_nodes = set(random.sample(list(G), m1))
- other_nodes = {n for n in G if n not in some_nodes}
- best_split_so_far = (some_nodes, other_nodes)
- else:
- if not nx.community.is_partition(G, init_split):
- raise nx.NetworkXError("init_split is not a partition of G")
- if not len(init_split) == 2:
- raise nx.NetworkXError("init_split must be a bipartition")
- best_split_so_far = deepcopy(init_split)
- best_mod = nx.community.modularity(G, best_split_so_far)
- max_split, max_mod = best_split_so_far, best_mod
- its = 0
- m = G.number_of_edges()
- G_degree = dict(G.degree)
- while max_mod >= best_mod and its < max_iter:
- best_split_so_far = max_split
- best_mod = max_mod
- next_split = deepcopy(max_split)
- next_mod = max_mod
- nodes = set(G)
- while nodes:
- max_swap = -1
- max_node = None
- max_node_comm = None
- left, right = next_split
- leftd = sum(G_degree[n] for n in left)
- rightd = sum(G_degree[n] for n in right)
- for n in nodes:
- if n in left:
- in_comm, out_comm = left, right
- in_deg, out_deg = leftd, rightd
- else:
- in_comm, out_comm = right, left
- in_deg, out_deg = rightd, leftd
- d_eii = -len(G[n].keys() & in_comm) / m
- d_ejj = len(G[n].keys() & out_comm) / m
- deg = G_degree[n]
- d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg)
- swap_change = d_eii + d_ejj + d_sum_ai
- if swap_change > max_swap:
- max_swap = swap_change
- max_node = n
- max_node_comm = in_comm
- non_max_node_comm = out_comm
- # swap the node from one comm to the other
- max_node_comm.remove(max_node)
- non_max_node_comm.add(max_node)
- next_mod += max_swap
- # deepcopy next_split each time it reaches a high (might go lower later)
- if next_mod > max_mod:
- max_split, max_mod = deepcopy(next_split), next_mod
- nodes.remove(max_node)
- its += 1
- return best_split_so_far
|