leiden.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. """Functions for detecting communities based on Leiden Community Detection
  2. algorithm.
  3. These functions do not have NetworkX implementations.
  4. They may only be run with an installable :doc:`backend </backends>`
  5. that supports them.
  6. """
  7. import itertools
  8. from collections import deque
  9. import networkx as nx
  10. from networkx.utils import not_implemented_for, py_random_state
  11. __all__ = ["leiden_communities", "leiden_partitions"]
  12. @not_implemented_for("directed")
  13. @py_random_state("seed")
  14. @nx._dispatchable(edge_attrs="weight", implemented_by_nx=False)
  15. def leiden_communities(G, weight="weight", resolution=1, max_level=None, seed=None):
  16. r"""Find a best partition of `G` using Leiden Community Detection (backend required)
  17. Leiden Community Detection is an algorithm to extract the community structure
  18. of a network based on modularity optimization. It is an improvement upon the
  19. Louvain Community Detection algorithm. See :any:`louvain_communities`.
  20. Unlike the Louvain algorithm, it guarantees that communities are well connected in addition
  21. to being faster and uncovering better partitions. [1]_
  22. The algorithm works in 3 phases. On the first phase, it adds the nodes to a queue randomly
  23. and assigns every node to be in its own community. For each node it tries to find the
  24. maximum positive modularity gain by moving each node to all of its neighbor communities.
  25. If a node is moved from its community, it adds to the rear of the queue all neighbors of
  26. the node that do not belong to the node’s new community and that are not in the queue.
  27. The first phase continues until the queue is empty.
  28. The second phase consists in refining the partition $P$ obtained from the first phase. It starts
  29. with a singleton partition $P_{refined}$. Then it merges nodes locally in $P_{refined}$ within
  30. each community of the partition $P$. Nodes are merged with a community in $P_{refined}$ only if
  31. both are sufficiently well connected to their community in $P$. This means that after the
  32. refinement phase is concluded, communities in $P$ sometimes will have been split into multiple
  33. communities.
  34. The third phase consists of aggregating the network by building a new network whose nodes are
  35. now the communities found in the second phase. However, the non-refined partition is used to create
  36. an initial partition for the aggregate network.
  37. Once this phase is complete it is possible to reapply the first and second phases creating bigger
  38. communities with increased modularity.
  39. The above three phases are executed until no modularity gain is achieved or `max_level` number
  40. of iterations have been performed.
  41. Parameters
  42. ----------
  43. G : NetworkX graph
  44. weight : string or None, optional (default="weight")
  45. The name of an edge attribute that holds the numerical value
  46. used as a weight. If None then each edge has weight 1.
  47. resolution : float, optional (default=1)
  48. If resolution is less than 1, the algorithm favors larger communities.
  49. Greater than 1 favors smaller communities.
  50. max_level : int or None, optional (default=None)
  51. The maximum number of levels (steps of the algorithm) to compute.
  52. Must be a positive integer or None. If None, then there is no max
  53. level and the algorithm will run until converged.
  54. seed : integer, random_state, or None (default)
  55. Indicator of random number generation state.
  56. See :ref:`Randomness<randomness>`.
  57. Returns
  58. -------
  59. list
  60. A list of disjoint sets (partition of `G`). Each set represents one community.
  61. All communities together contain all the nodes in `G`.
  62. Examples
  63. --------
  64. >>> import networkx as nx
  65. >>> G = nx.petersen_graph()
  66. >>> nx.community.leiden_communities(G, backend="example_backend") # doctest: +SKIP
  67. [{2, 3, 5, 7, 8}, {0, 1, 4, 6, 9}]
  68. Notes
  69. -----
  70. The order in which the nodes are considered can affect the final output. In the algorithm
  71. the ordering happens using a random shuffle.
  72. References
  73. ----------
  74. .. [1] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
  75. well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
  76. See Also
  77. --------
  78. leiden_partitions
  79. :any:`louvain_communities`
  80. """
  81. partitions = leiden_partitions(G, weight, resolution, seed)
  82. if max_level is not None:
  83. if max_level <= 0:
  84. raise ValueError("max_level argument must be a positive integer or None")
  85. partitions = itertools.islice(partitions, max_level)
  86. final_partition = deque(partitions, maxlen=1)
  87. return final_partition.pop()
  88. @not_implemented_for("directed")
  89. @py_random_state("seed")
  90. @nx._dispatchable(edge_attrs="weight", implemented_by_nx=False)
  91. def leiden_partitions(G, weight="weight", resolution=1, seed=None):
  92. """Yield partitions for each level of Leiden Community Detection (backend required)
  93. Leiden Community Detection is an algorithm to extract the community
  94. structure of a network based on modularity optimization.
  95. The partitions across levels (steps of the algorithm) form a dendrogram
  96. of communities. A dendrogram is a diagram representing a tree and each
  97. level represents a partition of the G graph. The top level contains the
  98. smallest communities and as you traverse to the bottom of the tree the
  99. communities get bigger and the overall modularity increases making
  100. the partition better.
  101. Each level is generated by executing the three phases of the Leiden Community
  102. Detection algorithm. See :any:`leiden_communities`.
  103. Parameters
  104. ----------
  105. G : NetworkX graph
  106. weight : string or None, optional (default="weight")
  107. The name of an edge attribute that holds the numerical value
  108. used as a weight. If None then each edge has weight 1.
  109. resolution : float, optional (default=1)
  110. If resolution is less than 1, the algorithm favors larger communities.
  111. Greater than 1 favors smaller communities.
  112. seed : integer, random_state, or None (default)
  113. Indicator of random number generation state.
  114. See :ref:`Randomness<randomness>`.
  115. Yields
  116. ------
  117. list
  118. A list of disjoint sets (partition of `G`). Each set represents one community.
  119. All communities together contain all the nodes in `G`. The yielded partitions
  120. increase modularity with each iteration.
  121. References
  122. ----------
  123. .. [1] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
  124. well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
  125. See Also
  126. --------
  127. leiden_communities
  128. :any:`louvain_partitions`
  129. """
  130. raise NotImplementedError(
  131. "'leiden_partitions' is not implemented by networkx. "
  132. "Please try a different backend."
  133. )