community.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070
  1. """Generators for classes of graphs used in studying social networks."""
  2. import itertools
  3. import math
  4. import networkx as nx
  5. from networkx.utils import py_random_state
  6. __all__ = [
  7. "caveman_graph",
  8. "connected_caveman_graph",
  9. "relaxed_caveman_graph",
  10. "random_partition_graph",
  11. "planted_partition_graph",
  12. "gaussian_random_partition_graph",
  13. "ring_of_cliques",
  14. "windmill_graph",
  15. "stochastic_block_model",
  16. "LFR_benchmark_graph",
  17. ]
  18. @nx._dispatchable(graphs=None, returns_graph=True)
  19. def caveman_graph(l, k):
  20. """Returns a caveman graph of `l` cliques of size `k`.
  21. Parameters
  22. ----------
  23. l : int
  24. Number of cliques
  25. k : int
  26. Size of cliques
  27. Returns
  28. -------
  29. G : NetworkX Graph
  30. caveman graph
  31. Notes
  32. -----
  33. This returns an undirected graph, it can be converted to a directed
  34. graph using :func:`nx.to_directed`, or a multigraph using
  35. ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
  36. described in [1]_ and it is unclear which of the directed
  37. generalizations is most useful.
  38. Examples
  39. --------
  40. >>> G = nx.caveman_graph(3, 3)
  41. See also
  42. --------
  43. connected_caveman_graph
  44. References
  45. ----------
  46. .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
  47. Amer. J. Soc. 105, 493-527, 1999.
  48. """
  49. # l disjoint cliques of size k
  50. G = nx.empty_graph(l * k)
  51. if k > 1:
  52. for start in range(0, l * k, k):
  53. edges = itertools.combinations(range(start, start + k), 2)
  54. G.add_edges_from(edges)
  55. return G
  56. @nx._dispatchable(graphs=None, returns_graph=True)
  57. def connected_caveman_graph(l, k):
  58. """Returns a connected caveman graph of `l` cliques of size `k`.
  59. The connected caveman graph is formed by creating `n` cliques of size
  60. `k`, then a single edge in each clique is rewired to a node in an
  61. adjacent clique.
  62. Parameters
  63. ----------
  64. l : int
  65. number of cliques
  66. k : int
  67. size of cliques (k at least 2 or NetworkXError is raised)
  68. Returns
  69. -------
  70. G : NetworkX Graph
  71. connected caveman graph
  72. Raises
  73. ------
  74. NetworkXError
  75. If the size of cliques `k` is smaller than 2.
  76. Notes
  77. -----
  78. This returns an undirected graph, it can be converted to a directed
  79. graph using :func:`nx.to_directed`, or a multigraph using
  80. ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
  81. described in [1]_ and it is unclear which of the directed
  82. generalizations is most useful.
  83. Examples
  84. --------
  85. >>> G = nx.connected_caveman_graph(3, 3)
  86. References
  87. ----------
  88. .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
  89. Amer. J. Soc. 105, 493-527, 1999.
  90. """
  91. if k < 2:
  92. raise nx.NetworkXError(
  93. "The size of cliques in a connected caveman graph must be at least 2."
  94. )
  95. G = nx.caveman_graph(l, k)
  96. for start in range(0, l * k, k):
  97. G.remove_edge(start, start + 1)
  98. G.add_edge(start, (start - 1) % (l * k))
  99. return G
  100. @py_random_state(3)
  101. @nx._dispatchable(graphs=None, returns_graph=True)
  102. def relaxed_caveman_graph(l, k, p, seed=None):
  103. """Returns a relaxed caveman graph.
  104. A relaxed caveman graph starts with `l` cliques of size `k`. Edges are
  105. then randomly rewired with probability `p` to link different cliques.
  106. Parameters
  107. ----------
  108. l : int
  109. Number of groups
  110. k : int
  111. Size of cliques
  112. p : float
  113. Probability of rewiring each edge.
  114. seed : integer, random_state, or None (default)
  115. Indicator of random number generation state.
  116. See :ref:`Randomness<randomness>`.
  117. Returns
  118. -------
  119. G : NetworkX Graph
  120. Relaxed Caveman Graph
  121. Raises
  122. ------
  123. NetworkXError
  124. If p is not in [0,1]
  125. Examples
  126. --------
  127. >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
  128. References
  129. ----------
  130. .. [1] Santo Fortunato, Community Detection in Graphs,
  131. Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
  132. https://arxiv.org/abs/0906.0612
  133. """
  134. G = nx.caveman_graph(l, k)
  135. nodes = list(G)
  136. for u, v in G.edges():
  137. if seed.random() < p: # rewire the edge
  138. x = seed.choice(nodes)
  139. if G.has_edge(u, x):
  140. continue
  141. G.remove_edge(u, v)
  142. G.add_edge(u, x)
  143. return G
  144. @py_random_state(3)
  145. @nx._dispatchable(graphs=None, returns_graph=True)
  146. def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
  147. """Returns the random partition graph with a partition of sizes.
  148. A partition graph is a graph of communities with sizes defined by
  149. s in sizes. Nodes in the same group are connected with probability
  150. p_in and nodes of different groups are connected with probability
  151. p_out.
  152. Parameters
  153. ----------
  154. sizes : list of ints
  155. Sizes of groups
  156. p_in : float
  157. probability of edges with in groups
  158. p_out : float
  159. probability of edges between groups
  160. directed : boolean optional, default=False
  161. Whether to create a directed graph
  162. seed : integer, random_state, or None (default)
  163. Indicator of random number generation state.
  164. See :ref:`Randomness<randomness>`.
  165. Returns
  166. -------
  167. G : NetworkX Graph or DiGraph
  168. random partition graph of size sum(gs)
  169. Raises
  170. ------
  171. NetworkXError
  172. If p_in or p_out is not in [0,1]
  173. Examples
  174. --------
  175. >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01)
  176. >>> len(G)
  177. 30
  178. >>> partition = G.graph["partition"]
  179. >>> len(partition)
  180. 3
  181. Notes
  182. -----
  183. This is a generalization of the planted-l-partition described in
  184. [1]_. It allows for the creation of groups of any size.
  185. The partition is store as a graph attribute 'partition'.
  186. References
  187. ----------
  188. .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
  189. Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
  190. """
  191. # Use geometric method for O(n+m) complexity algorithm
  192. # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
  193. if not 0.0 <= p_in <= 1.0:
  194. raise nx.NetworkXError("p_in must be in [0,1]")
  195. if not 0.0 <= p_out <= 1.0:
  196. raise nx.NetworkXError("p_out must be in [0,1]")
  197. # create connection matrix
  198. num_blocks = len(sizes)
  199. p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
  200. for r in range(num_blocks):
  201. p[r][r] = p_in
  202. return stochastic_block_model(
  203. sizes,
  204. p,
  205. nodelist=None,
  206. seed=seed,
  207. directed=directed,
  208. selfloops=False,
  209. sparse=True,
  210. )
  211. @py_random_state(4)
  212. @nx._dispatchable(graphs=None, returns_graph=True)
  213. def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
  214. """Returns the planted l-partition graph.
  215. This model partitions a graph with n=l*k vertices in
  216. l groups with k vertices each. Vertices of the same
  217. group are linked with a probability p_in, and vertices
  218. of different groups are linked with probability p_out.
  219. Parameters
  220. ----------
  221. l : int
  222. Number of groups
  223. k : int
  224. Number of vertices in each group
  225. p_in : float
  226. probability of connecting vertices within a group
  227. p_out : float
  228. probability of connected vertices between groups
  229. seed : integer, random_state, or None (default)
  230. Indicator of random number generation state.
  231. See :ref:`Randomness<randomness>`.
  232. directed : bool,optional (default=False)
  233. If True return a directed graph
  234. Returns
  235. -------
  236. G : NetworkX Graph or DiGraph
  237. planted l-partition graph
  238. Raises
  239. ------
  240. NetworkXError
  241. If `p_in`, `p_out` are not in `[0, 1]`
  242. Examples
  243. --------
  244. >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
  245. See Also
  246. --------
  247. random_partition_model
  248. References
  249. ----------
  250. .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
  251. on the planted partition model,
  252. Random Struct. Algor. 18 (2001) 116-140.
  253. .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
  254. Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
  255. """
  256. return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed)
  257. @py_random_state(6)
  258. @nx._dispatchable(graphs=None, returns_graph=True)
  259. def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None):
  260. """Generate a Gaussian random partition graph.
  261. A Gaussian random partition graph is created by creating k partitions
  262. each with a size drawn from a normal distribution with mean s and variance
  263. s/v. Nodes are connected within clusters with probability p_in and
  264. between clusters with probability p_out[1]
  265. Parameters
  266. ----------
  267. n : int
  268. Number of nodes in the graph
  269. s : float
  270. Mean cluster size
  271. v : float
  272. Shape parameter. The variance of cluster size distribution is s/v.
  273. p_in : float
  274. Probability of intra cluster connection.
  275. p_out : float
  276. Probability of inter cluster connection.
  277. directed : boolean, optional default=False
  278. Whether to create a directed graph or not
  279. seed : integer, random_state, or None (default)
  280. Indicator of random number generation state.
  281. See :ref:`Randomness<randomness>`.
  282. Returns
  283. -------
  284. G : NetworkX Graph or DiGraph
  285. gaussian random partition graph
  286. Raises
  287. ------
  288. NetworkXError
  289. If s is > n
  290. If p_in or p_out is not in [0,1]
  291. Notes
  292. -----
  293. Note the number of partitions is dependent on s,v and n, and that the
  294. last partition may be considerably smaller, as it is sized to simply
  295. fill out the nodes [1]
  296. See Also
  297. --------
  298. random_partition_graph
  299. Examples
  300. --------
  301. >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1)
  302. >>> len(G)
  303. 100
  304. References
  305. ----------
  306. .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
  307. Experiments on Graph Clustering Algorithms,
  308. In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
  309. """
  310. if s > n:
  311. raise nx.NetworkXError("s must be <= n")
  312. assigned = 0
  313. sizes = []
  314. while True:
  315. size = int(seed.gauss(s, s / v + 0.5))
  316. if size < 1: # how to handle 0 or negative sizes?
  317. continue
  318. if assigned + size >= n:
  319. sizes.append(n - assigned)
  320. break
  321. assigned += size
  322. sizes.append(size)
  323. return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed)
  324. @nx._dispatchable(graphs=None, returns_graph=True)
  325. def ring_of_cliques(num_cliques, clique_size):
  326. """Defines a "ring of cliques" graph.
  327. A ring of cliques graph is consisting of cliques, connected through single
  328. links. Each clique is a complete graph.
  329. Parameters
  330. ----------
  331. num_cliques : int
  332. Number of cliques
  333. clique_size : int
  334. Size of cliques
  335. Returns
  336. -------
  337. G : NetworkX Graph
  338. ring of cliques graph
  339. Raises
  340. ------
  341. NetworkXError
  342. If the number of cliques is lower than 2 or
  343. if the size of cliques is smaller than 2.
  344. Examples
  345. --------
  346. >>> G = nx.ring_of_cliques(8, 4)
  347. See Also
  348. --------
  349. connected_caveman_graph
  350. Notes
  351. -----
  352. The `connected_caveman_graph` graph removes a link from each clique to
  353. connect it with the next clique. Instead, the `ring_of_cliques` graph
  354. simply adds the link without removing any link from the cliques.
  355. """
  356. if num_cliques < 2:
  357. raise nx.NetworkXError("A ring of cliques must have at least two cliques")
  358. if clique_size < 2:
  359. raise nx.NetworkXError("The cliques must have at least two nodes")
  360. G = nx.Graph()
  361. for i in range(num_cliques):
  362. edges = itertools.combinations(
  363. range(i * clique_size, i * clique_size + clique_size), 2
  364. )
  365. G.add_edges_from(edges)
  366. G.add_edge(
  367. i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size)
  368. )
  369. return G
  370. @nx._dispatchable(graphs=None, returns_graph=True)
  371. def windmill_graph(n, k):
  372. """Generate a windmill graph.
  373. A windmill graph is a graph of `n` cliques each of size `k` that are all
  374. joined at one node.
  375. It can be thought of as taking a disjoint union of `n` cliques of size `k`,
  376. selecting one point from each, and contracting all of the selected points.
  377. Alternatively, one could generate `n` cliques of size `k-1` and one node
  378. that is connected to all other nodes in the graph.
  379. Parameters
  380. ----------
  381. n : int
  382. Number of cliques
  383. k : int
  384. Size of cliques
  385. Returns
  386. -------
  387. G : NetworkX Graph
  388. windmill graph with n cliques of size k
  389. Raises
  390. ------
  391. NetworkXError
  392. If the number of cliques is less than two
  393. If the size of the cliques are less than two
  394. Examples
  395. --------
  396. >>> G = nx.windmill_graph(4, 5)
  397. Notes
  398. -----
  399. The node labeled `0` will be the node connected to all other nodes.
  400. Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
  401. are in the opposite order as the parameters of this method.
  402. """
  403. if n < 2:
  404. msg = "A windmill graph must have at least two cliques"
  405. raise nx.NetworkXError(msg)
  406. if k < 2:
  407. raise nx.NetworkXError("The cliques must have at least two nodes")
  408. G = nx.disjoint_union_all(
  409. itertools.chain(
  410. [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1))
  411. )
  412. )
  413. G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
  414. return G
  415. @py_random_state(3)
  416. @nx._dispatchable(graphs=None, returns_graph=True)
  417. def stochastic_block_model(
  418. sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True
  419. ):
  420. """Returns a stochastic block model graph.
  421. This model partitions the nodes in blocks of arbitrary sizes, and places
  422. edges between pairs of nodes independently, with a probability that depends
  423. on the blocks.
  424. Parameters
  425. ----------
  426. sizes : list of ints
  427. Sizes of blocks
  428. p : list of list of floats
  429. Element (r,s) gives the density of edges going from the nodes
  430. of group r to nodes of group s.
  431. p must match the number of groups (len(sizes) == len(p)),
  432. and it must be symmetric if the graph is undirected.
  433. nodelist : list, optional
  434. The block tags are assigned according to the node identifiers
  435. in nodelist. If nodelist is None, then the ordering is the
  436. range [0,sum(sizes)-1].
  437. seed : integer, random_state, or None (default)
  438. Indicator of random number generation state.
  439. See :ref:`Randomness<randomness>`.
  440. directed : boolean optional, default=False
  441. Whether to create a directed graph or not.
  442. selfloops : boolean optional, default=False
  443. Whether to include self-loops or not.
  444. sparse: boolean optional, default=True
  445. Use the sparse heuristic to speed up the generator.
  446. Returns
  447. -------
  448. g : NetworkX Graph or DiGraph
  449. Stochastic block model graph of size sum(sizes)
  450. Raises
  451. ------
  452. NetworkXError
  453. If probabilities are not in [0,1].
  454. If the probability matrix is not square (directed case).
  455. If the probability matrix is not symmetric (undirected case).
  456. If the sizes list does not match nodelist or the probability matrix.
  457. If nodelist contains duplicate.
  458. Examples
  459. --------
  460. >>> sizes = [75, 75, 300]
  461. >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
  462. >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
  463. >>> len(g)
  464. 450
  465. >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True)
  466. >>> for v in H.nodes(data=True):
  467. ... print(round(v[1]["density"], 3))
  468. 0.245
  469. 0.348
  470. 0.405
  471. >>> for v in H.edges(data=True):
  472. ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3))
  473. 0.051
  474. 0.022
  475. 0.07
  476. See Also
  477. --------
  478. random_partition_graph
  479. planted_partition_graph
  480. gaussian_random_partition_graph
  481. gnp_random_graph
  482. References
  483. ----------
  484. .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
  485. "Stochastic blockmodels: First steps",
  486. Social networks, 5(2), 109-137, 1983.
  487. """
  488. # Check if dimensions match
  489. if len(sizes) != len(p):
  490. raise nx.NetworkXException("'sizes' and 'p' do not match.")
  491. # Check for probability symmetry (undirected) and shape (directed)
  492. for row in p:
  493. if len(p) != len(row):
  494. raise nx.NetworkXException("'p' must be a square matrix.")
  495. if not directed:
  496. p_transpose = [list(i) for i in zip(*p)]
  497. for i in zip(p, p_transpose):
  498. for j in zip(i[0], i[1]):
  499. if abs(j[0] - j[1]) > 1e-08:
  500. raise nx.NetworkXException("'p' must be symmetric.")
  501. # Check for probability range
  502. for row in p:
  503. for prob in row:
  504. if prob < 0 or prob > 1:
  505. raise nx.NetworkXException("Entries of 'p' not in [0,1].")
  506. # Check for nodelist consistency
  507. if nodelist is not None:
  508. if len(nodelist) != sum(sizes):
  509. raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
  510. if len(nodelist) != len(set(nodelist)):
  511. raise nx.NetworkXException("nodelist contains duplicate.")
  512. else:
  513. nodelist = range(sum(sizes))
  514. # Setup the graph conditionally to the directed switch.
  515. block_range = range(len(sizes))
  516. if directed:
  517. g = nx.DiGraph()
  518. block_iter = itertools.product(block_range, block_range)
  519. else:
  520. g = nx.Graph()
  521. block_iter = itertools.combinations_with_replacement(block_range, 2)
  522. # Split nodelist in a partition (list of sets).
  523. size_cumsum = [sum(sizes[0:x]) for x in range(len(sizes) + 1)]
  524. g.graph["partition"] = [
  525. set(nodelist[size_cumsum[x] : size_cumsum[x + 1]])
  526. for x in range(len(size_cumsum) - 1)
  527. ]
  528. # Setup nodes and graph name
  529. for block_id, nodes in enumerate(g.graph["partition"]):
  530. for node in nodes:
  531. g.add_node(node, block=block_id)
  532. g.name = "stochastic_block_model"
  533. # Test for edge existence
  534. parts = g.graph["partition"]
  535. for i, j in block_iter:
  536. if i == j:
  537. if directed:
  538. if selfloops:
  539. edges = itertools.product(parts[i], parts[i])
  540. else:
  541. edges = itertools.permutations(parts[i], 2)
  542. else:
  543. edges = itertools.combinations(parts[i], 2)
  544. if selfloops:
  545. edges = itertools.chain(edges, zip(parts[i], parts[i]))
  546. for e in edges:
  547. if seed.random() < p[i][j]:
  548. g.add_edge(*e)
  549. else:
  550. edges = itertools.product(parts[i], parts[j])
  551. if sparse:
  552. if p[i][j] == 1: # Test edges cases p_ij = 0 or 1
  553. for e in edges:
  554. g.add_edge(*e)
  555. elif p[i][j] > 0:
  556. while True:
  557. try:
  558. logrand = math.log(seed.random())
  559. skip = math.floor(logrand / math.log(1 - p[i][j]))
  560. # consume "skip" edges
  561. next(itertools.islice(edges, skip, skip), None)
  562. e = next(edges)
  563. g.add_edge(*e) # __safe
  564. except StopIteration:
  565. break
  566. else:
  567. for e in edges:
  568. if seed.random() < p[i][j]:
  569. g.add_edge(*e) # __safe
  570. return g
  571. def _zipf_rv_below(gamma, xmin, threshold, seed):
  572. """Returns a random value chosen from the bounded Zipf distribution.
  573. Repeatedly draws values from the Zipf distribution until the
  574. threshold is met, then returns that value.
  575. """
  576. result = nx.utils.zipf_rv(gamma, xmin, seed)
  577. while result > threshold:
  578. result = nx.utils.zipf_rv(gamma, xmin, seed)
  579. return result
  580. def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
  581. """Returns a list of numbers obeying a constrained power law distribution.
  582. ``gamma`` and ``low`` are the parameters for the Zipf distribution.
  583. ``high`` is the maximum allowed value for values draw from the Zipf
  584. distribution. For more information, see :func:`_zipf_rv_below`.
  585. ``condition`` and ``length`` are Boolean-valued functions on
  586. lists. While generating the list, random values are drawn and
  587. appended to the list until ``length`` is satisfied by the created
  588. list. Once ``condition`` is satisfied, the sequence generated in
  589. this way is returned.
  590. ``max_iters`` indicates the number of times to generate a list
  591. satisfying ``length``. If the number of iterations exceeds this
  592. value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
  593. seed : integer, random_state, or None (default)
  594. Indicator of random number generation state.
  595. See :ref:`Randomness<randomness>`.
  596. """
  597. for i in range(max_iters):
  598. seq = []
  599. while not length(seq):
  600. seq.append(_zipf_rv_below(gamma, low, high, seed))
  601. if condition(seq):
  602. return seq
  603. raise nx.ExceededMaxIterations("Could not create power law sequence")
  604. def _hurwitz_zeta(x, q, tolerance):
  605. """The Hurwitz zeta function, or the Riemann zeta function of two arguments.
  606. ``x`` must be greater than one and ``q`` must be positive.
  607. This function repeatedly computes subsequent partial sums until
  608. convergence, as decided by ``tolerance``.
  609. """
  610. z = 0
  611. z_prev = -float("inf")
  612. k = 0
  613. while abs(z - z_prev) > tolerance:
  614. z_prev = z
  615. z += 1 / ((k + q) ** x)
  616. k += 1
  617. return z
  618. def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters):
  619. """Returns a minimum degree from the given average degree."""
  620. # Defines zeta function whether or not Scipy is available
  621. try:
  622. from scipy.special import zeta
  623. except ImportError:
  624. def zeta(x, q):
  625. return _hurwitz_zeta(x, q, tolerance)
  626. min_deg_top = max_degree
  627. min_deg_bot = 1
  628. min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
  629. itrs = 0
  630. mid_avg_deg = 0
  631. while abs(mid_avg_deg - average_degree) > tolerance:
  632. if itrs > max_iters:
  633. raise nx.ExceededMaxIterations("Could not match average_degree")
  634. mid_avg_deg = 0
  635. for x in range(int(min_deg_mid), max_degree + 1):
  636. mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid)
  637. if mid_avg_deg > average_degree:
  638. min_deg_top = min_deg_mid
  639. min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
  640. else:
  641. min_deg_bot = min_deg_mid
  642. min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
  643. itrs += 1
  644. # return int(min_deg_mid + 0.5)
  645. return round(min_deg_mid)
  646. def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
  647. """Returns a list of sets, each of which represents a community.
  648. ``degree_seq`` is the degree sequence that must be met by the
  649. graph.
  650. ``community_sizes`` is the community size distribution that must be
  651. met by the generated list of sets.
  652. ``mu`` is a float in the interval [0, 1] indicating the fraction of
  653. intra-community edges incident to each node.
  654. ``max_iters`` is the number of times to try to add a node to a
  655. community. This must be greater than the length of
  656. ``degree_seq``, otherwise this function will always fail. If
  657. the number of iterations exceeds this value,
  658. :exc:`~networkx.exception.ExceededMaxIterations` is raised.
  659. seed : integer, random_state, or None (default)
  660. Indicator of random number generation state.
  661. See :ref:`Randomness<randomness>`.
  662. The communities returned by this are sets of integers in the set {0,
  663. ..., *n* - 1}, where *n* is the length of ``degree_seq``.
  664. """
  665. # This assumes the nodes in the graph will be natural numbers.
  666. result = [set() for _ in community_sizes]
  667. n = len(degree_seq)
  668. free = list(range(n))
  669. for i in range(max_iters):
  670. v = free.pop()
  671. c = seed.choice(range(len(community_sizes)))
  672. # s = int(degree_seq[v] * (1 - mu) + 0.5)
  673. s = round(degree_seq[v] * (1 - mu))
  674. # If the community is large enough, add the node to the chosen
  675. # community. Otherwise, return it to the list of unaffiliated
  676. # nodes.
  677. if s < community_sizes[c]:
  678. result[c].add(v)
  679. else:
  680. free.append(v)
  681. # If the community is too big, remove a node from it.
  682. if len(result[c]) > community_sizes[c]:
  683. free.append(result[c].pop())
  684. if not free:
  685. return result
  686. msg = "Could not assign communities; try increasing min_community"
  687. raise nx.ExceededMaxIterations(msg)
  688. @py_random_state(11)
  689. @nx._dispatchable(graphs=None, returns_graph=True)
  690. def LFR_benchmark_graph(
  691. n,
  692. tau1,
  693. tau2,
  694. mu,
  695. average_degree=None,
  696. min_degree=None,
  697. max_degree=None,
  698. min_community=None,
  699. max_community=None,
  700. tol=1.0e-7,
  701. max_iters=500,
  702. seed=None,
  703. ):
  704. r"""Returns the LFR benchmark graph.
  705. This algorithm proceeds as follows:
  706. 1) Find a degree sequence with a power law distribution, and minimum
  707. value ``min_degree``, which has approximate average degree
  708. ``average_degree``. This is accomplished by either
  709. a) specifying ``min_degree`` and not ``average_degree``,
  710. b) specifying ``average_degree`` and not ``min_degree``, in which
  711. case a suitable minimum degree will be found.
  712. ``max_degree`` can also be specified, otherwise it will be set to
  713. ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges
  714. joining it to nodes in communities other than its own and $(1 -
  715. \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own
  716. community.
  717. 2) Generate community sizes according to a power law distribution
  718. with exponent ``tau2``. If ``min_community`` and
  719. ``max_community`` are not specified they will be selected to be
  720. ``min_degree`` and ``max_degree``, respectively. Community sizes
  721. are generated until the sum of their sizes equals ``n``.
  722. 3) Each node will be randomly assigned a community with the
  723. condition that the community is large enough for the node's
  724. intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as
  725. described in step 2. If a community grows too large, a random node
  726. will be selected for reassignment to a new community, until all
  727. nodes have been assigned a community.
  728. 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$
  729. intra-community edges and $\mu \mathrm{deg}(u)$ inter-community
  730. edges.
  731. Parameters
  732. ----------
  733. n : int
  734. Number of nodes in the created graph.
  735. tau1 : float
  736. Power law exponent for the degree distribution of the created
  737. graph. This value must be strictly greater than one.
  738. tau2 : float
  739. Power law exponent for the community size distribution in the
  740. created graph. This value must be strictly greater than one.
  741. mu : float
  742. Fraction of inter-community edges incident to each node. This
  743. value must be in the interval [0, 1].
  744. average_degree : float
  745. Desired average degree of nodes in the created graph. This value
  746. must be in the interval [0, *n*]. Exactly one of this and
  747. ``min_degree`` must be specified, otherwise a
  748. :exc:`NetworkXError` is raised.
  749. min_degree : int
  750. Minimum degree of nodes in the created graph. This value must be
  751. in the interval [0, *n*]. Exactly one of this and
  752. ``average_degree`` must be specified, otherwise a
  753. :exc:`NetworkXError` is raised.
  754. max_degree : int
  755. Maximum degree of nodes in the created graph. If not specified,
  756. this is set to ``n``, the total number of nodes in the graph.
  757. min_community : int
  758. Minimum size of communities in the graph. If not specified, this
  759. is set to ``min_degree``.
  760. max_community : int
  761. Maximum size of communities in the graph. If not specified, this
  762. is set to ``n``, the total number of nodes in the graph.
  763. tol : float
  764. Tolerance when comparing floats, specifically when comparing
  765. average degree values.
  766. max_iters : int
  767. Maximum number of iterations to try to create the community sizes,
  768. degree distribution, and community affiliations.
  769. seed : integer, random_state, or None (default)
  770. Indicator of random number generation state.
  771. See :ref:`Randomness<randomness>`.
  772. Returns
  773. -------
  774. G : NetworkX graph
  775. The LFR benchmark graph generated according to the specified
  776. parameters.
  777. Each node in the graph has a node attribute ``'community'`` that
  778. stores the community (that is, the set of nodes) that includes
  779. it.
  780. Raises
  781. ------
  782. NetworkXError
  783. If any of the parameters do not meet their upper and lower bounds:
  784. - ``tau1`` and ``tau2`` must be strictly greater than 1.
  785. - ``mu`` must be in [0, 1].
  786. - ``max_degree`` must be in {1, ..., *n*}.
  787. - ``min_community`` and ``max_community`` must be in {0, ...,
  788. *n*}.
  789. If not exactly one of ``average_degree`` and ``min_degree`` is
  790. specified.
  791. If ``min_degree`` is not specified and a suitable ``min_degree``
  792. cannot be found.
  793. ExceededMaxIterations
  794. If a valid degree sequence cannot be created within
  795. ``max_iters`` number of iterations.
  796. If a valid set of community sizes cannot be created within
  797. ``max_iters`` number of iterations.
  798. If a valid community assignment cannot be created within ``10 *
  799. n * max_iters`` number of iterations.
  800. Examples
  801. --------
  802. Basic usage::
  803. >>> from networkx.generators.community import LFR_benchmark_graph
  804. >>> n = 250
  805. >>> tau1 = 3
  806. >>> tau2 = 1.5
  807. >>> mu = 0.1
  808. >>> G = LFR_benchmark_graph(
  809. ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10
  810. ... )
  811. Continuing the example above, you can get the communities from the
  812. node attributes of the graph::
  813. >>> communities = {frozenset(G.nodes[v]["community"]) for v in G}
  814. Notes
  815. -----
  816. This algorithm differs slightly from the original way it was
  817. presented in [1].
  818. 1) Rather than connecting the graph via a configuration model then
  819. rewiring to match the intra-community and inter-community
  820. degrees, we do this wiring explicitly at the end, which should be
  821. equivalent.
  822. 2) The code posted on the author's website [2] calculates the random
  823. power law distributed variables and their average using
  824. continuous approximations, whereas we use the discrete
  825. distributions here as both degree and community size are
  826. discrete.
  827. Though the authors describe the algorithm as quite robust, testing
  828. during development indicates that a somewhat narrower parameter set
  829. is likely to successfully produce a graph. Some suggestions have
  830. been provided in the event of exceptions.
  831. References
  832. ----------
  833. .. [1] "Benchmark graphs for testing community detection algorithms",
  834. Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
  835. Phys. Rev. E 78, 046110 2008
  836. .. [2] https://www.santofortunato.net/resources
  837. """
  838. # Perform some basic parameter validation.
  839. if not tau1 > 1:
  840. raise nx.NetworkXError("tau1 must be greater than one")
  841. if not tau2 > 1:
  842. raise nx.NetworkXError("tau2 must be greater than one")
  843. if not 0 <= mu <= 1:
  844. raise nx.NetworkXError("mu must be in the interval [0, 1]")
  845. # Validate parameters for generating the degree sequence.
  846. if max_degree is None:
  847. max_degree = n
  848. elif not 0 < max_degree <= n:
  849. raise nx.NetworkXError("max_degree must be in the interval (0, n]")
  850. if not ((min_degree is None) ^ (average_degree is None)):
  851. raise nx.NetworkXError(
  852. "Must assign exactly one of min_degree and average_degree"
  853. )
  854. if min_degree is None:
  855. min_degree = _generate_min_degree(
  856. tau1, average_degree, max_degree, tol, max_iters
  857. )
  858. # Generate a degree sequence with a power law distribution.
  859. low, high = min_degree, max_degree
  860. def condition(seq):
  861. return sum(seq) % 2 == 0
  862. def length(seq):
  863. return len(seq) >= n
  864. deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed)
  865. # Validate parameters for generating the community size sequence.
  866. if min_community is None:
  867. min_community = min(deg_seq)
  868. if max_community is None:
  869. max_community = max(deg_seq)
  870. # Generate a community size sequence with a power law distribution.
  871. #
  872. # TODO The original code incremented the number of iterations each
  873. # time a new Zipf random value was drawn from the distribution. This
  874. # differed from the way the number of iterations was incremented in
  875. # `_powerlaw_degree_sequence`, so this code was changed to match
  876. # that one. As a result, this code is allowed many more chances to
  877. # generate a valid community size sequence.
  878. low, high = min_community, max_community
  879. def condition(seq):
  880. return sum(seq) == n
  881. def length(seq):
  882. return sum(seq) >= n
  883. comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed)
  884. # Generate the communities based on the given degree sequence and
  885. # community sizes.
  886. max_iters *= 10 * n
  887. communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
  888. # Finally, generate the benchmark graph based on the given
  889. # communities, joining nodes according to the intra- and
  890. # inter-community degrees.
  891. G = nx.Graph()
  892. G.add_nodes_from(range(n))
  893. for c in communities:
  894. for u in c:
  895. while G.degree(u) < round(deg_seq[u] * (1 - mu)):
  896. v = seed.choice(list(c))
  897. G.add_edge(u, v)
  898. while G.degree(u) < deg_seq[u]:
  899. v = seed.choice(range(n))
  900. if v not in c:
  901. G.add_edge(u, v)
  902. G.nodes[u]["community"] = c
  903. return G