duplication.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. """Functions for generating graphs based on the "duplication" method.
  2. These graph generators start with a small initial graph then duplicate
  3. nodes and (partially) duplicate their edges. These functions are
  4. generally inspired by biological networks.
  5. """
  6. import networkx as nx
  7. from networkx.exception import NetworkXError
  8. from networkx.utils import py_random_state
  9. from networkx.utils.misc import check_create_using
  10. __all__ = ["partial_duplication_graph", "duplication_divergence_graph"]
  11. @py_random_state(4)
  12. @nx._dispatchable(graphs=None, returns_graph=True)
  13. def partial_duplication_graph(N, n, p, q, seed=None, *, create_using=None):
  14. """Returns a random graph using the partial duplication model.
  15. Parameters
  16. ----------
  17. N : int
  18. The total number of nodes in the final graph.
  19. n : int
  20. The number of nodes in the initial clique.
  21. p : float
  22. The probability of joining each neighbor of a node to the
  23. duplicate node. Must be a number in the between zero and one,
  24. inclusive.
  25. q : float
  26. The probability of joining the source node to the duplicate
  27. node. Must be a number in the between zero and one, inclusive.
  28. seed : integer, random_state, or None (default)
  29. Indicator of random number generation state.
  30. See :ref:`Randomness<randomness>`.
  31. create_using : Graph constructor, optional (default=nx.Graph)
  32. Graph type to create. If graph instance, then cleared before populated.
  33. Multigraph and directed types are not supported and raise a ``NetworkXError``.
  34. Notes
  35. -----
  36. A graph of nodes is grown by creating a fully connected graph
  37. of size `n`. The following procedure is then repeated until
  38. a total of `N` nodes have been reached.
  39. 1. A random node, *u*, is picked and a new node, *v*, is created.
  40. 2. For each neighbor of *u* an edge from the neighbor to *v* is created
  41. with probability `p`.
  42. 3. An edge from *u* to *v* is created with probability `q`.
  43. This algorithm appears in [1].
  44. This implementation allows the possibility of generating
  45. disconnected graphs.
  46. References
  47. ----------
  48. .. [1] Knudsen Michael, and Carsten Wiuf. "A Markov chain approach to
  49. randomly grown graphs." Journal of Applied Mathematics 2008.
  50. <https://doi.org/10.1155/2008/190836>
  51. """
  52. create_using = check_create_using(create_using, directed=False, multigraph=False)
  53. if p < 0 or p > 1 or q < 0 or q > 1:
  54. msg = "partial duplication graph must have 0 <= p, q <= 1."
  55. raise NetworkXError(msg)
  56. if n > N:
  57. raise NetworkXError("partial duplication graph must have n <= N.")
  58. G = nx.complete_graph(n, create_using)
  59. for new_node in range(n, N):
  60. # Pick a random vertex, u, already in the graph.
  61. src_node = seed.randint(0, new_node - 1)
  62. # Add a new vertex, v, to the graph.
  63. G.add_node(new_node)
  64. # For each neighbor of u...
  65. for nbr_node in list(nx.all_neighbors(G, src_node)):
  66. # Add the neighbor to v with probability p.
  67. if seed.random() < p:
  68. G.add_edge(new_node, nbr_node)
  69. # Join v and u with probability q.
  70. if seed.random() < q:
  71. G.add_edge(new_node, src_node)
  72. return G
  73. @py_random_state(2)
  74. @nx._dispatchable(graphs=None, returns_graph=True)
  75. def duplication_divergence_graph(n, p, seed=None, *, create_using=None):
  76. """Returns an undirected graph using the duplication-divergence model.
  77. A graph of `n` nodes is created by duplicating the initial nodes
  78. and retaining edges incident to the original nodes with a retention
  79. probability `p`.
  80. Parameters
  81. ----------
  82. n : int
  83. The desired number of nodes in the graph.
  84. p : float
  85. The probability for retaining the edge of the replicated node.
  86. seed : integer, random_state, or None (default)
  87. Indicator of random number generation state.
  88. See :ref:`Randomness<randomness>`.
  89. create_using : Graph constructor, optional (default=nx.Graph)
  90. Graph type to create. If graph instance, then cleared before populated.
  91. Multigraph and directed types are not supported and raise a ``NetworkXError``.
  92. Returns
  93. -------
  94. G : Graph
  95. Raises
  96. ------
  97. NetworkXError
  98. If `p` is not a valid probability.
  99. If `n` is less than 2.
  100. Notes
  101. -----
  102. This algorithm appears in [1].
  103. This implementation disallows the possibility of generating
  104. disconnected graphs.
  105. References
  106. ----------
  107. .. [1] I. Ispolatov, P. L. Krapivsky, A. Yuryev,
  108. "Duplication-divergence model of protein interaction network",
  109. Phys. Rev. E, 71, 061911, 2005.
  110. """
  111. if p > 1 or p < 0:
  112. msg = f"NetworkXError p={p} is not in [0,1]."
  113. raise nx.NetworkXError(msg)
  114. if n < 2:
  115. msg = "n must be greater than or equal to 2"
  116. raise nx.NetworkXError(msg)
  117. create_using = check_create_using(create_using, directed=False, multigraph=False)
  118. G = nx.empty_graph(create_using=create_using)
  119. # Initialize the graph with two connected nodes.
  120. G.add_edge(0, 1)
  121. i = 2
  122. while i < n:
  123. # Choose a random node from current graph to duplicate.
  124. random_node = seed.choice(list(G))
  125. # Make the replica.
  126. G.add_node(i)
  127. # flag indicates whether at least one edge is connected on the replica.
  128. flag = False
  129. for nbr in G.neighbors(random_node):
  130. if seed.random() < p:
  131. # Link retention step.
  132. G.add_edge(i, nbr)
  133. flag = True
  134. if not flag:
  135. # Delete replica if no edges retained.
  136. G.remove_node(i)
  137. else:
  138. # Successful duplication.
  139. i += 1
  140. return G