random_sequence.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. """
  2. Utilities for generating random numbers, random sequences, and
  3. random selections.
  4. """
  5. import networkx as nx
  6. from networkx.utils import py_random_state
  7. __all__ = [
  8. "powerlaw_sequence",
  9. "is_valid_tree_degree_sequence",
  10. "zipf_rv",
  11. "cumulative_distribution",
  12. "discrete_sequence",
  13. "random_weighted_sample",
  14. "weighted_choice",
  15. ]
  16. # The same helpers for choosing random sequences from distributions
  17. # uses Python's random module
  18. # https://docs.python.org/3/library/random.html
  19. @py_random_state(2)
  20. def powerlaw_sequence(n, exponent=2.0, seed=None):
  21. """
  22. Return sample sequence of length n from a power law distribution.
  23. """
  24. return [seed.paretovariate(exponent - 1) for i in range(n)]
  25. def is_valid_tree_degree_sequence(degree_sequence):
  26. """Check if a degree sequence is valid for a tree.
  27. Two conditions must be met for a degree sequence to be valid for a tree:
  28. 1. The number of nodes must be one more than the number of edges.
  29. 2. The degree sequence must be trivial or have only strictly positive
  30. node degrees.
  31. Parameters
  32. ----------
  33. degree_sequence : iterable
  34. Iterable of node degrees.
  35. Returns
  36. -------
  37. bool
  38. Whether the degree sequence is valid for a tree.
  39. str
  40. Reason for invalidity, or dummy string if valid.
  41. """
  42. seq = list(degree_sequence)
  43. number_of_nodes = len(seq)
  44. twice_number_of_edges = sum(seq)
  45. if 2 * number_of_nodes - twice_number_of_edges != 2:
  46. return False, "tree must have one more node than number of edges"
  47. elif seq != [0] and any(d <= 0 for d in seq):
  48. return False, "nontrivial tree must have strictly positive node degrees"
  49. return True, ""
  50. @py_random_state(2)
  51. def zipf_rv(alpha, xmin=1, seed=None):
  52. r"""Returns a random value chosen from the Zipf distribution.
  53. The return value is an integer drawn from the probability distribution
  54. .. math::
  55. p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},
  56. where $\zeta(\alpha, x_{\min})$ is the Hurwitz zeta function.
  57. Parameters
  58. ----------
  59. alpha : float
  60. Exponent value of the distribution
  61. xmin : int
  62. Minimum value
  63. seed : integer, random_state, or None (default)
  64. Indicator of random number generation state.
  65. See :ref:`Randomness<randomness>`.
  66. Returns
  67. -------
  68. x : int
  69. Random value from Zipf distribution
  70. Raises
  71. ------
  72. ValueError:
  73. If xmin < 1 or
  74. If alpha <= 1
  75. Notes
  76. -----
  77. The rejection algorithm generates random values for a the power-law
  78. distribution in uniformly bounded expected time dependent on
  79. parameters. See [1]_ for details on its operation.
  80. Examples
  81. --------
  82. >>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
  83. 8
  84. References
  85. ----------
  86. .. [1] Luc Devroye, Non-Uniform Random Variate Generation,
  87. Springer-Verlag, New York, 1986.
  88. """
  89. if xmin < 1:
  90. raise ValueError("xmin < 1")
  91. if alpha <= 1:
  92. raise ValueError("a <= 1.0")
  93. a1 = alpha - 1.0
  94. b = 2**a1
  95. while True:
  96. u = 1.0 - seed.random() # u in (0,1]
  97. v = seed.random() # v in [0,1)
  98. x = int(xmin * u ** -(1.0 / a1))
  99. t = (1.0 + (1.0 / x)) ** a1
  100. if v * x * (t - 1.0) / (b - 1.0) <= t / b:
  101. break
  102. return x
  103. def cumulative_distribution(distribution):
  104. """Returns normalized cumulative distribution from discrete distribution."""
  105. cdf = [0.0]
  106. cumulative = 0.0
  107. for element in distribution:
  108. cumulative += element
  109. cdf.append(cumulative)
  110. return [element / cumulative for element in cdf]
  111. @py_random_state(3)
  112. def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
  113. """
  114. Return sample sequence of length n from a given discrete distribution
  115. or discrete cumulative distribution.
  116. One of the following must be specified.
  117. distribution = histogram of values, will be normalized
  118. cdistribution = normalized discrete cumulative distribution
  119. """
  120. import bisect
  121. if cdistribution is not None:
  122. cdf = cdistribution
  123. elif distribution is not None:
  124. cdf = cumulative_distribution(distribution)
  125. else:
  126. raise nx.NetworkXError(
  127. "discrete_sequence: distribution or cdistribution missing"
  128. )
  129. # get a uniform random number
  130. inputseq = [seed.random() for i in range(n)]
  131. # choose from CDF
  132. seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
  133. return seq
  134. @py_random_state(2)
  135. def random_weighted_sample(mapping, k, seed=None):
  136. """Returns k items without replacement from a weighted sample.
  137. The input is a dictionary of items with weights as values.
  138. """
  139. if k > len(mapping):
  140. raise ValueError("sample larger than population")
  141. sample = set()
  142. while len(sample) < k:
  143. sample.add(weighted_choice(mapping, seed))
  144. return list(sample)
  145. @py_random_state(1)
  146. def weighted_choice(mapping, seed=None):
  147. """Returns a single element from a weighted sample.
  148. The input is a dictionary of items with weights as values.
  149. """
  150. # use roulette method
  151. rnd = seed.random() * sum(mapping.values())
  152. for k, w in mapping.items():
  153. rnd -= w
  154. if rnd < 0:
  155. return k