test_enumerative.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. import string
  2. from itertools import zip_longest
  3. from sympy.utilities.enumerative import (
  4. list_visitor,
  5. MultisetPartitionTraverser,
  6. multiset_partitions_taocp
  7. )
  8. from sympy.utilities.iterables import _set_partitions
  9. # first some functions only useful as test scaffolding - these provide
  10. # straightforward, but slow reference implementations against which to
  11. # compare the real versions, and also a comparison to verify that
  12. # different versions are giving identical results.
  13. def part_range_filter(partition_iterator, lb, ub):
  14. """
  15. Filters (on the number of parts) a multiset partition enumeration
  16. Arguments
  17. =========
  18. lb, and ub are a range (in the Python slice sense) on the lpart
  19. variable returned from a multiset partition enumeration. Recall
  20. that lpart is 0-based (it points to the topmost part on the part
  21. stack), so if you want to return parts of sizes 2,3,4,5 you would
  22. use lb=1 and ub=5.
  23. """
  24. for state in partition_iterator:
  25. f, lpart, pstack = state
  26. if lpart >= lb and lpart < ub:
  27. yield state
  28. def multiset_partitions_baseline(multiplicities, components):
  29. """Enumerates partitions of a multiset
  30. Parameters
  31. ==========
  32. multiplicities
  33. list of integer multiplicities of the components of the multiset.
  34. components
  35. the components (elements) themselves
  36. Returns
  37. =======
  38. Set of partitions. Each partition is tuple of parts, and each
  39. part is a tuple of components (with repeats to indicate
  40. multiplicity)
  41. Notes
  42. =====
  43. Multiset partitions can be created as equivalence classes of set
  44. partitions, and this function does just that. This approach is
  45. slow and memory intensive compared to the more advanced algorithms
  46. available, but the code is simple and easy to understand. Hence
  47. this routine is strictly for testing -- to provide a
  48. straightforward baseline against which to regress the production
  49. versions. (This code is a simplified version of an earlier
  50. production implementation.)
  51. """
  52. canon = [] # list of components with repeats
  53. for ct, elem in zip(multiplicities, components):
  54. canon.extend([elem]*ct)
  55. # accumulate the multiset partitions in a set to eliminate dups
  56. cache = set()
  57. n = len(canon)
  58. for nc, q in _set_partitions(n):
  59. rv = [[] for i in range(nc)]
  60. for i in range(n):
  61. rv[q[i]].append(canon[i])
  62. canonical = tuple(
  63. sorted([tuple(p) for p in rv]))
  64. cache.add(canonical)
  65. return cache
  66. def compare_multiset_w_baseline(multiplicities):
  67. """
  68. Enumerates the partitions of multiset with AOCP algorithm and
  69. baseline implementation, and compare the results.
  70. """
  71. letters = string.ascii_lowercase
  72. bl_partitions = multiset_partitions_baseline(multiplicities, letters)
  73. # The partitions returned by the different algorithms may have
  74. # their parts in different orders. Also, they generate partitions
  75. # in different orders. Hence the sorting, and set comparison.
  76. aocp_partitions = set()
  77. for state in multiset_partitions_taocp(multiplicities):
  78. p1 = tuple(sorted(
  79. [tuple(p) for p in list_visitor(state, letters)]))
  80. aocp_partitions.add(p1)
  81. assert bl_partitions == aocp_partitions
  82. def compare_multiset_states(s1, s2):
  83. """compare for equality two instances of multiset partition states
  84. This is useful for comparing different versions of the algorithm
  85. to verify correctness."""
  86. # Comparison is physical, the only use of semantics is to ignore
  87. # trash off the top of the stack.
  88. f1, lpart1, pstack1 = s1
  89. f2, lpart2, pstack2 = s2
  90. if (lpart1 == lpart2) and (f1[0:lpart1+1] == f2[0:lpart2+1]):
  91. if pstack1[0:f1[lpart1+1]] == pstack2[0:f2[lpart2+1]]:
  92. return True
  93. return False
  94. def test_multiset_partitions_taocp():
  95. """Compares the output of multiset_partitions_taocp with a baseline
  96. (set partition based) implementation."""
  97. # Test cases should not be too large, since the baseline
  98. # implementation is fairly slow.
  99. multiplicities = [2,2]
  100. compare_multiset_w_baseline(multiplicities)
  101. multiplicities = [4,3,1]
  102. compare_multiset_w_baseline(multiplicities)
  103. def test_multiset_partitions_versions():
  104. """Compares Knuth-based versions of multiset_partitions"""
  105. multiplicities = [5,2,2,1]
  106. m = MultisetPartitionTraverser()
  107. for s1, s2 in zip_longest(m.enum_all(multiplicities),
  108. multiset_partitions_taocp(multiplicities)):
  109. assert compare_multiset_states(s1, s2)
  110. def subrange_exercise(mult, lb, ub):
  111. """Compare filter-based and more optimized subrange implementations
  112. Helper for tests, called with both small and larger multisets.
  113. """
  114. m = MultisetPartitionTraverser()
  115. assert m.count_partitions(mult) == \
  116. m.count_partitions_slow(mult)
  117. # Note - multiple traversals from the same
  118. # MultisetPartitionTraverser object cannot execute at the same
  119. # time, hence make several instances here.
  120. ma = MultisetPartitionTraverser()
  121. mc = MultisetPartitionTraverser()
  122. md = MultisetPartitionTraverser()
  123. # Several paths to compute just the size two partitions
  124. a_it = ma.enum_range(mult, lb, ub)
  125. b_it = part_range_filter(multiset_partitions_taocp(mult), lb, ub)
  126. c_it = part_range_filter(mc.enum_small(mult, ub), lb, sum(mult))
  127. d_it = part_range_filter(md.enum_large(mult, lb), 0, ub)
  128. for sa, sb, sc, sd in zip_longest(a_it, b_it, c_it, d_it):
  129. assert compare_multiset_states(sa, sb)
  130. assert compare_multiset_states(sa, sc)
  131. assert compare_multiset_states(sa, sd)
  132. def test_subrange():
  133. # Quick, but doesn't hit some of the corner cases
  134. mult = [4,4,2,1] # mississippi
  135. lb = 1
  136. ub = 2
  137. subrange_exercise(mult, lb, ub)
  138. def test_subrange_large():
  139. # takes a second or so, depending on cpu, Python version, etc.
  140. mult = [6,3,2,1]
  141. lb = 4
  142. ub = 7
  143. subrange_exercise(mult, lb, ub)