test_hierarchy.py 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237
  1. #
  2. # Author: Damian Eads
  3. # Date: April 17, 2008
  4. #
  5. # Copyright (C) 2008 Damian Eads
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions
  9. # are met:
  10. #
  11. # 1. Redistributions of source code must retain the above copyright
  12. # notice, this list of conditions and the following disclaimer.
  13. #
  14. # 2. Redistributions in binary form must reproduce the above
  15. # copyright notice, this list of conditions and the following
  16. # disclaimer in the documentation and/or other materials provided
  17. # with the distribution.
  18. #
  19. # 3. The name of the author may not be used to endorse or promote
  20. # products derived from this software without specific prior
  21. # written permission.
  22. #
  23. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  24. # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  25. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  27. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  29. # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  31. # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  32. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34. import numpy as np
  35. from numpy.testing import assert_allclose, assert_equal, assert_array_equal, assert_
  36. import pytest
  37. from pytest import raises as assert_raises
  38. from scipy.cluster.hierarchy import (
  39. ClusterWarning, linkage, from_mlab_linkage, to_mlab_linkage,
  40. num_obs_linkage, inconsistent, cophenet, fclusterdata, fcluster,
  41. is_isomorphic, single, ward, leaders,
  42. correspond, is_monotonic, maxdists, maxinconsts, maxRstat,
  43. is_valid_linkage, is_valid_im, to_tree, leaves_list, dendrogram,
  44. set_link_color_palette, cut_tree, optimal_leaf_ordering,
  45. _order_cluster_tree, _hierarchy, _EUCLIDEAN_METHODS, _LINKAGE_METHODS)
  46. from scipy.cluster._hierarchy import Heap
  47. from scipy.spatial.distance import pdist
  48. from scipy._lib._array_api import (eager_warns, make_xp_test_case,
  49. xp_assert_close, xp_assert_equal)
  50. import scipy._lib.array_api_extra as xpx
  51. from threading import Lock
  52. from . import hierarchy_test_data
  53. class eager:
  54. # Bypass xpx.testing.lazy_xp_function when calling
  55. # these functions from this namespace
  56. is_valid_im = is_valid_im
  57. is_valid_linkage = is_valid_linkage
  58. # Matplotlib is not a scipy dependency but is optionally used in dendrogram, so
  59. # check if it's available
  60. try:
  61. import matplotlib
  62. # and set the backend to be Agg (no gui)
  63. matplotlib.use('Agg')
  64. # before importing pyplot
  65. import matplotlib.pyplot as plt
  66. have_matplotlib = True
  67. except Exception:
  68. have_matplotlib = False
  69. skip_xp_backends = pytest.mark.skip_xp_backends
  70. @make_xp_test_case(linkage)
  71. class TestLinkage:
  72. @skip_xp_backends("jax.numpy", reason="Can't raise inside jax.pure_callback")
  73. def test_linkage_non_finite_elements_in_distance_matrix(self, xp):
  74. # Tests linkage(Y) where Y contains a non-finite element (e.g. NaN or Inf).
  75. # Exception expected.
  76. y = xp.asarray([xp.nan] + [0.0]*5)
  77. assert_raises(ValueError, linkage, y)
  78. def test_linkage_empty_distance_matrix(self, xp):
  79. # Tests linkage(Y) where Y is a 0x4 linkage matrix. Exception expected.
  80. y = xp.zeros((0,))
  81. assert_raises(ValueError, linkage, y)
  82. def test_linkage_tdist(self, xp):
  83. for method in ['single', 'complete', 'average', 'weighted']:
  84. self.check_linkage_tdist(method, xp)
  85. def check_linkage_tdist(self, method, xp):
  86. # Tests linkage(Y, method) on the tdist data set.
  87. Z = linkage(xp.asarray(hierarchy_test_data.ytdist), method)
  88. expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method)
  89. xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-10)
  90. def test_linkage_X(self, xp):
  91. for method in ['centroid', 'median', 'ward']:
  92. self.check_linkage_q(method, xp)
  93. def check_linkage_q(self, method, xp):
  94. # Tests linkage(Y, method) on the Q data set.
  95. Z = linkage(xp.asarray(hierarchy_test_data.X), method)
  96. expectedZ = getattr(hierarchy_test_data, 'linkage_X_' + method)
  97. xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-06)
  98. X = xp.asarray(hierarchy_test_data.X)
  99. y = pdist(X, metric="euclidean")
  100. Z = linkage(y, method)
  101. xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-06)
  102. def test_compare_with_trivial(self, xp):
  103. rng = np.random.RandomState(0)
  104. n = 20
  105. X = rng.rand(n, 2)
  106. d = pdist(X)
  107. for method, code in _LINKAGE_METHODS.items():
  108. Z_trivial = _hierarchy.linkage(d, n, code)
  109. Z = linkage(xp.asarray(d), method)
  110. xp_assert_close(Z, xp.asarray(Z_trivial), rtol=1e-14, atol=1e-15)
  111. def test_optimal_leaf_ordering(self, xp):
  112. Z = linkage(xp.asarray(hierarchy_test_data.ytdist), optimal_ordering=True)
  113. expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_single_olo')
  114. xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-10)
  115. @pytest.mark.parametrize("method,expect", [
  116. ('single', [[0, 1, 1.41421356, 2],
  117. [2, 3, 1.41421356, 3]]),
  118. ('complete', [[0, 1, 1.41421356, 2],
  119. [2, 3, 2.82842712, 3]]),
  120. ('average', [[0, 1, 1.41421356, 2],
  121. [2, 3, 2.12132034, 3]]),
  122. ('weighted', [[0, 1, 1.41421356, 2],
  123. [2, 3, 2.12132034, 3]]),
  124. ('centroid', [[0, 1, 1.41421356, 2],
  125. [2, 3, 2.12132034, 3]]),
  126. ('median', [[0, 1, 1.41421356, 2],
  127. [2, 3, 2.12132034, 3]]),
  128. ('ward', [[0, 1, 1.41421356, 2],
  129. [2, 3, 2.44948974, 3]]),
  130. ])
  131. def test_linkage_ties(self, method, expect, xp):
  132. X = xp.asarray([[-1, -1], [0, 0], [1, 1]])
  133. Z = linkage(X, method=method)
  134. expect = xp.asarray(expect, dtype=xp.float64)
  135. xp_assert_close(Z, expect, atol=1e-06)
  136. def test_unsupported_uncondensed_distance_matrix_linkage_warning(self, xp):
  137. X = xp.asarray([[0, 1], [1, 0]])
  138. with eager_warns(ClusterWarning, xp=xp):
  139. linkage(X)
  140. @pytest.mark.parametrize("method", _EUCLIDEAN_METHODS)
  141. def test_euclidean_linkage_value_error(self, method, xp):
  142. X = xp.asarray([[1, 1], [1, 1]])
  143. with pytest.raises(ValueError):
  144. linkage(X, method=method, metric='cityblock')
  145. def test_2x2_linkage(self, xp):
  146. Z1 = linkage(xp.asarray([1]), method='single', metric='euclidean')
  147. Z2 = linkage(xp.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean')
  148. xp_assert_close(Z1, Z2, rtol=1e-15)
  149. @skip_xp_backends("jax.numpy", reason="Can't raise inside jax.pure_callback")
  150. def test_centroid_neg_distance(self, xp):
  151. # gh-21011
  152. values = xp.asarray([0, 0, -1])
  153. with pytest.raises(ValueError):
  154. # This is just checking that this doesn't crash
  155. linkage(values, method='centroid')
  156. @make_xp_test_case(inconsistent)
  157. class TestInconsistent:
  158. def test_inconsistent_tdist(self, xp):
  159. for depth in hierarchy_test_data.inconsistent_ytdist:
  160. self.check_inconsistent_tdist(depth, xp)
  161. def check_inconsistent_tdist(self, depth, xp):
  162. Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single)
  163. xp_assert_close(inconsistent(Z, depth),
  164. xp.asarray(hierarchy_test_data.inconsistent_ytdist[depth]))
  165. @make_xp_test_case(cophenet)
  166. class TestCopheneticDistance:
  167. def test_linkage_cophenet_tdist_Z(self, xp):
  168. # Tests cophenet(Z) on tdist data set.
  169. expectedM = xp.asarray([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
  170. 295, 138, 219, 295, 295])
  171. Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single)
  172. M = cophenet(Z)
  173. xp_assert_close(M, xp.asarray(expectedM, dtype=xp.float64), atol=1e-10)
  174. def test_linkage_cophenet_tdist_Z_Y(self, xp):
  175. # Tests cophenet(Z, Y) on tdist data set.
  176. Z = xp.asarray(hierarchy_test_data.linkage_ytdist_single)
  177. (c, M) = cophenet(Z, xp.asarray(hierarchy_test_data.ytdist))
  178. expectedM = xp.asarray([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
  179. 295, 138, 219, 295, 295], dtype=xp.float64)
  180. expectedc = xp.asarray(0.639931296433393415057366837573, dtype=xp.float64)[()]
  181. xp_assert_close(c, expectedc, atol=1e-10)
  182. xp_assert_close(M, expectedM, atol=1e-10)
  183. @skip_xp_backends("jax.numpy", reason="Can't raise inside jax.pure_callback")
  184. def test_gh_22183(self, xp):
  185. # check for lack of segfault
  186. # (out of bounds memory access)
  187. # and correct interception of
  188. # invalid linkage matrix
  189. arr=[[0.0, 1.0, 1.0, 2.0],
  190. [2.0, 12.0, 1.0, 3.0],
  191. [3.0, 4.0, 1.0, 2.0],
  192. [5.0, 14.0, 1.0, 3.0],
  193. [6.0, 7.0, 1.0, 2.0],
  194. [8.0, 16.0, 1.0, 3.0],
  195. [9.0, 10.0, 1.0, 2.0],
  196. [11.0, 18.0, 1.0, 3.0],
  197. [13.0, 15.0, 2.0, 6.0],
  198. [17.0, 20.0, 2.0, 32.0],
  199. [19.0, 21.0, 2.0, 12.0]]
  200. with pytest.raises(ValueError, match="excessive observations"):
  201. cophenet(xp.asarray(arr))
  202. @make_xp_test_case(from_mlab_linkage, to_mlab_linkage)
  203. class TestMLabLinkageConversion:
  204. def test_mlab_linkage_conversion_empty(self, xp):
  205. # Tests from/to_mlab_linkage on empty linkage array.
  206. X = xp.asarray([], dtype=xp.float64)
  207. xp_assert_equal(from_mlab_linkage(X), X)
  208. xp_assert_equal(to_mlab_linkage(X), X)
  209. def test_mlab_linkage_conversion_single_row(self, xp):
  210. # Tests from/to_mlab_linkage on linkage array with single row.
  211. Z = xp.asarray([[0., 1., 3., 2.]])
  212. Zm = xp.asarray([[1, 2, 3]])
  213. xp_assert_close(from_mlab_linkage(Zm), xp.asarray(Z, dtype=xp.float64),
  214. rtol=1e-15)
  215. xp_assert_close(to_mlab_linkage(Z), xp.asarray(Zm, dtype=xp.float64),
  216. rtol=1e-15)
  217. def test_mlab_linkage_conversion_multiple_rows(self, xp):
  218. # Tests from/to_mlab_linkage on linkage array with multiple rows.
  219. Zm = xp.asarray([[3, 6, 138], [4, 5, 219],
  220. [1, 8, 255], [2, 9, 268], [7, 10, 295]])
  221. Z = xp.asarray([[2., 5., 138., 2.],
  222. [3., 4., 219., 2.],
  223. [0., 7., 255., 3.],
  224. [1., 8., 268., 4.],
  225. [6., 9., 295., 6.]],
  226. dtype=xp.float64)
  227. xp_assert_close(from_mlab_linkage(Zm), Z, rtol=1e-15)
  228. xp_assert_close(to_mlab_linkage(Z), xp.asarray(Zm, dtype=xp.float64),
  229. rtol=1e-15)
  230. @make_xp_test_case(fclusterdata)
  231. class TestFclusterData:
  232. @make_xp_test_case(is_isomorphic)
  233. @pytest.mark.parametrize("criterion,t",
  234. [("inconsistent", t) for t in hierarchy_test_data.fcluster_inconsistent]
  235. + [("distance", t) for t in hierarchy_test_data.fcluster_distance]
  236. + [("maxclust", t) for t in hierarchy_test_data.fcluster_maxclust]
  237. )
  238. def test_fclusterdata(self, t, criterion, xp):
  239. # Tests fclusterdata(X, criterion=criterion, t=t) on a random 3-cluster data set
  240. expectedT = xp.asarray(getattr(hierarchy_test_data, 'fcluster_' + criterion)[t])
  241. X = xp.asarray(hierarchy_test_data.Q_X)
  242. T = fclusterdata(X, criterion=criterion, t=t)
  243. assert is_isomorphic(T, expectedT)
  244. @make_xp_test_case(fcluster)
  245. class TestFcluster:
  246. @make_xp_test_case(single, is_isomorphic)
  247. @pytest.mark.parametrize("criterion,t",
  248. [("inconsistent", t) for t in hierarchy_test_data.fcluster_inconsistent]
  249. + [("distance", t) for t in hierarchy_test_data.fcluster_distance]
  250. + [("maxclust", t) for t in hierarchy_test_data.fcluster_maxclust]
  251. )
  252. def test_fcluster(self, t, criterion, xp):
  253. # Tests fcluster(Z, criterion=criterion, t=t) on a random 3-cluster data set.
  254. expectedT = xp.asarray(getattr(hierarchy_test_data, 'fcluster_' + criterion)[t])
  255. Z = single(xp.asarray(hierarchy_test_data.Q_X))
  256. T = fcluster(Z, criterion=criterion, t=t)
  257. assert_(is_isomorphic(T, expectedT))
  258. @make_xp_test_case(single, is_isomorphic, maxdists)
  259. @pytest.mark.parametrize("t", hierarchy_test_data.fcluster_distance)
  260. def test_fcluster_monocrit(self, t, xp):
  261. expectedT = xp.asarray(hierarchy_test_data.fcluster_distance[t])
  262. Z = single(xp.asarray(hierarchy_test_data.Q_X))
  263. T = fcluster(Z, t, criterion='monocrit', monocrit=maxdists(Z))
  264. assert_(is_isomorphic(T, expectedT))
  265. @make_xp_test_case(single, is_isomorphic, maxdists)
  266. @pytest.mark.parametrize("t", hierarchy_test_data.fcluster_maxclust)
  267. def test_fcluster_maxclust_monocrit(self, t, xp):
  268. expectedT = xp.asarray(hierarchy_test_data.fcluster_maxclust[t])
  269. Z = single(xp.asarray(hierarchy_test_data.Q_X))
  270. T = fcluster(Z, t, criterion='maxclust_monocrit', monocrit=maxdists(Z))
  271. assert_(is_isomorphic(T, expectedT))
  272. @make_xp_test_case(single)
  273. def test_fcluster_maxclust_gh_12651(self, xp):
  274. y = xp.asarray([[1], [4], [5]])
  275. Z = single(y)
  276. assert_array_equal(fcluster(Z, t=1, criterion="maxclust"),
  277. xp.asarray([1, 1, 1]))
  278. assert_array_equal(fcluster(Z, t=2, criterion="maxclust"),
  279. xp.asarray([2, 1, 1]))
  280. assert_array_equal(fcluster(Z, t=3, criterion="maxclust"),
  281. xp.asarray([1, 2, 3]))
  282. assert_array_equal(fcluster(Z, t=5, criterion="maxclust"),
  283. xp.asarray([1, 2, 3]))
  284. @make_xp_test_case(leaders)
  285. class TestLeaders:
  286. def test_leaders_single(self, xp):
  287. # Tests leaders using a flat clustering generated by single linkage.
  288. X = hierarchy_test_data.Q_X
  289. Y = pdist(X)
  290. Z = linkage(Y)
  291. T = fcluster(Z, criterion='maxclust', t=3)
  292. Z = xp.asarray(Z)
  293. T = xp.asarray(T, dtype=xp.int32)
  294. L = leaders(Z, T)
  295. expect = xp.asarray([53, 55, 56, 2, 3, 1], dtype=xp.int32)
  296. xp_assert_close(xp.concat(L), expect, rtol=1e-15)
  297. @make_xp_test_case(is_isomorphic)
  298. class TestIsIsomorphic:
  299. def test_array_like(self):
  300. assert is_isomorphic([1, 1, 1], [2, 2, 2])
  301. assert is_isomorphic([], [])
  302. def test_is_isomorphic_1(self, xp):
  303. # Tests is_isomorphic on test case #1 (one flat cluster, different labellings)
  304. a = xp.asarray([1, 1, 1])
  305. b = xp.asarray([2, 2, 2])
  306. assert is_isomorphic(a, b)
  307. assert is_isomorphic(b, a)
  308. def test_is_isomorphic_2(self, xp):
  309. # Tests is_isomorphic on test case #2 (two flat clusters, different labelings)
  310. a = xp.asarray([1, 7, 1])
  311. b = xp.asarray([2, 3, 2])
  312. assert is_isomorphic(a, b)
  313. assert is_isomorphic(b, a)
  314. def test_is_isomorphic_3(self, xp):
  315. # Tests is_isomorphic on test case #3 (no flat clusters)
  316. a = xp.asarray([])
  317. b = xp.asarray([])
  318. assert is_isomorphic(a, b)
  319. def test_is_isomorphic_4A(self, xp):
  320. # Tests is_isomorphic on test case #4A
  321. # (3 flat clusters, different labelings, isomorphic)
  322. a = xp.asarray([1, 2, 3])
  323. b = xp.asarray([1, 3, 2])
  324. assert is_isomorphic(a, b)
  325. assert is_isomorphic(b, a)
  326. def test_is_isomorphic_4B(self, xp):
  327. # Tests is_isomorphic on test case #4B
  328. # (3 flat clusters, different labelings, nonisomorphic)
  329. a = xp.asarray([1, 2, 3, 3])
  330. b = xp.asarray([1, 3, 2, 3])
  331. assert not is_isomorphic(a, b)
  332. assert not is_isomorphic(b, a)
  333. def test_is_isomorphic_4C(self, xp):
  334. # Tests is_isomorphic on test case #4C
  335. # (3 flat clusters, different labelings, isomorphic)
  336. a = xp.asarray([7, 2, 3])
  337. b = xp.asarray([6, 3, 2])
  338. assert is_isomorphic(a, b)
  339. assert is_isomorphic(b, a)
  340. @pytest.mark.parametrize("nclusters", [2, 3, 5])
  341. def test_is_isomorphic_5(self, nclusters, xp):
  342. # Tests is_isomorphic on test case #5 (1000 observations, 2/3/5 random
  343. # clusters, random permutation of the labeling).
  344. self.is_isomorphic_randperm(1000, nclusters, xp=xp)
  345. @pytest.mark.parametrize("nclusters", [2, 3, 5])
  346. def test_is_isomorphic_6(self, nclusters, xp):
  347. # Tests is_isomorphic on test case #5A (1000 observations, 2/3/5 random
  348. # clusters, random permutation of the labeling, slightly
  349. # nonisomorphic.)
  350. self.is_isomorphic_randperm(1000, nclusters, True, 5, xp=xp)
  351. def test_is_isomorphic_7(self, xp):
  352. # Regression test for gh-6271
  353. a = xp.asarray([1, 2, 3])
  354. b = xp.asarray([1, 1, 1])
  355. assert not is_isomorphic(a, b)
  356. def is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0, *, xp):
  357. rng = np.random.default_rng()
  358. for _ in range(3):
  359. a = rng.integers(0, nclusters, size=nobs)
  360. p = rng.permutation(nclusters)
  361. b = p.take(a.astype(np.intp))
  362. if noniso:
  363. q = rng.permutation(nobs)
  364. b[q[0:nerrors]] += 1
  365. b[q[0:nerrors]] %= nclusters
  366. a = xp.asarray(a)
  367. b = xp.asarray(b)
  368. assert is_isomorphic(a, b) == (not noniso)
  369. assert is_isomorphic(b, a) == (not noniso)
  370. @make_xp_test_case(is_valid_linkage)
  371. class TestIsValidLinkage:
  372. @pytest.mark.parametrize("nrow, ncol, valid", [(2, 5, False), (2, 3, False),
  373. (1, 4, True), (2, 4, True)])
  374. def test_is_valid_linkage_various_size(self, nrow, ncol, valid, xp):
  375. # Tests is_valid_linkage(Z) with linkage matrices of various sizes
  376. Z = xp.asarray([[0, 1, 3.0, 2, 5],
  377. [3, 2, 4.0, 3, 3]], dtype=xp.float64)
  378. Z = Z[:nrow, :ncol]
  379. xp_assert_equal(is_valid_linkage(Z), valid, check_namespace=False)
  380. if not valid:
  381. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  382. def test_is_valid_linkage_int_type(self, xp):
  383. # Tests is_valid_linkage(Z) with integer type.
  384. Z = xp.asarray([[0, 1, 3.0, 2],
  385. [3, 2, 4.0, 3]], dtype=xp.int64)
  386. xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False)
  387. assert_raises(TypeError, is_valid_linkage, Z, throw=True)
  388. def test_is_valid_linkage_empty(self, xp):
  389. # Tests is_valid_linkage(Z) with empty linkage.
  390. Z = xp.zeros((0, 4), dtype=xp.float64)
  391. xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False)
  392. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  393. def test_is_valid_linkage_4_and_up(self, xp):
  394. # Tests is_valid_linkage(Z) on linkage on observation sets between
  395. # sizes 4 and 15 (step size 3).
  396. for i in range(4, 15, 3):
  397. y = np.random.rand(i*(i-1)//2)
  398. Z = xp.asarray(linkage(y))
  399. y = xp.asarray(y)
  400. xp_assert_equal(is_valid_linkage(Z), True, check_namespace=False)
  401. def test_is_valid_linkage_4_and_up_neg_index_left(self, xp):
  402. # Tests is_valid_linkage(Z) on linkage on observation sets between
  403. # sizes 4 and 15 (step size 3) with negative indices (left).
  404. for i in range(4, 15, 3):
  405. y = np.random.rand(i*(i-1)//2)
  406. Z = xp.asarray(linkage(y))
  407. y = xp.asarray(y)
  408. Z = xpx.at(Z)[i//2, 0].set(-2)
  409. xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False)
  410. with pytest.raises(ValueError):
  411. eager.is_valid_linkage(Z, throw=True)
  412. def test_is_valid_linkage_4_and_up_neg_index_right(self, xp):
  413. # Tests is_valid_linkage(Z) on linkage on observation sets between
  414. # sizes 4 and 15 (step size 3) with negative indices (right).
  415. for i in range(4, 15, 3):
  416. y = np.random.rand(i*(i-1)//2)
  417. Z = xp.asarray(linkage(y))
  418. y = xp.asarray(y)
  419. Z = xpx.at(Z)[i//2, 1].set(-2)
  420. xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False)
  421. with pytest.raises(ValueError):
  422. eager.is_valid_linkage(Z, throw=True)
  423. def test_is_valid_linkage_4_and_up_neg_dist(self, xp):
  424. # Tests is_valid_linkage(Z) on linkage on observation sets between
  425. # sizes 4 and 15 (step size 3) with negative distances.
  426. for i in range(4, 15, 3):
  427. y = np.random.rand(i*(i-1)//2)
  428. Z = xp.asarray(linkage(y))
  429. y = xp.asarray(y)
  430. Z = xpx.at(Z)[i//2, 2].set(-0.5)
  431. xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False)
  432. with pytest.raises(ValueError):
  433. eager.is_valid_linkage(Z, throw=True)
  434. def test_is_valid_linkage_4_and_up_neg_counts(self, xp):
  435. # Tests is_valid_linkage(Z) on linkage on observation sets between
  436. # sizes 4 and 15 (step size 3) with negative counts.
  437. for i in range(4, 15, 3):
  438. y = np.random.rand(i*(i-1)//2)
  439. Z = xp.asarray(linkage(y))
  440. y = xp.asarray(y)
  441. Z = xpx.at(Z)[i//2, 3].set(-2)
  442. xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False)
  443. with pytest.raises(ValueError):
  444. eager.is_valid_linkage(Z, throw=True)
  445. @make_xp_test_case(is_valid_im)
  446. class TestIsValidInconsistent:
  447. def test_is_valid_im_int_type(self, xp):
  448. # Tests is_valid_im(R) with integer type.
  449. R = xp.asarray([[0, 1, 3.0, 2],
  450. [3, 2, 4.0, 3]], dtype=xp.int64)
  451. xp_assert_equal(is_valid_im(R), False, check_namespace=False)
  452. assert_raises(TypeError, is_valid_im, R, throw=True)
  453. @pytest.mark.parametrize("nrow, ncol, valid", [(2, 5, False), (2, 3, False),
  454. (1, 4, True), (2, 4, True)])
  455. def test_is_valid_im_various_size(self, nrow, ncol, valid, xp):
  456. # Tests is_valid_im(R) with linkage matrices of various sizes
  457. R = xp.asarray([[0, 1, 3.0, 2, 5],
  458. [3, 2, 4.0, 3, 3]], dtype=xp.float64)
  459. R = R[:nrow, :ncol]
  460. xp_assert_equal(is_valid_im(R), valid, check_namespace=False)
  461. if not valid:
  462. assert_raises(ValueError, is_valid_im, R, throw=True)
  463. def test_is_valid_im_empty(self, xp):
  464. # Tests is_valid_im(R) with empty inconsistency matrix.
  465. R = xp.zeros((0, 4), dtype=xp.float64)
  466. xp_assert_equal(is_valid_im(R), False, check_namespace=False)
  467. assert_raises(ValueError, is_valid_im, R, throw=True)
  468. def test_is_valid_im_4_and_up(self, xp):
  469. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  470. # (step size 3).
  471. for i in range(4, 15, 3):
  472. y = np.random.rand(i*(i-1)//2)
  473. Z = linkage(y)
  474. R = inconsistent(Z)
  475. R = xp.asarray(R)
  476. xp_assert_equal(is_valid_im(R), True, check_namespace=False)
  477. def test_is_valid_im_4_and_up_neg_index_left(self, xp):
  478. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  479. # (step size 3) with negative link height means.
  480. for i in range(4, 15, 3):
  481. y = np.random.rand(i*(i-1)//2)
  482. Z = linkage(y)
  483. R = inconsistent(Z)
  484. R = xpx.at(R)[i//2 , 0].set(-2.0)
  485. R = xp.asarray(R)
  486. xp_assert_equal(is_valid_im(R), False, check_namespace=False)
  487. with pytest.raises(ValueError):
  488. eager.is_valid_im(R, throw=True)
  489. def test_is_valid_im_4_and_up_neg_index_right(self, xp):
  490. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  491. # (step size 3) with negative link height standard deviations.
  492. for i in range(4, 15, 3):
  493. y = np.random.rand(i*(i-1)//2)
  494. Z = linkage(y)
  495. R = inconsistent(Z)
  496. R = xpx.at(R)[i//2 , 1].set(-2.0)
  497. R = xp.asarray(R)
  498. xp_assert_equal(is_valid_im(R), False, check_namespace=False)
  499. with pytest.raises(ValueError):
  500. eager.is_valid_im(R, throw=True)
  501. def test_is_valid_im_4_and_up_neg_dist(self, xp):
  502. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  503. # (step size 3) with negative link counts.
  504. for i in range(4, 15, 3):
  505. y = np.random.rand(i*(i-1)//2)
  506. Z = linkage(y)
  507. R = inconsistent(Z)
  508. R = xpx.at(R)[i//2, 2].set(-0.5)
  509. R = xp.asarray(R)
  510. xp_assert_equal(is_valid_im(R), False, check_namespace=False)
  511. with pytest.raises(ValueError):
  512. eager.is_valid_im(R, throw=True)
  513. class TestNumObsLinkage:
  514. def test_num_obs_linkage_empty(self, xp):
  515. # Tests num_obs_linkage(Z) with empty linkage.
  516. Z = xp.zeros((0, 4), dtype=xp.float64)
  517. assert_raises(ValueError, num_obs_linkage, Z)
  518. def test_num_obs_linkage_1x4(self, xp):
  519. # Tests num_obs_linkage(Z) on linkage over 2 observations.
  520. Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64)
  521. assert num_obs_linkage(Z) == 2
  522. def test_num_obs_linkage_2x4(self, xp):
  523. # Tests num_obs_linkage(Z) on linkage over 3 observations.
  524. Z = xp.asarray([[0, 1, 3.0, 2],
  525. [3, 2, 4.0, 3]], dtype=xp.float64)
  526. assert num_obs_linkage(Z) == 3
  527. def test_num_obs_linkage_4_and_up(self, xp):
  528. # Tests num_obs_linkage(Z) on linkage on observation sets between sizes
  529. # 4 and 15 (step size 3).
  530. for i in range(4, 15, 3):
  531. y = np.random.rand(i*(i-1)//2)
  532. Z = xp.asarray(linkage(y))
  533. assert num_obs_linkage(Z) == i
  534. def test_num_obs_linkage_multi_matrix(self, xp):
  535. # Tests num_obs_linkage with observation matrices of multiple sizes.
  536. for n in range(2, 10):
  537. X = np.random.rand(n, 4)
  538. Y = pdist(X)
  539. Z = xp.asarray(linkage(Y))
  540. assert num_obs_linkage(Z) == n
  541. @make_xp_test_case(leaves_list, to_tree)
  542. class TestLeavesList:
  543. def test_leaves_list_1x4(self, xp):
  544. # Tests leaves_list(Z) on a 1x4 linkage.
  545. Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64)
  546. to_tree(Z)
  547. assert_allclose(leaves_list(Z), [0, 1], rtol=1e-15)
  548. def test_leaves_list_2x4(self, xp):
  549. # Tests leaves_list(Z) on a 2x4 linkage.
  550. Z = xp.asarray([[0, 1, 3.0, 2],
  551. [3, 2, 4.0, 3]], dtype=xp.float64)
  552. to_tree(Z)
  553. assert_allclose(leaves_list(Z), [0, 1, 2], rtol=1e-15)
  554. @pytest.mark.parametrize("method",
  555. ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward'])
  556. def test_leaves_list_Q(self, method, xp):
  557. # Tests leaves_list(Z) on the Q data set
  558. X = hierarchy_test_data.Q_X
  559. Z = xp.asarray(linkage(X, method))
  560. node = to_tree(Z)
  561. assert_allclose(node.pre_order(), leaves_list(Z), rtol=1e-15)
  562. def test_Q_subtree_pre_order(self, xp):
  563. # Tests that pre_order() works when called on sub-trees.
  564. X = hierarchy_test_data.Q_X
  565. Z = xp.asarray(linkage(X, 'single'))
  566. node = to_tree(Z)
  567. assert_allclose(node.pre_order(),
  568. (node.get_left().pre_order() + node.get_right().pre_order()),
  569. rtol=1e-15)
  570. @make_xp_test_case(correspond)
  571. class TestCorrespond:
  572. def test_correspond_empty(self, xp):
  573. # Tests correspond(Z, y) with empty linkage and condensed distance matrix.
  574. y = xp.zeros((0,), dtype=xp.float64)
  575. Z = xp.zeros((0,4), dtype=xp.float64)
  576. assert_raises(ValueError, correspond, Z, y)
  577. def test_correspond_2_and_up(self, xp):
  578. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  579. # different sizes.
  580. for i in range(2, 4):
  581. y = np.random.rand(i*(i-1)//2)
  582. Z = xp.asarray(linkage(y))
  583. y = xp.asarray(y)
  584. assert_(correspond(Z, y))
  585. for i in range(4, 15, 3):
  586. y = np.random.rand(i*(i-1)//2)
  587. Z = xp.asarray(linkage(y))
  588. y = xp.asarray(y)
  589. assert_(correspond(Z, y))
  590. def test_correspond_4_and_up(self, xp):
  591. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  592. # different sizes. Correspondence should be false.
  593. for (i, j) in (list(zip(list(range(2, 4)), list(range(3, 5)))) +
  594. list(zip(list(range(3, 5)), list(range(2, 4))))):
  595. y = np.random.rand(i*(i-1)//2)
  596. y2 = np.random.rand(j*(j-1)//2)
  597. Z = xp.asarray(linkage(y))
  598. Z2 = xp.asarray(linkage(y2))
  599. y = xp.asarray(y)
  600. y2 = xp.asarray(y2)
  601. assert not correspond(Z, y2)
  602. assert not correspond(Z2, y)
  603. def test_correspond_4_and_up_2(self, xp):
  604. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  605. # different sizes. Correspondence should be false.
  606. for (i, j) in (list(zip(list(range(2, 7)), list(range(16, 21)))) +
  607. list(zip(list(range(2, 7)), list(range(16, 21))))):
  608. y = np.random.rand(i*(i-1)//2)
  609. y2 = np.random.rand(j*(j-1)//2)
  610. Z = xp.asarray(linkage(y))
  611. Z2 = xp.asarray(linkage(y2))
  612. y = xp.asarray(y)
  613. y2 = xp.asarray(y2)
  614. assert not correspond(Z, y2)
  615. assert not correspond(Z2, y)
  616. @make_xp_test_case(is_monotonic)
  617. class TestIsMonotonic:
  618. def test_is_monotonic_empty(self, xp):
  619. # Tests is_monotonic(Z) on an empty linkage.
  620. Z = xp.zeros((0, 4), dtype=xp.float64)
  621. assert_raises(ValueError, is_monotonic, Z)
  622. def test_is_monotonic_1x4(self, xp):
  623. # Tests is_monotonic(Z) on 1x4 linkage. Expecting True.
  624. Z = xp.asarray([[0, 1, 0.3, 2]], dtype=xp.float64)
  625. assert is_monotonic(Z)
  626. def test_is_monotonic_2x4_T(self, xp):
  627. # Tests is_monotonic(Z) on 2x4 linkage. Expecting True.
  628. Z = xp.asarray([[0, 1, 0.3, 2],
  629. [2, 3, 0.4, 3]], dtype=xp.float64)
  630. assert is_monotonic(Z)
  631. def test_is_monotonic_2x4_F(self, xp):
  632. # Tests is_monotonic(Z) on 2x4 linkage. Expecting False.
  633. Z = xp.asarray([[0, 1, 0.4, 2],
  634. [2, 3, 0.3, 3]], dtype=xp.float64)
  635. assert not is_monotonic(Z)
  636. def test_is_monotonic_3x4_T(self, xp):
  637. # Tests is_monotonic(Z) on 3x4 linkage. Expecting True.
  638. Z = xp.asarray([[0, 1, 0.3, 2],
  639. [2, 3, 0.4, 2],
  640. [4, 5, 0.6, 4]], dtype=xp.float64)
  641. assert is_monotonic(Z)
  642. def test_is_monotonic_3x4_F1(self, xp):
  643. # Tests is_monotonic(Z) on 3x4 linkage (case 1). Expecting False.
  644. Z = xp.asarray([[0, 1, 0.3, 2],
  645. [2, 3, 0.2, 2],
  646. [4, 5, 0.6, 4]], dtype=xp.float64)
  647. assert not is_monotonic(Z)
  648. def test_is_monotonic_3x4_F2(self, xp):
  649. # Tests is_monotonic(Z) on 3x4 linkage (case 2). Expecting False.
  650. Z = xp.asarray([[0, 1, 0.8, 2],
  651. [2, 3, 0.4, 2],
  652. [4, 5, 0.6, 4]], dtype=xp.float64)
  653. assert not is_monotonic(Z)
  654. def test_is_monotonic_3x4_F3(self, xp):
  655. # Tests is_monotonic(Z) on 3x4 linkage (case 3). Expecting False
  656. Z = xp.asarray([[0, 1, 0.3, 2],
  657. [2, 3, 0.4, 2],
  658. [4, 5, 0.2, 4]], dtype=xp.float64)
  659. assert not is_monotonic(Z)
  660. def test_is_monotonic_tdist_linkage1(self, xp):
  661. # Tests is_monotonic(Z) on clustering generated by single linkage on
  662. # tdist data set. Expecting True.
  663. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  664. assert is_monotonic(Z)
  665. def test_is_monotonic_tdist_linkage2(self, xp):
  666. # Tests is_monotonic(Z) on clustering generated by single linkage on
  667. # tdist data set. Perturbing. Expecting False.
  668. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  669. Z = xpx.at(Z)[2, 2].set(0.0)
  670. assert not is_monotonic(Z)
  671. def test_is_monotonic_Q_linkage(self, xp):
  672. # Tests is_monotonic(Z) on clustering generated by single linkage on
  673. # Q data set. Expecting True.
  674. X = hierarchy_test_data.Q_X
  675. Z = xp.asarray(linkage(X, 'single'))
  676. assert is_monotonic(Z)
  677. @make_xp_test_case(maxdists)
  678. class TestMaxDists:
  679. def test_maxdists_empty_linkage(self, xp):
  680. # Tests maxdists(Z) on empty linkage. Expecting exception.
  681. Z = xp.zeros((0, 4), dtype=xp.float64)
  682. assert_raises(ValueError, maxdists, Z)
  683. def test_maxdists_one_cluster_linkage(self, xp):
  684. # Tests maxdists(Z) on linkage with one cluster.
  685. Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64)
  686. MD = maxdists(Z)
  687. expectedMD = calculate_maximum_distances(Z, xp)
  688. xp_assert_close(MD, expectedMD, atol=1e-15)
  689. @pytest.mark.parametrize(
  690. "method", ['single', 'complete', 'ward', 'centroid', 'median'])
  691. def test_maxdists_Q_linkage(self, method, xp):
  692. # Tests maxdists(Z) on the Q data set
  693. X = hierarchy_test_data.Q_X
  694. Z = xp.asarray(linkage(X, method))
  695. MD = maxdists(Z)
  696. expectedMD = calculate_maximum_distances(Z, xp)
  697. xp_assert_close(MD, expectedMD, atol=1e-15)
  698. @make_xp_test_case(maxinconsts)
  699. class TestMaxInconsts:
  700. def test_maxinconsts_empty_linkage(self, xp):
  701. # Tests maxinconsts(Z, R) on empty linkage. Expecting exception.
  702. Z = xp.zeros((0, 4), dtype=xp.float64)
  703. R = xp.zeros((0, 4), dtype=xp.float64)
  704. assert_raises(ValueError, maxinconsts, Z, R)
  705. def test_maxinconsts_difrow_linkage(self, xp):
  706. # Tests maxinconsts(Z, R) on linkage and inconsistency matrices with
  707. # different numbers of clusters. Expecting exception.
  708. Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64)
  709. R = np.random.rand(2, 4)
  710. R = xp.asarray(R)
  711. assert_raises(ValueError, maxinconsts, Z, R)
  712. def test_maxinconsts_one_cluster_linkage(self, xp):
  713. # Tests maxinconsts(Z, R) on linkage with one cluster.
  714. Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64)
  715. R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64)
  716. MD = maxinconsts(Z, R)
  717. expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp)
  718. xp_assert_close(MD, expectedMD, atol=1e-15)
  719. @pytest.mark.parametrize(
  720. "method", ['single', 'complete', 'ward', 'centroid', 'median'])
  721. def test_maxinconsts_Q_linkage(self, method, xp):
  722. # Tests maxinconsts(Z, R) on the Q data set
  723. X = hierarchy_test_data.Q_X
  724. Z = linkage(X, method)
  725. R = xp.asarray(inconsistent(Z))
  726. Z = xp.asarray(Z)
  727. MD = maxinconsts(Z, R)
  728. expectedMD = calculate_maximum_inconsistencies(Z, R, xp=xp)
  729. xp_assert_close(MD, expectedMD, atol=1e-15)
  730. @make_xp_test_case(maxRstat)
  731. class TestMaxRStat:
  732. def test_maxRstat_invalid_index(self, xp):
  733. # Tests maxRstat(Z, R, i). Expecting exception.
  734. Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64)
  735. R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64)
  736. with pytest.raises(TypeError):
  737. maxRstat(Z, R, 3.3)
  738. with pytest.raises(ValueError):
  739. maxRstat(Z, R, -1)
  740. with pytest.raises(ValueError):
  741. maxRstat(Z, R, 4)
  742. @pytest.mark.parametrize("i", range(4))
  743. def test_maxRstat_empty_linkage(self, i, xp):
  744. # Tests maxRstat(Z, R, i) on empty linkage. Expecting exception.
  745. Z = xp.zeros((0, 4), dtype=xp.float64)
  746. R = xp.zeros((0, 4), dtype=xp.float64)
  747. assert_raises(ValueError, maxRstat, Z, R, i)
  748. @pytest.mark.parametrize("i", range(4))
  749. def test_maxRstat_difrow_linkage(self, i, xp):
  750. # Tests maxRstat(Z, R, i) on linkage and inconsistency matrices with
  751. # different numbers of clusters. Expecting exception.
  752. Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64)
  753. R = np.random.rand(2, 4)
  754. R = xp.asarray(R)
  755. assert_raises(ValueError, maxRstat, Z, R, i)
  756. def test_maxRstat_one_cluster_linkage(self, xp):
  757. # Tests maxRstat(Z, R, i) on linkage with one cluster.
  758. Z = xp.asarray([[0, 1, 0.3, 4]], dtype=xp.float64)
  759. R = xp.asarray([[0, 0, 0, 0.3]], dtype=xp.float64)
  760. MD = maxRstat(Z, R, 1)
  761. expectedMD = calculate_maximum_inconsistencies(Z, R, 1, xp)
  762. xp_assert_close(MD, expectedMD, atol=1e-15)
  763. @pytest.mark.parametrize(
  764. "method", ['single', 'complete', 'ward', 'centroid', 'median'])
  765. def test_maxRstat_Q_linkage(self, method, xp):
  766. # Tests maxRstat(Z, R, 1) on the Q data set
  767. X = hierarchy_test_data.Q_X
  768. Z = linkage(X, method)
  769. R = xp.asarray(inconsistent(Z))
  770. Z = xp.asarray(Z)
  771. MD = maxRstat(Z, R, 1)
  772. expectedMD = calculate_maximum_inconsistencies(Z, R, 1, xp)
  773. xp_assert_close(MD, expectedMD, atol=1e-15)
  774. @make_xp_test_case(dendrogram)
  775. class TestDendrogram:
  776. def test_dendrogram_single_linkage_tdist(self, xp):
  777. # Tests dendrogram calculation on single linkage of the tdist data set.
  778. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  779. R = dendrogram(Z, no_plot=True)
  780. leaves = R["leaves"]
  781. assert_equal(leaves, [2, 5, 1, 0, 3, 4])
  782. def test_valid_orientation(self, xp):
  783. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  784. assert_raises(ValueError, dendrogram, Z, orientation="foo")
  785. def test_labels_as_array_or_list(self, xp):
  786. # test for gh-12418
  787. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  788. labels = [1, 3, 2, 6, 4, 5]
  789. result1 = dendrogram(Z, labels=xp.asarray(labels), no_plot=True)
  790. result2 = dendrogram(Z, labels=labels, no_plot=True)
  791. assert result1 == result2
  792. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  793. def test_valid_label_size(self, xp):
  794. link = xp.asarray([
  795. [0, 1, 1.0, 4],
  796. [2, 3, 1.0, 5],
  797. [4, 5, 2.0, 6],
  798. ])
  799. plt.figure()
  800. with pytest.raises(ValueError) as exc_info:
  801. dendrogram(link, labels=list(range(100)))
  802. assert "Dimensions of Z and labels must be consistent."\
  803. in str(exc_info.value)
  804. with pytest.raises(
  805. ValueError,
  806. match="Dimensions of Z and labels must be consistent."):
  807. dendrogram(link, labels=[])
  808. plt.close()
  809. @skip_xp_backends('torch',
  810. reason='MPL 3.9.2 & torch DeprecationWarning from __array_wrap__'
  811. ' and NumPy 2.0'
  812. )
  813. @skip_xp_backends('dask.array',
  814. reason='dask.array has bad interaction with matplotlib'
  815. )
  816. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  817. @pytest.mark.parametrize("orientation", ['top', 'bottom', 'left', 'right'])
  818. def test_dendrogram_plot(self, orientation, xp):
  819. # Tests dendrogram plotting.
  820. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  821. expected = {'color_list': ['C1', 'C0', 'C0', 'C0', 'C0'],
  822. 'dcoord': [[0.0, 138.0, 138.0, 0.0],
  823. [0.0, 219.0, 219.0, 0.0],
  824. [0.0, 255.0, 255.0, 219.0],
  825. [0.0, 268.0, 268.0, 255.0],
  826. [138.0, 295.0, 295.0, 268.0]],
  827. 'icoord': [[5.0, 5.0, 15.0, 15.0],
  828. [45.0, 45.0, 55.0, 55.0],
  829. [35.0, 35.0, 50.0, 50.0],
  830. [25.0, 25.0, 42.5, 42.5],
  831. [10.0, 10.0, 33.75, 33.75]],
  832. 'ivl': ['2', '5', '1', '0', '3', '4'],
  833. 'leaves': [2, 5, 1, 0, 3, 4],
  834. 'leaves_color_list': ['C1', 'C1', 'C0', 'C0', 'C0', 'C0'],
  835. }
  836. fig = plt.figure()
  837. ax = fig.add_subplot(221)
  838. # test that dendrogram accepts ax keyword
  839. R1 = dendrogram(Z, ax=ax, orientation=orientation)
  840. R1['dcoord'] = np.asarray(R1['dcoord'])
  841. assert_equal(R1, expected)
  842. # test that dendrogram accepts and handle the leaf_font_size and
  843. # leaf_rotation keywords
  844. dendrogram(Z, ax=ax, orientation=orientation,
  845. leaf_font_size=20, leaf_rotation=90)
  846. testlabel = (
  847. ax.get_xticklabels()[0]
  848. if orientation in ['top', 'bottom']
  849. else ax.get_yticklabels()[0]
  850. )
  851. assert_equal(testlabel.get_rotation(), 90)
  852. assert_equal(testlabel.get_size(), 20)
  853. dendrogram(Z, ax=ax, orientation=orientation,
  854. leaf_rotation=90)
  855. testlabel = (
  856. ax.get_xticklabels()[0]
  857. if orientation in ['top', 'bottom']
  858. else ax.get_yticklabels()[0]
  859. )
  860. assert_equal(testlabel.get_rotation(), 90)
  861. dendrogram(Z, ax=ax, orientation=orientation,
  862. leaf_font_size=20)
  863. testlabel = (
  864. ax.get_xticklabels()[0]
  865. if orientation in ['top', 'bottom']
  866. else ax.get_yticklabels()[0]
  867. )
  868. assert_equal(testlabel.get_size(), 20)
  869. plt.close()
  870. # test plotting to gca (will import pylab)
  871. R2 = dendrogram(Z, orientation=orientation)
  872. plt.close()
  873. R2['dcoord'] = np.asarray(R2['dcoord'])
  874. assert_equal(R2, expected)
  875. @skip_xp_backends('torch',
  876. reason='MPL 3.9.2 & torch DeprecationWarning from __array_wrap__'
  877. ' and NumPy 2.0'
  878. )
  879. @skip_xp_backends('dask.array',
  880. reason='dask.array has bad interaction with matplotlib'
  881. )
  882. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  883. def test_dendrogram_truncate_mode(self, xp):
  884. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  885. R = dendrogram(Z, 2, 'lastp', show_contracted=True)
  886. plt.close()
  887. R['dcoord'] = np.asarray(R['dcoord'])
  888. assert_equal(R, {'color_list': ['C0'],
  889. 'dcoord': [[0.0, 295.0, 295.0, 0.0]],
  890. 'icoord': [[5.0, 5.0, 15.0, 15.0]],
  891. 'ivl': ['(2)', '(4)'],
  892. 'leaves': [6, 9],
  893. 'leaves_color_list': ['C0', 'C0'],
  894. })
  895. R = dendrogram(Z, 2, 'mtica', show_contracted=True)
  896. plt.close()
  897. R['dcoord'] = np.asarray(R['dcoord'])
  898. assert_equal(R, {'color_list': ['C1', 'C0', 'C0', 'C0'],
  899. 'dcoord': [[0.0, 138.0, 138.0, 0.0],
  900. [0.0, 255.0, 255.0, 0.0],
  901. [0.0, 268.0, 268.0, 255.0],
  902. [138.0, 295.0, 295.0, 268.0]],
  903. 'icoord': [[5.0, 5.0, 15.0, 15.0],
  904. [35.0, 35.0, 45.0, 45.0],
  905. [25.0, 25.0, 40.0, 40.0],
  906. [10.0, 10.0, 32.5, 32.5]],
  907. 'ivl': ['2', '5', '1', '0', '(2)'],
  908. 'leaves': [2, 5, 1, 0, 7],
  909. 'leaves_color_list': ['C1', 'C1', 'C0', 'C0', 'C0'],
  910. })
  911. @pytest.fixture
  912. def dendrogram_lock(self):
  913. return Lock()
  914. def test_dendrogram_colors(self, xp, dendrogram_lock):
  915. # Tests dendrogram plots with alternate colors
  916. Z = xp.asarray(linkage(hierarchy_test_data.ytdist, 'single'))
  917. with dendrogram_lock:
  918. # Global color palette might be changed concurrently
  919. set_link_color_palette(['c', 'm', 'y', 'k'])
  920. R = dendrogram(Z, no_plot=True,
  921. above_threshold_color='g', color_threshold=250)
  922. set_link_color_palette(['g', 'r', 'c', 'm', 'y', 'k'])
  923. color_list = R['color_list']
  924. assert_equal(color_list, ['c', 'm', 'g', 'g', 'g'])
  925. # reset color palette (global list)
  926. set_link_color_palette(None)
  927. def test_dendrogram_leaf_colors_zero_dist(self, xp):
  928. # tests that the colors of leafs are correct for tree
  929. # with two identical points
  930. X = np.asarray([[1, 0, 0],
  931. [0, 0, 1],
  932. [0, 2, 0],
  933. [0, 0, 1],
  934. [0, 1, 0],
  935. [0, 1, 0]])
  936. Z = xp.asarray(linkage(X, "single"))
  937. d = dendrogram(Z, no_plot=True)
  938. exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2']
  939. colors = d["leaves_color_list"]
  940. assert_equal(colors, exp_colors)
  941. def test_dendrogram_leaf_colors(self, xp):
  942. # tests that the colors are correct for a tree
  943. # with two near points ((0, 0, 1.1) and (0, 0, 1))
  944. X = np.asarray([[1, 0, 0],
  945. [0, 0, 1.1],
  946. [0, 2, 0],
  947. [0, 0, 1],
  948. [0, 1, 0],
  949. [0, 1, 0]])
  950. Z = xp.asarray(linkage(X, "single"))
  951. d = dendrogram(Z, no_plot=True)
  952. exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2']
  953. colors = d["leaves_color_list"]
  954. assert_equal(colors, exp_colors)
  955. def calculate_maximum_distances(Z, xp):
  956. # Used for testing correctness of maxdists.
  957. n = Z.shape[0] + 1
  958. B = xp.zeros((n-1,), dtype=Z.dtype)
  959. for i in range(0, n - 1):
  960. q = xp.zeros((3,))
  961. left = Z[i, 0]
  962. right = Z[i, 1]
  963. if left >= n:
  964. b_left = B[xp.asarray(left, dtype=xp.int64) - n]
  965. q = xpx.at(q, 0).set(b_left)
  966. if right >= n:
  967. b_right = B[xp.asarray(right, dtype=xp.int64) - n]
  968. q = xpx.at(q, 1).set(b_right)
  969. q = xpx.at(q, 2).set(Z[i, 2])
  970. B = xpx.at(B, i).set(xp.max(q))
  971. return B
  972. def calculate_maximum_inconsistencies(Z, R, k=3, xp=np):
  973. # Used for testing correctness of maxinconsts.
  974. n = Z.shape[0] + 1
  975. dtype = xp.result_type(Z, R)
  976. B = xp.zeros((n-1,), dtype=dtype)
  977. for i in range(0, n - 1):
  978. q = xp.zeros((3,))
  979. left = Z[i, 0]
  980. right = Z[i, 1]
  981. if left >= n:
  982. b_left = B[xp.asarray(left, dtype=xp.int64) - n]
  983. q = xpx.at(q, 0).set(b_left)
  984. if right >= n:
  985. b_right = B[xp.asarray(right, dtype=xp.int64) - n]
  986. q = xpx.at(q, 1).set(b_right)
  987. q = xpx.at(q, 2).set(R[i, k])
  988. B = xpx.at(B, i).set(xp.max(q))
  989. return B
  990. @make_xp_test_case(to_tree)
  991. def test_node_compare(xp):
  992. np.random.seed(23)
  993. nobs = 50
  994. X = np.random.randn(nobs, 4)
  995. Z = xp.asarray(ward(X))
  996. tree = to_tree(Z)
  997. assert_(tree > tree.get_left())
  998. assert_(tree.get_right() > tree.get_left())
  999. assert_(tree.get_right() == tree.get_right())
  1000. assert_(tree.get_right() != tree.get_left())
  1001. @make_xp_test_case(cut_tree)
  1002. def test_cut_tree(xp):
  1003. np.random.seed(23)
  1004. nobs = 50
  1005. X = np.random.randn(nobs, 4)
  1006. Z = xp.asarray(ward(X))
  1007. cutree = cut_tree(Z)
  1008. # cutree.dtype varies between int32 and int64 over platforms
  1009. xp_assert_close(cutree[:, 0], xp.arange(nobs), rtol=1e-15, check_dtype=False)
  1010. xp_assert_close(cutree[:, -1], xp.zeros(nobs), rtol=1e-15, check_dtype=False)
  1011. assert_equal(np.asarray(cutree).max(0), np.arange(nobs - 1, -1, -1))
  1012. xp_assert_close(cutree[:, [-5]], cut_tree(Z, n_clusters=5), rtol=1e-15)
  1013. xp_assert_close(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10]), rtol=1e-15)
  1014. xp_assert_close(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5]), rtol=1e-15)
  1015. nodes = _order_cluster_tree(Z)
  1016. heights = xp.asarray([node.dist for node in nodes])
  1017. xp_assert_close(cutree[:, np.searchsorted(heights, [5])],
  1018. cut_tree(Z, height=5), rtol=1e-15)
  1019. xp_assert_close(cutree[:, np.searchsorted(heights, [5, 10])],
  1020. cut_tree(Z, height=[5, 10]), rtol=1e-15)
  1021. xp_assert_close(cutree[:, np.searchsorted(heights, [10, 5])],
  1022. cut_tree(Z, height=[10, 5]), rtol=1e-15)
  1023. @make_xp_test_case(optimal_leaf_ordering)
  1024. def test_optimal_leaf_ordering(xp):
  1025. # test with the distance vector y
  1026. Z = optimal_leaf_ordering(xp.asarray(linkage(hierarchy_test_data.ytdist)),
  1027. xp.asarray(hierarchy_test_data.ytdist))
  1028. expectedZ = hierarchy_test_data.linkage_ytdist_single_olo
  1029. xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-10)
  1030. # test with the observation matrix X
  1031. Z = optimal_leaf_ordering(xp.asarray(linkage(hierarchy_test_data.X, 'ward')),
  1032. xp.asarray(hierarchy_test_data.X))
  1033. expectedZ = hierarchy_test_data.linkage_X_ward_olo
  1034. xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-06)
  1035. @skip_xp_backends(np_only=True, reason='`Heap` only supports NumPy backend')
  1036. def test_Heap(xp):
  1037. values = xp.asarray([2, -1, 0, -1.5, 3])
  1038. heap = Heap(values)
  1039. pair = heap.get_min()
  1040. assert_equal(pair['key'], 3)
  1041. assert_equal(pair['value'], -1.5)
  1042. heap.remove_min()
  1043. pair = heap.get_min()
  1044. assert_equal(pair['key'], 1)
  1045. assert_equal(pair['value'], -1)
  1046. heap.change_value(1, 2.5)
  1047. pair = heap.get_min()
  1048. assert_equal(pair['key'], 2)
  1049. assert_equal(pair['value'], 0)
  1050. heap.remove_min()
  1051. heap.remove_min()
  1052. heap.change_value(1, 10)
  1053. pair = heap.get_min()
  1054. assert_equal(pair['key'], 4)
  1055. assert_equal(pair['value'], 3)
  1056. heap.remove_min()
  1057. pair = heap.get_min()
  1058. assert_equal(pair['key'], 1)
  1059. assert_equal(pair['value'], 10)