test_distance.py 83 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287
  1. #
  2. # Author: Damian Eads
  3. # Date: April 17, 2008
  4. #
  5. # Copyright (C) 2008 Damian Eads
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions
  9. # are met:
  10. #
  11. # 1. Redistributions of source code must retain the above copyright
  12. # notice, this list of conditions and the following disclaimer.
  13. #
  14. # 2. Redistributions in binary form must reproduce the above
  15. # copyright notice, this list of conditions and the following
  16. # disclaimer in the documentation and/or other materials provided
  17. # with the distribution.
  18. #
  19. # 3. The name of the author may not be used to endorse or promote
  20. # products derived from this software without specific prior
  21. # written permission.
  22. #
  23. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  24. # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  25. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  27. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  29. # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  31. # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  32. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34. from functools import wraps, partial
  35. import os.path
  36. import sys
  37. import sysconfig
  38. import warnings
  39. import weakref
  40. import numpy as np
  41. from numpy.linalg import norm
  42. from numpy.testing import (verbose, assert_,
  43. assert_array_equal, assert_equal,
  44. assert_almost_equal, assert_allclose,
  45. break_cycles, IS_PYPY)
  46. import pytest
  47. import scipy.spatial.distance
  48. from scipy.spatial.distance import (
  49. squareform, pdist, cdist, num_obs_y, num_obs_dm, is_valid_dm, is_valid_y,
  50. _validate_vector, _METRICS_NAMES)
  51. # these were missing: chebyshev cityblock
  52. # jensenshannon and seuclidean are referenced by string name.
  53. from scipy.spatial.distance import (braycurtis, canberra, chebyshev, cityblock,
  54. correlation, cosine, dice, euclidean,
  55. hamming, jaccard, jensenshannon, mahalanobis,
  56. minkowski, rogerstanimoto,
  57. russellrao, seuclidean, # noqa: F401
  58. sokalsneath, sqeuclidean, yule)
  59. from scipy._lib._util import np_long, np_ulong
  60. from scipy.conftest import skip_xp_invalid_arg
  61. @pytest.fixture(params=_METRICS_NAMES, scope="session")
  62. def metric(request):
  63. """
  64. Fixture for all metrics in scipy.spatial.distance
  65. """
  66. return request.param
  67. _filenames = [
  68. "cdist-X1.txt",
  69. "cdist-X2.txt",
  70. "iris.txt",
  71. "pdist-boolean-inp.txt",
  72. "pdist-chebyshev-ml-iris.txt",
  73. "pdist-chebyshev-ml.txt",
  74. "pdist-cityblock-ml-iris.txt",
  75. "pdist-cityblock-ml.txt",
  76. "pdist-correlation-ml-iris.txt",
  77. "pdist-correlation-ml.txt",
  78. "pdist-cosine-ml-iris.txt",
  79. "pdist-cosine-ml.txt",
  80. "pdist-double-inp.txt",
  81. "pdist-euclidean-ml-iris.txt",
  82. "pdist-euclidean-ml.txt",
  83. "pdist-hamming-ml.txt",
  84. "pdist-jaccard-ml.txt",
  85. "pdist-jensenshannon-ml-iris.txt",
  86. "pdist-jensenshannon-ml.txt",
  87. "pdist-minkowski-3.2-ml-iris.txt",
  88. "pdist-minkowski-3.2-ml.txt",
  89. "pdist-minkowski-5.8-ml-iris.txt",
  90. "pdist-seuclidean-ml-iris.txt",
  91. "pdist-seuclidean-ml.txt",
  92. "pdist-spearman-ml.txt",
  93. "random-bool-data.txt",
  94. "random-double-data.txt",
  95. "random-int-data.txt",
  96. "random-uint-data.txt",
  97. ]
  98. _tdist = np.array([[0, 662, 877, 255, 412, 996],
  99. [662, 0, 295, 468, 268, 400],
  100. [877, 295, 0, 754, 564, 138],
  101. [255, 468, 754, 0, 219, 869],
  102. [412, 268, 564, 219, 0, 669],
  103. [996, 400, 138, 869, 669, 0]], dtype='double')
  104. _ytdist = squareform(_tdist)
  105. # A hashmap of expected output arrays for the tests. These arrays
  106. # come from a list of text files, which are read prior to testing.
  107. # Each test loads inputs and outputs from this dictionary.
  108. eo = {}
  109. def load_testing_files():
  110. for fn in _filenames:
  111. name = fn.replace(".txt", "").replace("-ml", "")
  112. fqfn = os.path.join(os.path.dirname(__file__), 'data', fn)
  113. fp = open(fqfn)
  114. eo[name] = np.loadtxt(fp)
  115. fp.close()
  116. eo['pdist-boolean-inp'] = np.bool_(eo['pdist-boolean-inp'])
  117. eo['random-bool-data'] = np.bool_(eo['random-bool-data'])
  118. eo['random-float32-data'] = np.float32(eo['random-double-data'])
  119. eo['random-int-data'] = np_long(eo['random-int-data'])
  120. eo['random-uint-data'] = np_ulong(eo['random-uint-data'])
  121. load_testing_files()
  122. def _is_32bit():
  123. return np.intp(0).itemsize < 8
  124. def _chk_asarrays(arrays, axis=None):
  125. arrays = [np.asanyarray(a) for a in arrays]
  126. if axis is None:
  127. # np < 1.10 ravel removes subclass from arrays
  128. arrays = [np.ravel(a) if a.ndim != 1 else a
  129. for a in arrays]
  130. axis = 0
  131. arrays = tuple(np.atleast_1d(a) for a in arrays)
  132. if axis < 0:
  133. if not all(a.ndim == arrays[0].ndim for a in arrays):
  134. raise ValueError("array ndim must be the same for neg axis")
  135. axis = range(arrays[0].ndim)[axis]
  136. return arrays + (axis,)
  137. def _chk_weights(arrays, weights=None, axis=None,
  138. force_weights=False, simplify_weights=True,
  139. pos_only=False, neg_check=False,
  140. nan_screen=False, mask_screen=False,
  141. ddof=None):
  142. chked = _chk_asarrays(arrays, axis=axis)
  143. arrays, axis = chked[:-1], chked[-1]
  144. simplify_weights = simplify_weights and not force_weights
  145. if not force_weights and mask_screen:
  146. force_weights = any(np.ma.getmask(a) is not np.ma.nomask for a in arrays)
  147. if nan_screen:
  148. has_nans = [np.isnan(np.sum(a)) for a in arrays]
  149. if any(has_nans):
  150. mask_screen = True
  151. force_weights = True
  152. arrays = tuple(np.ma.masked_invalid(a) if has_nan else a
  153. for a, has_nan in zip(arrays, has_nans))
  154. if weights is not None:
  155. weights = np.asanyarray(weights)
  156. elif force_weights:
  157. weights = np.ones(arrays[0].shape[axis])
  158. else:
  159. return arrays + (weights, axis)
  160. if ddof:
  161. weights = _freq_weights(weights)
  162. if mask_screen:
  163. weights = _weight_masked(arrays, weights, axis)
  164. if not all(weights.shape == (a.shape[axis],) for a in arrays):
  165. raise ValueError("weights shape must match arrays along axis")
  166. if neg_check and (weights < 0).any():
  167. raise ValueError("weights cannot be negative")
  168. if pos_only:
  169. pos_weights = np.nonzero(weights > 0)[0]
  170. if pos_weights.size < weights.size:
  171. arrays = tuple(np.take(a, pos_weights, axis=axis) for a in arrays)
  172. weights = weights[pos_weights]
  173. if simplify_weights and (weights == 1).all():
  174. weights = None
  175. return arrays + (weights, axis)
  176. def _freq_weights(weights):
  177. if weights is None:
  178. return weights
  179. int_weights = weights.astype(int)
  180. if (weights != int_weights).any():
  181. raise ValueError(f"frequency (integer count-type) weights required {weights}")
  182. return int_weights
  183. def _weight_masked(arrays, weights, axis):
  184. if axis is None:
  185. axis = 0
  186. weights = np.asanyarray(weights)
  187. for a in arrays:
  188. axis_mask = np.ma.getmask(a)
  189. if axis_mask is np.ma.nomask:
  190. continue
  191. if a.ndim > 1:
  192. not_axes = tuple(i for i in range(a.ndim) if i != axis)
  193. axis_mask = axis_mask.any(axis=not_axes)
  194. weights *= 1 - axis_mask.astype(int)
  195. return weights
  196. def _rand_split(arrays, weights, axis, split_per, seed=None):
  197. # Coerce `arrays` to float64 if integer, to avoid nan-to-integer issues
  198. arrays = [arr.astype(np.float64) if np.issubdtype(arr.dtype, np.integer)
  199. else arr for arr in arrays]
  200. # inverse operation for stats.collapse_weights
  201. weights = np.array(weights, dtype=np.float64) # modified inplace; need a copy
  202. seeded_rand = np.random.RandomState(seed)
  203. def mytake(a, ix, axis):
  204. record = np.asanyarray(np.take(a, ix, axis=axis))
  205. return record.reshape([a.shape[i] if i != axis else 1
  206. for i in range(a.ndim)])
  207. n_obs = arrays[0].shape[axis]
  208. assert all(a.shape[axis] == n_obs for a in arrays), \
  209. "data must be aligned on sample axis"
  210. for i in range(int(split_per) * n_obs):
  211. split_ix = seeded_rand.randint(n_obs + i)
  212. prev_w = weights[split_ix]
  213. q = seeded_rand.rand()
  214. weights[split_ix] = q * prev_w
  215. weights = np.append(weights, (1. - q) * prev_w)
  216. arrays = [np.append(a, mytake(a, split_ix, axis=axis),
  217. axis=axis) for a in arrays]
  218. return arrays, weights
  219. assert_allclose_forgiving = partial(assert_allclose, atol=1e-5)
  220. def _rough_check(a, b, compare_assert=assert_allclose_forgiving,
  221. key=lambda x: x, w=None):
  222. check_a = key(a)
  223. check_b = key(b)
  224. try:
  225. if np.array(check_a != check_b).any(): # try strict equality for string types
  226. compare_assert(check_a, check_b)
  227. except AttributeError: # masked array
  228. compare_assert(check_a, check_b)
  229. except (TypeError, ValueError): # nested data structure
  230. for a_i, b_i in zip(check_a, check_b):
  231. _rough_check(a_i, b_i, compare_assert=compare_assert)
  232. # diff from test_stats:
  233. # n_args=2, weight_arg='w', default_axis=None
  234. # ma_safe = False, nan_safe = False
  235. def _weight_checked(fn, n_args=2, default_axis=None, key=lambda x: x, weight_arg='w',
  236. squeeze=True, silent=False,
  237. ones_test=True, const_test=True, dup_test=True,
  238. split_test=True, dud_test=True, ma_safe=False, ma_very_safe=False,
  239. nan_safe=False, split_per=1.0, seed=0,
  240. compare_assert=assert_allclose_forgiving):
  241. """runs fn on its arguments 2 or 3 ways, checks that the results are the same,
  242. then returns the same thing it would have returned before"""
  243. @wraps(fn)
  244. def wrapped(*args, **kwargs):
  245. result = fn(*args, **kwargs)
  246. arrays = args[:n_args]
  247. rest = args[n_args:]
  248. weights = kwargs.get(weight_arg, None)
  249. axis = kwargs.get('axis', default_axis)
  250. chked = _chk_weights(arrays, weights=weights, axis=axis,
  251. force_weights=True, mask_screen=True)
  252. arrays, weights, axis = chked[:-2], chked[-2], chked[-1]
  253. if squeeze:
  254. arrays = [np.atleast_1d(a.squeeze()) for a in arrays]
  255. try:
  256. # WEIGHTS CHECK 1: EQUAL WEIGHTED OBSERVATIONS
  257. args = tuple(arrays) + rest
  258. if ones_test:
  259. kwargs[weight_arg] = weights
  260. _rough_check(result, fn(*args, **kwargs), key=key)
  261. if const_test:
  262. kwargs[weight_arg] = weights * 101.0
  263. _rough_check(result, fn(*args, **kwargs), key=key)
  264. kwargs[weight_arg] = weights * 0.101
  265. try:
  266. _rough_check(result, fn(*args, **kwargs), key=key)
  267. except Exception as e:
  268. raise type(e)((e, arrays, weights)) from e
  269. # WEIGHTS CHECK 2: ADDL 0-WEIGHTED OBS
  270. if dud_test:
  271. # add randomly resampled rows, weighted at 0
  272. dud_arrays, dud_weights = _rand_split(arrays, weights, axis,
  273. split_per=split_per, seed=seed)
  274. dud_weights[:weights.size] = weights # not exactly 1 because of masked arrays # noqa: E501
  275. dud_weights[weights.size:] = 0
  276. dud_args = tuple(dud_arrays) + rest
  277. kwargs[weight_arg] = dud_weights
  278. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  279. # increase the value of those 0-weighted rows
  280. for a in dud_arrays:
  281. indexer = [slice(None)] * a.ndim
  282. indexer[axis] = slice(weights.size, None)
  283. indexer = tuple(indexer)
  284. a[indexer] = a[indexer] * 101
  285. dud_args = tuple(dud_arrays) + rest
  286. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  287. # set those 0-weighted rows to NaNs
  288. for a in dud_arrays:
  289. indexer = [slice(None)] * a.ndim
  290. indexer[axis] = slice(weights.size, None)
  291. indexer = tuple(indexer)
  292. a[indexer] = a[indexer] * np.nan
  293. if kwargs.get("nan_policy", None) == "omit" and nan_safe:
  294. dud_args = tuple(dud_arrays) + rest
  295. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  296. # mask out those nan values
  297. if ma_safe:
  298. dud_arrays = [np.ma.masked_invalid(a) for a in dud_arrays]
  299. dud_args = tuple(dud_arrays) + rest
  300. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  301. if ma_very_safe:
  302. kwargs[weight_arg] = None
  303. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  304. del dud_arrays, dud_args, dud_weights
  305. # WEIGHTS CHECK 3: DUPLICATE DATA (DUMB SPLITTING)
  306. if dup_test:
  307. dup_arrays = [np.append(a, a, axis=axis) for a in arrays]
  308. dup_weights = np.append(weights, weights) / 2.0
  309. dup_args = tuple(dup_arrays) + rest
  310. kwargs[weight_arg] = dup_weights
  311. _rough_check(result, fn(*dup_args, **kwargs), key=key)
  312. del dup_args, dup_arrays, dup_weights
  313. # WEIGHT CHECK 3: RANDOM SPLITTING
  314. if split_test and split_per > 0:
  315. split = _rand_split(arrays, weights, axis,
  316. split_per=split_per, seed=seed)
  317. split_arrays, split_weights = split
  318. split_args = tuple(split_arrays) + rest
  319. kwargs[weight_arg] = split_weights
  320. _rough_check(result, fn(*split_args, **kwargs), key=key)
  321. except NotImplementedError as e:
  322. # when some combination of arguments makes weighting impossible,
  323. # this is the desired response
  324. if not silent:
  325. warnings.warn(f"{fn.__name__} NotImplemented weights: {e}",
  326. stacklevel=3)
  327. return result
  328. return wrapped
  329. wcdist = _weight_checked(cdist, default_axis=1, squeeze=False)
  330. wcdist_no_const = _weight_checked(cdist, default_axis=1,
  331. squeeze=False, const_test=False)
  332. wpdist = _weight_checked(pdist, default_axis=1, squeeze=False, n_args=1)
  333. wpdist_no_const = _weight_checked(pdist, default_axis=1, squeeze=False,
  334. const_test=False, n_args=1)
  335. wrogerstanimoto = _weight_checked(rogerstanimoto)
  336. wmatching = whamming = _weight_checked(hamming, dud_test=False)
  337. wyule = _weight_checked(yule)
  338. wdice = _weight_checked(dice)
  339. wcityblock = _weight_checked(cityblock)
  340. wchebyshev = _weight_checked(chebyshev)
  341. wcosine = _weight_checked(cosine)
  342. wcorrelation = _weight_checked(correlation)
  343. wjaccard = _weight_checked(jaccard)
  344. weuclidean = _weight_checked(euclidean, const_test=False)
  345. wsqeuclidean = _weight_checked(sqeuclidean, const_test=False)
  346. wbraycurtis = _weight_checked(braycurtis)
  347. wcanberra = _weight_checked(canberra, const_test=False)
  348. wsokalsneath = _weight_checked(sokalsneath)
  349. wrussellrao = _weight_checked(russellrao)
  350. class TestCdist:
  351. def setup_method(self):
  352. self.rnd_eo_names = ['random-float32-data', 'random-int-data',
  353. 'random-uint-data', 'random-double-data',
  354. 'random-bool-data']
  355. self.valid_upcasts = {'bool': [np_ulong, np_long, np.float32, np.float64],
  356. 'uint': [np_long, np.float32, np.float64],
  357. 'int': [np.float32, np.float64],
  358. 'float32': [np.float64]}
  359. def test_cdist_extra_args(self, metric):
  360. # Tests that args and kwargs are correctly handled
  361. X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]]
  362. X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]]
  363. kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(3)}
  364. args = [3.14] * 200
  365. with pytest.raises(TypeError):
  366. cdist(X1, X2, metric=metric, **kwargs)
  367. with pytest.raises(TypeError):
  368. cdist(X1, X2, metric=eval(metric), **kwargs)
  369. with pytest.raises(TypeError):
  370. cdist(X1, X2, metric="test_" + metric, **kwargs)
  371. with pytest.raises(TypeError):
  372. cdist(X1, X2, metric=metric, *args)
  373. with pytest.raises(TypeError):
  374. cdist(X1, X2, metric=eval(metric), *args)
  375. with pytest.raises(TypeError):
  376. cdist(X1, X2, metric="test_" + metric, *args)
  377. def test_cdist_extra_args_custom(self):
  378. # Tests that args and kwargs are correctly handled
  379. # also for custom metric
  380. def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
  381. return arg + kwarg + kwarg2
  382. X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]]
  383. X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]]
  384. kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(3)}
  385. args = [3.14] * 200
  386. with pytest.raises(TypeError):
  387. cdist(X1, X2, _my_metric)
  388. with pytest.raises(TypeError):
  389. cdist(X1, X2, _my_metric, *args)
  390. with pytest.raises(TypeError):
  391. cdist(X1, X2, _my_metric, **kwargs)
  392. with pytest.raises(TypeError):
  393. cdist(X1, X2, _my_metric, kwarg=2.2, kwarg2=3.3)
  394. with pytest.raises(TypeError):
  395. cdist(X1, X2, _my_metric, 1, 2, kwarg=2.2)
  396. with pytest.raises(TypeError):
  397. cdist(X1, X2, _my_metric, 1, 2, kwarg=2.2)
  398. with pytest.raises(TypeError):
  399. cdist(X1, X2, _my_metric, 1.1, 2.2, 3.3)
  400. with pytest.raises(TypeError):
  401. cdist(X1, X2, _my_metric, 1.1, 2.2)
  402. with pytest.raises(TypeError):
  403. cdist(X1, X2, _my_metric, 1.1)
  404. with pytest.raises(TypeError):
  405. cdist(X1, X2, _my_metric, 1.1, kwarg=2.2, kwarg2=3.3)
  406. # this should work
  407. assert_allclose(cdist(X1, X2, metric=_my_metric,
  408. arg=1.1, kwarg2=3.3), 5.4)
  409. def test_cdist_euclidean_random_unicode(self):
  410. eps = 1e-15
  411. X1 = eo['cdist-X1']
  412. X2 = eo['cdist-X2']
  413. Y1 = wcdist_no_const(X1, X2, 'euclidean')
  414. Y2 = wcdist_no_const(X1, X2, 'test_euclidean')
  415. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  416. @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 1.23,
  417. 2.0, 3.8, 4.6, np.inf])
  418. def test_cdist_minkowski_random(self, p):
  419. eps = 1e-13
  420. X1 = eo['cdist-X1']
  421. X2 = eo['cdist-X2']
  422. Y1 = wcdist_no_const(X1, X2, 'minkowski', p=p)
  423. Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=p)
  424. assert_allclose(Y1, Y2, atol=0, rtol=eps, verbose=verbose > 2)
  425. def test_cdist_cosine_random(self):
  426. eps = 1e-14
  427. X1 = eo['cdist-X1']
  428. X2 = eo['cdist-X2']
  429. Y1 = wcdist(X1, X2, 'cosine')
  430. # Naive implementation
  431. def norms(X):
  432. return np.linalg.norm(X, axis=1).reshape(-1, 1)
  433. Y2 = 1 - np.dot((X1 / norms(X1)), (X2 / norms(X2)).T)
  434. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  435. def test_cdist_mahalanobis(self):
  436. # 1-dimensional observations
  437. x1 = np.array([[2], [3]])
  438. x2 = np.array([[2], [5]])
  439. dist = cdist(x1, x2, metric='mahalanobis')
  440. assert_allclose(dist, [[0.0, np.sqrt(4.5)], [np.sqrt(0.5), np.sqrt(2)]])
  441. # 2-dimensional observations
  442. x1 = np.array([[0, 0], [-1, 0]])
  443. x2 = np.array([[0, 2], [1, 0], [0, -2]])
  444. dist = cdist(x1, x2, metric='mahalanobis')
  445. rt2 = np.sqrt(2)
  446. assert_allclose(dist, [[rt2, rt2, rt2], [2, 2 * rt2, 2]])
  447. # Too few observations
  448. with pytest.raises(ValueError):
  449. cdist([[0, 1]], [[2, 3]], metric='mahalanobis')
  450. def test_cdist_custom_notdouble(self):
  451. class myclass:
  452. pass
  453. def _my_metric(x, y):
  454. if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
  455. raise ValueError("Type has been changed")
  456. return 1.123
  457. data = np.array([[myclass()]], dtype=object)
  458. cdist_y = cdist(data, data, metric=_my_metric)
  459. right_y = 1.123
  460. assert_equal(cdist_y, right_y, verbose=verbose > 2)
  461. def _check_calling_conventions(self, X1, X2, metric, eps=1e-07, **kwargs):
  462. # helper function for test_cdist_calling_conventions
  463. try:
  464. y1 = cdist(X1, X2, metric=metric, **kwargs)
  465. y2 = cdist(X1, X2, metric=eval(metric), **kwargs)
  466. y3 = cdist(X1, X2, metric="test_" + metric, **kwargs)
  467. except Exception as e:
  468. e_cls = e.__class__
  469. if verbose > 2:
  470. print(e_cls.__name__)
  471. print(e)
  472. with pytest.raises(e_cls):
  473. cdist(X1, X2, metric=metric, **kwargs)
  474. with pytest.raises(e_cls):
  475. cdist(X1, X2, metric=eval(metric), **kwargs)
  476. with pytest.raises(e_cls):
  477. cdist(X1, X2, metric="test_" + metric, **kwargs)
  478. else:
  479. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  480. assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2)
  481. def test_cdist_calling_conventions(self, metric):
  482. # Ensures that specifying the metric with a str or scipy function
  483. # gives the same behaviour (i.e. same result or same exception).
  484. # NOTE: The correctness should be checked within each metric tests.
  485. for eo_name in self.rnd_eo_names:
  486. # subsampling input data to speed-up tests
  487. # NOTE: num samples needs to be > than dimensions for mahalanobis
  488. X1 = eo[eo_name][::5, ::-2]
  489. X2 = eo[eo_name][1::5, ::2]
  490. if verbose > 2:
  491. print("testing: ", metric, " with: ", eo_name)
  492. if (metric in {'dice', 'yule', 'rogerstanimoto', 'russellrao',
  493. 'sokalsneath'}
  494. and 'bool' not in eo_name):
  495. # python version permits non-bools e.g. for fuzzy logic
  496. continue
  497. self._check_calling_conventions(X1, X2, metric)
  498. # Testing built-in metrics with extra args
  499. if metric == "seuclidean":
  500. X12 = np.vstack([X1, X2]).astype(np.float64)
  501. V = np.var(X12, axis=0, ddof=1)
  502. self._check_calling_conventions(X1, X2, metric, V=V)
  503. elif metric == "mahalanobis":
  504. X12 = np.vstack([X1, X2]).astype(np.float64)
  505. V = np.atleast_2d(np.cov(X12.T))
  506. VI = np.array(np.linalg.inv(V).T)
  507. self._check_calling_conventions(X1, X2, metric, VI=VI)
  508. def test_cdist_dtype_equivalence(self, metric):
  509. # Tests that the result is not affected by type up-casting
  510. eps = 1e-07
  511. tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
  512. (eo['random-uint-data'], self.valid_upcasts['uint']),
  513. (eo['random-int-data'], self.valid_upcasts['int']),
  514. (eo['random-float32-data'], self.valid_upcasts['float32'])]
  515. for test in tests:
  516. X1 = test[0][::5, ::-2]
  517. X2 = test[0][1::5, ::2]
  518. try:
  519. y1 = cdist(X1, X2, metric=metric)
  520. except Exception as e:
  521. e_cls = e.__class__
  522. if verbose > 2:
  523. print(e_cls.__name__)
  524. print(e)
  525. for new_type in test[1]:
  526. X1new = new_type(X1)
  527. X2new = new_type(X2)
  528. with pytest.raises(e_cls):
  529. cdist(X1new, X2new, metric=metric)
  530. else:
  531. for new_type in test[1]:
  532. y2 = cdist(new_type(X1), new_type(X2), metric=metric)
  533. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  534. @pytest.mark.skipif(sysconfig.get_platform() == 'win-arm64', reason="numpy#29442")
  535. def test_cdist_out(self, metric):
  536. # Test that out parameter works properly
  537. eps = 1e-15
  538. X1 = eo['cdist-X1']
  539. X2 = eo['cdist-X2']
  540. out_r, out_c = X1.shape[0], X2.shape[0]
  541. kwargs = dict()
  542. if metric == 'minkowski':
  543. kwargs['p'] = 1.23
  544. out1 = np.empty((out_r, out_c), dtype=np.float64)
  545. Y1 = cdist(X1, X2, metric, **kwargs)
  546. Y2 = cdist(X1, X2, metric, out=out1, **kwargs)
  547. # test that output is numerically equivalent
  548. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  549. # test that Y_test1 and out1 are the same object
  550. assert_(Y2 is out1)
  551. # test for incorrect shape
  552. out2 = np.empty((out_r-1, out_c+1), dtype=np.float64)
  553. with pytest.raises(ValueError):
  554. cdist(X1, X2, metric, out=out2, **kwargs)
  555. # test for C-contiguous order
  556. out3 = np.empty(
  557. (2 * out_r, 2 * out_c), dtype=np.float64)[::2, ::2]
  558. out4 = np.empty((out_r, out_c), dtype=np.float64, order='F')
  559. with pytest.raises(ValueError):
  560. cdist(X1, X2, metric, out=out3, **kwargs)
  561. with pytest.raises(ValueError):
  562. cdist(X1, X2, metric, out=out4, **kwargs)
  563. # test for incorrect dtype
  564. out5 = np.empty((out_r, out_c), dtype=np.int64)
  565. with pytest.raises(ValueError):
  566. cdist(X1, X2, metric, out=out5, **kwargs)
  567. def test_striding(self, metric):
  568. # test that striding is handled correct with calls to
  569. # _copy_array_if_base_present
  570. eps = 1e-15
  571. X1 = eo['cdist-X1'][::2, ::2]
  572. X2 = eo['cdist-X2'][::2, ::2]
  573. X1_copy = X1.copy()
  574. X2_copy = X2.copy()
  575. # confirm equivalence
  576. assert_equal(X1, X1_copy)
  577. assert_equal(X2, X2_copy)
  578. # confirm contiguity
  579. assert_(not X1.flags.c_contiguous)
  580. assert_(not X2.flags.c_contiguous)
  581. assert_(X1_copy.flags.c_contiguous)
  582. assert_(X2_copy.flags.c_contiguous)
  583. kwargs = dict()
  584. if metric == 'minkowski':
  585. kwargs['p'] = 1.23
  586. Y1 = cdist(X1, X2, metric, **kwargs)
  587. Y2 = cdist(X1_copy, X2_copy, metric, **kwargs)
  588. # test that output is numerically equivalent
  589. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  590. def test_cdist_refcount(self, metric):
  591. x1 = np.random.rand(10, 10)
  592. x2 = np.random.rand(10, 10)
  593. kwargs = dict()
  594. if metric == 'minkowski':
  595. kwargs['p'] = 1.23
  596. out = cdist(x1, x2, metric=metric, **kwargs)
  597. # Check reference counts aren't messed up. If we only hold weak
  598. # references, the arrays should be deallocated.
  599. weak_refs = [weakref.ref(v) for v in (x1, x2, out)]
  600. del x1, x2, out
  601. if IS_PYPY:
  602. break_cycles()
  603. assert all(weak_ref() is None for weak_ref in weak_refs)
  604. class TestPdist:
  605. def setup_method(self):
  606. self.rnd_eo_names = ['random-float32-data', 'random-int-data',
  607. 'random-uint-data', 'random-double-data',
  608. 'random-bool-data']
  609. self.valid_upcasts = {'bool': [np_ulong, np_long, np.float32, np.float64],
  610. 'uint': [np_long, np.float32, np.float64],
  611. 'int': [np.float32, np.float64],
  612. 'float32': [np.float64]}
  613. def test_pdist_extra_args(self, metric):
  614. # Tests that args and kwargs are correctly handled
  615. X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]]
  616. kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(2)}
  617. args = [3.14] * 200
  618. with pytest.raises(TypeError):
  619. pdist(X1, metric=metric, **kwargs)
  620. with pytest.raises(TypeError):
  621. pdist(X1, metric=eval(metric), **kwargs)
  622. with pytest.raises(TypeError):
  623. pdist(X1, metric="test_" + metric, **kwargs)
  624. with pytest.raises(TypeError):
  625. pdist(X1, metric=metric, *args)
  626. with pytest.raises(TypeError):
  627. pdist(X1, metric=eval(metric), *args)
  628. with pytest.raises(TypeError):
  629. pdist(X1, metric="test_" + metric, *args)
  630. def test_pdist_extra_args_custom(self):
  631. # Tests that args and kwargs are correctly handled
  632. # also for custom metric
  633. def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
  634. return arg + kwarg + kwarg2
  635. X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]]
  636. kwargs = {"N0tV4l1D_p4raM": 3.14, "w": np.arange(2)}
  637. args = [3.14] * 200
  638. with pytest.raises(TypeError):
  639. pdist(X1, _my_metric)
  640. with pytest.raises(TypeError):
  641. pdist(X1, _my_metric, *args)
  642. with pytest.raises(TypeError):
  643. pdist(X1, _my_metric, **kwargs)
  644. with pytest.raises(TypeError):
  645. pdist(X1, _my_metric, kwarg=2.2, kwarg2=3.3)
  646. with pytest.raises(TypeError):
  647. pdist(X1, _my_metric, 1, 2, kwarg=2.2)
  648. with pytest.raises(TypeError):
  649. pdist(X1, _my_metric, 1, 2, kwarg=2.2)
  650. with pytest.raises(TypeError):
  651. pdist(X1, _my_metric, 1.1, 2.2, 3.3)
  652. with pytest.raises(TypeError):
  653. pdist(X1, _my_metric, 1.1, 2.2)
  654. with pytest.raises(TypeError):
  655. pdist(X1, _my_metric, 1.1)
  656. with pytest.raises(TypeError):
  657. pdist(X1, _my_metric, 1.1, kwarg=2.2, kwarg2=3.3)
  658. # these should work
  659. assert_allclose(pdist(X1, metric=_my_metric,
  660. arg=1.1, kwarg2=3.3), 5.4)
  661. def test_pdist_euclidean_random(self):
  662. eps = 1e-07
  663. X = eo['pdist-double-inp']
  664. Y_right = eo['pdist-euclidean']
  665. Y_test1 = wpdist_no_const(X, 'euclidean')
  666. assert_allclose(Y_test1, Y_right, rtol=eps)
  667. def test_pdist_euclidean_random_u(self):
  668. eps = 1e-07
  669. X = eo['pdist-double-inp']
  670. Y_right = eo['pdist-euclidean']
  671. Y_test1 = wpdist_no_const(X, 'euclidean')
  672. assert_allclose(Y_test1, Y_right, rtol=eps)
  673. def test_pdist_euclidean_random_float32(self):
  674. eps = 1e-07
  675. X = np.float32(eo['pdist-double-inp'])
  676. Y_right = eo['pdist-euclidean']
  677. Y_test1 = wpdist_no_const(X, 'euclidean')
  678. assert_allclose(Y_test1, Y_right, rtol=eps)
  679. def test_pdist_euclidean_random_nonC(self):
  680. eps = 1e-07
  681. X = eo['pdist-double-inp']
  682. Y_right = eo['pdist-euclidean']
  683. Y_test2 = wpdist_no_const(X, 'test_euclidean')
  684. assert_allclose(Y_test2, Y_right, rtol=eps)
  685. @pytest.mark.slow
  686. def test_pdist_euclidean_iris_double(self):
  687. eps = 1e-7
  688. X = eo['iris']
  689. Y_right = eo['pdist-euclidean-iris']
  690. Y_test1 = wpdist_no_const(X, 'euclidean')
  691. assert_allclose(Y_test1, Y_right, rtol=eps)
  692. @pytest.mark.slow
  693. def test_pdist_euclidean_iris_float32(self):
  694. eps = 1e-5
  695. X = np.float32(eo['iris'])
  696. Y_right = eo['pdist-euclidean-iris']
  697. Y_test1 = wpdist_no_const(X, 'euclidean')
  698. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  699. @pytest.mark.slow
  700. def test_pdist_euclidean_iris_nonC(self):
  701. # Test pdist(X, 'test_euclidean') [the non-C implementation] on the
  702. # Iris data set.
  703. eps = 1e-7
  704. X = eo['iris']
  705. Y_right = eo['pdist-euclidean-iris']
  706. Y_test2 = wpdist_no_const(X, 'test_euclidean')
  707. assert_allclose(Y_test2, Y_right, rtol=eps)
  708. def test_pdist_seuclidean_random(self):
  709. eps = 1e-7
  710. X = eo['pdist-double-inp']
  711. Y_right = eo['pdist-seuclidean']
  712. Y_test1 = pdist(X, 'seuclidean')
  713. assert_allclose(Y_test1, Y_right, rtol=eps)
  714. def test_pdist_seuclidean_random_float32(self):
  715. eps = 1e-7
  716. X = np.float32(eo['pdist-double-inp'])
  717. Y_right = eo['pdist-seuclidean']
  718. Y_test1 = pdist(X, 'seuclidean')
  719. assert_allclose(Y_test1, Y_right, rtol=eps)
  720. # Check no error is raise when V has float32 dtype (#11171).
  721. V = np.var(X, axis=0, ddof=1)
  722. Y_test2 = pdist(X, 'seuclidean', V=V)
  723. assert_allclose(Y_test2, Y_right, rtol=eps)
  724. def test_pdist_seuclidean_random_nonC(self):
  725. # Test pdist(X, 'test_sqeuclidean') [the non-C implementation]
  726. eps = 1e-07
  727. X = eo['pdist-double-inp']
  728. Y_right = eo['pdist-seuclidean']
  729. Y_test2 = pdist(X, 'test_seuclidean')
  730. assert_allclose(Y_test2, Y_right, rtol=eps)
  731. def test_pdist_seuclidean_iris(self):
  732. eps = 1e-7
  733. X = eo['iris']
  734. Y_right = eo['pdist-seuclidean-iris']
  735. Y_test1 = pdist(X, 'seuclidean')
  736. assert_allclose(Y_test1, Y_right, rtol=eps)
  737. def test_pdist_seuclidean_iris_float32(self):
  738. # Tests pdist(X, 'seuclidean') on the Iris data set (float32).
  739. eps = 1e-5
  740. X = np.float32(eo['iris'])
  741. Y_right = eo['pdist-seuclidean-iris']
  742. Y_test1 = pdist(X, 'seuclidean')
  743. assert_allclose(Y_test1, Y_right, rtol=eps)
  744. def test_pdist_seuclidean_iris_nonC(self):
  745. # Test pdist(X, 'test_seuclidean') [the non-C implementation] on the
  746. # Iris data set.
  747. eps = 1e-7
  748. X = eo['iris']
  749. Y_right = eo['pdist-seuclidean-iris']
  750. Y_test2 = pdist(X, 'test_seuclidean')
  751. assert_allclose(Y_test2, Y_right, rtol=eps)
  752. def test_pdist_cosine_random(self):
  753. eps = 1e-7
  754. X = eo['pdist-double-inp']
  755. Y_right = eo['pdist-cosine']
  756. Y_test1 = wpdist(X, 'cosine')
  757. assert_allclose(Y_test1, Y_right, rtol=eps)
  758. def test_pdist_cosine_random_float32(self):
  759. eps = 1e-7
  760. X = np.float32(eo['pdist-double-inp'])
  761. Y_right = eo['pdist-cosine']
  762. Y_test1 = wpdist(X, 'cosine')
  763. assert_allclose(Y_test1, Y_right, rtol=eps)
  764. def test_pdist_cosine_random_nonC(self):
  765. # Test pdist(X, 'test_cosine') [the non-C implementation]
  766. eps = 1e-7
  767. X = eo['pdist-double-inp']
  768. Y_right = eo['pdist-cosine']
  769. Y_test2 = wpdist(X, 'test_cosine')
  770. assert_allclose(Y_test2, Y_right, rtol=eps)
  771. @pytest.mark.slow
  772. def test_pdist_cosine_iris(self):
  773. eps = 1e-05
  774. X = eo['iris']
  775. Y_right = eo['pdist-cosine-iris']
  776. Y_test1 = wpdist(X, 'cosine')
  777. assert_allclose(Y_test1, Y_right, atol=eps)
  778. @pytest.mark.slow
  779. def test_pdist_cosine_iris_float32(self):
  780. eps = 1e-05
  781. X = np.float32(eo['iris'])
  782. Y_right = eo['pdist-cosine-iris']
  783. Y_test1 = wpdist(X, 'cosine')
  784. assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2)
  785. @pytest.mark.slow
  786. def test_pdist_cosine_iris_nonC(self):
  787. eps = 1e-05
  788. X = eo['iris']
  789. Y_right = eo['pdist-cosine-iris']
  790. Y_test2 = wpdist(X, 'test_cosine')
  791. assert_allclose(Y_test2, Y_right, atol=eps)
  792. def test_pdist_cosine_bounds(self):
  793. # Test adapted from @joernhees's example at gh-5208: case where
  794. # cosine distance used to be negative. XXX: very sensitive to the
  795. # specific norm computation.
  796. x = np.abs(np.random.RandomState(1337).rand(91))
  797. X = np.vstack([x, x])
  798. assert_(wpdist(X, 'cosine')[0] >= 0,
  799. msg='cosine distance should be non-negative')
  800. def test_pdist_cityblock_random(self):
  801. eps = 1e-7
  802. X = eo['pdist-double-inp']
  803. Y_right = eo['pdist-cityblock']
  804. Y_test1 = wpdist_no_const(X, 'cityblock')
  805. assert_allclose(Y_test1, Y_right, rtol=eps)
  806. def test_pdist_cityblock_random_float32(self):
  807. eps = 1e-7
  808. X = np.float32(eo['pdist-double-inp'])
  809. Y_right = eo['pdist-cityblock']
  810. Y_test1 = wpdist_no_const(X, 'cityblock')
  811. assert_allclose(Y_test1, Y_right, rtol=eps)
  812. def test_pdist_cityblock_random_nonC(self):
  813. eps = 1e-7
  814. X = eo['pdist-double-inp']
  815. Y_right = eo['pdist-cityblock']
  816. Y_test2 = wpdist_no_const(X, 'test_cityblock')
  817. assert_allclose(Y_test2, Y_right, rtol=eps)
  818. @pytest.mark.slow
  819. def test_pdist_cityblock_iris(self):
  820. eps = 1e-14
  821. X = eo['iris']
  822. Y_right = eo['pdist-cityblock-iris']
  823. Y_test1 = wpdist_no_const(X, 'cityblock')
  824. assert_allclose(Y_test1, Y_right, rtol=eps)
  825. @pytest.mark.slow
  826. def test_pdist_cityblock_iris_float32(self):
  827. eps = 1e-5
  828. X = np.float32(eo['iris'])
  829. Y_right = eo['pdist-cityblock-iris']
  830. Y_test1 = wpdist_no_const(X, 'cityblock')
  831. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  832. @pytest.mark.slow
  833. def test_pdist_cityblock_iris_nonC(self):
  834. # Test pdist(X, 'test_cityblock') [the non-C implementation] on the
  835. # Iris data set.
  836. eps = 1e-14
  837. X = eo['iris']
  838. Y_right = eo['pdist-cityblock-iris']
  839. Y_test2 = wpdist_no_const(X, 'test_cityblock')
  840. assert_allclose(Y_test2, Y_right, rtol=eps)
  841. def test_pdist_correlation_random(self):
  842. eps = 1e-7
  843. X = eo['pdist-double-inp']
  844. Y_right = eo['pdist-correlation']
  845. Y_test1 = wpdist(X, 'correlation')
  846. assert_allclose(Y_test1, Y_right, rtol=eps)
  847. def test_pdist_correlation_random_float32(self):
  848. eps = 1e-7
  849. X = np.float32(eo['pdist-double-inp'])
  850. Y_right = eo['pdist-correlation']
  851. Y_test1 = wpdist(X, 'correlation')
  852. assert_allclose(Y_test1, Y_right, rtol=eps)
  853. def test_pdist_correlation_random_nonC(self):
  854. eps = 1e-7
  855. X = eo['pdist-double-inp']
  856. Y_right = eo['pdist-correlation']
  857. Y_test2 = wpdist(X, 'test_correlation')
  858. assert_allclose(Y_test2, Y_right, rtol=eps)
  859. @pytest.mark.slow
  860. def test_pdist_correlation_iris(self):
  861. eps = 1e-7
  862. X = eo['iris']
  863. Y_right = eo['pdist-correlation-iris']
  864. Y_test1 = wpdist(X, 'correlation')
  865. assert_allclose(Y_test1, Y_right, rtol=eps)
  866. @pytest.mark.slow
  867. def test_pdist_correlation_iris_float32(self):
  868. eps = 1e-7
  869. X = eo['iris']
  870. Y_right = np.float32(eo['pdist-correlation-iris'])
  871. Y_test1 = wpdist(X, 'correlation')
  872. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  873. @pytest.mark.slow
  874. def test_pdist_correlation_iris_nonC(self):
  875. if sys.maxsize > 2**32:
  876. eps = 1e-7
  877. else:
  878. pytest.skip("see gh-16456")
  879. X = eo['iris']
  880. Y_right = eo['pdist-correlation-iris']
  881. Y_test2 = wpdist(X, 'test_correlation')
  882. assert_allclose(Y_test2, Y_right, rtol=eps)
  883. @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 2.0, 3.2, np.inf])
  884. def test_pdist_minkowski_random_p(self, p):
  885. eps = 1e-13
  886. X = eo['pdist-double-inp']
  887. Y1 = wpdist_no_const(X, 'minkowski', p=p)
  888. Y2 = wpdist_no_const(X, 'test_minkowski', p=p)
  889. assert_allclose(Y1, Y2, atol=0, rtol=eps)
  890. def test_pdist_minkowski_random(self):
  891. eps = 1e-7
  892. X = eo['pdist-double-inp']
  893. Y_right = eo['pdist-minkowski-3.2']
  894. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  895. assert_allclose(Y_test1, Y_right, rtol=eps)
  896. def test_pdist_minkowski_random_float32(self):
  897. eps = 1e-7
  898. X = np.float32(eo['pdist-double-inp'])
  899. Y_right = eo['pdist-minkowski-3.2']
  900. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  901. assert_allclose(Y_test1, Y_right, rtol=eps)
  902. def test_pdist_minkowski_random_nonC(self):
  903. eps = 1e-7
  904. X = eo['pdist-double-inp']
  905. Y_right = eo['pdist-minkowski-3.2']
  906. Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
  907. assert_allclose(Y_test2, Y_right, rtol=eps)
  908. @pytest.mark.slow
  909. def test_pdist_minkowski_3_2_iris(self):
  910. eps = 1e-7
  911. X = eo['iris']
  912. Y_right = eo['pdist-minkowski-3.2-iris']
  913. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  914. assert_allclose(Y_test1, Y_right, rtol=eps)
  915. @pytest.mark.slow
  916. def test_pdist_minkowski_3_2_iris_float32(self):
  917. eps = 1e-5
  918. X = np.float32(eo['iris'])
  919. Y_right = eo['pdist-minkowski-3.2-iris']
  920. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  921. assert_allclose(Y_test1, Y_right, rtol=eps)
  922. @pytest.mark.slow
  923. def test_pdist_minkowski_3_2_iris_nonC(self):
  924. eps = 1e-7
  925. X = eo['iris']
  926. Y_right = eo['pdist-minkowski-3.2-iris']
  927. Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
  928. assert_allclose(Y_test2, Y_right, rtol=eps)
  929. @pytest.mark.slow
  930. def test_pdist_minkowski_5_8_iris(self):
  931. eps = 1e-7
  932. X = eo['iris']
  933. Y_right = eo['pdist-minkowski-5.8-iris']
  934. Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
  935. assert_allclose(Y_test1, Y_right, rtol=eps)
  936. @pytest.mark.slow
  937. def test_pdist_minkowski_5_8_iris_float32(self):
  938. eps = 1e-5
  939. X = np.float32(eo['iris'])
  940. Y_right = eo['pdist-minkowski-5.8-iris']
  941. Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
  942. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  943. @pytest.mark.slow
  944. def test_pdist_minkowski_5_8_iris_nonC(self):
  945. eps = 1e-7
  946. X = eo['iris']
  947. Y_right = eo['pdist-minkowski-5.8-iris']
  948. Y_test2 = wpdist_no_const(X, 'test_minkowski', p=5.8)
  949. assert_allclose(Y_test2, Y_right, rtol=eps)
  950. def test_pdist_mahalanobis(self):
  951. # 1-dimensional observations
  952. x = np.array([2.0, 2.0, 3.0, 5.0]).reshape(-1, 1)
  953. dist = pdist(x, metric='mahalanobis')
  954. assert_allclose(dist, [0.0, np.sqrt(0.5), np.sqrt(4.5),
  955. np.sqrt(0.5), np.sqrt(4.5), np.sqrt(2.0)])
  956. # 2-dimensional observations
  957. x = np.array([[0, 0], [-1, 0], [0, 2], [1, 0], [0, -2]])
  958. dist = pdist(x, metric='mahalanobis')
  959. rt2 = np.sqrt(2)
  960. assert_allclose(dist, [rt2, rt2, rt2, rt2, 2, 2 * rt2, 2, 2, 2 * rt2, 2])
  961. # Too few observations
  962. with pytest.raises(ValueError):
  963. wpdist([[0, 1], [2, 3]], metric='mahalanobis')
  964. def test_pdist_hamming_random(self):
  965. eps = 1e-15
  966. X = eo['pdist-boolean-inp']
  967. Y_right = eo['pdist-hamming']
  968. Y_test1 = wpdist(X, 'hamming')
  969. assert_allclose(Y_test1, Y_right, rtol=eps)
  970. def test_pdist_hamming_random_float32(self):
  971. eps = 1e-15
  972. X = np.float32(eo['pdist-boolean-inp'])
  973. Y_right = eo['pdist-hamming']
  974. Y_test1 = wpdist(X, 'hamming')
  975. assert_allclose(Y_test1, Y_right, rtol=eps)
  976. def test_pdist_hamming_random_nonC(self):
  977. eps = 1e-15
  978. X = eo['pdist-boolean-inp']
  979. Y_right = eo['pdist-hamming']
  980. Y_test2 = wpdist(X, 'test_hamming')
  981. assert_allclose(Y_test2, Y_right, rtol=eps)
  982. def test_pdist_dhamming_random(self):
  983. eps = 1e-15
  984. X = np.float64(eo['pdist-boolean-inp'])
  985. Y_right = eo['pdist-hamming']
  986. Y_test1 = wpdist(X, 'hamming')
  987. assert_allclose(Y_test1, Y_right, rtol=eps)
  988. def test_pdist_dhamming_random_float32(self):
  989. eps = 1e-15
  990. X = np.float32(eo['pdist-boolean-inp'])
  991. Y_right = eo['pdist-hamming']
  992. Y_test1 = wpdist(X, 'hamming')
  993. assert_allclose(Y_test1, Y_right, rtol=eps)
  994. def test_pdist_dhamming_random_nonC(self):
  995. eps = 1e-15
  996. X = np.float64(eo['pdist-boolean-inp'])
  997. Y_right = eo['pdist-hamming']
  998. Y_test2 = wpdist(X, 'test_hamming')
  999. assert_allclose(Y_test2, Y_right, rtol=eps)
  1000. def test_pdist_jensenshannon_random(self):
  1001. eps = 1e-11
  1002. X = eo['pdist-double-inp']
  1003. Y_right = eo['pdist-jensenshannon']
  1004. Y_test1 = pdist(X, 'jensenshannon')
  1005. assert_allclose(Y_test1, Y_right, rtol=eps)
  1006. def test_pdist_jensenshannon_random_float32(self):
  1007. eps = 1e-8
  1008. X = np.float32(eo['pdist-double-inp'])
  1009. Y_right = eo['pdist-jensenshannon']
  1010. Y_test1 = pdist(X, 'jensenshannon')
  1011. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  1012. def test_pdist_jensenshannon_random_nonC(self):
  1013. eps = 1e-11
  1014. X = eo['pdist-double-inp']
  1015. Y_right = eo['pdist-jensenshannon']
  1016. Y_test2 = pdist(X, 'test_jensenshannon')
  1017. assert_allclose(Y_test2, Y_right, rtol=eps)
  1018. def test_pdist_jensenshannon_iris(self):
  1019. if _is_32bit():
  1020. # Test failing on 32-bit Linux on Azure otherwise, see gh-12810
  1021. eps = 2.5e-10
  1022. else:
  1023. eps = 1e-12
  1024. X = eo['iris']
  1025. Y_right = eo['pdist-jensenshannon-iris']
  1026. Y_test1 = pdist(X, 'jensenshannon')
  1027. assert_allclose(Y_test1, Y_right, atol=eps)
  1028. def test_pdist_jensenshannon_iris_float32(self):
  1029. eps = 1e-06
  1030. X = np.float32(eo['iris'])
  1031. Y_right = eo['pdist-jensenshannon-iris']
  1032. Y_test1 = pdist(X, 'jensenshannon')
  1033. assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2)
  1034. def test_pdist_jensenshannon_iris_nonC(self):
  1035. eps = 5e-5
  1036. X = eo['iris']
  1037. Y_right = eo['pdist-jensenshannon-iris']
  1038. Y_test2 = pdist(X, 'test_jensenshannon')
  1039. assert_allclose(Y_test2, Y_right, rtol=eps)
  1040. def test_pdist_matching_mtica1(self):
  1041. # Test matching(*,*) with mtica example #1 (nums).
  1042. m = wmatching(np.array([1, 0, 1, 1, 0]),
  1043. np.array([1, 1, 0, 1, 1]))
  1044. m2 = wmatching(np.array([1, 0, 1, 1, 0], dtype=bool),
  1045. np.array([1, 1, 0, 1, 1], dtype=bool))
  1046. assert_allclose(m, 0.6, rtol=0, atol=1e-10)
  1047. assert_allclose(m2, 0.6, rtol=0, atol=1e-10)
  1048. def test_pdist_matching_mtica2(self):
  1049. # Test matching(*,*) with mtica example #2.
  1050. m = wmatching(np.array([1, 0, 1]),
  1051. np.array([1, 1, 0]))
  1052. m2 = wmatching(np.array([1, 0, 1], dtype=bool),
  1053. np.array([1, 1, 0], dtype=bool))
  1054. assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
  1055. assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
  1056. def test_pdist_yule_mtica1(self):
  1057. m = wyule(np.array([1, 0, 1, 1, 0]),
  1058. np.array([1, 1, 0, 1, 1]))
  1059. m2 = wyule(np.array([1, 0, 1, 1, 0], dtype=bool),
  1060. np.array([1, 1, 0, 1, 1], dtype=bool))
  1061. if verbose > 2:
  1062. print(m)
  1063. assert_allclose(m, 2, rtol=0, atol=1e-10)
  1064. assert_allclose(m2, 2, rtol=0, atol=1e-10)
  1065. def test_pdist_yule_mtica2(self):
  1066. m = wyule(np.array([1, 0, 1]),
  1067. np.array([1, 1, 0]))
  1068. m2 = wyule(np.array([1, 0, 1], dtype=bool),
  1069. np.array([1, 1, 0], dtype=bool))
  1070. if verbose > 2:
  1071. print(m)
  1072. assert_allclose(m, 2, rtol=0, atol=1e-10)
  1073. assert_allclose(m2, 2, rtol=0, atol=1e-10)
  1074. def test_pdist_dice_mtica1(self):
  1075. m = wdice(np.array([1, 0, 1, 1, 0]),
  1076. np.array([1, 1, 0, 1, 1]))
  1077. m2 = wdice(np.array([1, 0, 1, 1, 0], dtype=bool),
  1078. np.array([1, 1, 0, 1, 1], dtype=bool))
  1079. if verbose > 2:
  1080. print(m)
  1081. assert_allclose(m, 3 / 7, rtol=0, atol=1e-10)
  1082. assert_allclose(m2, 3 / 7, rtol=0, atol=1e-10)
  1083. def test_pdist_dice_mtica2(self):
  1084. m = wdice(np.array([1, 0, 1]),
  1085. np.array([1, 1, 0]))
  1086. m2 = wdice(np.array([1, 0, 1], dtype=bool),
  1087. np.array([1, 1, 0], dtype=bool))
  1088. if verbose > 2:
  1089. print(m)
  1090. assert_allclose(m, 0.5, rtol=0, atol=1e-10)
  1091. assert_allclose(m2, 0.5, rtol=0, atol=1e-10)
  1092. def test_pdist_sokalsneath_mtica1(self):
  1093. m = sokalsneath(np.array([1, 0, 1, 1, 0]),
  1094. np.array([1, 1, 0, 1, 1]))
  1095. m2 = sokalsneath(np.array([1, 0, 1, 1, 0], dtype=bool),
  1096. np.array([1, 1, 0, 1, 1], dtype=bool))
  1097. if verbose > 2:
  1098. print(m)
  1099. assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
  1100. assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)
  1101. def test_pdist_sokalsneath_mtica2(self):
  1102. m = wsokalsneath(np.array([1, 0, 1]),
  1103. np.array([1, 1, 0]))
  1104. m2 = wsokalsneath(np.array([1, 0, 1], dtype=bool),
  1105. np.array([1, 1, 0], dtype=bool))
  1106. if verbose > 2:
  1107. print(m)
  1108. assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
  1109. assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)
  1110. def test_pdist_rogerstanimoto_mtica1(self):
  1111. m = wrogerstanimoto(np.array([1, 0, 1, 1, 0]),
  1112. np.array([1, 1, 0, 1, 1]))
  1113. m2 = wrogerstanimoto(np.array([1, 0, 1, 1, 0], dtype=bool),
  1114. np.array([1, 1, 0, 1, 1], dtype=bool))
  1115. if verbose > 2:
  1116. print(m)
  1117. assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
  1118. assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)
  1119. def test_pdist_rogerstanimoto_mtica2(self):
  1120. m = wrogerstanimoto(np.array([1, 0, 1]),
  1121. np.array([1, 1, 0]))
  1122. m2 = wrogerstanimoto(np.array([1, 0, 1], dtype=bool),
  1123. np.array([1, 1, 0], dtype=bool))
  1124. if verbose > 2:
  1125. print(m)
  1126. assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
  1127. assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)
  1128. def test_pdist_russellrao_mtica1(self):
  1129. m = wrussellrao(np.array([1, 0, 1, 1, 0]),
  1130. np.array([1, 1, 0, 1, 1]))
  1131. m2 = wrussellrao(np.array([1, 0, 1, 1, 0], dtype=bool),
  1132. np.array([1, 1, 0, 1, 1], dtype=bool))
  1133. if verbose > 2:
  1134. print(m)
  1135. assert_allclose(m, 3 / 5, rtol=0, atol=1e-10)
  1136. assert_allclose(m2, 3 / 5, rtol=0, atol=1e-10)
  1137. def test_pdist_russellrao_mtica2(self):
  1138. m = wrussellrao(np.array([1, 0, 1]),
  1139. np.array([1, 1, 0]))
  1140. m2 = wrussellrao(np.array([1, 0, 1], dtype=bool),
  1141. np.array([1, 1, 0], dtype=bool))
  1142. if verbose > 2:
  1143. print(m)
  1144. assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
  1145. assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
  1146. @pytest.mark.slow
  1147. def test_pdist_canberra_match(self):
  1148. D = eo['iris']
  1149. if verbose > 2:
  1150. print(D.shape, D.dtype)
  1151. eps = 1e-15
  1152. y1 = wpdist_no_const(D, "canberra")
  1153. y2 = wpdist_no_const(D, "test_canberra")
  1154. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  1155. def test_pdist_canberra_ticket_711(self):
  1156. # Test pdist(X, 'canberra') to see if Canberra gives the right result
  1157. # as reported on gh-1238.
  1158. eps = 1e-8
  1159. pdist_y = wpdist_no_const(([3.3], [3.4]), "canberra")
  1160. right_y = 0.01492537
  1161. assert_allclose(pdist_y, right_y, atol=eps, verbose=verbose > 2)
  1162. @skip_xp_invalid_arg
  1163. def test_pdist_custom_notdouble(self):
  1164. # tests that when using a custom metric the data type is not altered
  1165. class myclass:
  1166. pass
  1167. def _my_metric(x, y):
  1168. if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
  1169. raise ValueError("Type has been changed")
  1170. return 1.123
  1171. data = np.array([[myclass()], [myclass()]], dtype=object)
  1172. pdist_y = pdist(data, metric=_my_metric)
  1173. right_y = 1.123
  1174. assert_equal(pdist_y, right_y, verbose=verbose > 2)
  1175. def _check_calling_conventions(self, X, metric, eps=1e-07, **kwargs):
  1176. # helper function for test_pdist_calling_conventions
  1177. try:
  1178. y1 = pdist(X, metric=metric, **kwargs)
  1179. y2 = pdist(X, metric=eval(metric), **kwargs)
  1180. y3 = pdist(X, metric="test_" + metric, **kwargs)
  1181. except Exception as e:
  1182. e_cls = e.__class__
  1183. if verbose > 2:
  1184. print(e_cls.__name__)
  1185. print(e)
  1186. with pytest.raises(e_cls):
  1187. pdist(X, metric=metric, **kwargs)
  1188. with pytest.raises(e_cls):
  1189. pdist(X, metric=eval(metric), **kwargs)
  1190. with pytest.raises(e_cls):
  1191. pdist(X, metric="test_" + metric, **kwargs)
  1192. else:
  1193. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  1194. assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2)
  1195. def test_pdist_calling_conventions(self, metric):
  1196. # Ensures that specifying the metric with a str or scipy function
  1197. # gives the same behaviour (i.e. same result or same exception).
  1198. # NOTE: The correctness should be checked within each metric tests.
  1199. # NOTE: Extra args should be checked with a dedicated test
  1200. for eo_name in self.rnd_eo_names:
  1201. # subsampling input data to speed-up tests
  1202. # NOTE: num samples needs to be > than dimensions for mahalanobis
  1203. X = eo[eo_name][::5, ::2]
  1204. if verbose > 2:
  1205. print("testing: ", metric, " with: ", eo_name)
  1206. if metric in {'dice', 'yule', 'matching', 'rogerstanimoto', 'russellrao',
  1207. 'sokalsneath'} and 'bool' not in eo_name:
  1208. # python version permits non-bools e.g. for fuzzy logic
  1209. continue
  1210. self._check_calling_conventions(X, metric)
  1211. # Testing built-in metrics with extra args
  1212. if metric == "seuclidean":
  1213. V = np.var(X.astype(np.float64), axis=0, ddof=1)
  1214. self._check_calling_conventions(X, metric, V=V)
  1215. elif metric == "mahalanobis":
  1216. V = np.atleast_2d(np.cov(X.astype(np.float64).T))
  1217. VI = np.array(np.linalg.inv(V).T)
  1218. self._check_calling_conventions(X, metric, VI=VI)
  1219. def test_pdist_dtype_equivalence(self, metric):
  1220. # Tests that the result is not affected by type up-casting
  1221. eps = 1e-07
  1222. tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
  1223. (eo['random-uint-data'], self.valid_upcasts['uint']),
  1224. (eo['random-int-data'], self.valid_upcasts['int']),
  1225. (eo['random-float32-data'], self.valid_upcasts['float32'])]
  1226. for test in tests:
  1227. X1 = test[0][::5, ::2]
  1228. try:
  1229. y1 = pdist(X1, metric=metric)
  1230. except Exception as e:
  1231. e_cls = e.__class__
  1232. if verbose > 2:
  1233. print(e_cls.__name__)
  1234. print(e)
  1235. for new_type in test[1]:
  1236. X2 = new_type(X1)
  1237. with pytest.raises(e_cls):
  1238. pdist(X2, metric=metric)
  1239. else:
  1240. for new_type in test[1]:
  1241. y2 = pdist(new_type(X1), metric=metric)
  1242. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  1243. def test_pdist_out(self, metric):
  1244. # Test that out parameter works properly
  1245. eps = 1e-15
  1246. X = eo['random-float32-data'][::5, ::2]
  1247. out_size = int((X.shape[0] * (X.shape[0] - 1)) / 2)
  1248. kwargs = dict()
  1249. if metric == 'minkowski':
  1250. kwargs['p'] = 1.23
  1251. out1 = np.empty(out_size, dtype=np.float64)
  1252. Y_right = pdist(X, metric, **kwargs)
  1253. Y_test1 = pdist(X, metric, out=out1, **kwargs)
  1254. # test that output is numerically equivalent
  1255. assert_allclose(Y_test1, Y_right, rtol=eps)
  1256. # test that Y_test1 and out1 are the same object
  1257. assert_(Y_test1 is out1)
  1258. # test for incorrect shape
  1259. out2 = np.empty(out_size + 3, dtype=np.float64)
  1260. with pytest.raises(ValueError):
  1261. pdist(X, metric, out=out2, **kwargs)
  1262. # test for (C-)contiguous output
  1263. out3 = np.empty(2 * out_size, dtype=np.float64)[::2]
  1264. with pytest.raises(ValueError):
  1265. pdist(X, metric, out=out3, **kwargs)
  1266. # test for incorrect dtype
  1267. out5 = np.empty(out_size, dtype=np.int64)
  1268. with pytest.raises(ValueError):
  1269. pdist(X, metric, out=out5, **kwargs)
  1270. def test_striding(self, metric):
  1271. # test that striding is handled correct with calls to
  1272. # _copy_array_if_base_present
  1273. eps = 1e-15
  1274. X = eo['random-float32-data'][::5, ::2]
  1275. X_copy = X.copy()
  1276. # confirm contiguity
  1277. assert_(not X.flags.c_contiguous)
  1278. assert_(X_copy.flags.c_contiguous)
  1279. kwargs = dict()
  1280. if metric == 'minkowski':
  1281. kwargs['p'] = 1.23
  1282. Y1 = pdist(X, metric, **kwargs)
  1283. Y2 = pdist(X_copy, metric, **kwargs)
  1284. # test that output is numerically equivalent
  1285. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  1286. class TestSomeDistanceFunctions:
  1287. def setup_method(self):
  1288. # 1D arrays
  1289. x = np.array([1.0, 2.0, 3.0])
  1290. y = np.array([1.0, 1.0, 5.0])
  1291. self.cases = [(x, y)]
  1292. def test_minkowski(self):
  1293. for x, y in self.cases:
  1294. dist1 = minkowski(x, y, p=1)
  1295. assert_almost_equal(dist1, 3.0)
  1296. dist1p5 = minkowski(x, y, p=1.5)
  1297. assert_almost_equal(dist1p5, (1.0 + 2.0**1.5)**(2. / 3))
  1298. dist2 = minkowski(x, y, p=2)
  1299. assert_almost_equal(dist2, 5.0 ** 0.5)
  1300. dist0p25 = minkowski(x, y, p=0.25)
  1301. assert_almost_equal(dist0p25, (1.0 + 2.0 ** 0.25) ** 4)
  1302. # Check that casting input to minimum scalar type doesn't affect result
  1303. # (issue #10262). This could be extended to more test inputs with
  1304. # np.min_scalar_type(np.max(input_matrix)).
  1305. a = np.array([352, 916])
  1306. b = np.array([350, 660])
  1307. assert_equal(minkowski(a, b),
  1308. minkowski(a.astype('uint16'), b.astype('uint16')))
  1309. def test_euclidean(self):
  1310. for x, y in self.cases:
  1311. dist = weuclidean(x, y)
  1312. assert_almost_equal(dist, np.sqrt(5))
  1313. def test_sqeuclidean(self):
  1314. for x, y in self.cases:
  1315. dist = wsqeuclidean(x, y)
  1316. assert_almost_equal(dist, 5.0)
  1317. def test_cosine(self):
  1318. for x, y in self.cases:
  1319. dist = wcosine(x, y)
  1320. assert_almost_equal(dist, 1.0 - 18.0 / (np.sqrt(14) * np.sqrt(27)))
  1321. def test_cosine_output_dtype(self):
  1322. # Regression test for gh-19541
  1323. assert isinstance(wcorrelation([1, 1], [1, 1], centered=False), float)
  1324. assert isinstance(wcosine([1, 1], [1, 1]), float)
  1325. def test_correlation(self):
  1326. xm = np.array([-1.0, 0, 1.0])
  1327. ym = np.array([-4.0 / 3, -4.0 / 3, 5.0 - 7.0 / 3])
  1328. for x, y in self.cases:
  1329. dist = wcorrelation(x, y)
  1330. assert_almost_equal(dist, 1.0 - np.dot(xm, ym) / (norm(xm) * norm(ym)))
  1331. def test_correlation_positive(self):
  1332. # Regression test for gh-12320 (negative return value due to rounding
  1333. x = np.array([0., 0., 0., 0., 0., 0., -2., 0., 0., 0., -2., -2., -2.,
  1334. 0., -2., 0., -2., 0., 0., -1., -2., 0., 1., 0., 0., -2.,
  1335. 0., 0., -2., 0., -2., -2., -2., -2., -2., -2., 0.])
  1336. y = np.array([1., 1., 1., 1., 1., 1., -1., 1., 1., 1., -1., -1., -1.,
  1337. 1., -1., 1., -1., 1., 1., 0., -1., 1., 2., 1., 1., -1.,
  1338. 1., 1., -1., 1., -1., -1., -1., -1., -1., -1., 1.])
  1339. dist = correlation(x, y)
  1340. assert 0 <= dist <= 10 * np.finfo(np.float64).eps
  1341. @pytest.mark.filterwarnings('ignore:Casting complex')
  1342. @pytest.mark.parametrize("func", [correlation, cosine])
  1343. def test_corr_dep_complex(self, func):
  1344. x = [1+0j, 2+0j]
  1345. y = [3+0j, 4+0j]
  1346. with pytest.raises(TypeError, match="real"):
  1347. func(x, y)
  1348. def test_mahalanobis(self):
  1349. x = np.array([1.0, 2.0, 3.0])
  1350. y = np.array([1.0, 1.0, 5.0])
  1351. vi = np.array([[2.0, 1.0, 0.0], [1.0, 2.0, 1.0], [0.0, 1.0, 2.0]])
  1352. for x, y in self.cases:
  1353. dist = mahalanobis(x, y, vi)
  1354. assert_almost_equal(dist, np.sqrt(6.0))
  1355. class TestSquareForm:
  1356. checked_dtypes = [np.float64, np.float32, np.int32, np.int8, bool]
  1357. def test_squareform_matrix(self):
  1358. for dtype in self.checked_dtypes:
  1359. self.check_squareform_matrix(dtype)
  1360. def test_squareform_vector(self):
  1361. for dtype in self.checked_dtypes:
  1362. self.check_squareform_vector(dtype)
  1363. def check_squareform_matrix(self, dtype):
  1364. A = np.zeros((0, 0), dtype=dtype)
  1365. rA = squareform(A)
  1366. assert_equal(rA.shape, (0,))
  1367. assert_equal(rA.dtype, dtype)
  1368. A = np.zeros((1, 1), dtype=dtype)
  1369. rA = squareform(A)
  1370. assert_equal(rA.shape, (0,))
  1371. assert_equal(rA.dtype, dtype)
  1372. A = np.array([[0, 4.2], [4.2, 0]], dtype=dtype)
  1373. rA = squareform(A)
  1374. assert_equal(rA.shape, (1,))
  1375. assert_equal(rA.dtype, dtype)
  1376. assert_array_equal(rA, np.array([4.2], dtype=dtype))
  1377. def check_squareform_vector(self, dtype):
  1378. v = np.zeros((0,), dtype=dtype)
  1379. rv = squareform(v)
  1380. assert_equal(rv.shape, (1, 1))
  1381. assert_equal(rv.dtype, dtype)
  1382. assert_array_equal(rv, [[0]])
  1383. v = np.array([8.3], dtype=dtype)
  1384. rv = squareform(v)
  1385. assert_equal(rv.shape, (2, 2))
  1386. assert_equal(rv.dtype, dtype)
  1387. assert_array_equal(rv, np.array([[0, 8.3], [8.3, 0]], dtype=dtype))
  1388. def test_squareform_multi_matrix(self):
  1389. for n in range(2, 5):
  1390. self.check_squareform_multi_matrix(n)
  1391. def check_squareform_multi_matrix(self, n):
  1392. X = np.random.rand(n, 4)
  1393. Y = wpdist_no_const(X)
  1394. assert_equal(len(Y.shape), 1)
  1395. A = squareform(Y)
  1396. Yr = squareform(A)
  1397. s = A.shape
  1398. k = 0
  1399. if verbose >= 3:
  1400. print(A.shape, Y.shape, Yr.shape)
  1401. assert_equal(len(s), 2)
  1402. assert_equal(len(Yr.shape), 1)
  1403. assert_equal(s[0], s[1])
  1404. for i in range(0, s[0]):
  1405. for j in range(i + 1, s[1]):
  1406. if i != j:
  1407. assert_equal(A[i, j], Y[k])
  1408. k += 1
  1409. else:
  1410. assert_equal(A[i, j], 0)
  1411. class TestNumObsY:
  1412. def test_num_obs_y_multi_matrix(self):
  1413. for n in range(2, 10):
  1414. X = np.random.rand(n, 4)
  1415. Y = wpdist_no_const(X)
  1416. assert_equal(num_obs_y(Y), n)
  1417. def test_num_obs_y_1(self):
  1418. # Tests num_obs_y(y) on a condensed distance matrix over 1
  1419. # observations. Expecting exception.
  1420. with pytest.raises(ValueError):
  1421. self.check_y(1)
  1422. def test_num_obs_y_2(self):
  1423. # Tests num_obs_y(y) on a condensed distance matrix over 2
  1424. # observations.
  1425. assert_(self.check_y(2))
  1426. def test_num_obs_y_3(self):
  1427. assert_(self.check_y(3))
  1428. def test_num_obs_y_4(self):
  1429. assert_(self.check_y(4))
  1430. def test_num_obs_y_5_10(self):
  1431. for i in range(5, 16):
  1432. self.minit(i)
  1433. def test_num_obs_y_2_100(self):
  1434. # Tests num_obs_y(y) on 100 improper condensed distance matrices.
  1435. # Expecting exception.
  1436. a = set()
  1437. for n in range(2, 16):
  1438. a.add(n * (n - 1) / 2)
  1439. for i in range(5, 105):
  1440. if i not in a:
  1441. with pytest.raises(ValueError):
  1442. self.bad_y(i)
  1443. def minit(self, n):
  1444. assert_(self.check_y(n))
  1445. def bad_y(self, n):
  1446. y = np.random.rand(n)
  1447. return num_obs_y(y)
  1448. def check_y(self, n):
  1449. return num_obs_y(self.make_y(n)) == n
  1450. def make_y(self, n):
  1451. return np.random.rand((n * (n - 1)) // 2)
  1452. class TestNumObsDM:
  1453. def test_num_obs_dm_multi_matrix(self):
  1454. for n in range(1, 10):
  1455. X = np.random.rand(n, 4)
  1456. Y = wpdist_no_const(X)
  1457. A = squareform(Y)
  1458. if verbose >= 3:
  1459. print(A.shape, Y.shape)
  1460. assert_equal(num_obs_dm(A), n)
  1461. def test_num_obs_dm_0(self):
  1462. # Tests num_obs_dm(D) on a 0x0 distance matrix. Expecting exception.
  1463. assert_(self.check_D(0))
  1464. def test_num_obs_dm_1(self):
  1465. # Tests num_obs_dm(D) on a 1x1 distance matrix.
  1466. assert_(self.check_D(1))
  1467. def test_num_obs_dm_2(self):
  1468. assert_(self.check_D(2))
  1469. def test_num_obs_dm_3(self):
  1470. assert_(self.check_D(2))
  1471. def test_num_obs_dm_4(self):
  1472. assert_(self.check_D(4))
  1473. def check_D(self, n):
  1474. return num_obs_dm(self.make_D(n)) == n
  1475. def make_D(self, n):
  1476. return np.random.rand(n, n)
  1477. def is_valid_dm_throw(D):
  1478. return is_valid_dm(D, throw=True)
  1479. class TestIsValidDM:
  1480. def test_is_valid_dm_improper_shape_1D_E(self):
  1481. D = np.zeros((5,), dtype=np.float64)
  1482. with pytest.raises(ValueError):
  1483. is_valid_dm_throw(D)
  1484. def test_is_valid_dm_improper_shape_1D_F(self):
  1485. D = np.zeros((5,), dtype=np.float64)
  1486. assert_equal(is_valid_dm(D), False)
  1487. def test_is_valid_dm_improper_shape_3D_E(self):
  1488. D = np.zeros((3, 3, 3), dtype=np.float64)
  1489. with pytest.raises(ValueError):
  1490. is_valid_dm_throw(D)
  1491. def test_is_valid_dm_improper_shape_3D_F(self):
  1492. D = np.zeros((3, 3, 3), dtype=np.float64)
  1493. assert_equal(is_valid_dm(D), False)
  1494. def test_is_valid_dm_nonzero_diagonal_E(self):
  1495. y = np.random.rand(10)
  1496. D = squareform(y)
  1497. for i in range(0, 5):
  1498. D[i, i] = 2.0
  1499. with pytest.raises(ValueError):
  1500. is_valid_dm_throw(D)
  1501. def test_is_valid_dm_nonzero_diagonal_F(self):
  1502. y = np.random.rand(10)
  1503. D = squareform(y)
  1504. for i in range(0, 5):
  1505. D[i, i] = 2.0
  1506. assert_equal(is_valid_dm(D), False)
  1507. def test_is_valid_dm_asymmetric_E(self):
  1508. y = np.random.rand(10)
  1509. D = squareform(y)
  1510. D[1, 3] = D[3, 1] + 1
  1511. with pytest.raises(ValueError):
  1512. is_valid_dm_throw(D)
  1513. def test_is_valid_dm_asymmetric_F(self):
  1514. y = np.random.rand(10)
  1515. D = squareform(y)
  1516. D[1, 3] = D[3, 1] + 1
  1517. assert_equal(is_valid_dm(D), False)
  1518. def test_is_valid_dm_correct_1_by_1(self):
  1519. D = np.zeros((1, 1), dtype=np.float64)
  1520. assert_equal(is_valid_dm(D), True)
  1521. def test_is_valid_dm_correct_2_by_2(self):
  1522. y = np.random.rand(1)
  1523. D = squareform(y)
  1524. assert_equal(is_valid_dm(D), True)
  1525. def test_is_valid_dm_correct_3_by_3(self):
  1526. y = np.random.rand(3)
  1527. D = squareform(y)
  1528. assert_equal(is_valid_dm(D), True)
  1529. def test_is_valid_dm_correct_4_by_4(self):
  1530. y = np.random.rand(6)
  1531. D = squareform(y)
  1532. assert_equal(is_valid_dm(D), True)
  1533. def test_is_valid_dm_correct_5_by_5(self):
  1534. y = np.random.rand(10)
  1535. D = squareform(y)
  1536. assert_equal(is_valid_dm(D), True)
  1537. def is_valid_y_throw(y):
  1538. return is_valid_y(y, throw=True)
  1539. class TestIsValidY:
  1540. # If test case name ends on "_E" then an exception is expected for the
  1541. # given input, if it ends in "_F" then False is expected for the is_valid_y
  1542. # check. Otherwise the input is expected to be valid.
  1543. def test_is_valid_y_improper_shape_2D_E(self):
  1544. y = np.zeros((3, 3,), dtype=np.float64)
  1545. with pytest.raises(ValueError):
  1546. is_valid_y_throw(y)
  1547. def test_is_valid_y_improper_shape_2D_F(self):
  1548. y = np.zeros((3, 3,), dtype=np.float64)
  1549. assert_equal(is_valid_y(y), False)
  1550. def test_is_valid_y_improper_shape_3D_E(self):
  1551. y = np.zeros((3, 3, 3), dtype=np.float64)
  1552. with pytest.raises(ValueError):
  1553. is_valid_y_throw(y)
  1554. def test_is_valid_y_improper_shape_3D_F(self):
  1555. y = np.zeros((3, 3, 3), dtype=np.float64)
  1556. assert_equal(is_valid_y(y), False)
  1557. def test_is_valid_y_correct_2_by_2(self):
  1558. y = self.correct_n_by_n(2)
  1559. assert_equal(is_valid_y(y), True)
  1560. def test_is_valid_y_correct_3_by_3(self):
  1561. y = self.correct_n_by_n(3)
  1562. assert_equal(is_valid_y(y), True)
  1563. def test_is_valid_y_correct_4_by_4(self):
  1564. y = self.correct_n_by_n(4)
  1565. assert_equal(is_valid_y(y), True)
  1566. def test_is_valid_y_correct_5_by_5(self):
  1567. y = self.correct_n_by_n(5)
  1568. assert_equal(is_valid_y(y), True)
  1569. def test_is_valid_y_2_100(self):
  1570. a = set()
  1571. for n in range(2, 16):
  1572. a.add(n * (n - 1) / 2)
  1573. for i in range(5, 105):
  1574. if i not in a:
  1575. with pytest.raises(ValueError):
  1576. self.bad_y(i)
  1577. def bad_y(self, n):
  1578. y = np.random.rand(n)
  1579. return is_valid_y(y, throw=True)
  1580. def correct_n_by_n(self, n):
  1581. y = np.random.rand((n * (n - 1)) // 2)
  1582. return y
  1583. @pytest.mark.parametrize("p", [-10.0, -0.5, 0.0])
  1584. def test_bad_p(p):
  1585. # Raise ValueError if p <=0.
  1586. with pytest.raises(ValueError):
  1587. minkowski([1, 2], [3, 4], p)
  1588. with pytest.raises(ValueError):
  1589. minkowski([1, 2], [3, 4], p, [1, 1])
  1590. def test_sokalsneath_all_false():
  1591. # Regression test for ticket #876
  1592. with pytest.raises(ValueError):
  1593. sokalsneath([False, False, False], [False, False, False])
  1594. def test_canberra():
  1595. # Regression test for ticket #1430.
  1596. assert_equal(wcanberra([1, 2, 3], [2, 4, 6]), 1)
  1597. assert_equal(wcanberra([1, 1, 0, 0], [1, 0, 1, 0]), 2)
  1598. def test_braycurtis():
  1599. # Regression test for ticket #1430.
  1600. assert_almost_equal(wbraycurtis([1, 2, 3], [2, 4, 6]), 1. / 3, decimal=15)
  1601. assert_almost_equal(wbraycurtis([1, 1, 0, 0], [1, 0, 1, 0]), 0.5, decimal=15)
  1602. def test_euclideans():
  1603. # Regression test for ticket #1328.
  1604. x1 = np.array([1, 1, 1])
  1605. x2 = np.array([0, 0, 0])
  1606. # Basic test of the calculation.
  1607. assert_almost_equal(wsqeuclidean(x1, x2), 3.0, decimal=14)
  1608. assert_almost_equal(weuclidean(x1, x2), np.sqrt(3), decimal=14)
  1609. # Check flattening for (1, N) or (N, 1) inputs
  1610. with pytest.raises(ValueError, match="Input vector should be 1-D"):
  1611. weuclidean(x1[np.newaxis, :], x2[np.newaxis, :]), np.sqrt(3)
  1612. with pytest.raises(ValueError, match="Input vector should be 1-D"):
  1613. wsqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :])
  1614. with pytest.raises(ValueError, match="Input vector should be 1-D"):
  1615. wsqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis])
  1616. # Distance metrics only defined for vectors (= 1-D)
  1617. x = np.arange(4).reshape(2, 2)
  1618. with pytest.raises(ValueError):
  1619. weuclidean(x, x)
  1620. with pytest.raises(ValueError):
  1621. wsqeuclidean(x, x)
  1622. # Another check, with random data.
  1623. rs = np.random.RandomState(1234567890)
  1624. x = rs.rand(10)
  1625. y = rs.rand(10)
  1626. d1 = weuclidean(x, y)
  1627. d2 = wsqeuclidean(x, y)
  1628. assert_almost_equal(d1**2, d2, decimal=14)
  1629. def test_hamming_unequal_length():
  1630. # Regression test for gh-4290.
  1631. x = [0, 0, 1]
  1632. y = [1, 0, 1, 0]
  1633. # Used to give an AttributeError from ndarray.mean called on bool
  1634. with pytest.raises(ValueError):
  1635. whamming(x, y)
  1636. def test_hamming_unequal_length_with_w():
  1637. u = [0, 0, 1]
  1638. v = [0, 0, 1]
  1639. w = [1, 0, 1, 0]
  1640. msg = "'w' should have the same length as 'u' and 'v'."
  1641. with pytest.raises(ValueError, match=msg):
  1642. whamming(u, v, w)
  1643. def test_hamming_string_array():
  1644. # https://github.com/scikit-learn/scikit-learn/issues/4014
  1645. a = np.array(['eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam',
  1646. 'spam', 'spam', 'spam', 'spam', 'eggs', 'eggs', 'spam',
  1647. 'eggs', 'eggs', 'eggs', 'eggs', 'eggs', 'spam'],
  1648. dtype='|S4')
  1649. b = np.array(['eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam',
  1650. 'spam', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'eggs',
  1651. 'spam', 'spam', 'eggs', 'spam', 'spam', 'eggs'],
  1652. dtype='|S4')
  1653. desired = 0.45
  1654. assert_allclose(whamming(a, b), desired)
  1655. def test_minkowski_w():
  1656. # Regression test for gh-8142.
  1657. arr_in = np.array([[83.33333333, 100., 83.33333333, 100., 36.,
  1658. 60., 90., 150., 24., 48.],
  1659. [83.33333333, 100., 83.33333333, 100., 36.,
  1660. 60., 90., 150., 24., 48.]])
  1661. p0 = pdist(arr_in, metric='minkowski', p=1, w=None)
  1662. c0 = cdist(arr_in, arr_in, metric='minkowski', p=1, w=None)
  1663. p1 = pdist(arr_in, metric='minkowski', p=1)
  1664. c1 = cdist(arr_in, arr_in, metric='minkowski', p=1)
  1665. assert_allclose(p0, p1, rtol=1e-15)
  1666. assert_allclose(c0, c1, rtol=1e-15)
  1667. def test_sqeuclidean_dtypes():
  1668. # Assert that sqeuclidean returns the right types of values.
  1669. # Integer types should be converted to floating for stability.
  1670. # Floating point types should be the same as the input.
  1671. x = [1, 2, 3]
  1672. y = [4, 5, 6]
  1673. for dtype in [np.int8, np.int16, np.int32, np.int64]:
  1674. d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
  1675. assert_(np.issubdtype(d.dtype, np.floating))
  1676. for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
  1677. umax = np.iinfo(dtype).max
  1678. d1 = wsqeuclidean([0], np.asarray([umax], dtype=dtype))
  1679. d2 = wsqeuclidean(np.asarray([umax], dtype=dtype), [0])
  1680. assert_equal(d1, d2)
  1681. assert_equal(d1, np.float64(umax)**2)
  1682. dtypes = [np.float32, np.float64, np.complex64, np.complex128]
  1683. for dtype in ['float16', 'float128']:
  1684. # These aren't present in older numpy versions; float128 may also not
  1685. # be present on all platforms.
  1686. if hasattr(np, dtype):
  1687. dtypes.append(getattr(np, dtype))
  1688. for dtype in dtypes:
  1689. d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
  1690. assert_equal(d.dtype, dtype)
  1691. def test_modifies_input(metric):
  1692. # test whether cdist or pdist modifies input arrays
  1693. X1 = np.asarray([[1., 2., 3.],
  1694. [1.2, 2.3, 3.4],
  1695. [2.2, 2.3, 4.4],
  1696. [22.2, 23.3, 44.4]])
  1697. X1_copy = X1.copy()
  1698. cdist(X1, X1, metric)
  1699. pdist(X1, metric)
  1700. assert_array_equal(X1, X1_copy)
  1701. def test_Xdist_deprecated_args(metric):
  1702. # testing both cdist and pdist deprecated warnings
  1703. X1 = np.asarray([[1., 2., 3.],
  1704. [1.2, 2.3, 3.4],
  1705. [2.2, 2.3, 4.4],
  1706. [22.2, 23.3, 44.4]])
  1707. with pytest.raises(TypeError):
  1708. cdist(X1, X1, metric, 2.)
  1709. with pytest.raises(TypeError):
  1710. pdist(X1, metric, 2.)
  1711. for arg in ["p", "V", "VI"]:
  1712. kwargs = {arg: np.asarray(1.)}
  1713. if ((arg == "V" and metric == "seuclidean")
  1714. or (arg == "VI" and metric == "mahalanobis")
  1715. or (arg == "p" and metric == "minkowski")):
  1716. continue
  1717. with pytest.raises(TypeError):
  1718. cdist(X1, X1, metric, **kwargs)
  1719. with pytest.raises(TypeError):
  1720. pdist(X1, metric, **kwargs)
  1721. def test_Xdist_non_negative_weights(metric):
  1722. X = eo['random-float32-data'][::5, ::2]
  1723. w = np.ones(X.shape[1])
  1724. w[::5] = -w[::5]
  1725. if metric in ['seuclidean', 'mahalanobis', 'jensenshannon']:
  1726. pytest.skip("not applicable")
  1727. for m in [metric, eval(metric), "test_" + metric]:
  1728. with pytest.raises(ValueError):
  1729. pdist(X, m, w=w)
  1730. with pytest.raises(ValueError):
  1731. cdist(X, X, m, w=w)
  1732. def test__validate_vector():
  1733. x = [1, 2, 3]
  1734. y = _validate_vector(x)
  1735. assert_array_equal(y, x)
  1736. y = _validate_vector(x, dtype=np.float64)
  1737. assert_array_equal(y, x)
  1738. assert_equal(y.dtype, np.float64)
  1739. x = [1]
  1740. y = _validate_vector(x)
  1741. assert_equal(y.ndim, 1)
  1742. assert_equal(y, x)
  1743. x = 1
  1744. with pytest.raises(ValueError, match="Input vector should be 1-D"):
  1745. _validate_vector(x)
  1746. x = np.arange(5).reshape(1, -1, 1)
  1747. with pytest.raises(ValueError, match="Input vector should be 1-D"):
  1748. _validate_vector(x)
  1749. x = [[1, 2], [3, 4]]
  1750. with pytest.raises(ValueError, match="Input vector should be 1-D"):
  1751. _validate_vector(x)
  1752. def test_yule_all_same():
  1753. # Test yule avoids a divide by zero when exactly equal
  1754. x = np.ones((2, 6), dtype=bool)
  1755. d = wyule(x[0], x[0])
  1756. assert d == 0.0
  1757. d = pdist(x, 'yule')
  1758. assert_equal(d, [0.0])
  1759. d = cdist(x[:1], x[:1], 'yule')
  1760. assert_equal(d, [[0.0]])
  1761. def test_jensenshannon():
  1762. assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [0.0, 1.0, 0.0], 2.0),
  1763. 1.0)
  1764. assert_almost_equal(jensenshannon([1.0, 0.0], [0.5, 0.5]),
  1765. 0.46450140402245893)
  1766. assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [1.0, 0.0, 0.0]), 0.0)
  1767. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0),
  1768. [0.0, 0.0])
  1769. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1),
  1770. [0.0649045])
  1771. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0,
  1772. keepdims=True), [[0.0, 0.0]])
  1773. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1,
  1774. keepdims=True), [[0.0649045]])
  1775. a = np.array([[1, 2, 3, 4],
  1776. [5, 6, 7, 8],
  1777. [9, 10, 11, 12]])
  1778. b = np.array([[13, 14, 15, 16],
  1779. [17, 18, 19, 20],
  1780. [21, 22, 23, 24]])
  1781. assert_almost_equal(jensenshannon(a, b, axis=0),
  1782. [0.1954288, 0.1447697, 0.1138377, 0.0927636])
  1783. assert_almost_equal(jensenshannon(a, b, axis=1),
  1784. [0.1402339, 0.0399106, 0.0201815])
  1785. def test_gh_17703():
  1786. arr_1 = np.array([1, 0, 0])
  1787. arr_2 = np.array([2, 0, 0])
  1788. expected = dice(arr_1, arr_2)
  1789. actual = pdist([arr_1, arr_2], metric='dice')
  1790. assert_allclose(actual, expected)
  1791. actual = cdist(np.atleast_2d(arr_1),
  1792. np.atleast_2d(arr_2), metric='dice')
  1793. assert_allclose(actual, expected)
  1794. def test_immutable_input(metric):
  1795. if metric in ("jensenshannon", "mahalanobis", "seuclidean"):
  1796. pytest.skip("not applicable")
  1797. x = np.arange(10, dtype=np.float64)
  1798. x.setflags(write=False)
  1799. getattr(scipy.spatial.distance, metric)(x, x, w=x)
  1800. def test_gh_23109():
  1801. a = np.array([0, 0, 1, 1])
  1802. b = np.array([0, 1, 1, 0])
  1803. w = np.asarray([1.5, 1.2, 0.7, 1.3])
  1804. expected = yule(a, b, w=w)
  1805. assert_allclose(expected, 1.1954022988505748)
  1806. actual = cdist(np.atleast_2d(a),
  1807. np.atleast_2d(b),
  1808. metric='yule', w=w)
  1809. assert_allclose(actual, expected)
  1810. class TestJaccard:
  1811. def test_pdist_jaccard_random(self):
  1812. eps = 1e-8
  1813. X = eo['pdist-boolean-inp']
  1814. Y_right = eo['pdist-jaccard']
  1815. Y_test1 = wpdist(X, 'jaccard')
  1816. assert_allclose(Y_test1, Y_right, rtol=eps)
  1817. def test_pdist_jaccard_random_float32(self):
  1818. eps = 1e-8
  1819. X = np.float32(eo['pdist-boolean-inp'])
  1820. Y_right = eo['pdist-jaccard']
  1821. Y_test1 = wpdist(X, 'jaccard')
  1822. assert_allclose(Y_test1, Y_right, rtol=eps)
  1823. def test_pdist_jaccard_random_nonC(self):
  1824. eps = 1e-8
  1825. X = eo['pdist-boolean-inp']
  1826. Y_right = eo['pdist-jaccard']
  1827. Y_test2 = wpdist(X, 'test_jaccard')
  1828. assert_allclose(Y_test2, Y_right, rtol=eps)
  1829. def test_pdist_djaccard_random(self):
  1830. eps = 1e-8
  1831. X = np.float64(eo['pdist-boolean-inp'])
  1832. Y_right = eo['pdist-jaccard']
  1833. Y_test1 = wpdist(X, 'jaccard')
  1834. assert_allclose(Y_test1, Y_right, rtol=eps)
  1835. def test_pdist_djaccard_random_float32(self):
  1836. eps = 1e-8
  1837. X = np.float32(eo['pdist-boolean-inp'])
  1838. Y_right = eo['pdist-jaccard']
  1839. Y_test1 = wpdist(X, 'jaccard')
  1840. assert_allclose(Y_test1, Y_right, rtol=eps)
  1841. def test_pdist_djaccard_allzeros(self):
  1842. eps = 1e-15
  1843. Y = pdist(np.zeros((5, 3)), 'jaccard')
  1844. assert_allclose(np.zeros(10), Y, rtol=eps)
  1845. def test_pdist_djaccard_random_nonC(self):
  1846. eps = 1e-8
  1847. X = np.float64(eo['pdist-boolean-inp'])
  1848. Y_right = eo['pdist-jaccard']
  1849. Y_test2 = wpdist(X, 'test_jaccard')
  1850. assert_allclose(Y_test2, Y_right, rtol=eps)
  1851. def test_pdist_djaccard_allzeros_nonC(self):
  1852. eps = 1e-15
  1853. Y = pdist(np.zeros((5, 3)), 'test_jaccard')
  1854. assert_allclose(np.zeros(10), Y, rtol=eps)
  1855. def test_pdist_jaccard_mtica1(self):
  1856. m = wjaccard(np.array([1, 0, 1, 1, 0]),
  1857. np.array([1, 1, 0, 1, 1]))
  1858. m2 = wjaccard(np.array([1, 0, 1, 1, 0], dtype=bool),
  1859. np.array([1, 1, 0, 1, 1], dtype=bool))
  1860. assert_allclose(m, 0.6, rtol=0, atol=1e-10)
  1861. assert_allclose(m2, 0.6, rtol=0, atol=1e-10)
  1862. def test_pdist_jaccard_mtica2(self):
  1863. m = wjaccard(np.array([1, 0, 1]),
  1864. np.array([1, 1, 0]))
  1865. m2 = wjaccard(np.array([1, 0, 1], dtype=bool),
  1866. np.array([1, 1, 0], dtype=bool))
  1867. assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
  1868. assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
  1869. def test_non_01_input(self):
  1870. # Non-0/1 numeric input should be cast to bool before computation.
  1871. # See gh-21176.
  1872. x = np.array([-10, 2.5, 0]) # [True, True, False]
  1873. y = np.array([ 2, -5, 2]) # [True, True, True]
  1874. eps = np.finfo(float).eps
  1875. assert_allclose(jaccard(x, y), 1/3, rtol=eps)
  1876. assert_allclose(cdist([x], [y], 'jaccard'), [[1/3]])
  1877. assert_allclose(pdist([x, y], 'jaccard'), [1/3])
  1878. class TestChebyshev:
  1879. def test_pdist_chebyshev_random(self):
  1880. eps = 1e-8
  1881. X = eo['pdist-double-inp']
  1882. Y_right = eo['pdist-chebyshev']
  1883. Y_test1 = pdist(X, 'chebyshev')
  1884. assert_allclose(Y_test1, Y_right, rtol=eps)
  1885. def test_pdist_chebyshev_random_float32(self):
  1886. eps = 1e-7
  1887. X = np.float32(eo['pdist-double-inp'])
  1888. Y_right = eo['pdist-chebyshev']
  1889. Y_test1 = pdist(X, 'chebyshev')
  1890. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  1891. def test_pdist_chebyshev_random_nonC(self):
  1892. eps = 1e-8
  1893. X = eo['pdist-double-inp']
  1894. Y_right = eo['pdist-chebyshev']
  1895. Y_test2 = pdist(X, 'test_chebyshev')
  1896. assert_allclose(Y_test2, Y_right, rtol=eps)
  1897. def test_pdist_chebyshev_iris(self):
  1898. eps = 1e-14
  1899. X = eo['iris']
  1900. Y_right = eo['pdist-chebyshev-iris']
  1901. Y_test1 = pdist(X, 'chebyshev')
  1902. assert_allclose(Y_test1, Y_right, rtol=eps)
  1903. def test_pdist_chebyshev_iris_float32(self):
  1904. eps = 1e-5
  1905. X = np.float32(eo['iris'])
  1906. Y_right = eo['pdist-chebyshev-iris']
  1907. Y_test1 = pdist(X, 'chebyshev')
  1908. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  1909. def test_pdist_chebyshev_iris_nonC(self):
  1910. eps = 1e-14
  1911. X = eo['iris']
  1912. Y_right = eo['pdist-chebyshev-iris']
  1913. Y_test2 = pdist(X, 'test_chebyshev')
  1914. assert_allclose(Y_test2, Y_right, rtol=eps)
  1915. def test_weighted(self):
  1916. # Basic test for weighted Chebyshev. Only components with non-zero
  1917. # weight participate in the 'max'.
  1918. x = [1, 2, 3]
  1919. y = [6, 5, 4]
  1920. w = [0, 1, 5]
  1921. assert_equal(chebyshev(x, y, w), 3)
  1922. assert_equal(pdist([x, y], 'chebyshev', w=w), [3])
  1923. assert_equal(cdist([x], [y], 'chebyshev', w=w), [[3]])
  1924. def test_zero_weight(self):
  1925. # If the weight is identically zero, the distance should be zero.
  1926. x = [1, 2, 3]
  1927. y = [6, 5, 4]
  1928. w = [0, 0, 0]
  1929. assert_equal(chebyshev(x, y, w), 0)
  1930. assert_equal(pdist([x, y], 'chebyshev', w=w), [0])
  1931. assert_equal(cdist([x], [y], 'chebyshev', w=w), [[0]])