test_arraysetops.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000
  1. """Test functions for 1D array set operations.
  2. """
  3. import numpy as np
  4. from numpy import (
  5. ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, isin
  6. )
  7. from numpy.exceptions import AxisError
  8. from numpy.testing import (assert_array_equal, assert_equal,
  9. assert_raises, assert_raises_regex)
  10. import pytest
  11. class TestSetOps:
  12. def test_intersect1d(self):
  13. # unique inputs
  14. a = np.array([5, 7, 1, 2])
  15. b = np.array([2, 4, 3, 1, 5])
  16. ec = np.array([1, 2, 5])
  17. c = intersect1d(a, b, assume_unique=True)
  18. assert_array_equal(c, ec)
  19. # non-unique inputs
  20. a = np.array([5, 5, 7, 1, 2])
  21. b = np.array([2, 1, 4, 3, 3, 1, 5])
  22. ed = np.array([1, 2, 5])
  23. c = intersect1d(a, b)
  24. assert_array_equal(c, ed)
  25. assert_array_equal([], intersect1d([], []))
  26. def test_intersect1d_array_like(self):
  27. # See gh-11772
  28. class Test:
  29. def __array__(self, dtype=None, copy=None):
  30. return np.arange(3)
  31. a = Test()
  32. res = intersect1d(a, a)
  33. assert_array_equal(res, a)
  34. res = intersect1d([1, 2, 3], [1, 2, 3])
  35. assert_array_equal(res, [1, 2, 3])
  36. def test_intersect1d_indices(self):
  37. # unique inputs
  38. a = np.array([1, 2, 3, 4])
  39. b = np.array([2, 1, 4, 6])
  40. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  41. ee = np.array([1, 2, 4])
  42. assert_array_equal(c, ee)
  43. assert_array_equal(a[i1], ee)
  44. assert_array_equal(b[i2], ee)
  45. # non-unique inputs
  46. a = np.array([1, 2, 2, 3, 4, 3, 2])
  47. b = np.array([1, 8, 4, 2, 2, 3, 2, 3])
  48. c, i1, i2 = intersect1d(a, b, return_indices=True)
  49. ef = np.array([1, 2, 3, 4])
  50. assert_array_equal(c, ef)
  51. assert_array_equal(a[i1], ef)
  52. assert_array_equal(b[i2], ef)
  53. # non1d, unique inputs
  54. a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
  55. b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
  56. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  57. ui1 = np.unravel_index(i1, a.shape)
  58. ui2 = np.unravel_index(i2, b.shape)
  59. ea = np.array([2, 6, 7, 8])
  60. assert_array_equal(ea, a[ui1])
  61. assert_array_equal(ea, b[ui2])
  62. # non1d, not assumed to be uniqueinputs
  63. a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
  64. b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
  65. c, i1, i2 = intersect1d(a, b, return_indices=True)
  66. ui1 = np.unravel_index(i1, a.shape)
  67. ui2 = np.unravel_index(i2, b.shape)
  68. ea = np.array([2, 7, 8])
  69. assert_array_equal(ea, a[ui1])
  70. assert_array_equal(ea, b[ui2])
  71. def test_setxor1d(self):
  72. a = np.array([5, 7, 1, 2])
  73. b = np.array([2, 4, 3, 1, 5])
  74. ec = np.array([3, 4, 7])
  75. c = setxor1d(a, b)
  76. assert_array_equal(c, ec)
  77. a = np.array([1, 2, 3])
  78. b = np.array([6, 5, 4])
  79. ec = np.array([1, 2, 3, 4, 5, 6])
  80. c = setxor1d(a, b)
  81. assert_array_equal(c, ec)
  82. a = np.array([1, 8, 2, 3])
  83. b = np.array([6, 5, 4, 8])
  84. ec = np.array([1, 2, 3, 4, 5, 6])
  85. c = setxor1d(a, b)
  86. assert_array_equal(c, ec)
  87. assert_array_equal([], setxor1d([], []))
  88. def test_setxor1d_unique(self):
  89. a = np.array([1, 8, 2, 3])
  90. b = np.array([6, 5, 4, 8])
  91. ec = np.array([1, 2, 3, 4, 5, 6])
  92. c = setxor1d(a, b, assume_unique=True)
  93. assert_array_equal(c, ec)
  94. a = np.array([[1], [8], [2], [3]])
  95. b = np.array([[6, 5], [4, 8]])
  96. ec = np.array([1, 2, 3, 4, 5, 6])
  97. c = setxor1d(a, b, assume_unique=True)
  98. assert_array_equal(c, ec)
  99. def test_ediff1d(self):
  100. zero_elem = np.array([])
  101. one_elem = np.array([1])
  102. two_elem = np.array([1, 2])
  103. assert_array_equal([], ediff1d(zero_elem))
  104. assert_array_equal([0], ediff1d(zero_elem, to_begin=0))
  105. assert_array_equal([0], ediff1d(zero_elem, to_end=0))
  106. assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0))
  107. assert_array_equal([], ediff1d(one_elem))
  108. assert_array_equal([1], ediff1d(two_elem))
  109. assert_array_equal([7, 1, 9], ediff1d(two_elem, to_begin=7, to_end=9))
  110. assert_array_equal([5, 6, 1, 7, 8],
  111. ediff1d(two_elem, to_begin=[5, 6], to_end=[7, 8]))
  112. assert_array_equal([1, 9], ediff1d(two_elem, to_end=9))
  113. assert_array_equal([1, 7, 8], ediff1d(two_elem, to_end=[7, 8]))
  114. assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
  115. assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))
  116. @pytest.mark.parametrize("ary, prepend, append, expected", [
  117. # should fail because trying to cast
  118. # np.nan standard floating point value
  119. # into an integer array:
  120. (np.array([1, 2, 3], dtype=np.int64),
  121. None,
  122. np.nan,
  123. 'to_end'),
  124. # should fail because attempting
  125. # to downcast to int type:
  126. (np.array([1, 2, 3], dtype=np.int64),
  127. np.array([5, 7, 2], dtype=np.float32),
  128. None,
  129. 'to_begin'),
  130. # should fail because attempting to cast
  131. # two special floating point values
  132. # to integers (on both sides of ary),
  133. # `to_begin` is in the error message as the impl checks this first:
  134. (np.array([1., 3., 9.], dtype=np.int8),
  135. np.nan,
  136. np.nan,
  137. 'to_begin'),
  138. ])
  139. def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
  140. # verify resolution of gh-11490
  141. # specifically, raise an appropriate
  142. # Exception when attempting to append or
  143. # prepend with an incompatible type
  144. msg = 'dtype of `{}` must be compatible'.format(expected)
  145. with assert_raises_regex(TypeError, msg):
  146. ediff1d(ary=ary,
  147. to_end=append,
  148. to_begin=prepend)
  149. @pytest.mark.parametrize(
  150. "ary,prepend,append,expected",
  151. [
  152. (np.array([1, 2, 3], dtype=np.int16),
  153. 2**16, # will be cast to int16 under same kind rule.
  154. 2**16 + 4,
  155. np.array([0, 1, 1, 4], dtype=np.int16)),
  156. (np.array([1, 2, 3], dtype=np.float32),
  157. np.array([5], dtype=np.float64),
  158. None,
  159. np.array([5, 1, 1], dtype=np.float32)),
  160. (np.array([1, 2, 3], dtype=np.int32),
  161. 0,
  162. 0,
  163. np.array([0, 1, 1, 0], dtype=np.int32)),
  164. (np.array([1, 2, 3], dtype=np.int64),
  165. 3,
  166. -9,
  167. np.array([3, 1, 1, -9], dtype=np.int64)),
  168. ]
  169. )
  170. def test_ediff1d_scalar_handling(self,
  171. ary,
  172. prepend,
  173. append,
  174. expected):
  175. # maintain backwards-compatibility
  176. # of scalar prepend / append behavior
  177. # in ediff1d following fix for gh-11490
  178. actual = np.ediff1d(ary=ary,
  179. to_end=append,
  180. to_begin=prepend)
  181. assert_equal(actual, expected)
  182. assert actual.dtype == expected.dtype
  183. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  184. def test_isin(self, kind):
  185. def _isin_slow(a, b):
  186. b = np.asarray(b).flatten().tolist()
  187. return a in b
  188. isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
  189. def assert_isin_equal(a, b):
  190. x = isin(a, b, kind=kind)
  191. y = isin_slow(a, b)
  192. assert_array_equal(x, y)
  193. # multidimensional arrays in both arguments
  194. a = np.arange(24).reshape([2, 3, 4])
  195. b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
  196. assert_isin_equal(a, b)
  197. # array-likes as both arguments
  198. c = [(9, 8), (7, 6)]
  199. d = (9, 7)
  200. assert_isin_equal(c, d)
  201. # zero-d array:
  202. f = np.array(3)
  203. assert_isin_equal(f, b)
  204. assert_isin_equal(a, f)
  205. assert_isin_equal(f, f)
  206. # scalar:
  207. assert_isin_equal(5, b)
  208. assert_isin_equal(a, 6)
  209. assert_isin_equal(5, 6)
  210. # empty array-like:
  211. if kind != "table":
  212. # An empty list will become float64,
  213. # which is invalid for kind="table"
  214. x = []
  215. assert_isin_equal(x, b)
  216. assert_isin_equal(a, x)
  217. assert_isin_equal(x, x)
  218. # empty array with various types:
  219. for dtype in [bool, np.int64, np.float64]:
  220. if kind == "table" and dtype == np.float64:
  221. continue
  222. if dtype in {np.int64, np.float64}:
  223. ar = np.array([10, 20, 30], dtype=dtype)
  224. elif dtype in {bool}:
  225. ar = np.array([True, False, False])
  226. empty_array = np.array([], dtype=dtype)
  227. assert_isin_equal(empty_array, ar)
  228. assert_isin_equal(ar, empty_array)
  229. assert_isin_equal(empty_array, empty_array)
  230. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  231. def test_isin_additional(self, kind):
  232. # we use two different sizes for the b array here to test the
  233. # two different paths in isin().
  234. for mult in (1, 10):
  235. # One check without np.array to make sure lists are handled correct
  236. a = [5, 7, 1, 2]
  237. b = [2, 4, 3, 1, 5] * mult
  238. ec = np.array([True, False, True, True])
  239. c = isin(a, b, assume_unique=True, kind=kind)
  240. assert_array_equal(c, ec)
  241. a[0] = 8
  242. ec = np.array([False, False, True, True])
  243. c = isin(a, b, assume_unique=True, kind=kind)
  244. assert_array_equal(c, ec)
  245. a[0], a[3] = 4, 8
  246. ec = np.array([True, False, True, False])
  247. c = isin(a, b, assume_unique=True, kind=kind)
  248. assert_array_equal(c, ec)
  249. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  250. b = [2, 3, 4] * mult
  251. ec = [False, True, False, True, True, True, True, True, True,
  252. False, True, False, False, False]
  253. c = isin(a, b, kind=kind)
  254. assert_array_equal(c, ec)
  255. b = b + [5, 5, 4] * mult
  256. ec = [True, True, True, True, True, True, True, True, True, True,
  257. True, False, True, True]
  258. c = isin(a, b, kind=kind)
  259. assert_array_equal(c, ec)
  260. a = np.array([5, 7, 1, 2])
  261. b = np.array([2, 4, 3, 1, 5] * mult)
  262. ec = np.array([True, False, True, True])
  263. c = isin(a, b, kind=kind)
  264. assert_array_equal(c, ec)
  265. a = np.array([5, 7, 1, 1, 2])
  266. b = np.array([2, 4, 3, 3, 1, 5] * mult)
  267. ec = np.array([True, False, True, True, True])
  268. c = isin(a, b, kind=kind)
  269. assert_array_equal(c, ec)
  270. a = np.array([5, 5])
  271. b = np.array([2, 2] * mult)
  272. ec = np.array([False, False])
  273. c = isin(a, b, kind=kind)
  274. assert_array_equal(c, ec)
  275. a = np.array([5])
  276. b = np.array([2])
  277. ec = np.array([False])
  278. c = isin(a, b, kind=kind)
  279. assert_array_equal(c, ec)
  280. if kind in {None, "sort"}:
  281. assert_array_equal(isin([], [], kind=kind), [])
  282. def test_isin_char_array(self):
  283. a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
  284. b = np.array(['a', 'c'])
  285. ec = np.array([True, False, True, False, False, True, False, False])
  286. c = isin(a, b)
  287. assert_array_equal(c, ec)
  288. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  289. def test_isin_invert(self, kind):
  290. "Test isin's invert parameter"
  291. # We use two different sizes for the b array here to test the
  292. # two different paths in isin().
  293. for mult in (1, 10):
  294. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  295. b = [2, 3, 4] * mult
  296. assert_array_equal(np.invert(isin(a, b, kind=kind)),
  297. isin(a, b, invert=True, kind=kind))
  298. # float:
  299. if kind in {None, "sort"}:
  300. for mult in (1, 10):
  301. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
  302. dtype=np.float32)
  303. b = [2, 3, 4] * mult
  304. b = np.array(b, dtype=np.float32)
  305. assert_array_equal(np.invert(isin(a, b, kind=kind)),
  306. isin(a, b, invert=True, kind=kind))
  307. def test_isin_hit_alternate_algorithm(self):
  308. """Hit the standard isin code with integers"""
  309. # Need extreme range to hit standard code
  310. # This hits it without the use of kind='table'
  311. a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
  312. b = np.array([2, 3, 4, 1e9], dtype=np.int64)
  313. expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
  314. assert_array_equal(expected, isin(a, b))
  315. assert_array_equal(np.invert(expected), isin(a, b, invert=True))
  316. a = np.array([5, 7, 1, 2], dtype=np.int64)
  317. b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
  318. ec = np.array([True, False, True, True])
  319. c = isin(a, b, assume_unique=True)
  320. assert_array_equal(c, ec)
  321. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  322. def test_isin_boolean(self, kind):
  323. """Test that isin works for boolean input"""
  324. a = np.array([True, False])
  325. b = np.array([False, False, False])
  326. expected = np.array([False, True])
  327. assert_array_equal(expected,
  328. isin(a, b, kind=kind))
  329. assert_array_equal(np.invert(expected),
  330. isin(a, b, invert=True, kind=kind))
  331. @pytest.mark.parametrize("kind", [None, "sort"])
  332. def test_isin_timedelta(self, kind):
  333. """Test that isin works for timedelta input"""
  334. rstate = np.random.RandomState(0)
  335. a = rstate.randint(0, 100, size=10)
  336. b = rstate.randint(0, 100, size=10)
  337. truth = isin(a, b)
  338. a_timedelta = a.astype("timedelta64[s]")
  339. b_timedelta = b.astype("timedelta64[s]")
  340. assert_array_equal(truth, isin(a_timedelta, b_timedelta, kind=kind))
  341. def test_isin_table_timedelta_fails(self):
  342. a = np.array([0, 1, 2], dtype="timedelta64[s]")
  343. b = a
  344. # Make sure it raises a value error:
  345. with pytest.raises(ValueError):
  346. isin(a, b, kind="table")
  347. @pytest.mark.parametrize(
  348. "dtype1,dtype2",
  349. [
  350. (np.int8, np.int16),
  351. (np.int16, np.int8),
  352. (np.uint8, np.uint16),
  353. (np.uint16, np.uint8),
  354. (np.uint8, np.int16),
  355. (np.int16, np.uint8),
  356. (np.uint64, np.int64),
  357. ]
  358. )
  359. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  360. def test_isin_mixed_dtype(self, dtype1, dtype2, kind):
  361. """Test that isin works as expected for mixed dtype input."""
  362. is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
  363. ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
  364. if is_dtype2_signed:
  365. ar2 = np.array([-128, 0, 127], dtype=dtype2)
  366. else:
  367. ar2 = np.array([127, 0, 255], dtype=dtype2)
  368. expected = np.array([True, True, False, False])
  369. expect_failure = kind == "table" and (
  370. dtype1 == np.int16 and dtype2 == np.int8)
  371. if expect_failure:
  372. with pytest.raises(RuntimeError, match="exceed the maximum"):
  373. isin(ar1, ar2, kind=kind)
  374. else:
  375. assert_array_equal(isin(ar1, ar2, kind=kind), expected)
  376. @pytest.mark.parametrize("data", [
  377. np.array([2**63, 2**63+1], dtype=np.uint64),
  378. np.array([-2**62, -2**62-1], dtype=np.int64),
  379. ])
  380. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  381. def test_isin_mixed_huge_vals(self, kind, data):
  382. """Test values outside intp range (negative ones if 32bit system)"""
  383. query = data[1]
  384. res = np.isin(data, query, kind=kind)
  385. assert_array_equal(res, [False, True])
  386. # Also check that nothing weird happens for values can't possibly
  387. # in range.
  388. data = data.astype(np.int32) # clearly different values
  389. res = np.isin(data, query, kind=kind)
  390. assert_array_equal(res, [False, False])
  391. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  392. def test_isin_mixed_boolean(self, kind):
  393. """Test that isin works as expected for bool/int input."""
  394. for dtype in np.typecodes["AllInteger"]:
  395. a = np.array([True, False, False], dtype=bool)
  396. b = np.array([0, 0, 0, 0], dtype=dtype)
  397. expected = np.array([False, True, True], dtype=bool)
  398. assert_array_equal(isin(a, b, kind=kind), expected)
  399. a, b = b, a
  400. expected = np.array([True, True, True, True], dtype=bool)
  401. assert_array_equal(isin(a, b, kind=kind), expected)
  402. def test_isin_first_array_is_object(self):
  403. ar1 = [None]
  404. ar2 = np.array([1]*10)
  405. expected = np.array([False])
  406. result = np.isin(ar1, ar2)
  407. assert_array_equal(result, expected)
  408. def test_isin_second_array_is_object(self):
  409. ar1 = 1
  410. ar2 = np.array([None]*10)
  411. expected = np.array([False])
  412. result = np.isin(ar1, ar2)
  413. assert_array_equal(result, expected)
  414. def test_isin_both_arrays_are_object(self):
  415. ar1 = [None]
  416. ar2 = np.array([None]*10)
  417. expected = np.array([True])
  418. result = np.isin(ar1, ar2)
  419. assert_array_equal(result, expected)
  420. def test_isin_both_arrays_have_structured_dtype(self):
  421. # Test arrays of a structured data type containing an integer field
  422. # and a field of dtype `object` allowing for arbitrary Python objects
  423. dt = np.dtype([('field1', int), ('field2', object)])
  424. ar1 = np.array([(1, None)], dtype=dt)
  425. ar2 = np.array([(1, None)]*10, dtype=dt)
  426. expected = np.array([True])
  427. result = np.isin(ar1, ar2)
  428. assert_array_equal(result, expected)
  429. def test_isin_with_arrays_containing_tuples(self):
  430. ar1 = np.array([(1,), 2], dtype=object)
  431. ar2 = np.array([(1,), 2], dtype=object)
  432. expected = np.array([True, True])
  433. result = np.isin(ar1, ar2)
  434. assert_array_equal(result, expected)
  435. result = np.isin(ar1, ar2, invert=True)
  436. assert_array_equal(result, np.invert(expected))
  437. # An integer is added at the end of the array to make sure
  438. # that the array builder will create the array with tuples
  439. # and after it's created the integer is removed.
  440. # There's a bug in the array constructor that doesn't handle
  441. # tuples properly and adding the integer fixes that.
  442. ar1 = np.array([(1,), (2, 1), 1], dtype=object)
  443. ar1 = ar1[:-1]
  444. ar2 = np.array([(1,), (2, 1), 1], dtype=object)
  445. ar2 = ar2[:-1]
  446. expected = np.array([True, True])
  447. result = np.isin(ar1, ar2)
  448. assert_array_equal(result, expected)
  449. result = np.isin(ar1, ar2, invert=True)
  450. assert_array_equal(result, np.invert(expected))
  451. ar1 = np.array([(1,), (2, 3), 1], dtype=object)
  452. ar1 = ar1[:-1]
  453. ar2 = np.array([(1,), 2], dtype=object)
  454. expected = np.array([True, False])
  455. result = np.isin(ar1, ar2)
  456. assert_array_equal(result, expected)
  457. result = np.isin(ar1, ar2, invert=True)
  458. assert_array_equal(result, np.invert(expected))
  459. def test_isin_errors(self):
  460. """Test that isin raises expected errors."""
  461. # Error 1: `kind` is not one of 'sort' 'table' or None.
  462. ar1 = np.array([1, 2, 3, 4, 5])
  463. ar2 = np.array([2, 4, 6, 8, 10])
  464. assert_raises(ValueError, isin, ar1, ar2, kind='quicksort')
  465. # Error 2: `kind="table"` does not work for non-integral arrays.
  466. obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
  467. obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
  468. assert_raises(ValueError, isin, obj_ar1, obj_ar2, kind='table')
  469. for dtype in [np.int32, np.int64]:
  470. ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
  471. # The range of this array will overflow:
  472. overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
  473. # Error 3: `kind="table"` will trigger a runtime error
  474. # if there is an integer overflow expected when computing the
  475. # range of ar2
  476. assert_raises(
  477. RuntimeError,
  478. isin, ar1, overflow_ar2, kind='table'
  479. )
  480. # Non-error: `kind=None` will *not* trigger a runtime error
  481. # if there is an integer overflow, it will switch to
  482. # the `sort` algorithm.
  483. result = np.isin(ar1, overflow_ar2, kind=None)
  484. assert_array_equal(result, [True] + [False] * 4)
  485. result = np.isin(ar1, overflow_ar2, kind='sort')
  486. assert_array_equal(result, [True] + [False] * 4)
  487. def test_union1d(self):
  488. a = np.array([5, 4, 7, 1, 2])
  489. b = np.array([2, 4, 3, 3, 2, 1, 5])
  490. ec = np.array([1, 2, 3, 4, 5, 7])
  491. c = union1d(a, b)
  492. assert_array_equal(c, ec)
  493. # Tests gh-10340, arguments to union1d should be
  494. # flattened if they are not already 1D
  495. x = np.array([[0, 1, 2], [3, 4, 5]])
  496. y = np.array([0, 1, 2, 3, 4])
  497. ez = np.array([0, 1, 2, 3, 4, 5])
  498. z = union1d(x, y)
  499. assert_array_equal(z, ez)
  500. assert_array_equal([], union1d([], []))
  501. def test_setdiff1d(self):
  502. a = np.array([6, 5, 4, 7, 1, 2, 7, 4])
  503. b = np.array([2, 4, 3, 3, 2, 1, 5])
  504. ec = np.array([6, 7])
  505. c = setdiff1d(a, b)
  506. assert_array_equal(c, ec)
  507. a = np.arange(21)
  508. b = np.arange(19)
  509. ec = np.array([19, 20])
  510. c = setdiff1d(a, b)
  511. assert_array_equal(c, ec)
  512. assert_array_equal([], setdiff1d([], []))
  513. a = np.array((), np.uint32)
  514. assert_equal(setdiff1d(a, []).dtype, np.uint32)
  515. def test_setdiff1d_unique(self):
  516. a = np.array([3, 2, 1])
  517. b = np.array([7, 5, 2])
  518. expected = np.array([3, 1])
  519. actual = setdiff1d(a, b, assume_unique=True)
  520. assert_equal(actual, expected)
  521. def test_setdiff1d_char_array(self):
  522. a = np.array(['a', 'b', 'c'])
  523. b = np.array(['a', 'b', 's'])
  524. assert_array_equal(setdiff1d(a, b), np.array(['c']))
  525. def test_manyways(self):
  526. a = np.array([5, 7, 1, 2, 8])
  527. b = np.array([9, 8, 2, 4, 3, 1, 5])
  528. c1 = setxor1d(a, b)
  529. aux1 = intersect1d(a, b)
  530. aux2 = union1d(a, b)
  531. c2 = setdiff1d(aux2, aux1)
  532. assert_array_equal(c1, c2)
  533. class TestUnique:
  534. def test_unique_1d(self):
  535. def check_all(a, b, i1, i2, c, dt):
  536. base_msg = 'check {0} failed for type {1}'
  537. msg = base_msg.format('values', dt)
  538. v = unique(a)
  539. assert_array_equal(v, b, msg)
  540. msg = base_msg.format('return_index', dt)
  541. v, j = unique(a, True, False, False)
  542. assert_array_equal(v, b, msg)
  543. assert_array_equal(j, i1, msg)
  544. msg = base_msg.format('return_inverse', dt)
  545. v, j = unique(a, False, True, False)
  546. assert_array_equal(v, b, msg)
  547. assert_array_equal(j, i2, msg)
  548. msg = base_msg.format('return_counts', dt)
  549. v, j = unique(a, False, False, True)
  550. assert_array_equal(v, b, msg)
  551. assert_array_equal(j, c, msg)
  552. msg = base_msg.format('return_index and return_inverse', dt)
  553. v, j1, j2 = unique(a, True, True, False)
  554. assert_array_equal(v, b, msg)
  555. assert_array_equal(j1, i1, msg)
  556. assert_array_equal(j2, i2, msg)
  557. msg = base_msg.format('return_index and return_counts', dt)
  558. v, j1, j2 = unique(a, True, False, True)
  559. assert_array_equal(v, b, msg)
  560. assert_array_equal(j1, i1, msg)
  561. assert_array_equal(j2, c, msg)
  562. msg = base_msg.format('return_inverse and return_counts', dt)
  563. v, j1, j2 = unique(a, False, True, True)
  564. assert_array_equal(v, b, msg)
  565. assert_array_equal(j1, i2, msg)
  566. assert_array_equal(j2, c, msg)
  567. msg = base_msg.format(('return_index, return_inverse '
  568. 'and return_counts'), dt)
  569. v, j1, j2, j3 = unique(a, True, True, True)
  570. assert_array_equal(v, b, msg)
  571. assert_array_equal(j1, i1, msg)
  572. assert_array_equal(j2, i2, msg)
  573. assert_array_equal(j3, c, msg)
  574. a = [5, 7, 1, 2, 1, 5, 7]*10
  575. b = [1, 2, 5, 7]
  576. i1 = [2, 3, 0, 1]
  577. i2 = [2, 3, 0, 1, 0, 2, 3]*10
  578. c = np.multiply([2, 1, 2, 2], 10)
  579. # test for numeric arrays
  580. types = []
  581. types.extend(np.typecodes['AllInteger'])
  582. types.extend(np.typecodes['AllFloat'])
  583. types.append('datetime64[D]')
  584. types.append('timedelta64[D]')
  585. for dt in types:
  586. aa = np.array(a, dt)
  587. bb = np.array(b, dt)
  588. check_all(aa, bb, i1, i2, c, dt)
  589. # test for object arrays
  590. dt = 'O'
  591. aa = np.empty(len(a), dt)
  592. aa[:] = a
  593. bb = np.empty(len(b), dt)
  594. bb[:] = b
  595. check_all(aa, bb, i1, i2, c, dt)
  596. # test for structured arrays
  597. dt = [('', 'i'), ('', 'i')]
  598. aa = np.array(list(zip(a, a)), dt)
  599. bb = np.array(list(zip(b, b)), dt)
  600. check_all(aa, bb, i1, i2, c, dt)
  601. # test for ticket #2799
  602. aa = [1. + 0.j, 1 - 1.j, 1]
  603. assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])
  604. # test for ticket #4785
  605. a = [(1, 2), (1, 2), (2, 3)]
  606. unq = [1, 2, 3]
  607. inv = [[0, 1], [0, 1], [1, 2]]
  608. a1 = unique(a)
  609. assert_array_equal(a1, unq)
  610. a2, a2_inv = unique(a, return_inverse=True)
  611. assert_array_equal(a2, unq)
  612. assert_array_equal(a2_inv, inv)
  613. # test for chararrays with return_inverse (gh-5099)
  614. a = np.char.chararray(5)
  615. a[...] = ''
  616. a2, a2_inv = np.unique(a, return_inverse=True)
  617. assert_array_equal(a2_inv, np.zeros(5))
  618. # test for ticket #9137
  619. a = []
  620. a1_idx = np.unique(a, return_index=True)[1]
  621. a2_inv = np.unique(a, return_inverse=True)[1]
  622. a3_idx, a3_inv = np.unique(a, return_index=True,
  623. return_inverse=True)[1:]
  624. assert_equal(a1_idx.dtype, np.intp)
  625. assert_equal(a2_inv.dtype, np.intp)
  626. assert_equal(a3_idx.dtype, np.intp)
  627. assert_equal(a3_inv.dtype, np.intp)
  628. # test for ticket 2111 - float
  629. a = [2.0, np.nan, 1.0, np.nan]
  630. ua = [1.0, 2.0, np.nan]
  631. ua_idx = [2, 0, 1]
  632. ua_inv = [1, 2, 0, 2]
  633. ua_cnt = [1, 1, 2]
  634. assert_equal(np.unique(a), ua)
  635. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  636. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  637. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  638. # test for ticket 2111 - complex
  639. a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)]
  640. ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)]
  641. ua_idx = [2, 0, 3]
  642. ua_inv = [1, 2, 0, 2, 2]
  643. ua_cnt = [1, 1, 3]
  644. assert_equal(np.unique(a), ua)
  645. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  646. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  647. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  648. # test for ticket 2111 - datetime64
  649. nat = np.datetime64('nat')
  650. a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
  651. ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
  652. ua_idx = [2, 0, 1]
  653. ua_inv = [1, 2, 0, 2]
  654. ua_cnt = [1, 1, 2]
  655. assert_equal(np.unique(a), ua)
  656. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  657. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  658. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  659. # test for ticket 2111 - timedelta
  660. nat = np.timedelta64('nat')
  661. a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
  662. ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
  663. ua_idx = [2, 0, 1]
  664. ua_inv = [1, 2, 0, 2]
  665. ua_cnt = [1, 1, 2]
  666. assert_equal(np.unique(a), ua)
  667. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  668. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  669. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  670. # test for gh-19300
  671. all_nans = [np.nan] * 4
  672. ua = [np.nan]
  673. ua_idx = [0]
  674. ua_inv = [0, 0, 0, 0]
  675. ua_cnt = [4]
  676. assert_equal(np.unique(all_nans), ua)
  677. assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
  678. assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
  679. assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
  680. def test_unique_axis_errors(self):
  681. assert_raises(TypeError, self._run_axis_tests, object)
  682. assert_raises(TypeError, self._run_axis_tests,
  683. [('a', int), ('b', object)])
  684. assert_raises(AxisError, unique, np.arange(10), axis=2)
  685. assert_raises(AxisError, unique, np.arange(10), axis=-2)
  686. def test_unique_axis_list(self):
  687. msg = "Unique failed on list of lists"
  688. inp = [[0, 1, 0], [0, 1, 0]]
  689. inp_arr = np.asarray(inp)
  690. assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
  691. assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
  692. def test_unique_axis(self):
  693. types = []
  694. types.extend(np.typecodes['AllInteger'])
  695. types.extend(np.typecodes['AllFloat'])
  696. types.append('datetime64[D]')
  697. types.append('timedelta64[D]')
  698. types.append([('a', int), ('b', int)])
  699. types.append([('a', int), ('b', float)])
  700. for dtype in types:
  701. self._run_axis_tests(dtype)
  702. msg = 'Non-bitwise-equal booleans test failed'
  703. data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
  704. result = np.array([[False, True], [True, True]], dtype=bool)
  705. assert_array_equal(unique(data, axis=0), result, msg)
  706. msg = 'Negative zero equality test failed'
  707. data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
  708. result = np.array([[-0.0, 0.0]])
  709. assert_array_equal(unique(data, axis=0), result, msg)
  710. @pytest.mark.parametrize("axis", [0, -1])
  711. def test_unique_1d_with_axis(self, axis):
  712. x = np.array([4, 3, 2, 3, 2, 1, 2, 2])
  713. uniq = unique(x, axis=axis)
  714. assert_array_equal(uniq, [1, 2, 3, 4])
  715. @pytest.mark.parametrize("axis", [None, 0, -1])
  716. def test_unique_inverse_with_axis(self, axis):
  717. x = np.array([[4, 4, 3], [2, 2, 1], [2, 2, 1], [4, 4, 3]])
  718. uniq, inv = unique(x, return_inverse=True, axis=axis)
  719. assert_equal(inv.ndim, x.ndim if axis is None else 1)
  720. assert_array_equal(x, np.take(uniq, inv, axis=axis))
  721. def test_unique_axis_zeros(self):
  722. # issue 15559
  723. single_zero = np.empty(shape=(2, 0), dtype=np.int8)
  724. uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True,
  725. return_inverse=True, return_counts=True)
  726. # there's 1 element of shape (0,) along axis 0
  727. assert_equal(uniq.dtype, single_zero.dtype)
  728. assert_array_equal(uniq, np.empty(shape=(1, 0)))
  729. assert_array_equal(idx, np.array([0]))
  730. assert_array_equal(inv, np.array([0, 0]))
  731. assert_array_equal(cnt, np.array([2]))
  732. # there's 0 elements of shape (2,) along axis 1
  733. uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True,
  734. return_inverse=True, return_counts=True)
  735. assert_equal(uniq.dtype, single_zero.dtype)
  736. assert_array_equal(uniq, np.empty(shape=(2, 0)))
  737. assert_array_equal(idx, np.array([]))
  738. assert_array_equal(inv, np.array([]))
  739. assert_array_equal(cnt, np.array([]))
  740. # test a "complicated" shape
  741. shape = (0, 2, 0, 3, 0, 4, 0)
  742. multiple_zeros = np.empty(shape=shape)
  743. for axis in range(len(shape)):
  744. expected_shape = list(shape)
  745. if shape[axis] == 0:
  746. expected_shape[axis] = 0
  747. else:
  748. expected_shape[axis] = 1
  749. assert_array_equal(unique(multiple_zeros, axis=axis),
  750. np.empty(shape=expected_shape))
  751. def test_unique_masked(self):
  752. # issue 8664
  753. x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0],
  754. dtype='uint8')
  755. y = np.ma.masked_equal(x, 0)
  756. v = np.unique(y)
  757. v2, i, c = np.unique(y, return_index=True, return_counts=True)
  758. msg = 'Unique returned different results when asked for index'
  759. assert_array_equal(v.data, v2.data, msg)
  760. assert_array_equal(v.mask, v2.mask, msg)
  761. def test_unique_sort_order_with_axis(self):
  762. # These tests fail if sorting along axis is done by treating subarrays
  763. # as unsigned byte strings. See gh-10495.
  764. fmt = "sort order incorrect for integer type '%s'"
  765. for dt in 'bhilq':
  766. a = np.array([[-1], [0]], dt)
  767. b = np.unique(a, axis=0)
  768. assert_array_equal(a, b, fmt % dt)
  769. def _run_axis_tests(self, dtype):
  770. data = np.array([[0, 1, 0, 0],
  771. [1, 0, 0, 0],
  772. [0, 1, 0, 0],
  773. [1, 0, 0, 0]]).astype(dtype)
  774. msg = 'Unique with 1d array and axis=0 failed'
  775. result = np.array([0, 1])
  776. assert_array_equal(unique(data), result.astype(dtype), msg)
  777. msg = 'Unique with 2d array and axis=0 failed'
  778. result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
  779. assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)
  780. msg = 'Unique with 2d array and axis=1 failed'
  781. result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
  782. assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
  783. msg = 'Unique with 3d array and axis=2 failed'
  784. data3d = np.array([[[1, 1],
  785. [1, 0]],
  786. [[0, 1],
  787. [0, 0]]]).astype(dtype)
  788. result = np.take(data3d, [1, 0], axis=2)
  789. assert_array_equal(unique(data3d, axis=2), result, msg)
  790. uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
  791. return_inverse=True, return_counts=True)
  792. msg = "Unique's return_index=True failed with axis=0"
  793. assert_array_equal(data[idx], uniq, msg)
  794. msg = "Unique's return_inverse=True failed with axis=0"
  795. assert_array_equal(np.take(uniq, inv, axis=0), data)
  796. msg = "Unique's return_counts=True failed with axis=0"
  797. assert_array_equal(cnt, np.array([2, 2]), msg)
  798. uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
  799. return_inverse=True, return_counts=True)
  800. msg = "Unique's return_index=True failed with axis=1"
  801. assert_array_equal(data[:, idx], uniq)
  802. msg = "Unique's return_inverse=True failed with axis=1"
  803. assert_array_equal(np.take(uniq, inv, axis=1), data)
  804. msg = "Unique's return_counts=True failed with axis=1"
  805. assert_array_equal(cnt, np.array([2, 1, 1]), msg)
  806. def test_unique_nanequals(self):
  807. # issue 20326
  808. a = np.array([1, 1, np.nan, np.nan, np.nan])
  809. unq = np.unique(a)
  810. not_unq = np.unique(a, equal_nan=False)
  811. assert_array_equal(unq, np.array([1, np.nan]))
  812. assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan]))
  813. def test_unique_array_api_functions(self):
  814. arr = np.array([np.nan, 1, 4, 1, 3, 4, np.nan, 5, 1])
  815. for res_unique_array_api, res_unique in [
  816. (
  817. np.unique_values(arr),
  818. np.unique(arr, equal_nan=False)
  819. ),
  820. (
  821. np.unique_counts(arr),
  822. np.unique(arr, return_counts=True, equal_nan=False)
  823. ),
  824. (
  825. np.unique_inverse(arr),
  826. np.unique(arr, return_inverse=True, equal_nan=False)
  827. ),
  828. (
  829. np.unique_all(arr),
  830. np.unique(
  831. arr,
  832. return_index=True,
  833. return_inverse=True,
  834. return_counts=True,
  835. equal_nan=False
  836. )
  837. )
  838. ]:
  839. assert len(res_unique_array_api) == len(res_unique)
  840. for actual, expected in zip(res_unique_array_api, res_unique):
  841. assert_array_equal(actual, expected)
  842. def test_unique_inverse_shape(self):
  843. # Regression test for https://github.com/numpy/numpy/issues/25552
  844. arr = np.array([[1, 2, 3], [2, 3, 1]])
  845. expected_values, expected_inverse = np.unique(arr, return_inverse=True)
  846. expected_inverse = expected_inverse.reshape(arr.shape)
  847. for func in np.unique_inverse, np.unique_all:
  848. result = func(arr)
  849. assert_array_equal(expected_values, result.values)
  850. assert_array_equal(expected_inverse, result.inverse_indices)
  851. assert_array_equal(arr, result.values[result.inverse_indices])