test_arraysetops.py 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302
  1. """Test functions for 1D array set operations.
  2. """
  3. import pytest
  4. import numpy as np
  5. from numpy import ediff1d, intersect1d, isin, setdiff1d, setxor1d, union1d, unique
  6. from numpy.dtypes import StringDType
  7. from numpy.exceptions import AxisError
  8. from numpy.testing import (
  9. assert_array_equal,
  10. assert_equal,
  11. assert_raises,
  12. assert_raises_regex,
  13. )
  14. class TestSetOps:
  15. def test_intersect1d(self):
  16. # unique inputs
  17. a = np.array([5, 7, 1, 2])
  18. b = np.array([2, 4, 3, 1, 5])
  19. ec = np.array([1, 2, 5])
  20. c = intersect1d(a, b, assume_unique=True)
  21. assert_array_equal(c, ec)
  22. # non-unique inputs
  23. a = np.array([5, 5, 7, 1, 2])
  24. b = np.array([2, 1, 4, 3, 3, 1, 5])
  25. ed = np.array([1, 2, 5])
  26. c = intersect1d(a, b)
  27. assert_array_equal(c, ed)
  28. assert_array_equal([], intersect1d([], []))
  29. def test_intersect1d_array_like(self):
  30. # See gh-11772
  31. class Test:
  32. def __array__(self, dtype=None, copy=None):
  33. return np.arange(3)
  34. a = Test()
  35. res = intersect1d(a, a)
  36. assert_array_equal(res, a)
  37. res = intersect1d([1, 2, 3], [1, 2, 3])
  38. assert_array_equal(res, [1, 2, 3])
  39. def test_intersect1d_indices(self):
  40. # unique inputs
  41. a = np.array([1, 2, 3, 4])
  42. b = np.array([2, 1, 4, 6])
  43. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  44. ee = np.array([1, 2, 4])
  45. assert_array_equal(c, ee)
  46. assert_array_equal(a[i1], ee)
  47. assert_array_equal(b[i2], ee)
  48. # non-unique inputs
  49. a = np.array([1, 2, 2, 3, 4, 3, 2])
  50. b = np.array([1, 8, 4, 2, 2, 3, 2, 3])
  51. c, i1, i2 = intersect1d(a, b, return_indices=True)
  52. ef = np.array([1, 2, 3, 4])
  53. assert_array_equal(c, ef)
  54. assert_array_equal(a[i1], ef)
  55. assert_array_equal(b[i2], ef)
  56. # non1d, unique inputs
  57. a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
  58. b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
  59. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  60. ui1 = np.unravel_index(i1, a.shape)
  61. ui2 = np.unravel_index(i2, b.shape)
  62. ea = np.array([2, 6, 7, 8])
  63. assert_array_equal(ea, a[ui1])
  64. assert_array_equal(ea, b[ui2])
  65. # non1d, not assumed to be uniqueinputs
  66. a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
  67. b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
  68. c, i1, i2 = intersect1d(a, b, return_indices=True)
  69. ui1 = np.unravel_index(i1, a.shape)
  70. ui2 = np.unravel_index(i2, b.shape)
  71. ea = np.array([2, 7, 8])
  72. assert_array_equal(ea, a[ui1])
  73. assert_array_equal(ea, b[ui2])
  74. def test_setxor1d(self):
  75. a = np.array([5, 7, 1, 2])
  76. b = np.array([2, 4, 3, 1, 5])
  77. ec = np.array([3, 4, 7])
  78. c = setxor1d(a, b)
  79. assert_array_equal(c, ec)
  80. a = np.array([1, 2, 3])
  81. b = np.array([6, 5, 4])
  82. ec = np.array([1, 2, 3, 4, 5, 6])
  83. c = setxor1d(a, b)
  84. assert_array_equal(c, ec)
  85. a = np.array([1, 8, 2, 3])
  86. b = np.array([6, 5, 4, 8])
  87. ec = np.array([1, 2, 3, 4, 5, 6])
  88. c = setxor1d(a, b)
  89. assert_array_equal(c, ec)
  90. assert_array_equal([], setxor1d([], []))
  91. def test_setxor1d_unique(self):
  92. a = np.array([1, 8, 2, 3])
  93. b = np.array([6, 5, 4, 8])
  94. ec = np.array([1, 2, 3, 4, 5, 6])
  95. c = setxor1d(a, b, assume_unique=True)
  96. assert_array_equal(c, ec)
  97. a = np.array([[1], [8], [2], [3]])
  98. b = np.array([[6, 5], [4, 8]])
  99. ec = np.array([1, 2, 3, 4, 5, 6])
  100. c = setxor1d(a, b, assume_unique=True)
  101. assert_array_equal(c, ec)
  102. def test_ediff1d(self):
  103. zero_elem = np.array([])
  104. one_elem = np.array([1])
  105. two_elem = np.array([1, 2])
  106. assert_array_equal([], ediff1d(zero_elem))
  107. assert_array_equal([0], ediff1d(zero_elem, to_begin=0))
  108. assert_array_equal([0], ediff1d(zero_elem, to_end=0))
  109. assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0))
  110. assert_array_equal([], ediff1d(one_elem))
  111. assert_array_equal([1], ediff1d(two_elem))
  112. assert_array_equal([7, 1, 9], ediff1d(two_elem, to_begin=7, to_end=9))
  113. assert_array_equal([5, 6, 1, 7, 8],
  114. ediff1d(two_elem, to_begin=[5, 6], to_end=[7, 8]))
  115. assert_array_equal([1, 9], ediff1d(two_elem, to_end=9))
  116. assert_array_equal([1, 7, 8], ediff1d(two_elem, to_end=[7, 8]))
  117. assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
  118. assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))
  119. @pytest.mark.parametrize("ary, prepend, append, expected", [
  120. # should fail because trying to cast
  121. # np.nan standard floating point value
  122. # into an integer array:
  123. (np.array([1, 2, 3], dtype=np.int64),
  124. None,
  125. np.nan,
  126. 'to_end'),
  127. # should fail because attempting
  128. # to downcast to int type:
  129. (np.array([1, 2, 3], dtype=np.int64),
  130. np.array([5, 7, 2], dtype=np.float32),
  131. None,
  132. 'to_begin'),
  133. # should fail because attempting to cast
  134. # two special floating point values
  135. # to integers (on both sides of ary),
  136. # `to_begin` is in the error message as the impl checks this first:
  137. (np.array([1., 3., 9.], dtype=np.int8),
  138. np.nan,
  139. np.nan,
  140. 'to_begin'),
  141. ])
  142. def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
  143. # verify resolution of gh-11490
  144. # specifically, raise an appropriate
  145. # Exception when attempting to append or
  146. # prepend with an incompatible type
  147. msg = f'dtype of `{expected}` must be compatible'
  148. with assert_raises_regex(TypeError, msg):
  149. ediff1d(ary=ary,
  150. to_end=append,
  151. to_begin=prepend)
  152. @pytest.mark.parametrize(
  153. "ary,prepend,append,expected",
  154. [
  155. (np.array([1, 2, 3], dtype=np.int16),
  156. 2**16, # will be cast to int16 under same kind rule.
  157. 2**16 + 4,
  158. np.array([0, 1, 1, 4], dtype=np.int16)),
  159. (np.array([1, 2, 3], dtype=np.float32),
  160. np.array([5], dtype=np.float64),
  161. None,
  162. np.array([5, 1, 1], dtype=np.float32)),
  163. (np.array([1, 2, 3], dtype=np.int32),
  164. 0,
  165. 0,
  166. np.array([0, 1, 1, 0], dtype=np.int32)),
  167. (np.array([1, 2, 3], dtype=np.int64),
  168. 3,
  169. -9,
  170. np.array([3, 1, 1, -9], dtype=np.int64)),
  171. ]
  172. )
  173. def test_ediff1d_scalar_handling(self,
  174. ary,
  175. prepend,
  176. append,
  177. expected):
  178. # maintain backwards-compatibility
  179. # of scalar prepend / append behavior
  180. # in ediff1d following fix for gh-11490
  181. actual = np.ediff1d(ary=ary,
  182. to_end=append,
  183. to_begin=prepend)
  184. assert_equal(actual, expected)
  185. assert actual.dtype == expected.dtype
  186. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  187. def test_isin(self, kind):
  188. def _isin_slow(a, b):
  189. b = np.asarray(b).flatten().tolist()
  190. return a in b
  191. isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
  192. def assert_isin_equal(a, b):
  193. x = isin(a, b, kind=kind)
  194. y = isin_slow(a, b)
  195. assert_array_equal(x, y)
  196. # multidimensional arrays in both arguments
  197. a = np.arange(24).reshape([2, 3, 4])
  198. b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
  199. assert_isin_equal(a, b)
  200. # array-likes as both arguments
  201. c = [(9, 8), (7, 6)]
  202. d = (9, 7)
  203. assert_isin_equal(c, d)
  204. # zero-d array:
  205. f = np.array(3)
  206. assert_isin_equal(f, b)
  207. assert_isin_equal(a, f)
  208. assert_isin_equal(f, f)
  209. # scalar:
  210. assert_isin_equal(5, b)
  211. assert_isin_equal(a, 6)
  212. assert_isin_equal(5, 6)
  213. # empty array-like:
  214. if kind != "table":
  215. # An empty list will become float64,
  216. # which is invalid for kind="table"
  217. x = []
  218. assert_isin_equal(x, b)
  219. assert_isin_equal(a, x)
  220. assert_isin_equal(x, x)
  221. # empty array with various types:
  222. for dtype in [bool, np.int64, np.float64]:
  223. if kind == "table" and dtype == np.float64:
  224. continue
  225. if dtype in {np.int64, np.float64}:
  226. ar = np.array([10, 20, 30], dtype=dtype)
  227. elif dtype in {bool}:
  228. ar = np.array([True, False, False])
  229. empty_array = np.array([], dtype=dtype)
  230. assert_isin_equal(empty_array, ar)
  231. assert_isin_equal(ar, empty_array)
  232. assert_isin_equal(empty_array, empty_array)
  233. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  234. def test_isin_additional(self, kind):
  235. # we use two different sizes for the b array here to test the
  236. # two different paths in isin().
  237. for mult in (1, 10):
  238. # One check without np.array to make sure lists are handled correct
  239. a = [5, 7, 1, 2]
  240. b = [2, 4, 3, 1, 5] * mult
  241. ec = np.array([True, False, True, True])
  242. c = isin(a, b, assume_unique=True, kind=kind)
  243. assert_array_equal(c, ec)
  244. a[0] = 8
  245. ec = np.array([False, False, True, True])
  246. c = isin(a, b, assume_unique=True, kind=kind)
  247. assert_array_equal(c, ec)
  248. a[0], a[3] = 4, 8
  249. ec = np.array([True, False, True, False])
  250. c = isin(a, b, assume_unique=True, kind=kind)
  251. assert_array_equal(c, ec)
  252. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  253. b = [2, 3, 4] * mult
  254. ec = [False, True, False, True, True, True, True, True, True,
  255. False, True, False, False, False]
  256. c = isin(a, b, kind=kind)
  257. assert_array_equal(c, ec)
  258. b = b + [5, 5, 4] * mult
  259. ec = [True, True, True, True, True, True, True, True, True, True,
  260. True, False, True, True]
  261. c = isin(a, b, kind=kind)
  262. assert_array_equal(c, ec)
  263. a = np.array([5, 7, 1, 2])
  264. b = np.array([2, 4, 3, 1, 5] * mult)
  265. ec = np.array([True, False, True, True])
  266. c = isin(a, b, kind=kind)
  267. assert_array_equal(c, ec)
  268. a = np.array([5, 7, 1, 1, 2])
  269. b = np.array([2, 4, 3, 3, 1, 5] * mult)
  270. ec = np.array([True, False, True, True, True])
  271. c = isin(a, b, kind=kind)
  272. assert_array_equal(c, ec)
  273. a = np.array([5, 5])
  274. b = np.array([2, 2] * mult)
  275. ec = np.array([False, False])
  276. c = isin(a, b, kind=kind)
  277. assert_array_equal(c, ec)
  278. a = np.array([5])
  279. b = np.array([2])
  280. ec = np.array([False])
  281. c = isin(a, b, kind=kind)
  282. assert_array_equal(c, ec)
  283. if kind in {None, "sort"}:
  284. assert_array_equal(isin([], [], kind=kind), [])
  285. def test_isin_char_array(self):
  286. a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
  287. b = np.array(['a', 'c'])
  288. ec = np.array([True, False, True, False, False, True, False, False])
  289. c = isin(a, b)
  290. assert_array_equal(c, ec)
  291. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  292. def test_isin_invert(self, kind):
  293. "Test isin's invert parameter"
  294. # We use two different sizes for the b array here to test the
  295. # two different paths in isin().
  296. for mult in (1, 10):
  297. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  298. b = [2, 3, 4] * mult
  299. assert_array_equal(np.invert(isin(a, b, kind=kind)),
  300. isin(a, b, invert=True, kind=kind))
  301. # float:
  302. if kind in {None, "sort"}:
  303. for mult in (1, 10):
  304. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
  305. dtype=np.float32)
  306. b = [2, 3, 4] * mult
  307. b = np.array(b, dtype=np.float32)
  308. assert_array_equal(np.invert(isin(a, b, kind=kind)),
  309. isin(a, b, invert=True, kind=kind))
  310. def test_isin_hit_alternate_algorithm(self):
  311. """Hit the standard isin code with integers"""
  312. # Need extreme range to hit standard code
  313. # This hits it without the use of kind='table'
  314. a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
  315. b = np.array([2, 3, 4, 1e9], dtype=np.int64)
  316. expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
  317. assert_array_equal(expected, isin(a, b))
  318. assert_array_equal(np.invert(expected), isin(a, b, invert=True))
  319. a = np.array([5, 7, 1, 2], dtype=np.int64)
  320. b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
  321. ec = np.array([True, False, True, True])
  322. c = isin(a, b, assume_unique=True)
  323. assert_array_equal(c, ec)
  324. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  325. def test_isin_boolean(self, kind):
  326. """Test that isin works for boolean input"""
  327. a = np.array([True, False])
  328. b = np.array([False, False, False])
  329. expected = np.array([False, True])
  330. assert_array_equal(expected,
  331. isin(a, b, kind=kind))
  332. assert_array_equal(np.invert(expected),
  333. isin(a, b, invert=True, kind=kind))
  334. @pytest.mark.parametrize("kind", [None, "sort"])
  335. def test_isin_timedelta(self, kind):
  336. """Test that isin works for timedelta input"""
  337. rstate = np.random.RandomState(0)
  338. a = rstate.randint(0, 100, size=10)
  339. b = rstate.randint(0, 100, size=10)
  340. truth = isin(a, b)
  341. a_timedelta = a.astype("timedelta64[s]")
  342. b_timedelta = b.astype("timedelta64[s]")
  343. assert_array_equal(truth, isin(a_timedelta, b_timedelta, kind=kind))
  344. def test_isin_table_timedelta_fails(self):
  345. a = np.array([0, 1, 2], dtype="timedelta64[s]")
  346. b = a
  347. # Make sure it raises a value error:
  348. with pytest.raises(ValueError):
  349. isin(a, b, kind="table")
  350. @pytest.mark.parametrize(
  351. "dtype1,dtype2",
  352. [
  353. (np.int8, np.int16),
  354. (np.int16, np.int8),
  355. (np.uint8, np.uint16),
  356. (np.uint16, np.uint8),
  357. (np.uint8, np.int16),
  358. (np.int16, np.uint8),
  359. (np.uint64, np.int64),
  360. ]
  361. )
  362. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  363. def test_isin_mixed_dtype(self, dtype1, dtype2, kind):
  364. """Test that isin works as expected for mixed dtype input."""
  365. is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
  366. ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
  367. if is_dtype2_signed:
  368. ar2 = np.array([-128, 0, 127], dtype=dtype2)
  369. else:
  370. ar2 = np.array([127, 0, 255], dtype=dtype2)
  371. expected = np.array([True, True, False, False])
  372. expect_failure = kind == "table" and (
  373. dtype1 == np.int16 and dtype2 == np.int8)
  374. if expect_failure:
  375. with pytest.raises(RuntimeError, match="exceed the maximum"):
  376. isin(ar1, ar2, kind=kind)
  377. else:
  378. assert_array_equal(isin(ar1, ar2, kind=kind), expected)
  379. @pytest.mark.parametrize("data", [
  380. np.array([2**63, 2**63 + 1], dtype=np.uint64),
  381. np.array([-2**62, -2**62 - 1], dtype=np.int64),
  382. ])
  383. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  384. def test_isin_mixed_huge_vals(self, kind, data):
  385. """Test values outside intp range (negative ones if 32bit system)"""
  386. query = data[1]
  387. res = np.isin(data, query, kind=kind)
  388. assert_array_equal(res, [False, True])
  389. # Also check that nothing weird happens for values can't possibly
  390. # in range.
  391. data = data.astype(np.int32) # clearly different values
  392. res = np.isin(data, query, kind=kind)
  393. assert_array_equal(res, [False, False])
  394. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  395. def test_isin_mixed_boolean(self, kind):
  396. """Test that isin works as expected for bool/int input."""
  397. for dtype in np.typecodes["AllInteger"]:
  398. a = np.array([True, False, False], dtype=bool)
  399. b = np.array([0, 0, 0, 0], dtype=dtype)
  400. expected = np.array([False, True, True], dtype=bool)
  401. assert_array_equal(isin(a, b, kind=kind), expected)
  402. a, b = b, a
  403. expected = np.array([True, True, True, True], dtype=bool)
  404. assert_array_equal(isin(a, b, kind=kind), expected)
  405. def test_isin_first_array_is_object(self):
  406. ar1 = [None]
  407. ar2 = np.array([1] * 10)
  408. expected = np.array([False])
  409. result = np.isin(ar1, ar2)
  410. assert_array_equal(result, expected)
  411. def test_isin_second_array_is_object(self):
  412. ar1 = 1
  413. ar2 = np.array([None] * 10)
  414. expected = np.array([False])
  415. result = np.isin(ar1, ar2)
  416. assert_array_equal(result, expected)
  417. def test_isin_both_arrays_are_object(self):
  418. ar1 = [None]
  419. ar2 = np.array([None] * 10)
  420. expected = np.array([True])
  421. result = np.isin(ar1, ar2)
  422. assert_array_equal(result, expected)
  423. def test_isin_both_arrays_have_structured_dtype(self):
  424. # Test arrays of a structured data type containing an integer field
  425. # and a field of dtype `object` allowing for arbitrary Python objects
  426. dt = np.dtype([('field1', int), ('field2', object)])
  427. ar1 = np.array([(1, None)], dtype=dt)
  428. ar2 = np.array([(1, None)] * 10, dtype=dt)
  429. expected = np.array([True])
  430. result = np.isin(ar1, ar2)
  431. assert_array_equal(result, expected)
  432. def test_isin_with_arrays_containing_tuples(self):
  433. ar1 = np.array([(1,), 2], dtype=object)
  434. ar2 = np.array([(1,), 2], dtype=object)
  435. expected = np.array([True, True])
  436. result = np.isin(ar1, ar2)
  437. assert_array_equal(result, expected)
  438. result = np.isin(ar1, ar2, invert=True)
  439. assert_array_equal(result, np.invert(expected))
  440. # An integer is added at the end of the array to make sure
  441. # that the array builder will create the array with tuples
  442. # and after it's created the integer is removed.
  443. # There's a bug in the array constructor that doesn't handle
  444. # tuples properly and adding the integer fixes that.
  445. ar1 = np.array([(1,), (2, 1), 1], dtype=object)
  446. ar1 = ar1[:-1]
  447. ar2 = np.array([(1,), (2, 1), 1], dtype=object)
  448. ar2 = ar2[:-1]
  449. expected = np.array([True, True])
  450. result = np.isin(ar1, ar2)
  451. assert_array_equal(result, expected)
  452. result = np.isin(ar1, ar2, invert=True)
  453. assert_array_equal(result, np.invert(expected))
  454. ar1 = np.array([(1,), (2, 3), 1], dtype=object)
  455. ar1 = ar1[:-1]
  456. ar2 = np.array([(1,), 2], dtype=object)
  457. expected = np.array([True, False])
  458. result = np.isin(ar1, ar2)
  459. assert_array_equal(result, expected)
  460. result = np.isin(ar1, ar2, invert=True)
  461. assert_array_equal(result, np.invert(expected))
  462. def test_isin_errors(self):
  463. """Test that isin raises expected errors."""
  464. # Error 1: `kind` is not one of 'sort' 'table' or None.
  465. ar1 = np.array([1, 2, 3, 4, 5])
  466. ar2 = np.array([2, 4, 6, 8, 10])
  467. assert_raises(ValueError, isin, ar1, ar2, kind='quicksort')
  468. # Error 2: `kind="table"` does not work for non-integral arrays.
  469. obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
  470. obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
  471. assert_raises(ValueError, isin, obj_ar1, obj_ar2, kind='table')
  472. for dtype in [np.int32, np.int64]:
  473. ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
  474. # The range of this array will overflow:
  475. overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
  476. # Error 3: `kind="table"` will trigger a runtime error
  477. # if there is an integer overflow expected when computing the
  478. # range of ar2
  479. assert_raises(
  480. RuntimeError,
  481. isin, ar1, overflow_ar2, kind='table'
  482. )
  483. # Non-error: `kind=None` will *not* trigger a runtime error
  484. # if there is an integer overflow, it will switch to
  485. # the `sort` algorithm.
  486. result = np.isin(ar1, overflow_ar2, kind=None)
  487. assert_array_equal(result, [True] + [False] * 4)
  488. result = np.isin(ar1, overflow_ar2, kind='sort')
  489. assert_array_equal(result, [True] + [False] * 4)
  490. def test_union1d(self):
  491. a = np.array([5, 4, 7, 1, 2])
  492. b = np.array([2, 4, 3, 3, 2, 1, 5])
  493. ec = np.array([1, 2, 3, 4, 5, 7])
  494. c = union1d(a, b)
  495. assert_array_equal(c, ec)
  496. # Tests gh-10340, arguments to union1d should be
  497. # flattened if they are not already 1D
  498. x = np.array([[0, 1, 2], [3, 4, 5]])
  499. y = np.array([0, 1, 2, 3, 4])
  500. ez = np.array([0, 1, 2, 3, 4, 5])
  501. z = union1d(x, y)
  502. assert_array_equal(z, ez)
  503. assert_array_equal([], union1d([], []))
  504. def test_setdiff1d(self):
  505. a = np.array([6, 5, 4, 7, 1, 2, 7, 4])
  506. b = np.array([2, 4, 3, 3, 2, 1, 5])
  507. ec = np.array([6, 7])
  508. c = setdiff1d(a, b)
  509. assert_array_equal(c, ec)
  510. a = np.arange(21)
  511. b = np.arange(19)
  512. ec = np.array([19, 20])
  513. c = setdiff1d(a, b)
  514. assert_array_equal(c, ec)
  515. assert_array_equal([], setdiff1d([], []))
  516. a = np.array((), np.uint32)
  517. assert_equal(setdiff1d(a, []).dtype, np.uint32)
  518. def test_setdiff1d_unique(self):
  519. a = np.array([3, 2, 1])
  520. b = np.array([7, 5, 2])
  521. expected = np.array([3, 1])
  522. actual = setdiff1d(a, b, assume_unique=True)
  523. assert_equal(actual, expected)
  524. def test_setdiff1d_char_array(self):
  525. a = np.array(['a', 'b', 'c'])
  526. b = np.array(['a', 'b', 's'])
  527. assert_array_equal(setdiff1d(a, b), np.array(['c']))
  528. def test_manyways(self):
  529. a = np.array([5, 7, 1, 2, 8])
  530. b = np.array([9, 8, 2, 4, 3, 1, 5])
  531. c1 = setxor1d(a, b)
  532. aux1 = intersect1d(a, b)
  533. aux2 = union1d(a, b)
  534. c2 = setdiff1d(aux2, aux1)
  535. assert_array_equal(c1, c2)
  536. class TestUnique:
  537. def check_all(self, a, b, i1, i2, c, dt):
  538. base_msg = 'check {0} failed for type {1}'
  539. msg = base_msg.format('values', dt)
  540. v = unique(a)
  541. assert_array_equal(v, b, msg)
  542. assert type(v) == type(b)
  543. msg = base_msg.format('return_index', dt)
  544. v, j = unique(a, True, False, False)
  545. assert_array_equal(v, b, msg)
  546. assert_array_equal(j, i1, msg)
  547. assert type(v) == type(b)
  548. msg = base_msg.format('return_inverse', dt)
  549. v, j = unique(a, False, True, False)
  550. assert_array_equal(v, b, msg)
  551. assert_array_equal(j, i2, msg)
  552. assert type(v) == type(b)
  553. msg = base_msg.format('return_counts', dt)
  554. v, j = unique(a, False, False, True)
  555. assert_array_equal(v, b, msg)
  556. assert_array_equal(j, c, msg)
  557. assert type(v) == type(b)
  558. msg = base_msg.format('return_index and return_inverse', dt)
  559. v, j1, j2 = unique(a, True, True, False)
  560. assert_array_equal(v, b, msg)
  561. assert_array_equal(j1, i1, msg)
  562. assert_array_equal(j2, i2, msg)
  563. assert type(v) == type(b)
  564. msg = base_msg.format('return_index and return_counts', dt)
  565. v, j1, j2 = unique(a, True, False, True)
  566. assert_array_equal(v, b, msg)
  567. assert_array_equal(j1, i1, msg)
  568. assert_array_equal(j2, c, msg)
  569. assert type(v) == type(b)
  570. msg = base_msg.format('return_inverse and return_counts', dt)
  571. v, j1, j2 = unique(a, False, True, True)
  572. assert_array_equal(v, b, msg)
  573. assert_array_equal(j1, i2, msg)
  574. assert_array_equal(j2, c, msg)
  575. assert type(v) == type(b)
  576. msg = base_msg.format(('return_index, return_inverse '
  577. 'and return_counts'), dt)
  578. v, j1, j2, j3 = unique(a, True, True, True)
  579. assert_array_equal(v, b, msg)
  580. assert_array_equal(j1, i1, msg)
  581. assert_array_equal(j2, i2, msg)
  582. assert_array_equal(j3, c, msg)
  583. assert type(v) == type(b)
  584. def get_types(self):
  585. types = []
  586. types.extend(np.typecodes['AllInteger'])
  587. types.extend(np.typecodes['AllFloat'])
  588. types.append('datetime64[D]')
  589. types.append('timedelta64[D]')
  590. return types
  591. def test_unique_1d(self):
  592. a = [5, 7, 1, 2, 1, 5, 7] * 10
  593. b = [1, 2, 5, 7]
  594. i1 = [2, 3, 0, 1]
  595. i2 = [2, 3, 0, 1, 0, 2, 3] * 10
  596. c = np.multiply([2, 1, 2, 2], 10)
  597. # test for numeric arrays
  598. types = self.get_types()
  599. for dt in types:
  600. aa = np.array(a, dt)
  601. bb = np.array(b, dt)
  602. self.check_all(aa, bb, i1, i2, c, dt)
  603. # test for object arrays
  604. dt = 'O'
  605. aa = np.empty(len(a), dt)
  606. aa[:] = a
  607. bb = np.empty(len(b), dt)
  608. bb[:] = b
  609. self.check_all(aa, bb, i1, i2, c, dt)
  610. # test for structured arrays
  611. dt = [('', 'i'), ('', 'i')]
  612. aa = np.array(list(zip(a, a)), dt)
  613. bb = np.array(list(zip(b, b)), dt)
  614. self.check_all(aa, bb, i1, i2, c, dt)
  615. # test for ticket #2799
  616. aa = [1. + 0.j, 1 - 1.j, 1]
  617. assert_array_equal(
  618. np.sort(np.unique(aa)),
  619. [1. - 1.j, 1.],
  620. )
  621. # test for ticket #4785
  622. a = [(1, 2), (1, 2), (2, 3)]
  623. unq = [1, 2, 3]
  624. inv = [[0, 1], [0, 1], [1, 2]]
  625. a1 = unique(a)
  626. assert_array_equal(a1, unq)
  627. a2, a2_inv = unique(a, return_inverse=True)
  628. assert_array_equal(a2, unq)
  629. assert_array_equal(a2_inv, inv)
  630. # test for chararrays with return_inverse (gh-5099)
  631. a = np.char.chararray(5)
  632. a[...] = ''
  633. a2, a2_inv = np.unique(a, return_inverse=True)
  634. assert_array_equal(a2_inv, np.zeros(5))
  635. # test for ticket #9137
  636. a = []
  637. a1_idx = np.unique(a, return_index=True)[1]
  638. a2_inv = np.unique(a, return_inverse=True)[1]
  639. a3_idx, a3_inv = np.unique(a, return_index=True,
  640. return_inverse=True)[1:]
  641. assert_equal(a1_idx.dtype, np.intp)
  642. assert_equal(a2_inv.dtype, np.intp)
  643. assert_equal(a3_idx.dtype, np.intp)
  644. assert_equal(a3_inv.dtype, np.intp)
  645. # test for ticket 2111 - float
  646. a = [2.0, np.nan, 1.0, np.nan]
  647. ua = [1.0, 2.0, np.nan]
  648. ua_idx = [2, 0, 1]
  649. ua_inv = [1, 2, 0, 2]
  650. ua_cnt = [1, 1, 2]
  651. # order of unique values is not guaranteed
  652. assert_equal(np.sort(np.unique(a)), np.sort(ua))
  653. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  654. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  655. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  656. # test for ticket 2111 - complex
  657. a = [2.0 - 1j, np.nan, 1.0 + 1j, complex(0.0, np.nan), complex(1.0, np.nan)]
  658. ua = [1.0 + 1j, 2.0 - 1j, complex(0.0, np.nan)]
  659. ua_idx = [2, 0, 3]
  660. ua_inv = [1, 2, 0, 2, 2]
  661. ua_cnt = [1, 1, 3]
  662. # order of unique values is not guaranteed
  663. assert_equal(np.sort(np.unique(a)), np.sort(ua))
  664. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  665. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  666. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  667. # test for ticket 2111 - datetime64
  668. nat = np.datetime64('nat')
  669. a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
  670. ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
  671. ua_idx = [2, 0, 1]
  672. ua_inv = [1, 2, 0, 2]
  673. ua_cnt = [1, 1, 2]
  674. assert_equal(np.unique(a), ua)
  675. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  676. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  677. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  678. # test for ticket 2111 - timedelta
  679. nat = np.timedelta64('nat')
  680. a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
  681. ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
  682. ua_idx = [2, 0, 1]
  683. ua_inv = [1, 2, 0, 2]
  684. ua_cnt = [1, 1, 2]
  685. assert_equal(np.unique(a), ua)
  686. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  687. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  688. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  689. # test for gh-19300
  690. all_nans = [np.nan] * 4
  691. ua = [np.nan]
  692. ua_idx = [0]
  693. ua_inv = [0, 0, 0, 0]
  694. ua_cnt = [4]
  695. assert_equal(np.unique(all_nans), ua)
  696. assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
  697. assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
  698. assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
  699. def test_unique_zero_sized(self):
  700. # test for zero-sized arrays
  701. types = self.get_types()
  702. types.extend('SU')
  703. for dt in types:
  704. a = np.array([], dt)
  705. b = np.array([], dt)
  706. i1 = np.array([], np.int64)
  707. i2 = np.array([], np.int64)
  708. c = np.array([], np.int64)
  709. self.check_all(a, b, i1, i2, c, dt)
  710. def test_unique_subclass(self):
  711. class Subclass(np.ndarray):
  712. pass
  713. i1 = [2, 3, 0, 1]
  714. i2 = [2, 3, 0, 1, 0, 2, 3] * 10
  715. c = np.multiply([2, 1, 2, 2], 10)
  716. # test for numeric arrays
  717. types = self.get_types()
  718. for dt in types:
  719. a = np.array([5, 7, 1, 2, 1, 5, 7] * 10, dtype=dt)
  720. b = np.array([1, 2, 5, 7], dtype=dt)
  721. aa = Subclass(a.shape, dtype=dt, buffer=a)
  722. bb = Subclass(b.shape, dtype=dt, buffer=b)
  723. self.check_all(aa, bb, i1, i2, c, dt)
  724. def test_unique_byte_string_hash_based(self):
  725. # test for byte string arrays
  726. arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape']
  727. unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape']
  728. a1 = unique(arr, sorted=False)
  729. # the result varies depending on the impl of std::unordered_set,
  730. # so we check them by sorting
  731. assert_array_equal(sorted(a1.tolist()), unq_sorted)
  732. def test_unique_unicode_string_hash_based(self):
  733. # test for unicode string arrays
  734. arr = [
  735. 'café', 'cafe', 'café', 'naïve', 'naive',
  736. 'résumé', 'naïve', 'resume', 'résumé',
  737. ]
  738. unq_sorted = ['cafe', 'café', 'naive', 'naïve', 'resume', 'résumé']
  739. a1 = unique(arr, sorted=False)
  740. # the result varies depending on the impl of std::unordered_set,
  741. # so we check them by sorting
  742. assert_array_equal(sorted(a1.tolist()), unq_sorted)
  743. def test_unique_vstring_hash_based_equal_nan(self):
  744. # test for unicode and nullable string arrays (equal_nan=True)
  745. a = np.array([
  746. # short strings
  747. 'straße',
  748. None,
  749. 'strasse',
  750. 'straße',
  751. None,
  752. 'niño',
  753. 'nino',
  754. 'élève',
  755. 'eleve',
  756. 'niño',
  757. 'élève',
  758. # medium strings
  759. 'b' * 20,
  760. 'ß' * 30,
  761. None,
  762. 'é' * 30,
  763. 'e' * 20,
  764. 'ß' * 30,
  765. 'n' * 30,
  766. 'ñ' * 20,
  767. None,
  768. 'e' * 20,
  769. 'ñ' * 20,
  770. # long strings
  771. 'b' * 300,
  772. 'ß' * 400,
  773. None,
  774. 'é' * 400,
  775. 'e' * 300,
  776. 'ß' * 400,
  777. 'n' * 400,
  778. 'ñ' * 300,
  779. None,
  780. 'e' * 300,
  781. 'ñ' * 300,
  782. ],
  783. dtype=StringDType(na_object=None)
  784. )
  785. unq_sorted_wo_none = [
  786. 'b' * 20,
  787. 'b' * 300,
  788. 'e' * 20,
  789. 'e' * 300,
  790. 'eleve',
  791. 'nino',
  792. 'niño',
  793. 'n' * 30,
  794. 'n' * 400,
  795. 'strasse',
  796. 'straße',
  797. 'ß' * 30,
  798. 'ß' * 400,
  799. 'élève',
  800. 'é' * 30,
  801. 'é' * 400,
  802. 'ñ' * 20,
  803. 'ñ' * 300,
  804. ]
  805. a1 = unique(a, sorted=False, equal_nan=True)
  806. # the result varies depending on the impl of std::unordered_set,
  807. # so we check them by sorting
  808. # a1 should have exactly one None
  809. count_none = sum(x is None for x in a1)
  810. assert_equal(count_none, 1)
  811. a1_wo_none = sorted(x for x in a1 if x is not None)
  812. assert_array_equal(a1_wo_none, unq_sorted_wo_none)
  813. def test_unique_vstring_hash_based_not_equal_nan(self):
  814. # test for unicode and nullable string arrays (equal_nan=False)
  815. a = np.array([
  816. # short strings
  817. 'straße',
  818. None,
  819. 'strasse',
  820. 'straße',
  821. None,
  822. 'niño',
  823. 'nino',
  824. 'élève',
  825. 'eleve',
  826. 'niño',
  827. 'élève',
  828. # medium strings
  829. 'b' * 20,
  830. 'ß' * 30,
  831. None,
  832. 'é' * 30,
  833. 'e' * 20,
  834. 'ß' * 30,
  835. 'n' * 30,
  836. 'ñ' * 20,
  837. None,
  838. 'e' * 20,
  839. 'ñ' * 20,
  840. # long strings
  841. 'b' * 300,
  842. 'ß' * 400,
  843. None,
  844. 'é' * 400,
  845. 'e' * 300,
  846. 'ß' * 400,
  847. 'n' * 400,
  848. 'ñ' * 300,
  849. None,
  850. 'e' * 300,
  851. 'ñ' * 300,
  852. ],
  853. dtype=StringDType(na_object=None)
  854. )
  855. unq_sorted_wo_none = [
  856. 'b' * 20,
  857. 'b' * 300,
  858. 'e' * 20,
  859. 'e' * 300,
  860. 'eleve',
  861. 'nino',
  862. 'niño',
  863. 'n' * 30,
  864. 'n' * 400,
  865. 'strasse',
  866. 'straße',
  867. 'ß' * 30,
  868. 'ß' * 400,
  869. 'élève',
  870. 'é' * 30,
  871. 'é' * 400,
  872. 'ñ' * 20,
  873. 'ñ' * 300,
  874. ]
  875. a1 = unique(a, sorted=False, equal_nan=False)
  876. # the result varies depending on the impl of std::unordered_set,
  877. # so we check them by sorting
  878. # a1 should have exactly one None
  879. count_none = sum(x is None for x in a1)
  880. assert_equal(count_none, 6)
  881. a1_wo_none = sorted(x for x in a1 if x is not None)
  882. assert_array_equal(a1_wo_none, unq_sorted_wo_none)
  883. def test_unique_vstring_errors(self):
  884. a = np.array(
  885. [
  886. 'apple', 'banana', 'apple', None, 'cherry',
  887. 'date', 'banana', 'fig', None, 'grape',
  888. ] * 2,
  889. dtype=StringDType(na_object=None)
  890. )
  891. assert_raises(ValueError, unique, a, equal_nan=False)
  892. @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
  893. def test_unsupported_hash_based(self, arg):
  894. """These currently never use the hash-based solution. However,
  895. it seems easier to just allow it.
  896. When the hash-based solution is added, this test should fail and be
  897. replaced with something more comprehensive.
  898. """
  899. a = np.array([1, 5, 2, 3, 4, 8, 199, 1, 3, 5])
  900. res_not_sorted = np.unique([1, 1], sorted=False, **{arg: True})
  901. res_sorted = np.unique([1, 1], sorted=True, **{arg: True})
  902. # The following should fail without first sorting `res_not_sorted`.
  903. for arr, expected in zip(res_not_sorted, res_sorted):
  904. assert_array_equal(arr, expected)
  905. def test_unique_axis_errors(self):
  906. assert_raises(TypeError, self._run_axis_tests, object)
  907. assert_raises(TypeError, self._run_axis_tests,
  908. [('a', int), ('b', object)])
  909. assert_raises(AxisError, unique, np.arange(10), axis=2)
  910. assert_raises(AxisError, unique, np.arange(10), axis=-2)
  911. def test_unique_axis_list(self):
  912. msg = "Unique failed on list of lists"
  913. inp = [[0, 1, 0], [0, 1, 0]]
  914. inp_arr = np.asarray(inp)
  915. assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
  916. assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
  917. def test_unique_axis(self):
  918. types = []
  919. types.extend(np.typecodes['AllInteger'])
  920. types.extend(np.typecodes['AllFloat'])
  921. types.append('datetime64[D]')
  922. types.append('timedelta64[D]')
  923. types.append([('a', int), ('b', int)])
  924. types.append([('a', int), ('b', float)])
  925. for dtype in types:
  926. self._run_axis_tests(dtype)
  927. msg = 'Non-bitwise-equal booleans test failed'
  928. data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
  929. result = np.array([[False, True], [True, True]], dtype=bool)
  930. assert_array_equal(unique(data, axis=0), result, msg)
  931. msg = 'Negative zero equality test failed'
  932. data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
  933. result = np.array([[-0.0, 0.0]])
  934. assert_array_equal(unique(data, axis=0), result, msg)
  935. @pytest.mark.parametrize("axis", [0, -1])
  936. def test_unique_1d_with_axis(self, axis):
  937. x = np.array([4, 3, 2, 3, 2, 1, 2, 2])
  938. uniq = unique(x, axis=axis)
  939. assert_array_equal(uniq, [1, 2, 3, 4])
  940. @pytest.mark.parametrize("axis", [None, 0, -1])
  941. def test_unique_inverse_with_axis(self, axis):
  942. x = np.array([[4, 4, 3], [2, 2, 1], [2, 2, 1], [4, 4, 3]])
  943. uniq, inv = unique(x, return_inverse=True, axis=axis)
  944. assert_equal(inv.ndim, x.ndim if axis is None else 1)
  945. assert_array_equal(x, np.take(uniq, inv, axis=axis))
  946. def test_unique_axis_zeros(self):
  947. # issue 15559
  948. single_zero = np.empty(shape=(2, 0), dtype=np.int8)
  949. uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True,
  950. return_inverse=True, return_counts=True)
  951. # there's 1 element of shape (0,) along axis 0
  952. assert_equal(uniq.dtype, single_zero.dtype)
  953. assert_array_equal(uniq, np.empty(shape=(1, 0)))
  954. assert_array_equal(idx, np.array([0]))
  955. assert_array_equal(inv, np.array([0, 0]))
  956. assert_array_equal(cnt, np.array([2]))
  957. # there's 0 elements of shape (2,) along axis 1
  958. uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True,
  959. return_inverse=True, return_counts=True)
  960. assert_equal(uniq.dtype, single_zero.dtype)
  961. assert_array_equal(uniq, np.empty(shape=(2, 0)))
  962. assert_array_equal(idx, np.array([]))
  963. assert_array_equal(inv, np.array([]))
  964. assert_array_equal(cnt, np.array([]))
  965. # test a "complicated" shape
  966. shape = (0, 2, 0, 3, 0, 4, 0)
  967. multiple_zeros = np.empty(shape=shape)
  968. for axis in range(len(shape)):
  969. expected_shape = list(shape)
  970. if shape[axis] == 0:
  971. expected_shape[axis] = 0
  972. else:
  973. expected_shape[axis] = 1
  974. assert_array_equal(unique(multiple_zeros, axis=axis),
  975. np.empty(shape=expected_shape))
  976. def test_unique_masked(self):
  977. # issue 8664
  978. x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0],
  979. dtype='uint8')
  980. y = np.ma.masked_equal(x, 0)
  981. v = np.unique(y)
  982. v2, i, c = np.unique(y, return_index=True, return_counts=True)
  983. msg = 'Unique returned different results when asked for index'
  984. assert_array_equal(v.data, v2.data, msg)
  985. assert_array_equal(v.mask, v2.mask, msg)
  986. def test_unique_sort_order_with_axis(self):
  987. # These tests fail if sorting along axis is done by treating subarrays
  988. # as unsigned byte strings. See gh-10495.
  989. fmt = "sort order incorrect for integer type '%s'"
  990. for dt in 'bhilq':
  991. a = np.array([[-1], [0]], dt)
  992. b = np.unique(a, axis=0)
  993. assert_array_equal(a, b, fmt % dt)
  994. def _run_axis_tests(self, dtype):
  995. data = np.array([[0, 1, 0, 0],
  996. [1, 0, 0, 0],
  997. [0, 1, 0, 0],
  998. [1, 0, 0, 0]]).astype(dtype)
  999. msg = 'Unique with 1d array and axis=0 failed'
  1000. result = np.array([0, 1])
  1001. assert_array_equal(unique(data), result.astype(dtype), msg)
  1002. msg = 'Unique with 2d array and axis=0 failed'
  1003. result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
  1004. assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)
  1005. msg = 'Unique with 2d array and axis=1 failed'
  1006. result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
  1007. assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
  1008. msg = 'Unique with 3d array and axis=2 failed'
  1009. data3d = np.array([[[1, 1],
  1010. [1, 0]],
  1011. [[0, 1],
  1012. [0, 0]]]).astype(dtype)
  1013. result = np.take(data3d, [1, 0], axis=2)
  1014. assert_array_equal(unique(data3d, axis=2), result, msg)
  1015. uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
  1016. return_inverse=True, return_counts=True)
  1017. msg = "Unique's return_index=True failed with axis=0"
  1018. assert_array_equal(data[idx], uniq, msg)
  1019. msg = "Unique's return_inverse=True failed with axis=0"
  1020. assert_array_equal(np.take(uniq, inv, axis=0), data)
  1021. msg = "Unique's return_counts=True failed with axis=0"
  1022. assert_array_equal(cnt, np.array([2, 2]), msg)
  1023. uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
  1024. return_inverse=True, return_counts=True)
  1025. msg = "Unique's return_index=True failed with axis=1"
  1026. assert_array_equal(data[:, idx], uniq)
  1027. msg = "Unique's return_inverse=True failed with axis=1"
  1028. assert_array_equal(np.take(uniq, inv, axis=1), data)
  1029. msg = "Unique's return_counts=True failed with axis=1"
  1030. assert_array_equal(cnt, np.array([2, 1, 1]), msg)
  1031. def test_unique_nanequals(self):
  1032. # issue 20326
  1033. a = np.array([1, 1, np.nan, np.nan, np.nan])
  1034. unq = np.unique(a)
  1035. not_unq = np.unique(a, equal_nan=False)
  1036. assert_array_equal(unq, np.array([1, np.nan]))
  1037. assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan]))
  1038. def test_unique_array_api_functions(self):
  1039. arr = np.array(
  1040. [
  1041. np.nan, 1.0, 0.0, 4.0, -np.nan,
  1042. -0.0, 1.0, 3.0, 4.0, np.nan,
  1043. 5.0, -0.0, 1.0, -np.nan, 0.0,
  1044. ],
  1045. )
  1046. for res_unique_array_api, res_unique in [
  1047. (
  1048. np.unique_values(arr),
  1049. np.unique(arr, equal_nan=False)
  1050. ),
  1051. (
  1052. np.unique_counts(arr),
  1053. np.unique(arr, return_counts=True, equal_nan=False)
  1054. ),
  1055. (
  1056. np.unique_inverse(arr),
  1057. np.unique(arr, return_inverse=True, equal_nan=False)
  1058. ),
  1059. (
  1060. np.unique_all(arr),
  1061. np.unique(
  1062. arr,
  1063. return_index=True,
  1064. return_inverse=True,
  1065. return_counts=True,
  1066. equal_nan=False
  1067. )
  1068. )
  1069. ]:
  1070. assert len(res_unique_array_api) == len(res_unique)
  1071. if not isinstance(res_unique_array_api, tuple):
  1072. res_unique_array_api = (res_unique_array_api,)
  1073. if not isinstance(res_unique, tuple):
  1074. res_unique = (res_unique,)
  1075. for actual, expected in zip(res_unique_array_api, res_unique):
  1076. # Order of output is not guaranteed
  1077. assert_equal(np.sort(actual), np.sort(expected))
  1078. def test_unique_inverse_shape(self):
  1079. # Regression test for https://github.com/numpy/numpy/issues/25552
  1080. arr = np.array([[1, 2, 3], [2, 3, 1]])
  1081. expected_values, expected_inverse = np.unique(arr, return_inverse=True)
  1082. expected_inverse = expected_inverse.reshape(arr.shape)
  1083. for func in np.unique_inverse, np.unique_all:
  1084. result = func(arr)
  1085. assert_array_equal(expected_values, result.values)
  1086. assert_array_equal(expected_inverse, result.inverse_indices)
  1087. assert_array_equal(arr, result.values[result.inverse_indices])
  1088. @pytest.mark.parametrize(
  1089. 'data',
  1090. [[[1, 1, 1],
  1091. [1, 1, 1]],
  1092. [1, 3, 2],
  1093. 1],
  1094. )
  1095. @pytest.mark.parametrize('transpose', [False, True])
  1096. @pytest.mark.parametrize('dtype', [np.int32, np.float64])
  1097. def test_unique_with_matrix(self, data, transpose, dtype):
  1098. mat = np.matrix(data).astype(dtype)
  1099. if transpose:
  1100. mat = mat.T
  1101. u = np.unique(mat)
  1102. expected = np.unique(np.asarray(mat))
  1103. assert_array_equal(u, expected, strict=True)
  1104. def test_unique_axis0_equal_nan_on_1d_array(self):
  1105. # Test Issue #29336
  1106. arr1d = np.array([np.nan, 0, 0, np.nan])
  1107. expected = np.array([0., np.nan])
  1108. result = np.unique(arr1d, axis=0, equal_nan=True)
  1109. assert_array_equal(result, expected)
  1110. def test_unique_axis_minus1_eq_on_1d_array(self):
  1111. arr1d = np.array([np.nan, 0, 0, np.nan])
  1112. expected = np.array([0., np.nan])
  1113. result = np.unique(arr1d, axis=-1, equal_nan=True)
  1114. assert_array_equal(result, expected)
  1115. def test_unique_axis_float_raises_typeerror(self):
  1116. arr1d = np.array([np.nan, 0, 0, np.nan])
  1117. with pytest.raises(TypeError, match="integer argument expected"):
  1118. np.unique(arr1d, axis=0.0, equal_nan=False)
  1119. @pytest.mark.parametrize('dt', [np.dtype('F'), np.dtype('D')])
  1120. @pytest.mark.parametrize('values', [[complex(0.0, -1), complex(-0.0, -1), 0],
  1121. [-200, complex(-200, -0.0), -1],
  1122. [-25, 3, -5j, complex(-25, -0.0), 3j]])
  1123. def test_unique_complex_signed_zeros(self, dt, values):
  1124. z = np.array(values, dtype=dt)
  1125. u = np.unique(z)
  1126. assert len(u) == len(values) - 1