defchararray.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414
  1. """
  2. This module contains a set of functions for vectorized string
  3. operations and methods.
  4. .. note::
  5. The `chararray` class exists for backwards compatibility with
  6. Numarray, it is not recommended for new development. Starting from numpy
  7. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  8. `dtype` `object_`, `bytes_` or `str_`, and use the free functions
  9. in the `numpy.char` module for fast vectorized string operations.
  10. Some methods will only be available if the corresponding string method is
  11. available in your version of Python.
  12. The preferred alias for `defchararray` is `numpy.char`.
  13. """
  14. import functools
  15. import numpy as np
  16. from .._utils import set_module
  17. from .numerictypes import bytes_, str_, character
  18. from .numeric import ndarray, array as narray, asarray as asnarray
  19. from numpy._core.multiarray import compare_chararrays
  20. from numpy._core import overrides
  21. from numpy.strings import *
  22. from numpy.strings import (
  23. multiply as strings_multiply,
  24. partition as strings_partition,
  25. rpartition as strings_rpartition,
  26. )
  27. from numpy._core.strings import (
  28. _split as split,
  29. _rsplit as rsplit,
  30. _splitlines as splitlines,
  31. _join as join,
  32. )
  33. __all__ = [
  34. 'equal', 'not_equal', 'greater_equal', 'less_equal',
  35. 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
  36. 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
  37. 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
  38. 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
  39. 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
  40. 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
  41. 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
  42. 'array', 'asarray', 'compare_chararrays', 'chararray'
  43. ]
  44. array_function_dispatch = functools.partial(
  45. overrides.array_function_dispatch, module='numpy.char')
  46. def _binary_op_dispatcher(x1, x2):
  47. return (x1, x2)
  48. @array_function_dispatch(_binary_op_dispatcher)
  49. def equal(x1, x2):
  50. """
  51. Return (x1 == x2) element-wise.
  52. Unlike `numpy.equal`, this comparison is performed by first
  53. stripping whitespace characters from the end of the string. This
  54. behavior is provided for backward-compatibility with numarray.
  55. Parameters
  56. ----------
  57. x1, x2 : array_like of str or unicode
  58. Input arrays of the same shape.
  59. Returns
  60. -------
  61. out : ndarray
  62. Output array of bools.
  63. Examples
  64. --------
  65. >>> import numpy as np
  66. >>> y = "aa "
  67. >>> x = "aa"
  68. >>> np.char.equal(x, y)
  69. array(True)
  70. See Also
  71. --------
  72. not_equal, greater_equal, less_equal, greater, less
  73. """
  74. return compare_chararrays(x1, x2, '==', True)
  75. @array_function_dispatch(_binary_op_dispatcher)
  76. def not_equal(x1, x2):
  77. """
  78. Return (x1 != x2) element-wise.
  79. Unlike `numpy.not_equal`, this comparison is performed by first
  80. stripping whitespace characters from the end of the string. This
  81. behavior is provided for backward-compatibility with numarray.
  82. Parameters
  83. ----------
  84. x1, x2 : array_like of str or unicode
  85. Input arrays of the same shape.
  86. Returns
  87. -------
  88. out : ndarray
  89. Output array of bools.
  90. See Also
  91. --------
  92. equal, greater_equal, less_equal, greater, less
  93. Examples
  94. --------
  95. >>> import numpy as np
  96. >>> x1 = np.array(['a', 'b', 'c'])
  97. >>> np.char.not_equal(x1, 'b')
  98. array([ True, False, True])
  99. """
  100. return compare_chararrays(x1, x2, '!=', True)
  101. @array_function_dispatch(_binary_op_dispatcher)
  102. def greater_equal(x1, x2):
  103. """
  104. Return (x1 >= x2) element-wise.
  105. Unlike `numpy.greater_equal`, this comparison is performed by
  106. first stripping whitespace characters from the end of the string.
  107. This behavior is provided for backward-compatibility with
  108. numarray.
  109. Parameters
  110. ----------
  111. x1, x2 : array_like of str or unicode
  112. Input arrays of the same shape.
  113. Returns
  114. -------
  115. out : ndarray
  116. Output array of bools.
  117. See Also
  118. --------
  119. equal, not_equal, less_equal, greater, less
  120. Examples
  121. --------
  122. >>> import numpy as np
  123. >>> x1 = np.array(['a', 'b', 'c'])
  124. >>> np.char.greater_equal(x1, 'b')
  125. array([False, True, True])
  126. """
  127. return compare_chararrays(x1, x2, '>=', True)
  128. @array_function_dispatch(_binary_op_dispatcher)
  129. def less_equal(x1, x2):
  130. """
  131. Return (x1 <= x2) element-wise.
  132. Unlike `numpy.less_equal`, this comparison is performed by first
  133. stripping whitespace characters from the end of the string. This
  134. behavior is provided for backward-compatibility with numarray.
  135. Parameters
  136. ----------
  137. x1, x2 : array_like of str or unicode
  138. Input arrays of the same shape.
  139. Returns
  140. -------
  141. out : ndarray
  142. Output array of bools.
  143. See Also
  144. --------
  145. equal, not_equal, greater_equal, greater, less
  146. Examples
  147. --------
  148. >>> import numpy as np
  149. >>> x1 = np.array(['a', 'b', 'c'])
  150. >>> np.char.less_equal(x1, 'b')
  151. array([ True, True, False])
  152. """
  153. return compare_chararrays(x1, x2, '<=', True)
  154. @array_function_dispatch(_binary_op_dispatcher)
  155. def greater(x1, x2):
  156. """
  157. Return (x1 > x2) element-wise.
  158. Unlike `numpy.greater`, this comparison is performed by first
  159. stripping whitespace characters from the end of the string. This
  160. behavior is provided for backward-compatibility with numarray.
  161. Parameters
  162. ----------
  163. x1, x2 : array_like of str or unicode
  164. Input arrays of the same shape.
  165. Returns
  166. -------
  167. out : ndarray
  168. Output array of bools.
  169. See Also
  170. --------
  171. equal, not_equal, greater_equal, less_equal, less
  172. Examples
  173. --------
  174. >>> import numpy as np
  175. >>> x1 = np.array(['a', 'b', 'c'])
  176. >>> np.char.greater(x1, 'b')
  177. array([False, False, True])
  178. """
  179. return compare_chararrays(x1, x2, '>', True)
  180. @array_function_dispatch(_binary_op_dispatcher)
  181. def less(x1, x2):
  182. """
  183. Return (x1 < x2) element-wise.
  184. Unlike `numpy.greater`, this comparison is performed by first
  185. stripping whitespace characters from the end of the string. This
  186. behavior is provided for backward-compatibility with numarray.
  187. Parameters
  188. ----------
  189. x1, x2 : array_like of str or unicode
  190. Input arrays of the same shape.
  191. Returns
  192. -------
  193. out : ndarray
  194. Output array of bools.
  195. See Also
  196. --------
  197. equal, not_equal, greater_equal, less_equal, greater
  198. Examples
  199. --------
  200. >>> import numpy as np
  201. >>> x1 = np.array(['a', 'b', 'c'])
  202. >>> np.char.less(x1, 'b')
  203. array([True, False, False])
  204. """
  205. return compare_chararrays(x1, x2, '<', True)
  206. @set_module("numpy.char")
  207. def multiply(a, i):
  208. """
  209. Return (a * i), that is string multiple concatenation,
  210. element-wise.
  211. Values in ``i`` of less than 0 are treated as 0 (which yields an
  212. empty string).
  213. Parameters
  214. ----------
  215. a : array_like, with `np.bytes_` or `np.str_` dtype
  216. i : array_like, with any integer dtype
  217. Returns
  218. -------
  219. out : ndarray
  220. Output array of str or unicode, depending on input types
  221. Notes
  222. -----
  223. This is a thin wrapper around np.strings.multiply that raises
  224. `ValueError` when ``i`` is not an integer. It only
  225. exists for backwards-compatibility.
  226. Examples
  227. --------
  228. >>> import numpy as np
  229. >>> a = np.array(["a", "b", "c"])
  230. >>> np.strings.multiply(a, 3)
  231. array(['aaa', 'bbb', 'ccc'], dtype='<U3')
  232. >>> i = np.array([1, 2, 3])
  233. >>> np.strings.multiply(a, i)
  234. array(['a', 'bb', 'ccc'], dtype='<U3')
  235. >>> np.strings.multiply(np.array(['a']), i)
  236. array(['a', 'aa', 'aaa'], dtype='<U3')
  237. >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
  238. >>> np.strings.multiply(a, 3)
  239. array([['aaa', 'bbb', 'ccc'],
  240. ['ddd', 'eee', 'fff']], dtype='<U3')
  241. >>> np.strings.multiply(a, i)
  242. array([['a', 'bb', 'ccc'],
  243. ['d', 'ee', 'fff']], dtype='<U3')
  244. """
  245. try:
  246. return strings_multiply(a, i)
  247. except TypeError:
  248. raise ValueError("Can only multiply by integers")
  249. @set_module("numpy.char")
  250. def partition(a, sep):
  251. """
  252. Partition each element in `a` around `sep`.
  253. Calls :meth:`str.partition` element-wise.
  254. For each element in `a`, split the element as the first
  255. occurrence of `sep`, and return 3 strings containing the part
  256. before the separator, the separator itself, and the part after
  257. the separator. If the separator is not found, return 3 strings
  258. containing the string itself, followed by two empty strings.
  259. Parameters
  260. ----------
  261. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  262. Input array
  263. sep : {str, unicode}
  264. Separator to split each string element in `a`.
  265. Returns
  266. -------
  267. out : ndarray
  268. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  269. depending on input types. The output array will have an extra
  270. dimension with 3 elements per input element.
  271. Examples
  272. --------
  273. >>> import numpy as np
  274. >>> x = np.array(["Numpy is nice!"])
  275. >>> np.char.partition(x, " ")
  276. array([['Numpy', ' ', 'is nice!']], dtype='<U8')
  277. See Also
  278. --------
  279. str.partition
  280. """
  281. return np.stack(strings_partition(a, sep), axis=-1)
  282. @set_module("numpy.char")
  283. def rpartition(a, sep):
  284. """
  285. Partition (split) each element around the right-most separator.
  286. Calls :meth:`str.rpartition` element-wise.
  287. For each element in `a`, split the element as the last
  288. occurrence of `sep`, and return 3 strings containing the part
  289. before the separator, the separator itself, and the part after
  290. the separator. If the separator is not found, return 3 strings
  291. containing the string itself, followed by two empty strings.
  292. Parameters
  293. ----------
  294. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  295. Input array
  296. sep : str or unicode
  297. Right-most separator to split each element in array.
  298. Returns
  299. -------
  300. out : ndarray
  301. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  302. depending on input types. The output array will have an extra
  303. dimension with 3 elements per input element.
  304. See Also
  305. --------
  306. str.rpartition
  307. Examples
  308. --------
  309. >>> import numpy as np
  310. >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
  311. >>> np.char.rpartition(a, 'A')
  312. array([['aAaAa', 'A', ''],
  313. [' a', 'A', ' '],
  314. ['abB', 'A', 'Bba']], dtype='<U5')
  315. """
  316. return np.stack(strings_rpartition(a, sep), axis=-1)
  317. @set_module("numpy.char")
  318. class chararray(ndarray):
  319. """
  320. chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
  321. strides=None, order=None)
  322. Provides a convenient view on arrays of string and unicode values.
  323. .. note::
  324. The `chararray` class exists for backwards compatibility with
  325. Numarray, it is not recommended for new development. Starting from numpy
  326. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  327. `dtype` `~numpy.object_`, `~numpy.bytes_` or `~numpy.str_`, and use
  328. the free functions in the `numpy.char` module for fast vectorized
  329. string operations.
  330. Versus a NumPy array of dtype `~numpy.bytes_` or `~numpy.str_`, this
  331. class adds the following functionality:
  332. 1) values automatically have whitespace removed from the end
  333. when indexed
  334. 2) comparison operators automatically remove whitespace from the
  335. end when comparing values
  336. 3) vectorized string operations are provided as methods
  337. (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
  338. chararrays should be created using `numpy.char.array` or
  339. `numpy.char.asarray`, rather than this constructor directly.
  340. This constructor creates the array, using `buffer` (with `offset`
  341. and `strides`) if it is not ``None``. If `buffer` is ``None``, then
  342. constructs a new array with `strides` in "C order", unless both
  343. ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
  344. is in "Fortran order".
  345. Methods
  346. -------
  347. astype
  348. argsort
  349. copy
  350. count
  351. decode
  352. dump
  353. dumps
  354. encode
  355. endswith
  356. expandtabs
  357. fill
  358. find
  359. flatten
  360. getfield
  361. index
  362. isalnum
  363. isalpha
  364. isdecimal
  365. isdigit
  366. islower
  367. isnumeric
  368. isspace
  369. istitle
  370. isupper
  371. item
  372. join
  373. ljust
  374. lower
  375. lstrip
  376. nonzero
  377. put
  378. ravel
  379. repeat
  380. replace
  381. reshape
  382. resize
  383. rfind
  384. rindex
  385. rjust
  386. rsplit
  387. rstrip
  388. searchsorted
  389. setfield
  390. setflags
  391. sort
  392. split
  393. splitlines
  394. squeeze
  395. startswith
  396. strip
  397. swapaxes
  398. swapcase
  399. take
  400. title
  401. tofile
  402. tolist
  403. tostring
  404. translate
  405. transpose
  406. upper
  407. view
  408. zfill
  409. Parameters
  410. ----------
  411. shape : tuple
  412. Shape of the array.
  413. itemsize : int, optional
  414. Length of each array element, in number of characters. Default is 1.
  415. unicode : bool, optional
  416. Are the array elements of type unicode (True) or string (False).
  417. Default is False.
  418. buffer : object exposing the buffer interface or str, optional
  419. Memory address of the start of the array data. Default is None,
  420. in which case a new array is created.
  421. offset : int, optional
  422. Fixed stride displacement from the beginning of an axis?
  423. Default is 0. Needs to be >=0.
  424. strides : array_like of ints, optional
  425. Strides for the array (see `~numpy.ndarray.strides` for
  426. full description). Default is None.
  427. order : {'C', 'F'}, optional
  428. The order in which the array data is stored in memory: 'C' ->
  429. "row major" order (the default), 'F' -> "column major"
  430. (Fortran) order.
  431. Examples
  432. --------
  433. >>> import numpy as np
  434. >>> charar = np.char.chararray((3, 3))
  435. >>> charar[:] = 'a'
  436. >>> charar
  437. chararray([[b'a', b'a', b'a'],
  438. [b'a', b'a', b'a'],
  439. [b'a', b'a', b'a']], dtype='|S1')
  440. >>> charar = np.char.chararray(charar.shape, itemsize=5)
  441. >>> charar[:] = 'abc'
  442. >>> charar
  443. chararray([[b'abc', b'abc', b'abc'],
  444. [b'abc', b'abc', b'abc'],
  445. [b'abc', b'abc', b'abc']], dtype='|S5')
  446. """
  447. def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
  448. offset=0, strides=None, order='C'):
  449. if unicode:
  450. dtype = str_
  451. else:
  452. dtype = bytes_
  453. # force itemsize to be a Python int, since using NumPy integer
  454. # types results in itemsize.itemsize being used as the size of
  455. # strings in the new array.
  456. itemsize = int(itemsize)
  457. if isinstance(buffer, str):
  458. # unicode objects do not have the buffer interface
  459. filler = buffer
  460. buffer = None
  461. else:
  462. filler = None
  463. if buffer is None:
  464. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  465. order=order)
  466. else:
  467. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  468. buffer=buffer,
  469. offset=offset, strides=strides,
  470. order=order)
  471. if filler is not None:
  472. self[...] = filler
  473. return self
  474. def __array_wrap__(self, arr, context=None, return_scalar=False):
  475. # When calling a ufunc (and some other functions), we return a
  476. # chararray if the ufunc output is a string-like array,
  477. # or an ndarray otherwise
  478. if arr.dtype.char in "SUbc":
  479. return arr.view(type(self))
  480. return arr
  481. def __array_finalize__(self, obj):
  482. # The b is a special case because it is used for reconstructing.
  483. if self.dtype.char not in 'VSUbc':
  484. raise ValueError("Can only create a chararray from string data.")
  485. def __getitem__(self, obj):
  486. val = ndarray.__getitem__(self, obj)
  487. if isinstance(val, character):
  488. return val.rstrip()
  489. return val
  490. # IMPLEMENTATION NOTE: Most of the methods of this class are
  491. # direct delegations to the free functions in this module.
  492. # However, those that return an array of strings should instead
  493. # return a chararray, so some extra wrapping is required.
  494. def __eq__(self, other):
  495. """
  496. Return (self == other) element-wise.
  497. See Also
  498. --------
  499. equal
  500. """
  501. return equal(self, other)
  502. def __ne__(self, other):
  503. """
  504. Return (self != other) element-wise.
  505. See Also
  506. --------
  507. not_equal
  508. """
  509. return not_equal(self, other)
  510. def __ge__(self, other):
  511. """
  512. Return (self >= other) element-wise.
  513. See Also
  514. --------
  515. greater_equal
  516. """
  517. return greater_equal(self, other)
  518. def __le__(self, other):
  519. """
  520. Return (self <= other) element-wise.
  521. See Also
  522. --------
  523. less_equal
  524. """
  525. return less_equal(self, other)
  526. def __gt__(self, other):
  527. """
  528. Return (self > other) element-wise.
  529. See Also
  530. --------
  531. greater
  532. """
  533. return greater(self, other)
  534. def __lt__(self, other):
  535. """
  536. Return (self < other) element-wise.
  537. See Also
  538. --------
  539. less
  540. """
  541. return less(self, other)
  542. def __add__(self, other):
  543. """
  544. Return (self + other), that is string concatenation,
  545. element-wise for a pair of array_likes of str or unicode.
  546. See Also
  547. --------
  548. add
  549. """
  550. return add(self, other)
  551. def __radd__(self, other):
  552. """
  553. Return (other + self), that is string concatenation,
  554. element-wise for a pair of array_likes of `bytes_` or `str_`.
  555. See Also
  556. --------
  557. add
  558. """
  559. return add(other, self)
  560. def __mul__(self, i):
  561. """
  562. Return (self * i), that is string multiple concatenation,
  563. element-wise.
  564. See Also
  565. --------
  566. multiply
  567. """
  568. return asarray(multiply(self, i))
  569. def __rmul__(self, i):
  570. """
  571. Return (self * i), that is string multiple concatenation,
  572. element-wise.
  573. See Also
  574. --------
  575. multiply
  576. """
  577. return asarray(multiply(self, i))
  578. def __mod__(self, i):
  579. """
  580. Return (self % i), that is pre-Python 2.6 string formatting
  581. (interpolation), element-wise for a pair of array_likes of `bytes_`
  582. or `str_`.
  583. See Also
  584. --------
  585. mod
  586. """
  587. return asarray(mod(self, i))
  588. def __rmod__(self, other):
  589. return NotImplemented
  590. def argsort(self, axis=-1, kind=None, order=None):
  591. """
  592. Return the indices that sort the array lexicographically.
  593. For full documentation see `numpy.argsort`, for which this method is
  594. in fact merely a "thin wrapper."
  595. Examples
  596. --------
  597. >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
  598. >>> c = c.view(np.char.chararray); c
  599. chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
  600. dtype='|S5')
  601. >>> c[c.argsort()]
  602. chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
  603. dtype='|S5')
  604. """
  605. return self.__array__().argsort(axis, kind, order)
  606. argsort.__doc__ = ndarray.argsort.__doc__
  607. def capitalize(self):
  608. """
  609. Return a copy of `self` with only the first character of each element
  610. capitalized.
  611. See Also
  612. --------
  613. char.capitalize
  614. """
  615. return asarray(capitalize(self))
  616. def center(self, width, fillchar=' '):
  617. """
  618. Return a copy of `self` with its elements centered in a
  619. string of length `width`.
  620. See Also
  621. --------
  622. center
  623. """
  624. return asarray(center(self, width, fillchar))
  625. def count(self, sub, start=0, end=None):
  626. """
  627. Returns an array with the number of non-overlapping occurrences of
  628. substring `sub` in the range [`start`, `end`].
  629. See Also
  630. --------
  631. char.count
  632. """
  633. return count(self, sub, start, end)
  634. def decode(self, encoding=None, errors=None):
  635. """
  636. Calls ``bytes.decode`` element-wise.
  637. See Also
  638. --------
  639. char.decode
  640. """
  641. return decode(self, encoding, errors)
  642. def encode(self, encoding=None, errors=None):
  643. """
  644. Calls :meth:`str.encode` element-wise.
  645. See Also
  646. --------
  647. char.encode
  648. """
  649. return encode(self, encoding, errors)
  650. def endswith(self, suffix, start=0, end=None):
  651. """
  652. Returns a boolean array which is `True` where the string element
  653. in `self` ends with `suffix`, otherwise `False`.
  654. See Also
  655. --------
  656. char.endswith
  657. """
  658. return endswith(self, suffix, start, end)
  659. def expandtabs(self, tabsize=8):
  660. """
  661. Return a copy of each string element where all tab characters are
  662. replaced by one or more spaces.
  663. See Also
  664. --------
  665. char.expandtabs
  666. """
  667. return asarray(expandtabs(self, tabsize))
  668. def find(self, sub, start=0, end=None):
  669. """
  670. For each element, return the lowest index in the string where
  671. substring `sub` is found.
  672. See Also
  673. --------
  674. char.find
  675. """
  676. return find(self, sub, start, end)
  677. def index(self, sub, start=0, end=None):
  678. """
  679. Like `find`, but raises :exc:`ValueError` when the substring is not
  680. found.
  681. See Also
  682. --------
  683. char.index
  684. """
  685. return index(self, sub, start, end)
  686. def isalnum(self):
  687. """
  688. Returns true for each element if all characters in the string
  689. are alphanumeric and there is at least one character, false
  690. otherwise.
  691. See Also
  692. --------
  693. char.isalnum
  694. """
  695. return isalnum(self)
  696. def isalpha(self):
  697. """
  698. Returns true for each element if all characters in the string
  699. are alphabetic and there is at least one character, false
  700. otherwise.
  701. See Also
  702. --------
  703. char.isalpha
  704. """
  705. return isalpha(self)
  706. def isdigit(self):
  707. """
  708. Returns true for each element if all characters in the string are
  709. digits and there is at least one character, false otherwise.
  710. See Also
  711. --------
  712. char.isdigit
  713. """
  714. return isdigit(self)
  715. def islower(self):
  716. """
  717. Returns true for each element if all cased characters in the
  718. string are lowercase and there is at least one cased character,
  719. false otherwise.
  720. See Also
  721. --------
  722. char.islower
  723. """
  724. return islower(self)
  725. def isspace(self):
  726. """
  727. Returns true for each element if there are only whitespace
  728. characters in the string and there is at least one character,
  729. false otherwise.
  730. See Also
  731. --------
  732. char.isspace
  733. """
  734. return isspace(self)
  735. def istitle(self):
  736. """
  737. Returns true for each element if the element is a titlecased
  738. string and there is at least one character, false otherwise.
  739. See Also
  740. --------
  741. char.istitle
  742. """
  743. return istitle(self)
  744. def isupper(self):
  745. """
  746. Returns true for each element if all cased characters in the
  747. string are uppercase and there is at least one character, false
  748. otherwise.
  749. See Also
  750. --------
  751. char.isupper
  752. """
  753. return isupper(self)
  754. def join(self, seq):
  755. """
  756. Return a string which is the concatenation of the strings in the
  757. sequence `seq`.
  758. See Also
  759. --------
  760. char.join
  761. """
  762. return join(self, seq)
  763. def ljust(self, width, fillchar=' '):
  764. """
  765. Return an array with the elements of `self` left-justified in a
  766. string of length `width`.
  767. See Also
  768. --------
  769. char.ljust
  770. """
  771. return asarray(ljust(self, width, fillchar))
  772. def lower(self):
  773. """
  774. Return an array with the elements of `self` converted to
  775. lowercase.
  776. See Also
  777. --------
  778. char.lower
  779. """
  780. return asarray(lower(self))
  781. def lstrip(self, chars=None):
  782. """
  783. For each element in `self`, return a copy with the leading characters
  784. removed.
  785. See Also
  786. --------
  787. char.lstrip
  788. """
  789. return lstrip(self, chars)
  790. def partition(self, sep):
  791. """
  792. Partition each element in `self` around `sep`.
  793. See Also
  794. --------
  795. partition
  796. """
  797. return asarray(partition(self, sep))
  798. def replace(self, old, new, count=None):
  799. """
  800. For each element in `self`, return a copy of the string with all
  801. occurrences of substring `old` replaced by `new`.
  802. See Also
  803. --------
  804. char.replace
  805. """
  806. return replace(self, old, new, count if count is not None else -1)
  807. def rfind(self, sub, start=0, end=None):
  808. """
  809. For each element in `self`, return the highest index in the string
  810. where substring `sub` is found, such that `sub` is contained
  811. within [`start`, `end`].
  812. See Also
  813. --------
  814. char.rfind
  815. """
  816. return rfind(self, sub, start, end)
  817. def rindex(self, sub, start=0, end=None):
  818. """
  819. Like `rfind`, but raises :exc:`ValueError` when the substring `sub` is
  820. not found.
  821. See Also
  822. --------
  823. char.rindex
  824. """
  825. return rindex(self, sub, start, end)
  826. def rjust(self, width, fillchar=' '):
  827. """
  828. Return an array with the elements of `self`
  829. right-justified in a string of length `width`.
  830. See Also
  831. --------
  832. char.rjust
  833. """
  834. return asarray(rjust(self, width, fillchar))
  835. def rpartition(self, sep):
  836. """
  837. Partition each element in `self` around `sep`.
  838. See Also
  839. --------
  840. rpartition
  841. """
  842. return asarray(rpartition(self, sep))
  843. def rsplit(self, sep=None, maxsplit=None):
  844. """
  845. For each element in `self`, return a list of the words in
  846. the string, using `sep` as the delimiter string.
  847. See Also
  848. --------
  849. char.rsplit
  850. """
  851. return rsplit(self, sep, maxsplit)
  852. def rstrip(self, chars=None):
  853. """
  854. For each element in `self`, return a copy with the trailing
  855. characters removed.
  856. See Also
  857. --------
  858. char.rstrip
  859. """
  860. return rstrip(self, chars)
  861. def split(self, sep=None, maxsplit=None):
  862. """
  863. For each element in `self`, return a list of the words in the
  864. string, using `sep` as the delimiter string.
  865. See Also
  866. --------
  867. char.split
  868. """
  869. return split(self, sep, maxsplit)
  870. def splitlines(self, keepends=None):
  871. """
  872. For each element in `self`, return a list of the lines in the
  873. element, breaking at line boundaries.
  874. See Also
  875. --------
  876. char.splitlines
  877. """
  878. return splitlines(self, keepends)
  879. def startswith(self, prefix, start=0, end=None):
  880. """
  881. Returns a boolean array which is `True` where the string element
  882. in `self` starts with `prefix`, otherwise `False`.
  883. See Also
  884. --------
  885. char.startswith
  886. """
  887. return startswith(self, prefix, start, end)
  888. def strip(self, chars=None):
  889. """
  890. For each element in `self`, return a copy with the leading and
  891. trailing characters removed.
  892. See Also
  893. --------
  894. char.strip
  895. """
  896. return strip(self, chars)
  897. def swapcase(self):
  898. """
  899. For each element in `self`, return a copy of the string with
  900. uppercase characters converted to lowercase and vice versa.
  901. See Also
  902. --------
  903. char.swapcase
  904. """
  905. return asarray(swapcase(self))
  906. def title(self):
  907. """
  908. For each element in `self`, return a titlecased version of the
  909. string: words start with uppercase characters, all remaining cased
  910. characters are lowercase.
  911. See Also
  912. --------
  913. char.title
  914. """
  915. return asarray(title(self))
  916. def translate(self, table, deletechars=None):
  917. """
  918. For each element in `self`, return a copy of the string where
  919. all characters occurring in the optional argument
  920. `deletechars` are removed, and the remaining characters have
  921. been mapped through the given translation table.
  922. See Also
  923. --------
  924. char.translate
  925. """
  926. return asarray(translate(self, table, deletechars))
  927. def upper(self):
  928. """
  929. Return an array with the elements of `self` converted to
  930. uppercase.
  931. See Also
  932. --------
  933. char.upper
  934. """
  935. return asarray(upper(self))
  936. def zfill(self, width):
  937. """
  938. Return the numeric string left-filled with zeros in a string of
  939. length `width`.
  940. See Also
  941. --------
  942. char.zfill
  943. """
  944. return asarray(zfill(self, width))
  945. def isnumeric(self):
  946. """
  947. For each element in `self`, return True if there are only
  948. numeric characters in the element.
  949. See Also
  950. --------
  951. char.isnumeric
  952. """
  953. return isnumeric(self)
  954. def isdecimal(self):
  955. """
  956. For each element in `self`, return True if there are only
  957. decimal characters in the element.
  958. See Also
  959. --------
  960. char.isdecimal
  961. """
  962. return isdecimal(self)
  963. @set_module("numpy.char")
  964. def array(obj, itemsize=None, copy=True, unicode=None, order=None):
  965. """
  966. Create a `~numpy.char.chararray`.
  967. .. note::
  968. This class is provided for numarray backward-compatibility.
  969. New code (not concerned with numarray compatibility) should use
  970. arrays of type `bytes_` or `str_` and use the free functions
  971. in :mod:`numpy.char` for fast vectorized string operations instead.
  972. Versus a NumPy array of dtype `bytes_` or `str_`, this
  973. class adds the following functionality:
  974. 1) values automatically have whitespace removed from the end
  975. when indexed
  976. 2) comparison operators automatically remove whitespace from the
  977. end when comparing values
  978. 3) vectorized string operations are provided as methods
  979. (e.g. `chararray.endswith <numpy.char.chararray.endswith>`)
  980. and infix operators (e.g. ``+, *, %``)
  981. Parameters
  982. ----------
  983. obj : array of str or unicode-like
  984. itemsize : int, optional
  985. `itemsize` is the number of characters per scalar in the
  986. resulting array. If `itemsize` is None, and `obj` is an
  987. object array or a Python list, the `itemsize` will be
  988. automatically determined. If `itemsize` is provided and `obj`
  989. is of type str or unicode, then the `obj` string will be
  990. chunked into `itemsize` pieces.
  991. copy : bool, optional
  992. If true (default), then the object is copied. Otherwise, a copy
  993. will only be made if ``__array__`` returns a copy, if obj is a
  994. nested sequence, or if a copy is needed to satisfy any of the other
  995. requirements (`itemsize`, unicode, `order`, etc.).
  996. unicode : bool, optional
  997. When true, the resulting `~numpy.char.chararray` can contain Unicode
  998. characters, when false only 8-bit characters. If unicode is
  999. None and `obj` is one of the following:
  1000. - a `~numpy.char.chararray`,
  1001. - an ndarray of type :class:`str_` or :class:`bytes_`
  1002. - a Python :class:`str` or :class:`bytes` object,
  1003. then the unicode setting of the output array will be
  1004. automatically determined.
  1005. order : {'C', 'F', 'A'}, optional
  1006. Specify the order of the array. If order is 'C' (default), then the
  1007. array will be in C-contiguous order (last-index varies the
  1008. fastest). If order is 'F', then the returned array
  1009. will be in Fortran-contiguous order (first-index varies the
  1010. fastest). If order is 'A', then the returned array may
  1011. be in any order (either C-, Fortran-contiguous, or even
  1012. discontiguous).
  1013. Examples
  1014. --------
  1015. >>> import numpy as np
  1016. >>> char_array = np.char.array(['hello', 'world', 'numpy','array'])
  1017. >>> char_array
  1018. chararray(['hello', 'world', 'numpy', 'array'], dtype='<U5')
  1019. """
  1020. if isinstance(obj, (bytes, str)):
  1021. if unicode is None:
  1022. if isinstance(obj, str):
  1023. unicode = True
  1024. else:
  1025. unicode = False
  1026. if itemsize is None:
  1027. itemsize = len(obj)
  1028. shape = len(obj) // itemsize
  1029. return chararray(shape, itemsize=itemsize, unicode=unicode,
  1030. buffer=obj, order=order)
  1031. if isinstance(obj, (list, tuple)):
  1032. obj = asnarray(obj)
  1033. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
  1034. # If we just have a vanilla chararray, create a chararray
  1035. # view around it.
  1036. if not isinstance(obj, chararray):
  1037. obj = obj.view(chararray)
  1038. if itemsize is None:
  1039. itemsize = obj.itemsize
  1040. # itemsize is in 8-bit chars, so for Unicode, we need
  1041. # to divide by the size of a single Unicode character,
  1042. # which for NumPy is always 4
  1043. if issubclass(obj.dtype.type, str_):
  1044. itemsize //= 4
  1045. if unicode is None:
  1046. if issubclass(obj.dtype.type, str_):
  1047. unicode = True
  1048. else:
  1049. unicode = False
  1050. if unicode:
  1051. dtype = str_
  1052. else:
  1053. dtype = bytes_
  1054. if order is not None:
  1055. obj = asnarray(obj, order=order)
  1056. if (copy or
  1057. (itemsize != obj.itemsize) or
  1058. (not unicode and isinstance(obj, str_)) or
  1059. (unicode and isinstance(obj, bytes_))):
  1060. obj = obj.astype((dtype, int(itemsize)))
  1061. return obj
  1062. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
  1063. if itemsize is None:
  1064. # Since no itemsize was specified, convert the input array to
  1065. # a list so the ndarray constructor will automatically
  1066. # determine the itemsize for us.
  1067. obj = obj.tolist()
  1068. # Fall through to the default case
  1069. if unicode:
  1070. dtype = str_
  1071. else:
  1072. dtype = bytes_
  1073. if itemsize is None:
  1074. val = narray(obj, dtype=dtype, order=order, subok=True)
  1075. else:
  1076. val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
  1077. return val.view(chararray)
  1078. @set_module("numpy.char")
  1079. def asarray(obj, itemsize=None, unicode=None, order=None):
  1080. """
  1081. Convert the input to a `~numpy.char.chararray`, copying the data only if
  1082. necessary.
  1083. Versus a NumPy array of dtype `bytes_` or `str_`, this
  1084. class adds the following functionality:
  1085. 1) values automatically have whitespace removed from the end
  1086. when indexed
  1087. 2) comparison operators automatically remove whitespace from the
  1088. end when comparing values
  1089. 3) vectorized string operations are provided as methods
  1090. (e.g. `chararray.endswith <numpy.char.chararray.endswith>`)
  1091. and infix operators (e.g. ``+``, ``*``, ``%``)
  1092. Parameters
  1093. ----------
  1094. obj : array of str or unicode-like
  1095. itemsize : int, optional
  1096. `itemsize` is the number of characters per scalar in the
  1097. resulting array. If `itemsize` is None, and `obj` is an
  1098. object array or a Python list, the `itemsize` will be
  1099. automatically determined. If `itemsize` is provided and `obj`
  1100. is of type str or unicode, then the `obj` string will be
  1101. chunked into `itemsize` pieces.
  1102. unicode : bool, optional
  1103. When true, the resulting `~numpy.char.chararray` can contain Unicode
  1104. characters, when false only 8-bit characters. If unicode is
  1105. None and `obj` is one of the following:
  1106. - a `~numpy.char.chararray`,
  1107. - an ndarray of type `str_` or `unicode_`
  1108. - a Python str or unicode object,
  1109. then the unicode setting of the output array will be
  1110. automatically determined.
  1111. order : {'C', 'F'}, optional
  1112. Specify the order of the array. If order is 'C' (default), then the
  1113. array will be in C-contiguous order (last-index varies the
  1114. fastest). If order is 'F', then the returned array
  1115. will be in Fortran-contiguous order (first-index varies the
  1116. fastest).
  1117. Examples
  1118. --------
  1119. >>> import numpy as np
  1120. >>> np.char.asarray(['hello', 'world'])
  1121. chararray(['hello', 'world'], dtype='<U5')
  1122. """
  1123. return array(obj, itemsize, copy=False,
  1124. unicode=unicode, order=order)