strings.py 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641
  1. """
  2. This module contains a set of functions for vectorized string
  3. operations.
  4. """
  5. import sys
  6. import numpy as np
  7. from numpy import (
  8. equal, not_equal, less, less_equal, greater, greater_equal,
  9. add, multiply as _multiply_ufunc,
  10. )
  11. from numpy._core.multiarray import _vec_string
  12. from numpy._core.overrides import set_module
  13. from numpy._core.umath import (
  14. isalpha,
  15. isdigit,
  16. isspace,
  17. isalnum,
  18. islower,
  19. isupper,
  20. istitle,
  21. isdecimal,
  22. isnumeric,
  23. str_len,
  24. find as _find_ufunc,
  25. rfind as _rfind_ufunc,
  26. index as _index_ufunc,
  27. rindex as _rindex_ufunc,
  28. count as _count_ufunc,
  29. startswith as _startswith_ufunc,
  30. endswith as _endswith_ufunc,
  31. _lstrip_whitespace,
  32. _lstrip_chars,
  33. _rstrip_whitespace,
  34. _rstrip_chars,
  35. _strip_whitespace,
  36. _strip_chars,
  37. _replace,
  38. _expandtabs_length,
  39. _expandtabs,
  40. _center,
  41. _ljust,
  42. _rjust,
  43. _zfill,
  44. _partition,
  45. _partition_index,
  46. _rpartition,
  47. _rpartition_index,
  48. )
  49. def _override___module__():
  50. for ufunc in [
  51. isalnum, isalpha, isdecimal, isdigit, islower, isnumeric, isspace,
  52. istitle, isupper, str_len,
  53. ]:
  54. ufunc.__module__ = "numpy.strings"
  55. ufunc.__qualname__ = ufunc.__name__
  56. _override___module__()
  57. __all__ = [
  58. # UFuncs
  59. "equal", "not_equal", "less", "less_equal", "greater", "greater_equal",
  60. "add", "multiply", "isalpha", "isdigit", "isspace", "isalnum", "islower",
  61. "isupper", "istitle", "isdecimal", "isnumeric", "str_len", "find",
  62. "rfind", "index", "rindex", "count", "startswith", "endswith", "lstrip",
  63. "rstrip", "strip", "replace", "expandtabs", "center", "ljust", "rjust",
  64. "zfill", "partition", "rpartition",
  65. # _vec_string - Will gradually become ufuncs as well
  66. "upper", "lower", "swapcase", "capitalize", "title",
  67. # _vec_string - Will probably not become ufuncs
  68. "mod", "decode", "encode", "translate",
  69. # Removed from namespace until behavior has been crystallized
  70. # "join", "split", "rsplit", "splitlines",
  71. ]
  72. MAX = np.iinfo(np.int64).max
  73. def _get_num_chars(a):
  74. """
  75. Helper function that returns the number of characters per field in
  76. a string or unicode array. This is to abstract out the fact that
  77. for a unicode array this is itemsize / 4.
  78. """
  79. if issubclass(a.dtype.type, np.str_):
  80. return a.itemsize // 4
  81. return a.itemsize
  82. def _to_bytes_or_str_array(result, output_dtype_like):
  83. """
  84. Helper function to cast a result back into an array
  85. with the appropriate dtype if an object array must be used
  86. as an intermediary.
  87. """
  88. output_dtype_like = np.asarray(output_dtype_like)
  89. if result.size == 0:
  90. # Calling asarray & tolist in an empty array would result
  91. # in losing shape information
  92. return result.astype(output_dtype_like.dtype)
  93. ret = np.asarray(result.tolist())
  94. if isinstance(output_dtype_like.dtype, np.dtypes.StringDType):
  95. return ret.astype(type(output_dtype_like.dtype))
  96. return ret.astype(type(output_dtype_like.dtype)(_get_num_chars(ret)))
  97. def _clean_args(*args):
  98. """
  99. Helper function for delegating arguments to Python string
  100. functions.
  101. Many of the Python string operations that have optional arguments
  102. do not use 'None' to indicate a default value. In these cases,
  103. we need to remove all None arguments, and those following them.
  104. """
  105. newargs = []
  106. for chk in args:
  107. if chk is None:
  108. break
  109. newargs.append(chk)
  110. return newargs
  111. @set_module("numpy.strings")
  112. def multiply(a, i):
  113. """
  114. Return (a * i), that is string multiple concatenation,
  115. element-wise.
  116. Values in ``i`` of less than 0 are treated as 0 (which yields an
  117. empty string).
  118. Parameters
  119. ----------
  120. a : array_like, with ``StringDType``, ``bytes_`` or ``str_`` dtype
  121. i : array_like, with any integer dtype
  122. Returns
  123. -------
  124. out : ndarray
  125. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  126. depending on input types
  127. Examples
  128. --------
  129. >>> import numpy as np
  130. >>> a = np.array(["a", "b", "c"])
  131. >>> np.strings.multiply(a, 3)
  132. array(['aaa', 'bbb', 'ccc'], dtype='<U3')
  133. >>> i = np.array([1, 2, 3])
  134. >>> np.strings.multiply(a, i)
  135. array(['a', 'bb', 'ccc'], dtype='<U3')
  136. >>> np.strings.multiply(np.array(['a']), i)
  137. array(['a', 'aa', 'aaa'], dtype='<U3')
  138. >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
  139. >>> np.strings.multiply(a, 3)
  140. array([['aaa', 'bbb', 'ccc'],
  141. ['ddd', 'eee', 'fff']], dtype='<U3')
  142. >>> np.strings.multiply(a, i)
  143. array([['a', 'bb', 'ccc'],
  144. ['d', 'ee', 'fff']], dtype='<U3')
  145. """
  146. a = np.asanyarray(a)
  147. i = np.asanyarray(i)
  148. if not np.issubdtype(i.dtype, np.integer):
  149. raise TypeError(f"unsupported type {i.dtype} for operand 'i'")
  150. i = np.maximum(i, 0)
  151. # delegate to stringdtype loops that also do overflow checking
  152. if a.dtype.char == "T":
  153. return a * i
  154. a_len = str_len(a)
  155. # Ensure we can do a_len * i without overflow.
  156. if np.any(a_len > sys.maxsize / np.maximum(i, 1)):
  157. raise MemoryError("repeated string is too long")
  158. buffersizes = a_len * i
  159. out_dtype = f"{a.dtype.char}{buffersizes.max()}"
  160. out = np.empty_like(a, shape=buffersizes.shape, dtype=out_dtype)
  161. return _multiply_ufunc(a, i, out=out)
  162. @set_module("numpy.strings")
  163. def mod(a, values):
  164. """
  165. Return (a % i), that is pre-Python 2.6 string formatting
  166. (interpolation), element-wise for a pair of array_likes of str
  167. or unicode.
  168. Parameters
  169. ----------
  170. a : array_like, with `np.bytes_` or `np.str_` dtype
  171. values : array_like of values
  172. These values will be element-wise interpolated into the string.
  173. Returns
  174. -------
  175. out : ndarray
  176. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  177. depending on input types
  178. Examples
  179. --------
  180. >>> import numpy as np
  181. >>> a = np.array(["NumPy is a %s library"])
  182. >>> np.strings.mod(a, values=["Python"])
  183. array(['NumPy is a Python library'], dtype='<U25')
  184. >>> a = np.array([b'%d bytes', b'%d bits'])
  185. >>> values = np.array([8, 64])
  186. >>> np.strings.mod(a, values)
  187. array([b'8 bytes', b'64 bits'], dtype='|S7')
  188. """
  189. return _to_bytes_or_str_array(
  190. _vec_string(a, np.object_, '__mod__', (values,)), a)
  191. @set_module("numpy.strings")
  192. def find(a, sub, start=0, end=None):
  193. """
  194. For each element, return the lowest index in the string where
  195. substring ``sub`` is found, such that ``sub`` is contained in the
  196. range [``start``, ``end``).
  197. Parameters
  198. ----------
  199. a : array_like, with ``StringDType``, ``bytes_`` or ``str_`` dtype
  200. sub : array_like, with `np.bytes_` or `np.str_` dtype
  201. The substring to search for.
  202. start, end : array_like, with any integer dtype
  203. The range to look in, interpreted as in slice notation.
  204. Returns
  205. -------
  206. y : ndarray
  207. Output array of ints
  208. See Also
  209. --------
  210. str.find
  211. Examples
  212. --------
  213. >>> import numpy as np
  214. >>> a = np.array(["NumPy is a Python library"])
  215. >>> np.strings.find(a, "Python")
  216. array([11])
  217. """
  218. end = end if end is not None else MAX
  219. return _find_ufunc(a, sub, start, end)
  220. @set_module("numpy.strings")
  221. def rfind(a, sub, start=0, end=None):
  222. """
  223. For each element, return the highest index in the string where
  224. substring ``sub`` is found, such that ``sub`` is contained in the
  225. range [``start``, ``end``).
  226. Parameters
  227. ----------
  228. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  229. sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  230. The substring to search for.
  231. start, end : array_like, with any integer dtype
  232. The range to look in, interpreted as in slice notation.
  233. Returns
  234. -------
  235. y : ndarray
  236. Output array of ints
  237. See Also
  238. --------
  239. str.rfind
  240. Examples
  241. --------
  242. >>> import numpy as np
  243. >>> a = np.array(["Computer Science"])
  244. >>> np.strings.rfind(a, "Science", start=0, end=None)
  245. array([9])
  246. >>> np.strings.rfind(a, "Science", start=0, end=8)
  247. array([-1])
  248. >>> b = np.array(["Computer Science", "Science"])
  249. >>> np.strings.rfind(b, "Science", start=0, end=None)
  250. array([9, 0])
  251. """
  252. end = end if end is not None else MAX
  253. return _rfind_ufunc(a, sub, start, end)
  254. @set_module("numpy.strings")
  255. def index(a, sub, start=0, end=None):
  256. """
  257. Like `find`, but raises :exc:`ValueError` when the substring is not found.
  258. Parameters
  259. ----------
  260. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  261. sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  262. start, end : array_like, with any integer dtype, optional
  263. Returns
  264. -------
  265. out : ndarray
  266. Output array of ints.
  267. See Also
  268. --------
  269. find, str.index
  270. Examples
  271. --------
  272. >>> import numpy as np
  273. >>> a = np.array(["Computer Science"])
  274. >>> np.strings.index(a, "Science", start=0, end=None)
  275. array([9])
  276. """
  277. end = end if end is not None else MAX
  278. return _index_ufunc(a, sub, start, end)
  279. @set_module("numpy.strings")
  280. def rindex(a, sub, start=0, end=None):
  281. """
  282. Like `rfind`, but raises :exc:`ValueError` when the substring `sub` is
  283. not found.
  284. Parameters
  285. ----------
  286. a : array-like, with `np.bytes_` or `np.str_` dtype
  287. sub : array-like, with `np.bytes_` or `np.str_` dtype
  288. start, end : array-like, with any integer dtype, optional
  289. Returns
  290. -------
  291. out : ndarray
  292. Output array of ints.
  293. See Also
  294. --------
  295. rfind, str.rindex
  296. Examples
  297. --------
  298. >>> a = np.array(["Computer Science"])
  299. >>> np.strings.rindex(a, "Science", start=0, end=None)
  300. array([9])
  301. """
  302. end = end if end is not None else MAX
  303. return _rindex_ufunc(a, sub, start, end)
  304. @set_module("numpy.strings")
  305. def count(a, sub, start=0, end=None):
  306. """
  307. Returns an array with the number of non-overlapping occurrences of
  308. substring ``sub`` in the range [``start``, ``end``).
  309. Parameters
  310. ----------
  311. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  312. sub : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  313. The substring to search for.
  314. start, end : array_like, with any integer dtype
  315. The range to look in, interpreted as in slice notation.
  316. Returns
  317. -------
  318. y : ndarray
  319. Output array of ints
  320. See Also
  321. --------
  322. str.count
  323. Examples
  324. --------
  325. >>> import numpy as np
  326. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  327. >>> c
  328. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  329. >>> np.strings.count(c, 'A')
  330. array([3, 1, 1])
  331. >>> np.strings.count(c, 'aA')
  332. array([3, 1, 0])
  333. >>> np.strings.count(c, 'A', start=1, end=4)
  334. array([2, 1, 1])
  335. >>> np.strings.count(c, 'A', start=1, end=3)
  336. array([1, 0, 0])
  337. """
  338. end = end if end is not None else MAX
  339. return _count_ufunc(a, sub, start, end)
  340. @set_module("numpy.strings")
  341. def startswith(a, prefix, start=0, end=None):
  342. """
  343. Returns a boolean array which is `True` where the string element
  344. in ``a`` starts with ``prefix``, otherwise `False`.
  345. Parameters
  346. ----------
  347. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  348. prefix : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  349. start, end : array_like, with any integer dtype
  350. With ``start``, test beginning at that position. With ``end``,
  351. stop comparing at that position.
  352. Returns
  353. -------
  354. out : ndarray
  355. Output array of bools
  356. See Also
  357. --------
  358. str.startswith
  359. Examples
  360. --------
  361. >>> import numpy as np
  362. >>> s = np.array(['foo', 'bar'])
  363. >>> s
  364. array(['foo', 'bar'], dtype='<U3')
  365. >>> np.strings.startswith(s, 'fo')
  366. array([True, False])
  367. >>> np.strings.startswith(s, 'o', start=1, end=2)
  368. array([True, False])
  369. """
  370. end = end if end is not None else MAX
  371. return _startswith_ufunc(a, prefix, start, end)
  372. @set_module("numpy.strings")
  373. def endswith(a, suffix, start=0, end=None):
  374. """
  375. Returns a boolean array which is `True` where the string element
  376. in ``a`` ends with ``suffix``, otherwise `False`.
  377. Parameters
  378. ----------
  379. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  380. suffix : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  381. start, end : array_like, with any integer dtype
  382. With ``start``, test beginning at that position. With ``end``,
  383. stop comparing at that position.
  384. Returns
  385. -------
  386. out : ndarray
  387. Output array of bools
  388. See Also
  389. --------
  390. str.endswith
  391. Examples
  392. --------
  393. >>> import numpy as np
  394. >>> s = np.array(['foo', 'bar'])
  395. >>> s
  396. array(['foo', 'bar'], dtype='<U3')
  397. >>> np.strings.endswith(s, 'ar')
  398. array([False, True])
  399. >>> np.strings.endswith(s, 'a', start=1, end=2)
  400. array([False, True])
  401. """
  402. end = end if end is not None else MAX
  403. return _endswith_ufunc(a, suffix, start, end)
  404. @set_module("numpy.strings")
  405. def decode(a, encoding=None, errors=None):
  406. r"""
  407. Calls :meth:`bytes.decode` element-wise.
  408. The set of available codecs comes from the Python standard library,
  409. and may be extended at runtime. For more information, see the
  410. :mod:`codecs` module.
  411. Parameters
  412. ----------
  413. a : array_like, with ``bytes_`` dtype
  414. encoding : str, optional
  415. The name of an encoding
  416. errors : str, optional
  417. Specifies how to handle encoding errors
  418. Returns
  419. -------
  420. out : ndarray
  421. See Also
  422. --------
  423. :py:meth:`bytes.decode`
  424. Notes
  425. -----
  426. The type of the result will depend on the encoding specified.
  427. Examples
  428. --------
  429. >>> import numpy as np
  430. >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
  431. ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
  432. >>> c
  433. array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
  434. b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
  435. >>> np.strings.decode(c, encoding='cp037')
  436. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  437. """
  438. return _to_bytes_or_str_array(
  439. _vec_string(a, np.object_, 'decode', _clean_args(encoding, errors)),
  440. np.str_(''))
  441. @set_module("numpy.strings")
  442. def encode(a, encoding=None, errors=None):
  443. """
  444. Calls :meth:`str.encode` element-wise.
  445. The set of available codecs comes from the Python standard library,
  446. and may be extended at runtime. For more information, see the
  447. :mod:`codecs` module.
  448. Parameters
  449. ----------
  450. a : array_like, with ``StringDType`` or ``str_`` dtype
  451. encoding : str, optional
  452. The name of an encoding
  453. errors : str, optional
  454. Specifies how to handle encoding errors
  455. Returns
  456. -------
  457. out : ndarray
  458. See Also
  459. --------
  460. str.encode
  461. Notes
  462. -----
  463. The type of the result will depend on the encoding specified.
  464. Examples
  465. --------
  466. >>> import numpy as np
  467. >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
  468. >>> np.strings.encode(a, encoding='cp037')
  469. array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
  470. b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
  471. """
  472. return _to_bytes_or_str_array(
  473. _vec_string(a, np.object_, 'encode', _clean_args(encoding, errors)),
  474. np.bytes_(b''))
  475. @set_module("numpy.strings")
  476. def expandtabs(a, tabsize=8):
  477. """
  478. Return a copy of each string element where all tab characters are
  479. replaced by one or more spaces.
  480. Calls :meth:`str.expandtabs` element-wise.
  481. Return a copy of each string element where all tab characters are
  482. replaced by one or more spaces, depending on the current column
  483. and the given `tabsize`. The column number is reset to zero after
  484. each newline occurring in the string. This doesn't understand other
  485. non-printing characters or escape sequences.
  486. Parameters
  487. ----------
  488. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  489. Input array
  490. tabsize : int, optional
  491. Replace tabs with `tabsize` number of spaces. If not given defaults
  492. to 8 spaces.
  493. Returns
  494. -------
  495. out : ndarray
  496. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  497. depending on input type
  498. See Also
  499. --------
  500. str.expandtabs
  501. Examples
  502. --------
  503. >>> import numpy as np
  504. >>> a = np.array(['\t\tHello\tworld'])
  505. >>> np.strings.expandtabs(a, tabsize=4) # doctest: +SKIP
  506. array([' Hello world'], dtype='<U21') # doctest: +SKIP
  507. """
  508. a = np.asanyarray(a)
  509. tabsize = np.asanyarray(tabsize)
  510. if a.dtype.char == "T":
  511. return _expandtabs(a, tabsize)
  512. buffersizes = _expandtabs_length(a, tabsize)
  513. out_dtype = f"{a.dtype.char}{buffersizes.max()}"
  514. out = np.empty_like(a, shape=buffersizes.shape, dtype=out_dtype)
  515. return _expandtabs(a, tabsize, out=out)
  516. @set_module("numpy.strings")
  517. def center(a, width, fillchar=' '):
  518. """
  519. Return a copy of `a` with its elements centered in a string of
  520. length `width`.
  521. Parameters
  522. ----------
  523. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  524. width : array_like, with any integer dtype
  525. The length of the resulting strings, unless ``width < str_len(a)``.
  526. fillchar : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  527. Optional padding character to use (default is space).
  528. Returns
  529. -------
  530. out : ndarray
  531. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  532. depending on input types
  533. See Also
  534. --------
  535. str.center
  536. Notes
  537. -----
  538. While it is possible for ``a`` and ``fillchar`` to have different dtypes,
  539. passing a non-ASCII character in ``fillchar`` when ``a`` is of dtype "S"
  540. is not allowed, and a ``ValueError`` is raised.
  541. Examples
  542. --------
  543. >>> import numpy as np
  544. >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
  545. array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
  546. >>> np.strings.center(c, width=9)
  547. array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
  548. >>> np.strings.center(c, width=9, fillchar='*')
  549. array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
  550. >>> np.strings.center(c, width=1)
  551. array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
  552. """
  553. width = np.asanyarray(width)
  554. if not np.issubdtype(width.dtype, np.integer):
  555. raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
  556. a = np.asanyarray(a)
  557. fillchar = np.asanyarray(fillchar)
  558. if np.any(str_len(fillchar) != 1):
  559. raise TypeError(
  560. "The fill character must be exactly one character long")
  561. if np.result_type(a, fillchar).char == "T":
  562. return _center(a, width, fillchar)
  563. fillchar = fillchar.astype(a.dtype, copy=False)
  564. width = np.maximum(str_len(a), width)
  565. out_dtype = f"{a.dtype.char}{width.max()}"
  566. shape = np.broadcast_shapes(a.shape, width.shape, fillchar.shape)
  567. out = np.empty_like(a, shape=shape, dtype=out_dtype)
  568. return _center(a, width, fillchar, out=out)
  569. @set_module("numpy.strings")
  570. def ljust(a, width, fillchar=' '):
  571. """
  572. Return an array with the elements of `a` left-justified in a
  573. string of length `width`.
  574. Parameters
  575. ----------
  576. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  577. width : array_like, with any integer dtype
  578. The length of the resulting strings, unless ``width < str_len(a)``.
  579. fillchar : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  580. Optional character to use for padding (default is space).
  581. Returns
  582. -------
  583. out : ndarray
  584. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  585. depending on input types
  586. See Also
  587. --------
  588. str.ljust
  589. Notes
  590. -----
  591. While it is possible for ``a`` and ``fillchar`` to have different dtypes,
  592. passing a non-ASCII character in ``fillchar`` when ``a`` is of dtype "S"
  593. is not allowed, and a ``ValueError`` is raised.
  594. Examples
  595. --------
  596. >>> import numpy as np
  597. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  598. >>> np.strings.ljust(c, width=3)
  599. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  600. >>> np.strings.ljust(c, width=9)
  601. array(['aAaAaA ', ' aA ', 'abBABba '], dtype='<U9')
  602. """
  603. width = np.asanyarray(width)
  604. if not np.issubdtype(width.dtype, np.integer):
  605. raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
  606. a = np.asanyarray(a)
  607. fillchar = np.asanyarray(fillchar)
  608. if np.any(str_len(fillchar) != 1):
  609. raise TypeError(
  610. "The fill character must be exactly one character long")
  611. if np.result_type(a, fillchar).char == "T":
  612. return _ljust(a, width, fillchar)
  613. fillchar = fillchar.astype(a.dtype, copy=False)
  614. width = np.maximum(str_len(a), width)
  615. shape = np.broadcast_shapes(a.shape, width.shape, fillchar.shape)
  616. out_dtype = f"{a.dtype.char}{width.max()}"
  617. out = np.empty_like(a, shape=shape, dtype=out_dtype)
  618. return _ljust(a, width, fillchar, out=out)
  619. @set_module("numpy.strings")
  620. def rjust(a, width, fillchar=' '):
  621. """
  622. Return an array with the elements of `a` right-justified in a
  623. string of length `width`.
  624. Parameters
  625. ----------
  626. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  627. width : array_like, with any integer dtype
  628. The length of the resulting strings, unless ``width < str_len(a)``.
  629. fillchar : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  630. Optional padding character to use (default is space).
  631. Returns
  632. -------
  633. out : ndarray
  634. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  635. depending on input types
  636. See Also
  637. --------
  638. str.rjust
  639. Notes
  640. -----
  641. While it is possible for ``a`` and ``fillchar`` to have different dtypes,
  642. passing a non-ASCII character in ``fillchar`` when ``a`` is of dtype "S"
  643. is not allowed, and a ``ValueError`` is raised.
  644. Examples
  645. --------
  646. >>> import numpy as np
  647. >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
  648. >>> np.strings.rjust(a, width=3)
  649. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  650. >>> np.strings.rjust(a, width=9)
  651. array([' aAaAaA', ' aA ', ' abBABba'], dtype='<U9')
  652. """
  653. width = np.asanyarray(width)
  654. if not np.issubdtype(width.dtype, np.integer):
  655. raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
  656. a = np.asanyarray(a)
  657. fillchar = np.asanyarray(fillchar)
  658. if np.any(str_len(fillchar) != 1):
  659. raise TypeError(
  660. "The fill character must be exactly one character long")
  661. if np.result_type(a, fillchar).char == "T":
  662. return _rjust(a, width, fillchar)
  663. fillchar = fillchar.astype(a.dtype, copy=False)
  664. width = np.maximum(str_len(a), width)
  665. shape = np.broadcast_shapes(a.shape, width.shape, fillchar.shape)
  666. out_dtype = f"{a.dtype.char}{width.max()}"
  667. out = np.empty_like(a, shape=shape, dtype=out_dtype)
  668. return _rjust(a, width, fillchar, out=out)
  669. @set_module("numpy.strings")
  670. def zfill(a, width):
  671. """
  672. Return the numeric string left-filled with zeros. A leading
  673. sign prefix (``+``/``-``) is handled by inserting the padding
  674. after the sign character rather than before.
  675. Parameters
  676. ----------
  677. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  678. width : array_like, with any integer dtype
  679. Width of string to left-fill elements in `a`.
  680. Returns
  681. -------
  682. out : ndarray
  683. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  684. depending on input type
  685. See Also
  686. --------
  687. str.zfill
  688. Examples
  689. --------
  690. >>> import numpy as np
  691. >>> np.strings.zfill(['1', '-1', '+1'], 3)
  692. array(['001', '-01', '+01'], dtype='<U3')
  693. """
  694. width = np.asanyarray(width)
  695. if not np.issubdtype(width.dtype, np.integer):
  696. raise TypeError(f"unsupported type {width.dtype} for operand 'width'")
  697. a = np.asanyarray(a)
  698. if a.dtype.char == "T":
  699. return _zfill(a, width)
  700. width = np.maximum(str_len(a), width)
  701. shape = np.broadcast_shapes(a.shape, width.shape)
  702. out_dtype = f"{a.dtype.char}{width.max()}"
  703. out = np.empty_like(a, shape=shape, dtype=out_dtype)
  704. return _zfill(a, width, out=out)
  705. @set_module("numpy.strings")
  706. def lstrip(a, chars=None):
  707. """
  708. For each element in `a`, return a copy with the leading characters
  709. removed.
  710. Parameters
  711. ----------
  712. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  713. chars : scalar with the same dtype as ``a``, optional
  714. The ``chars`` argument is a string specifying the set of
  715. characters to be removed. If ``None``, the ``chars``
  716. argument defaults to removing whitespace. The ``chars`` argument
  717. is not a prefix or suffix; rather, all combinations of its
  718. values are stripped.
  719. Returns
  720. -------
  721. out : ndarray
  722. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  723. depending on input types
  724. See Also
  725. --------
  726. str.lstrip
  727. Examples
  728. --------
  729. >>> import numpy as np
  730. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  731. >>> c
  732. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  733. # The 'a' variable is unstripped from c[1] because of leading whitespace.
  734. >>> np.strings.lstrip(c, 'a')
  735. array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
  736. >>> np.strings.lstrip(c, 'A') # leaves c unchanged
  737. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  738. >>> (np.strings.lstrip(c, ' ') == np.strings.lstrip(c, '')).all()
  739. np.False_
  740. >>> (np.strings.lstrip(c, ' ') == np.strings.lstrip(c)).all()
  741. np.True_
  742. """
  743. if chars is None:
  744. return _lstrip_whitespace(a)
  745. return _lstrip_chars(a, chars)
  746. @set_module("numpy.strings")
  747. def rstrip(a, chars=None):
  748. """
  749. For each element in `a`, return a copy with the trailing characters
  750. removed.
  751. Parameters
  752. ----------
  753. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  754. chars : scalar with the same dtype as ``a``, optional
  755. The ``chars`` argument is a string specifying the set of
  756. characters to be removed. If ``None``, the ``chars``
  757. argument defaults to removing whitespace. The ``chars`` argument
  758. is not a prefix or suffix; rather, all combinations of its
  759. values are stripped.
  760. Returns
  761. -------
  762. out : ndarray
  763. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  764. depending on input types
  765. See Also
  766. --------
  767. str.rstrip
  768. Examples
  769. --------
  770. >>> import numpy as np
  771. >>> c = np.array(['aAaAaA', 'abBABba'])
  772. >>> c
  773. array(['aAaAaA', 'abBABba'], dtype='<U7')
  774. >>> np.strings.rstrip(c, 'a')
  775. array(['aAaAaA', 'abBABb'], dtype='<U7')
  776. >>> np.strings.rstrip(c, 'A')
  777. array(['aAaAa', 'abBABba'], dtype='<U7')
  778. """
  779. if chars is None:
  780. return _rstrip_whitespace(a)
  781. return _rstrip_chars(a, chars)
  782. @set_module("numpy.strings")
  783. def strip(a, chars=None):
  784. """
  785. For each element in `a`, return a copy with the leading and
  786. trailing characters removed.
  787. Parameters
  788. ----------
  789. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  790. chars : scalar with the same dtype as ``a``, optional
  791. The ``chars`` argument is a string specifying the set of
  792. characters to be removed. If ``None``, the ``chars``
  793. argument defaults to removing whitespace. The ``chars`` argument
  794. is not a prefix or suffix; rather, all combinations of its
  795. values are stripped.
  796. Returns
  797. -------
  798. out : ndarray
  799. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  800. depending on input types
  801. See Also
  802. --------
  803. str.strip
  804. Examples
  805. --------
  806. >>> import numpy as np
  807. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  808. >>> c
  809. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  810. >>> np.strings.strip(c)
  811. array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
  812. # 'a' unstripped from c[1] because of leading whitespace.
  813. >>> np.strings.strip(c, 'a')
  814. array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
  815. # 'A' unstripped from c[1] because of trailing whitespace.
  816. >>> np.strings.strip(c, 'A')
  817. array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
  818. """
  819. if chars is None:
  820. return _strip_whitespace(a)
  821. return _strip_chars(a, chars)
  822. @set_module("numpy.strings")
  823. def upper(a):
  824. """
  825. Return an array with the elements converted to uppercase.
  826. Calls :meth:`str.upper` element-wise.
  827. For 8-bit strings, this method is locale-dependent.
  828. Parameters
  829. ----------
  830. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  831. Input array.
  832. Returns
  833. -------
  834. out : ndarray
  835. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  836. depending on input types
  837. See Also
  838. --------
  839. str.upper
  840. Examples
  841. --------
  842. >>> import numpy as np
  843. >>> c = np.array(['a1b c', '1bca', 'bca1']); c
  844. array(['a1b c', '1bca', 'bca1'], dtype='<U5')
  845. >>> np.strings.upper(c)
  846. array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
  847. """
  848. a_arr = np.asarray(a)
  849. return _vec_string(a_arr, a_arr.dtype, 'upper')
  850. @set_module("numpy.strings")
  851. def lower(a):
  852. """
  853. Return an array with the elements converted to lowercase.
  854. Call :meth:`str.lower` element-wise.
  855. For 8-bit strings, this method is locale-dependent.
  856. Parameters
  857. ----------
  858. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  859. Input array.
  860. Returns
  861. -------
  862. out : ndarray
  863. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  864. depending on input types
  865. See Also
  866. --------
  867. str.lower
  868. Examples
  869. --------
  870. >>> import numpy as np
  871. >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
  872. array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
  873. >>> np.strings.lower(c)
  874. array(['a1b c', '1bca', 'bca1'], dtype='<U5')
  875. """
  876. a_arr = np.asarray(a)
  877. return _vec_string(a_arr, a_arr.dtype, 'lower')
  878. @set_module("numpy.strings")
  879. def swapcase(a):
  880. """
  881. Return element-wise a copy of the string with
  882. uppercase characters converted to lowercase and vice versa.
  883. Calls :meth:`str.swapcase` element-wise.
  884. For 8-bit strings, this method is locale-dependent.
  885. Parameters
  886. ----------
  887. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  888. Input array.
  889. Returns
  890. -------
  891. out : ndarray
  892. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  893. depending on input types
  894. See Also
  895. --------
  896. str.swapcase
  897. Examples
  898. --------
  899. >>> import numpy as np
  900. >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
  901. array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
  902. dtype='|S5')
  903. >>> np.strings.swapcase(c)
  904. array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
  905. dtype='|S5')
  906. """
  907. a_arr = np.asarray(a)
  908. return _vec_string(a_arr, a_arr.dtype, 'swapcase')
  909. @set_module("numpy.strings")
  910. def capitalize(a):
  911. """
  912. Return a copy of ``a`` with only the first character of each element
  913. capitalized.
  914. Calls :meth:`str.capitalize` element-wise.
  915. For byte strings, this method is locale-dependent.
  916. Parameters
  917. ----------
  918. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  919. Input array of strings to capitalize.
  920. Returns
  921. -------
  922. out : ndarray
  923. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  924. depending on input types
  925. See Also
  926. --------
  927. str.capitalize
  928. Examples
  929. --------
  930. >>> import numpy as np
  931. >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
  932. array(['a1b2', '1b2a', 'b2a1', '2a1b'],
  933. dtype='|S4')
  934. >>> np.strings.capitalize(c)
  935. array(['A1b2', '1b2a', 'B2a1', '2a1b'],
  936. dtype='|S4')
  937. """
  938. a_arr = np.asarray(a)
  939. return _vec_string(a_arr, a_arr.dtype, 'capitalize')
  940. @set_module("numpy.strings")
  941. def title(a):
  942. """
  943. Return element-wise title cased version of string or unicode.
  944. Title case words start with uppercase characters, all remaining cased
  945. characters are lowercase.
  946. Calls :meth:`str.title` element-wise.
  947. For 8-bit strings, this method is locale-dependent.
  948. Parameters
  949. ----------
  950. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  951. Input array.
  952. Returns
  953. -------
  954. out : ndarray
  955. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  956. depending on input types
  957. See Also
  958. --------
  959. str.title
  960. Examples
  961. --------
  962. >>> import numpy as np
  963. >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
  964. array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
  965. dtype='|S5')
  966. >>> np.strings.title(c)
  967. array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
  968. dtype='|S5')
  969. """
  970. a_arr = np.asarray(a)
  971. return _vec_string(a_arr, a_arr.dtype, 'title')
  972. @set_module("numpy.strings")
  973. def replace(a, old, new, count=-1):
  974. """
  975. For each element in ``a``, return a copy of the string with
  976. occurrences of substring ``old`` replaced by ``new``.
  977. Parameters
  978. ----------
  979. a : array_like, with ``bytes_`` or ``str_`` dtype
  980. old, new : array_like, with ``bytes_`` or ``str_`` dtype
  981. count : array_like, with ``int_`` dtype
  982. If the optional argument ``count`` is given, only the first
  983. ``count`` occurrences are replaced.
  984. Returns
  985. -------
  986. out : ndarray
  987. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  988. depending on input types
  989. See Also
  990. --------
  991. str.replace
  992. Examples
  993. --------
  994. >>> import numpy as np
  995. >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
  996. >>> np.strings.replace(a, 'mango', 'banana')
  997. array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
  998. >>> a = np.array(["The dish is fresh", "This is it"])
  999. >>> np.strings.replace(a, 'is', 'was')
  1000. array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
  1001. """
  1002. count = np.asanyarray(count)
  1003. if not np.issubdtype(count.dtype, np.integer):
  1004. raise TypeError(f"unsupported type {count.dtype} for operand 'count'")
  1005. arr = np.asanyarray(a)
  1006. old_dtype = getattr(old, 'dtype', None)
  1007. old = np.asanyarray(old)
  1008. new_dtype = getattr(new, 'dtype', None)
  1009. new = np.asanyarray(new)
  1010. if np.result_type(arr, old, new).char == "T":
  1011. return _replace(arr, old, new, count)
  1012. a_dt = arr.dtype
  1013. old = old.astype(old_dtype if old_dtype else a_dt, copy=False)
  1014. new = new.astype(new_dtype if new_dtype else a_dt, copy=False)
  1015. max_int64 = np.iinfo(np.int64).max
  1016. counts = _count_ufunc(arr, old, 0, max_int64)
  1017. counts = np.where(count < 0, counts, np.minimum(counts, count))
  1018. buffersizes = str_len(arr) + counts * (str_len(new) - str_len(old))
  1019. out_dtype = f"{arr.dtype.char}{buffersizes.max()}"
  1020. out = np.empty_like(arr, shape=buffersizes.shape, dtype=out_dtype)
  1021. return _replace(arr, old, new, counts, out=out)
  1022. def _join(sep, seq):
  1023. """
  1024. Return a string which is the concatenation of the strings in the
  1025. sequence `seq`.
  1026. Calls :meth:`str.join` element-wise.
  1027. Parameters
  1028. ----------
  1029. sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1030. seq : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1031. Returns
  1032. -------
  1033. out : ndarray
  1034. Output array of ``StringDType``, ``bytes_`` or ``str_`` dtype,
  1035. depending on input types
  1036. See Also
  1037. --------
  1038. str.join
  1039. Examples
  1040. --------
  1041. >>> import numpy as np
  1042. >>> np.strings.join('-', 'osd') # doctest: +SKIP
  1043. array('o-s-d', dtype='<U5') # doctest: +SKIP
  1044. >>> np.strings.join(['-', '.'], ['ghc', 'osd']) # doctest: +SKIP
  1045. array(['g-h-c', 'o.s.d'], dtype='<U5') # doctest: +SKIP
  1046. """
  1047. return _to_bytes_or_str_array(
  1048. _vec_string(sep, np.object_, 'join', (seq,)), seq)
  1049. def _split(a, sep=None, maxsplit=None):
  1050. """
  1051. For each element in `a`, return a list of the words in the
  1052. string, using `sep` as the delimiter string.
  1053. Calls :meth:`str.split` element-wise.
  1054. Parameters
  1055. ----------
  1056. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1057. sep : str or unicode, optional
  1058. If `sep` is not specified or None, any whitespace string is a
  1059. separator.
  1060. maxsplit : int, optional
  1061. If `maxsplit` is given, at most `maxsplit` splits are done.
  1062. Returns
  1063. -------
  1064. out : ndarray
  1065. Array of list objects
  1066. Examples
  1067. --------
  1068. >>> import numpy as np
  1069. >>> x = np.array("Numpy is nice!")
  1070. >>> np.strings.split(x, " ") # doctest: +SKIP
  1071. array(list(['Numpy', 'is', 'nice!']), dtype=object) # doctest: +SKIP
  1072. >>> np.strings.split(x, " ", 1) # doctest: +SKIP
  1073. array(list(['Numpy', 'is nice!']), dtype=object) # doctest: +SKIP
  1074. See Also
  1075. --------
  1076. str.split, rsplit
  1077. """
  1078. # This will return an array of lists of different sizes, so we
  1079. # leave it as an object array
  1080. return _vec_string(
  1081. a, np.object_, 'split', [sep] + _clean_args(maxsplit))
  1082. def _rsplit(a, sep=None, maxsplit=None):
  1083. """
  1084. For each element in `a`, return a list of the words in the
  1085. string, using `sep` as the delimiter string.
  1086. Calls :meth:`str.rsplit` element-wise.
  1087. Except for splitting from the right, `rsplit`
  1088. behaves like `split`.
  1089. Parameters
  1090. ----------
  1091. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1092. sep : str or unicode, optional
  1093. If `sep` is not specified or None, any whitespace string
  1094. is a separator.
  1095. maxsplit : int, optional
  1096. If `maxsplit` is given, at most `maxsplit` splits are done,
  1097. the rightmost ones.
  1098. Returns
  1099. -------
  1100. out : ndarray
  1101. Array of list objects
  1102. See Also
  1103. --------
  1104. str.rsplit, split
  1105. Examples
  1106. --------
  1107. >>> import numpy as np
  1108. >>> a = np.array(['aAaAaA', 'abBABba'])
  1109. >>> np.strings.rsplit(a, 'A') # doctest: +SKIP
  1110. array([list(['a', 'a', 'a', '']), # doctest: +SKIP
  1111. list(['abB', 'Bba'])], dtype=object) # doctest: +SKIP
  1112. """
  1113. # This will return an array of lists of different sizes, so we
  1114. # leave it as an object array
  1115. return _vec_string(
  1116. a, np.object_, 'rsplit', [sep] + _clean_args(maxsplit))
  1117. def _splitlines(a, keepends=None):
  1118. """
  1119. For each element in `a`, return a list of the lines in the
  1120. element, breaking at line boundaries.
  1121. Calls :meth:`str.splitlines` element-wise.
  1122. Parameters
  1123. ----------
  1124. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1125. keepends : bool, optional
  1126. Line breaks are not included in the resulting list unless
  1127. keepends is given and true.
  1128. Returns
  1129. -------
  1130. out : ndarray
  1131. Array of list objects
  1132. See Also
  1133. --------
  1134. str.splitlines
  1135. Examples
  1136. --------
  1137. >>> np.char.splitlines("first line\\nsecond line")
  1138. array(list(['first line', 'second line']), dtype=object)
  1139. >>> a = np.array(["first\\nsecond", "third\\nfourth"])
  1140. >>> np.char.splitlines(a)
  1141. array([list(['first', 'second']), list(['third', 'fourth'])], dtype=object)
  1142. """
  1143. return _vec_string(
  1144. a, np.object_, 'splitlines', _clean_args(keepends))
  1145. @set_module("numpy.strings")
  1146. def partition(a, sep):
  1147. """
  1148. Partition each element in ``a`` around ``sep``.
  1149. For each element in ``a``, split the element at the first
  1150. occurrence of ``sep``, and return a 3-tuple containing the part
  1151. before the separator, the separator itself, and the part after
  1152. the separator. If the separator is not found, the first item of
  1153. the tuple will contain the whole string, and the second and third
  1154. ones will be the empty string.
  1155. Parameters
  1156. ----------
  1157. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1158. Input array
  1159. sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1160. Separator to split each string element in ``a``.
  1161. Returns
  1162. -------
  1163. out : 3-tuple:
  1164. - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
  1165. part before the separator
  1166. - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
  1167. separator
  1168. - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
  1169. part after the separator
  1170. See Also
  1171. --------
  1172. str.partition
  1173. Examples
  1174. --------
  1175. >>> import numpy as np
  1176. >>> x = np.array(["Numpy is nice!"])
  1177. >>> np.strings.partition(x, " ")
  1178. (array(['Numpy'], dtype='<U5'),
  1179. array([' '], dtype='<U1'),
  1180. array(['is nice!'], dtype='<U8'))
  1181. """
  1182. a = np.asanyarray(a)
  1183. sep = np.asanyarray(sep)
  1184. if np.result_type(a, sep).char == "T":
  1185. return _partition(a, sep)
  1186. sep = sep.astype(a.dtype, copy=False)
  1187. pos = _find_ufunc(a, sep, 0, MAX)
  1188. a_len = str_len(a)
  1189. sep_len = str_len(sep)
  1190. not_found = pos < 0
  1191. buffersizes1 = np.where(not_found, a_len, pos)
  1192. buffersizes3 = np.where(not_found, 0, a_len - pos - sep_len)
  1193. out_dtype = ",".join([f"{a.dtype.char}{n}" for n in (
  1194. buffersizes1.max(),
  1195. 1 if np.all(not_found) else sep_len.max(),
  1196. buffersizes3.max(),
  1197. )])
  1198. shape = np.broadcast_shapes(a.shape, sep.shape)
  1199. out = np.empty_like(a, shape=shape, dtype=out_dtype)
  1200. return _partition_index(a, sep, pos, out=(out["f0"], out["f1"], out["f2"]))
  1201. @set_module("numpy.strings")
  1202. def rpartition(a, sep):
  1203. """
  1204. Partition (split) each element around the right-most separator.
  1205. For each element in ``a``, split the element at the last
  1206. occurrence of ``sep``, and return a 3-tuple containing the part
  1207. before the separator, the separator itself, and the part after
  1208. the separator. If the separator is not found, the third item of
  1209. the tuple will contain the whole string, and the first and second
  1210. ones will be the empty string.
  1211. Parameters
  1212. ----------
  1213. a : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1214. Input array
  1215. sep : array-like, with ``StringDType``, ``bytes_``, or ``str_`` dtype
  1216. Separator to split each string element in ``a``.
  1217. Returns
  1218. -------
  1219. out : 3-tuple:
  1220. - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
  1221. part before the separator
  1222. - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
  1223. separator
  1224. - array with ``StringDType``, ``bytes_`` or ``str_`` dtype with the
  1225. part after the separator
  1226. See Also
  1227. --------
  1228. str.rpartition
  1229. Examples
  1230. --------
  1231. >>> import numpy as np
  1232. >>> a = np.array(['aAaAaA', ' aA ', 'abBABba'])
  1233. >>> np.strings.rpartition(a, 'A')
  1234. (array(['aAaAa', ' a', 'abB'], dtype='<U5'),
  1235. array(['A', 'A', 'A'], dtype='<U1'),
  1236. array(['', ' ', 'Bba'], dtype='<U3'))
  1237. """
  1238. a = np.asanyarray(a)
  1239. sep = np.asanyarray(sep)
  1240. if np.result_type(a, sep).char == "T":
  1241. return _rpartition(a, sep)
  1242. sep = sep.astype(a.dtype, copy=False)
  1243. pos = _rfind_ufunc(a, sep, 0, MAX)
  1244. a_len = str_len(a)
  1245. sep_len = str_len(sep)
  1246. not_found = pos < 0
  1247. buffersizes1 = np.where(not_found, 0, pos)
  1248. buffersizes3 = np.where(not_found, a_len, a_len - pos - sep_len)
  1249. out_dtype = ",".join([f"{a.dtype.char}{n}" for n in (
  1250. buffersizes1.max(),
  1251. 1 if np.all(not_found) else sep_len.max(),
  1252. buffersizes3.max(),
  1253. )])
  1254. shape = np.broadcast_shapes(a.shape, sep.shape)
  1255. out = np.empty_like(a, shape=shape, dtype=out_dtype)
  1256. return _rpartition_index(
  1257. a, sep, pos, out=(out["f0"], out["f1"], out["f2"]))
  1258. @set_module("numpy.strings")
  1259. def translate(a, table, deletechars=None):
  1260. """
  1261. For each element in `a`, return a copy of the string where all
  1262. characters occurring in the optional argument `deletechars` are
  1263. removed, and the remaining characters have been mapped through the
  1264. given translation table.
  1265. Calls :meth:`str.translate` element-wise.
  1266. Parameters
  1267. ----------
  1268. a : array-like, with `np.bytes_` or `np.str_` dtype
  1269. table : str of length 256
  1270. deletechars : str
  1271. Returns
  1272. -------
  1273. out : ndarray
  1274. Output array of str or unicode, depending on input type
  1275. See Also
  1276. --------
  1277. str.translate
  1278. Examples
  1279. --------
  1280. >>> import numpy as np
  1281. >>> a = np.array(['a1b c', '1bca', 'bca1'])
  1282. >>> table = a[0].maketrans('abc', '123')
  1283. >>> deletechars = ' '
  1284. >>> np.char.translate(a, table, deletechars)
  1285. array(['112 3', '1231', '2311'], dtype='<U5')
  1286. """
  1287. a_arr = np.asarray(a)
  1288. if issubclass(a_arr.dtype.type, np.str_):
  1289. return _vec_string(
  1290. a_arr, a_arr.dtype, 'translate', (table,))
  1291. else:
  1292. return _vec_string(
  1293. a_arr,
  1294. a_arr.dtype,
  1295. 'translate',
  1296. [table] + _clean_args(deletechars)
  1297. )