records.py 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091
  1. """
  2. This module contains a set of functions for record arrays.
  3. """
  4. import os
  5. import warnings
  6. from collections import Counter
  7. from contextlib import nullcontext
  8. from .._utils import set_module
  9. from . import numeric as sb
  10. from . import numerictypes as nt
  11. from .arrayprint import _get_legacy_print_mode
  12. # All of the functions allow formats to be a dtype
  13. __all__ = [
  14. 'record', 'recarray', 'format_parser', 'fromarrays', 'fromrecords',
  15. 'fromstring', 'fromfile', 'array', 'find_duplicate',
  16. ]
  17. ndarray = sb.ndarray
  18. _byteorderconv = {'b': '>',
  19. 'l': '<',
  20. 'n': '=',
  21. 'B': '>',
  22. 'L': '<',
  23. 'N': '=',
  24. 'S': 's',
  25. 's': 's',
  26. '>': '>',
  27. '<': '<',
  28. '=': '=',
  29. '|': '|',
  30. 'I': '|',
  31. 'i': '|'}
  32. # formats regular expression
  33. # allows multidimensional spec with a tuple syntax in front
  34. # of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
  35. # are equally allowed
  36. numfmt = nt.sctypeDict
  37. @set_module('numpy.rec')
  38. def find_duplicate(list):
  39. """Find duplication in a list, return a list of duplicated elements"""
  40. return [
  41. item
  42. for item, counts in Counter(list).items()
  43. if counts > 1
  44. ]
  45. @set_module('numpy.rec')
  46. class format_parser:
  47. """
  48. Class to convert formats, names, titles description to a dtype.
  49. After constructing the format_parser object, the dtype attribute is
  50. the converted data-type:
  51. ``dtype = format_parser(formats, names, titles).dtype``
  52. Attributes
  53. ----------
  54. dtype : dtype
  55. The converted data-type.
  56. Parameters
  57. ----------
  58. formats : str or list of str
  59. The format description, either specified as a string with
  60. comma-separated format descriptions in the form ``'f8, i4, S5'``, or
  61. a list of format description strings in the form
  62. ``['f8', 'i4', 'S5']``.
  63. names : str or list/tuple of str
  64. The field names, either specified as a comma-separated string in the
  65. form ``'col1, col2, col3'``, or as a list or tuple of strings in the
  66. form ``['col1', 'col2', 'col3']``.
  67. An empty list can be used, in that case default field names
  68. ('f0', 'f1', ...) are used.
  69. titles : sequence
  70. Sequence of title strings. An empty list can be used to leave titles
  71. out.
  72. aligned : bool, optional
  73. If True, align the fields by padding as the C-compiler would.
  74. Default is False.
  75. byteorder : str, optional
  76. If specified, all the fields will be changed to the
  77. provided byte-order. Otherwise, the default byte-order is
  78. used. For all available string specifiers, see `dtype.newbyteorder`.
  79. See Also
  80. --------
  81. numpy.dtype, numpy.typename
  82. Examples
  83. --------
  84. >>> import numpy as np
  85. >>> np.rec.format_parser(['<f8', '<i4'], ['col1', 'col2'],
  86. ... ['T1', 'T2']).dtype
  87. dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4')])
  88. `names` and/or `titles` can be empty lists. If `titles` is an empty list,
  89. titles will simply not appear. If `names` is empty, default field names
  90. will be used.
  91. >>> np.rec.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
  92. ... []).dtype
  93. dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
  94. >>> np.rec.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
  95. dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
  96. """
  97. def __init__(self, formats, names, titles, aligned=False, byteorder=None):
  98. self._parseFormats(formats, aligned)
  99. self._setfieldnames(names, titles)
  100. self._createdtype(byteorder)
  101. def _parseFormats(self, formats, aligned=False):
  102. """ Parse the field formats """
  103. if formats is None:
  104. raise ValueError("Need formats argument")
  105. if isinstance(formats, list):
  106. dtype = sb.dtype(
  107. [
  108. ('f{}'.format(i), format_)
  109. for i, format_ in enumerate(formats)
  110. ],
  111. aligned,
  112. )
  113. else:
  114. dtype = sb.dtype(formats, aligned)
  115. fields = dtype.fields
  116. if fields is None:
  117. dtype = sb.dtype([('f1', dtype)], aligned)
  118. fields = dtype.fields
  119. keys = dtype.names
  120. self._f_formats = [fields[key][0] for key in keys]
  121. self._offsets = [fields[key][1] for key in keys]
  122. self._nfields = len(keys)
  123. def _setfieldnames(self, names, titles):
  124. """convert input field names into a list and assign to the _names
  125. attribute """
  126. if names:
  127. if type(names) in [list, tuple]:
  128. pass
  129. elif isinstance(names, str):
  130. names = names.split(',')
  131. else:
  132. raise NameError("illegal input names %s" % repr(names))
  133. self._names = [n.strip() for n in names[:self._nfields]]
  134. else:
  135. self._names = []
  136. # if the names are not specified, they will be assigned as
  137. # "f0, f1, f2,..."
  138. # if not enough names are specified, they will be assigned as "f[n],
  139. # f[n+1],..." etc. where n is the number of specified names..."
  140. self._names += ['f%d' % i for i in range(len(self._names),
  141. self._nfields)]
  142. # check for redundant names
  143. _dup = find_duplicate(self._names)
  144. if _dup:
  145. raise ValueError("Duplicate field names: %s" % _dup)
  146. if titles:
  147. self._titles = [n.strip() for n in titles[:self._nfields]]
  148. else:
  149. self._titles = []
  150. titles = []
  151. if self._nfields > len(titles):
  152. self._titles += [None] * (self._nfields - len(titles))
  153. def _createdtype(self, byteorder):
  154. dtype = sb.dtype({
  155. 'names': self._names,
  156. 'formats': self._f_formats,
  157. 'offsets': self._offsets,
  158. 'titles': self._titles,
  159. })
  160. if byteorder is not None:
  161. byteorder = _byteorderconv[byteorder[0]]
  162. dtype = dtype.newbyteorder(byteorder)
  163. self.dtype = dtype
  164. class record(nt.void):
  165. """A data-type scalar that allows field access as attribute lookup.
  166. """
  167. # manually set name and module so that this class's type shows up
  168. # as numpy.record when printed
  169. __name__ = 'record'
  170. __module__ = 'numpy'
  171. def __repr__(self):
  172. if _get_legacy_print_mode() <= 113:
  173. return self.__str__()
  174. return super().__repr__()
  175. def __str__(self):
  176. if _get_legacy_print_mode() <= 113:
  177. return str(self.item())
  178. return super().__str__()
  179. def __getattribute__(self, attr):
  180. if attr in ('setfield', 'getfield', 'dtype'):
  181. return nt.void.__getattribute__(self, attr)
  182. try:
  183. return nt.void.__getattribute__(self, attr)
  184. except AttributeError:
  185. pass
  186. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  187. res = fielddict.get(attr, None)
  188. if res:
  189. obj = self.getfield(*res[:2])
  190. # if it has fields return a record,
  191. # otherwise return the object
  192. try:
  193. dt = obj.dtype
  194. except AttributeError:
  195. #happens if field is Object type
  196. return obj
  197. if dt.names is not None:
  198. return obj.view((self.__class__, obj.dtype))
  199. return obj
  200. else:
  201. raise AttributeError("'record' object has no "
  202. "attribute '%s'" % attr)
  203. def __setattr__(self, attr, val):
  204. if attr in ('setfield', 'getfield', 'dtype'):
  205. raise AttributeError("Cannot set '%s' attribute" % attr)
  206. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  207. res = fielddict.get(attr, None)
  208. if res:
  209. return self.setfield(val, *res[:2])
  210. else:
  211. if getattr(self, attr, None):
  212. return nt.void.__setattr__(self, attr, val)
  213. else:
  214. raise AttributeError("'record' object has no "
  215. "attribute '%s'" % attr)
  216. def __getitem__(self, indx):
  217. obj = nt.void.__getitem__(self, indx)
  218. # copy behavior of record.__getattribute__,
  219. if isinstance(obj, nt.void) and obj.dtype.names is not None:
  220. return obj.view((self.__class__, obj.dtype))
  221. else:
  222. # return a single element
  223. return obj
  224. def pprint(self):
  225. """Pretty-print all fields."""
  226. # pretty-print all fields
  227. names = self.dtype.names
  228. maxlen = max(len(name) for name in names)
  229. fmt = '%% %ds: %%s' % maxlen
  230. rows = [fmt % (name, getattr(self, name)) for name in names]
  231. return "\n".join(rows)
  232. # The recarray is almost identical to a standard array (which supports
  233. # named fields already) The biggest difference is that it can use
  234. # attribute-lookup to find the fields and it is constructed using
  235. # a record.
  236. # If byteorder is given it forces a particular byteorder on all
  237. # the fields (and any subfields)
  238. @set_module("numpy.rec")
  239. class recarray(ndarray):
  240. """Construct an ndarray that allows field access using attributes.
  241. Arrays may have a data-types containing fields, analogous
  242. to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
  243. where each entry in the array is a pair of ``(int, float)``. Normally,
  244. these attributes are accessed using dictionary lookups such as ``arr['x']``
  245. and ``arr['y']``. Record arrays allow the fields to be accessed as members
  246. of the array, using ``arr.x`` and ``arr.y``.
  247. Parameters
  248. ----------
  249. shape : tuple
  250. Shape of output array.
  251. dtype : data-type, optional
  252. The desired data-type. By default, the data-type is determined
  253. from `formats`, `names`, `titles`, `aligned` and `byteorder`.
  254. formats : list of data-types, optional
  255. A list containing the data-types for the different columns, e.g.
  256. ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
  257. convention of using types directly, i.e. ``(int, float, int)``.
  258. Note that `formats` must be a list, not a tuple.
  259. Given that `formats` is somewhat limited, we recommend specifying
  260. `dtype` instead.
  261. names : tuple of str, optional
  262. The name of each column, e.g. ``('x', 'y', 'z')``.
  263. buf : buffer, optional
  264. By default, a new array is created of the given shape and data-type.
  265. If `buf` is specified and is an object exposing the buffer interface,
  266. the array will use the memory from the existing buffer. In this case,
  267. the `offset` and `strides` keywords are available.
  268. Other Parameters
  269. ----------------
  270. titles : tuple of str, optional
  271. Aliases for column names. For example, if `names` were
  272. ``('x', 'y', 'z')`` and `titles` is
  273. ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
  274. ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
  275. byteorder : {'<', '>', '='}, optional
  276. Byte-order for all fields.
  277. aligned : bool, optional
  278. Align the fields in memory as the C-compiler would.
  279. strides : tuple of ints, optional
  280. Buffer (`buf`) is interpreted according to these strides (strides
  281. define how many bytes each array element, row, column, etc.
  282. occupy in memory).
  283. offset : int, optional
  284. Start reading buffer (`buf`) from this offset onwards.
  285. order : {'C', 'F'}, optional
  286. Row-major (C-style) or column-major (Fortran-style) order.
  287. Returns
  288. -------
  289. rec : recarray
  290. Empty array of the given shape and type.
  291. See Also
  292. --------
  293. numpy.rec.fromrecords : Construct a record array from data.
  294. numpy.record : fundamental data-type for `recarray`.
  295. numpy.rec.format_parser : determine data-type from formats, names, titles.
  296. Notes
  297. -----
  298. This constructor can be compared to ``empty``: it creates a new record
  299. array but does not fill it with data. To create a record array from data,
  300. use one of the following methods:
  301. 1. Create a standard ndarray and convert it to a record array,
  302. using ``arr.view(np.recarray)``
  303. 2. Use the `buf` keyword.
  304. 3. Use `np.rec.fromrecords`.
  305. Examples
  306. --------
  307. Create an array with two fields, ``x`` and ``y``:
  308. >>> import numpy as np
  309. >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  310. >>> x
  311. array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  312. >>> x['x']
  313. array([1., 3.])
  314. View the array as a record array:
  315. >>> x = x.view(np.recarray)
  316. >>> x.x
  317. array([1., 3.])
  318. >>> x.y
  319. array([2, 4])
  320. Create a new, empty record array:
  321. >>> np.recarray((2,),
  322. ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
  323. rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
  324. (3471280, 1.2134086255804012e-316, 0)],
  325. dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
  326. """
  327. def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
  328. formats=None, names=None, titles=None,
  329. byteorder=None, aligned=False, order='C'):
  330. if dtype is not None:
  331. descr = sb.dtype(dtype)
  332. else:
  333. descr = format_parser(
  334. formats, names, titles, aligned, byteorder
  335. ).dtype
  336. if buf is None:
  337. self = ndarray.__new__(
  338. subtype, shape, (record, descr), order=order
  339. )
  340. else:
  341. self = ndarray.__new__(
  342. subtype, shape, (record, descr), buffer=buf,
  343. offset=offset, strides=strides, order=order
  344. )
  345. return self
  346. def __array_finalize__(self, obj):
  347. if self.dtype.type is not record and self.dtype.names is not None:
  348. # if self.dtype is not np.record, invoke __setattr__ which will
  349. # convert it to a record if it is a void dtype.
  350. self.dtype = self.dtype
  351. def __getattribute__(self, attr):
  352. # See if ndarray has this attr, and return it if so. (note that this
  353. # means a field with the same name as an ndarray attr cannot be
  354. # accessed by attribute).
  355. try:
  356. return object.__getattribute__(self, attr)
  357. except AttributeError: # attr must be a fieldname
  358. pass
  359. # look for a field with this name
  360. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  361. try:
  362. res = fielddict[attr][:2]
  363. except (TypeError, KeyError) as e:
  364. raise AttributeError("recarray has no attribute %s" % attr) from e
  365. obj = self.getfield(*res)
  366. # At this point obj will always be a recarray, since (see
  367. # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
  368. # non-structured, convert it to an ndarray. Then if obj is structured
  369. # with void type convert it to the same dtype.type (eg to preserve
  370. # numpy.record type if present), since nested structured fields do not
  371. # inherit type. Don't do this for non-void structures though.
  372. if obj.dtype.names is not None:
  373. if issubclass(obj.dtype.type, nt.void):
  374. return obj.view(dtype=(self.dtype.type, obj.dtype))
  375. return obj
  376. else:
  377. return obj.view(ndarray)
  378. # Save the dictionary.
  379. # If the attr is a field name and not in the saved dictionary
  380. # Undo any "setting" of the attribute and do a setfield
  381. # Thus, you can't create attributes on-the-fly that are field names.
  382. def __setattr__(self, attr, val):
  383. # Automatically convert (void) structured types to records
  384. # (but not non-void structures, subarrays, or non-structured voids)
  385. if (
  386. attr == 'dtype' and
  387. issubclass(val.type, nt.void) and
  388. val.names is not None
  389. ):
  390. val = sb.dtype((record, val))
  391. newattr = attr not in self.__dict__
  392. try:
  393. ret = object.__setattr__(self, attr, val)
  394. except Exception:
  395. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  396. if attr not in fielddict:
  397. raise
  398. else:
  399. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  400. if attr not in fielddict:
  401. return ret
  402. if newattr:
  403. # We just added this one or this setattr worked on an
  404. # internal attribute.
  405. try:
  406. object.__delattr__(self, attr)
  407. except Exception:
  408. return ret
  409. try:
  410. res = fielddict[attr][:2]
  411. except (TypeError, KeyError) as e:
  412. raise AttributeError(
  413. "record array has no attribute %s" % attr
  414. ) from e
  415. return self.setfield(val, *res)
  416. def __getitem__(self, indx):
  417. obj = super().__getitem__(indx)
  418. # copy behavior of getattr, except that here
  419. # we might also be returning a single element
  420. if isinstance(obj, ndarray):
  421. if obj.dtype.names is not None:
  422. obj = obj.view(type(self))
  423. if issubclass(obj.dtype.type, nt.void):
  424. return obj.view(dtype=(self.dtype.type, obj.dtype))
  425. return obj
  426. else:
  427. return obj.view(type=ndarray)
  428. else:
  429. # return a single element
  430. return obj
  431. def __repr__(self):
  432. repr_dtype = self.dtype
  433. if (
  434. self.dtype.type is record or
  435. not issubclass(self.dtype.type, nt.void)
  436. ):
  437. # If this is a full record array (has numpy.record dtype),
  438. # or if it has a scalar (non-void) dtype with no records,
  439. # represent it using the rec.array function. Since rec.array
  440. # converts dtype to a numpy.record for us, convert back
  441. # to non-record before printing
  442. if repr_dtype.type is record:
  443. repr_dtype = sb.dtype((nt.void, repr_dtype))
  444. prefix = "rec.array("
  445. fmt = 'rec.array(%s,%sdtype=%s)'
  446. else:
  447. # otherwise represent it using np.array plus a view
  448. # This should only happen if the user is playing
  449. # strange games with dtypes.
  450. prefix = "array("
  451. fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
  452. # get data/shape string. logic taken from numeric.array_repr
  453. if self.size > 0 or self.shape == (0,):
  454. lst = sb.array2string(
  455. self, separator=', ', prefix=prefix, suffix=',')
  456. else:
  457. # show zero-length shape unless it is (0,)
  458. lst = "[], shape=%s" % (repr(self.shape),)
  459. lf = '\n'+' '*len(prefix)
  460. if _get_legacy_print_mode() <= 113:
  461. lf = ' ' + lf # trailing space
  462. return fmt % (lst, lf, repr_dtype)
  463. def field(self, attr, val=None):
  464. if isinstance(attr, int):
  465. names = ndarray.__getattribute__(self, 'dtype').names
  466. attr = names[attr]
  467. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  468. res = fielddict[attr][:2]
  469. if val is None:
  470. obj = self.getfield(*res)
  471. if obj.dtype.names is not None:
  472. return obj
  473. return obj.view(ndarray)
  474. else:
  475. return self.setfield(val, *res)
  476. def _deprecate_shape_0_as_None(shape):
  477. if shape == 0:
  478. warnings.warn(
  479. "Passing `shape=0` to have the shape be inferred is deprecated, "
  480. "and in future will be equivalent to `shape=(0,)`. To infer "
  481. "the shape and suppress this warning, pass `shape=None` instead.",
  482. FutureWarning, stacklevel=3)
  483. return None
  484. else:
  485. return shape
  486. @set_module("numpy.rec")
  487. def fromarrays(arrayList, dtype=None, shape=None, formats=None,
  488. names=None, titles=None, aligned=False, byteorder=None):
  489. """Create a record array from a (flat) list of arrays
  490. Parameters
  491. ----------
  492. arrayList : list or tuple
  493. List of array-like objects (such as lists, tuples,
  494. and ndarrays).
  495. dtype : data-type, optional
  496. valid dtype for all arrays
  497. shape : int or tuple of ints, optional
  498. Shape of the resulting array. If not provided, inferred from
  499. ``arrayList[0]``.
  500. formats, names, titles, aligned, byteorder :
  501. If `dtype` is ``None``, these arguments are passed to
  502. `numpy.rec.format_parser` to construct a dtype. See that function for
  503. detailed documentation.
  504. Returns
  505. -------
  506. np.recarray
  507. Record array consisting of given arrayList columns.
  508. Examples
  509. --------
  510. >>> x1=np.array([1,2,3,4])
  511. >>> x2=np.array(['a','dd','xyz','12'])
  512. >>> x3=np.array([1.1,2,3,4])
  513. >>> r = np.rec.fromarrays([x1,x2,x3],names='a,b,c')
  514. >>> print(r[1])
  515. (2, 'dd', 2.0) # may vary
  516. >>> x1[1]=34
  517. >>> r.a
  518. array([1, 2, 3, 4])
  519. >>> x1 = np.array([1, 2, 3, 4])
  520. >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
  521. >>> x3 = np.array([1.1, 2, 3,4])
  522. >>> r = np.rec.fromarrays(
  523. ... [x1, x2, x3],
  524. ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
  525. >>> r
  526. rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
  527. (4, b'12', 4. )],
  528. dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
  529. """
  530. arrayList = [sb.asarray(x) for x in arrayList]
  531. # NumPy 1.19.0, 2020-01-01
  532. shape = _deprecate_shape_0_as_None(shape)
  533. if shape is None:
  534. shape = arrayList[0].shape
  535. elif isinstance(shape, int):
  536. shape = (shape,)
  537. if formats is None and dtype is None:
  538. # go through each object in the list to see if it is an ndarray
  539. # and determine the formats.
  540. formats = [obj.dtype for obj in arrayList]
  541. if dtype is not None:
  542. descr = sb.dtype(dtype)
  543. else:
  544. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  545. _names = descr.names
  546. # Determine shape from data-type.
  547. if len(descr) != len(arrayList):
  548. raise ValueError("mismatch between the number of fields "
  549. "and the number of arrays")
  550. d0 = descr[0].shape
  551. nn = len(d0)
  552. if nn > 0:
  553. shape = shape[:-nn]
  554. _array = recarray(shape, descr)
  555. # populate the record array (makes a copy)
  556. for k, obj in enumerate(arrayList):
  557. nn = descr[k].ndim
  558. testshape = obj.shape[:obj.ndim - nn]
  559. name = _names[k]
  560. if testshape != shape:
  561. raise ValueError(f'array-shape mismatch in array {k} ("{name}")')
  562. _array[name] = obj
  563. return _array
  564. @set_module("numpy.rec")
  565. def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
  566. titles=None, aligned=False, byteorder=None):
  567. """Create a recarray from a list of records in text form.
  568. Parameters
  569. ----------
  570. recList : sequence
  571. data in the same field may be heterogeneous - they will be promoted
  572. to the highest data type.
  573. dtype : data-type, optional
  574. valid dtype for all arrays
  575. shape : int or tuple of ints, optional
  576. shape of each array.
  577. formats, names, titles, aligned, byteorder :
  578. If `dtype` is ``None``, these arguments are passed to
  579. `numpy.format_parser` to construct a dtype. See that function for
  580. detailed documentation.
  581. If both `formats` and `dtype` are None, then this will auto-detect
  582. formats. Use list of tuples rather than list of lists for faster
  583. processing.
  584. Returns
  585. -------
  586. np.recarray
  587. record array consisting of given recList rows.
  588. Examples
  589. --------
  590. >>> r=np.rec.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
  591. ... names='col1,col2,col3')
  592. >>> print(r[0])
  593. (456, 'dbe', 1.2)
  594. >>> r.col1
  595. array([456, 2])
  596. >>> r.col2
  597. array(['dbe', 'de'], dtype='<U3')
  598. >>> import pickle
  599. >>> pickle.loads(pickle.dumps(r))
  600. rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
  601. dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
  602. """
  603. if formats is None and dtype is None: # slower
  604. obj = sb.array(recList, dtype=object)
  605. arrlist = [
  606. sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])
  607. ]
  608. return fromarrays(arrlist, formats=formats, shape=shape, names=names,
  609. titles=titles, aligned=aligned, byteorder=byteorder)
  610. if dtype is not None:
  611. descr = sb.dtype((record, dtype))
  612. else:
  613. descr = format_parser(
  614. formats, names, titles, aligned, byteorder
  615. ).dtype
  616. try:
  617. retval = sb.array(recList, dtype=descr)
  618. except (TypeError, ValueError):
  619. # NumPy 1.19.0, 2020-01-01
  620. shape = _deprecate_shape_0_as_None(shape)
  621. if shape is None:
  622. shape = len(recList)
  623. if isinstance(shape, int):
  624. shape = (shape,)
  625. if len(shape) > 1:
  626. raise ValueError("Can only deal with 1-d array.")
  627. _array = recarray(shape, descr)
  628. for k in range(_array.size):
  629. _array[k] = tuple(recList[k])
  630. # list of lists instead of list of tuples ?
  631. # 2018-02-07, 1.14.1
  632. warnings.warn(
  633. "fromrecords expected a list of tuples, may have received a list "
  634. "of lists instead. In the future that will raise an error",
  635. FutureWarning, stacklevel=2)
  636. return _array
  637. else:
  638. if shape is not None and retval.shape != shape:
  639. retval.shape = shape
  640. res = retval.view(recarray)
  641. return res
  642. @set_module("numpy.rec")
  643. def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
  644. names=None, titles=None, aligned=False, byteorder=None):
  645. r"""Create a record array from binary data
  646. Note that despite the name of this function it does not accept `str`
  647. instances.
  648. Parameters
  649. ----------
  650. datastring : bytes-like
  651. Buffer of binary data
  652. dtype : data-type, optional
  653. Valid dtype for all arrays
  654. shape : int or tuple of ints, optional
  655. Shape of each array.
  656. offset : int, optional
  657. Position in the buffer to start reading from.
  658. formats, names, titles, aligned, byteorder :
  659. If `dtype` is ``None``, these arguments are passed to
  660. `numpy.format_parser` to construct a dtype. See that function for
  661. detailed documentation.
  662. Returns
  663. -------
  664. np.recarray
  665. Record array view into the data in datastring. This will be readonly
  666. if `datastring` is readonly.
  667. See Also
  668. --------
  669. numpy.frombuffer
  670. Examples
  671. --------
  672. >>> a = b'\x01\x02\x03abc'
  673. >>> np.rec.fromstring(a, dtype='u1,u1,u1,S3')
  674. rec.array([(1, 2, 3, b'abc')],
  675. dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
  676. >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
  677. ... ('GradeLevel', np.int32)]
  678. >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
  679. ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
  680. >>> np.rec.fromstring(grades_array.tobytes(), dtype=grades_dtype)
  681. rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
  682. dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
  683. >>> s = '\x01\x02\x03abc'
  684. >>> np.rec.fromstring(s, dtype='u1,u1,u1,S3')
  685. Traceback (most recent call last):
  686. ...
  687. TypeError: a bytes-like object is required, not 'str'
  688. """
  689. if dtype is None and formats is None:
  690. raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
  691. if dtype is not None:
  692. descr = sb.dtype(dtype)
  693. else:
  694. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  695. itemsize = descr.itemsize
  696. # NumPy 1.19.0, 2020-01-01
  697. shape = _deprecate_shape_0_as_None(shape)
  698. if shape in (None, -1):
  699. shape = (len(datastring) - offset) // itemsize
  700. _array = recarray(shape, descr, buf=datastring, offset=offset)
  701. return _array
  702. def get_remaining_size(fd):
  703. pos = fd.tell()
  704. try:
  705. fd.seek(0, 2)
  706. return fd.tell() - pos
  707. finally:
  708. fd.seek(pos, 0)
  709. @set_module("numpy.rec")
  710. def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
  711. names=None, titles=None, aligned=False, byteorder=None):
  712. """Create an array from binary file data
  713. Parameters
  714. ----------
  715. fd : str or file type
  716. If file is a string or a path-like object then that file is opened,
  717. else it is assumed to be a file object. The file object must
  718. support random access (i.e. it must have tell and seek methods).
  719. dtype : data-type, optional
  720. valid dtype for all arrays
  721. shape : int or tuple of ints, optional
  722. shape of each array.
  723. offset : int, optional
  724. Position in the file to start reading from.
  725. formats, names, titles, aligned, byteorder :
  726. If `dtype` is ``None``, these arguments are passed to
  727. `numpy.format_parser` to construct a dtype. See that function for
  728. detailed documentation
  729. Returns
  730. -------
  731. np.recarray
  732. record array consisting of data enclosed in file.
  733. Examples
  734. --------
  735. >>> from tempfile import TemporaryFile
  736. >>> a = np.empty(10,dtype='f8,i4,a5')
  737. >>> a[5] = (0.5,10,'abcde')
  738. >>>
  739. >>> fd=TemporaryFile()
  740. >>> a = a.view(a.dtype.newbyteorder('<'))
  741. >>> a.tofile(fd)
  742. >>>
  743. >>> _ = fd.seek(0)
  744. >>> r=np.rec.fromfile(fd, formats='f8,i4,a5', shape=10,
  745. ... byteorder='<')
  746. >>> print(r[5])
  747. (0.5, 10, b'abcde')
  748. >>> r.shape
  749. (10,)
  750. """
  751. if dtype is None and formats is None:
  752. raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
  753. # NumPy 1.19.0, 2020-01-01
  754. shape = _deprecate_shape_0_as_None(shape)
  755. if shape is None:
  756. shape = (-1,)
  757. elif isinstance(shape, int):
  758. shape = (shape,)
  759. if hasattr(fd, 'readinto'):
  760. # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase
  761. # interface. Example of fd: gzip, BytesIO, BufferedReader
  762. # file already opened
  763. ctx = nullcontext(fd)
  764. else:
  765. # open file
  766. ctx = open(os.fspath(fd), 'rb')
  767. with ctx as fd:
  768. if offset > 0:
  769. fd.seek(offset, 1)
  770. size = get_remaining_size(fd)
  771. if dtype is not None:
  772. descr = sb.dtype(dtype)
  773. else:
  774. descr = format_parser(
  775. formats, names, titles, aligned, byteorder
  776. ).dtype
  777. itemsize = descr.itemsize
  778. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  779. shapesize = shapeprod * itemsize
  780. if shapesize < 0:
  781. shape = list(shape)
  782. shape[shape.index(-1)] = size // -shapesize
  783. shape = tuple(shape)
  784. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  785. nbytes = shapeprod * itemsize
  786. if nbytes > size:
  787. raise ValueError(
  788. "Not enough bytes left in file for specified "
  789. "shape and type."
  790. )
  791. # create the array
  792. _array = recarray(shape, descr)
  793. nbytesread = fd.readinto(_array.data)
  794. if nbytesread != nbytes:
  795. raise OSError("Didn't read as many bytes as expected")
  796. return _array
  797. @set_module("numpy.rec")
  798. def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
  799. names=None, titles=None, aligned=False, byteorder=None, copy=True):
  800. """
  801. Construct a record array from a wide-variety of objects.
  802. A general-purpose record array constructor that dispatches to the
  803. appropriate `recarray` creation function based on the inputs (see Notes).
  804. Parameters
  805. ----------
  806. obj : any
  807. Input object. See Notes for details on how various input types are
  808. treated.
  809. dtype : data-type, optional
  810. Valid dtype for array.
  811. shape : int or tuple of ints, optional
  812. Shape of each array.
  813. offset : int, optional
  814. Position in the file or buffer to start reading from.
  815. strides : tuple of ints, optional
  816. Buffer (`buf`) is interpreted according to these strides (strides
  817. define how many bytes each array element, row, column, etc.
  818. occupy in memory).
  819. formats, names, titles, aligned, byteorder :
  820. If `dtype` is ``None``, these arguments are passed to
  821. `numpy.format_parser` to construct a dtype. See that function for
  822. detailed documentation.
  823. copy : bool, optional
  824. Whether to copy the input object (True), or to use a reference instead.
  825. This option only applies when the input is an ndarray or recarray.
  826. Defaults to True.
  827. Returns
  828. -------
  829. np.recarray
  830. Record array created from the specified object.
  831. Notes
  832. -----
  833. If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
  834. `obj` is a string, then call the `fromstring` constructor. If `obj` is a
  835. list or a tuple, then if the first object is an `~numpy.ndarray`, call
  836. `fromarrays`, otherwise call `fromrecords`. If `obj` is a
  837. `~numpy.recarray`, then make a copy of the data in the recarray
  838. (if ``copy=True``) and use the new formats, names, and titles. If `obj`
  839. is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
  840. return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
  841. Examples
  842. --------
  843. >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  844. >>> a
  845. array([[1, 2, 3],
  846. [4, 5, 6],
  847. [7, 8, 9]])
  848. >>> np.rec.array(a)
  849. rec.array([[1, 2, 3],
  850. [4, 5, 6],
  851. [7, 8, 9]],
  852. dtype=int64)
  853. >>> b = [(1, 1), (2, 4), (3, 9)]
  854. >>> c = np.rec.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
  855. >>> c
  856. rec.array([(1, 1.), (2, 4.), (3, 9.)],
  857. dtype=[('x', '<i2'), ('y', '<f2')])
  858. >>> c.x
  859. array([1, 2, 3], dtype=int16)
  860. >>> c.y
  861. array([1., 4., 9.], dtype=float16)
  862. >>> r = np.rec.array(['abc','def'], names=['col1','col2'])
  863. >>> print(r.col1)
  864. abc
  865. >>> r.col1
  866. array('abc', dtype='<U3')
  867. >>> r.col2
  868. array('def', dtype='<U3')
  869. """
  870. if ((isinstance(obj, (type(None), str)) or hasattr(obj, 'readinto')) and
  871. formats is None and dtype is None):
  872. raise ValueError("Must define formats (or dtype) if object is "
  873. "None, string, or an open file")
  874. kwds = {}
  875. if dtype is not None:
  876. dtype = sb.dtype(dtype)
  877. elif formats is not None:
  878. dtype = format_parser(formats, names, titles,
  879. aligned, byteorder).dtype
  880. else:
  881. kwds = {'formats': formats,
  882. 'names': names,
  883. 'titles': titles,
  884. 'aligned': aligned,
  885. 'byteorder': byteorder
  886. }
  887. if obj is None:
  888. if shape is None:
  889. raise ValueError("Must define a shape if obj is None")
  890. return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
  891. elif isinstance(obj, bytes):
  892. return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
  893. elif isinstance(obj, (list, tuple)):
  894. if isinstance(obj[0], (tuple, list)):
  895. return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
  896. else:
  897. return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
  898. elif isinstance(obj, recarray):
  899. if dtype is not None and (obj.dtype != dtype):
  900. new = obj.view(dtype)
  901. else:
  902. new = obj
  903. if copy:
  904. new = new.copy()
  905. return new
  906. elif hasattr(obj, 'readinto'):
  907. return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
  908. elif isinstance(obj, ndarray):
  909. if dtype is not None and (obj.dtype != dtype):
  910. new = obj.view(dtype)
  911. else:
  912. new = obj
  913. if copy:
  914. new = new.copy()
  915. return new.view(recarray)
  916. else:
  917. interface = getattr(obj, "__array_interface__", None)
  918. if interface is None or not isinstance(interface, dict):
  919. raise ValueError("Unknown input type")
  920. obj = sb.array(obj)
  921. if dtype is not None and (obj.dtype != dtype):
  922. obj = obj.view(dtype)
  923. return obj.view(recarray)