_netcdf.py 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104
  1. """
  2. NetCDF reader/writer module.
  3. This module is used to read and create NetCDF files. NetCDF files are
  4. accessed through the `netcdf_file` object. Data written to and from NetCDF
  5. files are contained in `netcdf_variable` objects. Attributes are given
  6. as member variables of the `netcdf_file` and `netcdf_variable` objects.
  7. This module implements the Scientific.IO.NetCDF API to read and create
  8. NetCDF files. The same API is also used in the PyNIO and pynetcdf
  9. modules, allowing these modules to be used interchangeably when working
  10. with NetCDF files.
  11. Only NetCDF3 is supported here; for NetCDF4 see
  12. `netCDF4-python <http://unidata.github.io/netcdf4-python/>`__,
  13. which has a similar API.
  14. """
  15. # TODO:
  16. # * properly implement ``_FillValue``.
  17. # * fix character variables.
  18. # * implement PAGESIZE for Python 2.6?
  19. # The Scientific.IO.NetCDF API allows attributes to be added directly to
  20. # instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
  21. # between user-set attributes and instance attributes, user-set attributes
  22. # are automatically stored in the ``_attributes`` attribute by overloading
  23. #``__setattr__``. This is the reason why the code sometimes uses
  24. #``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
  25. # otherwise the key would be inserted into userspace attributes.
  26. __all__ = ['netcdf_file', 'netcdf_variable']
  27. import warnings
  28. import weakref
  29. from operator import mul
  30. from platform import python_implementation
  31. import mmap as mm
  32. import numpy as np
  33. from numpy import frombuffer, dtype, empty, array, asarray
  34. from numpy import little_endian as LITTLE_ENDIAN
  35. from functools import reduce
  36. IS_PYPY = python_implementation() == 'PyPy'
  37. ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'
  38. ZERO = b'\x00\x00\x00\x00'
  39. NC_BYTE = b'\x00\x00\x00\x01'
  40. NC_CHAR = b'\x00\x00\x00\x02'
  41. NC_SHORT = b'\x00\x00\x00\x03'
  42. NC_INT = b'\x00\x00\x00\x04'
  43. NC_FLOAT = b'\x00\x00\x00\x05'
  44. NC_DOUBLE = b'\x00\x00\x00\x06'
  45. NC_DIMENSION = b'\x00\x00\x00\n'
  46. NC_VARIABLE = b'\x00\x00\x00\x0b'
  47. NC_ATTRIBUTE = b'\x00\x00\x00\x0c'
  48. FILL_BYTE = b'\x81'
  49. FILL_CHAR = b'\x00'
  50. FILL_SHORT = b'\x80\x01'
  51. FILL_INT = b'\x80\x00\x00\x01'
  52. FILL_FLOAT = b'\x7C\xF0\x00\x00'
  53. FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00'
  54. TYPEMAP = {NC_BYTE: ('b', 1),
  55. NC_CHAR: ('c', 1),
  56. NC_SHORT: ('h', 2),
  57. NC_INT: ('i', 4),
  58. NC_FLOAT: ('f', 4),
  59. NC_DOUBLE: ('d', 8)}
  60. FILLMAP = {NC_BYTE: FILL_BYTE,
  61. NC_CHAR: FILL_CHAR,
  62. NC_SHORT: FILL_SHORT,
  63. NC_INT: FILL_INT,
  64. NC_FLOAT: FILL_FLOAT,
  65. NC_DOUBLE: FILL_DOUBLE}
  66. REVERSE = {('b', 1): NC_BYTE,
  67. ('B', 1): NC_CHAR,
  68. ('c', 1): NC_CHAR,
  69. ('h', 2): NC_SHORT,
  70. ('i', 4): NC_INT,
  71. ('f', 4): NC_FLOAT,
  72. ('d', 8): NC_DOUBLE,
  73. # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
  74. # used when getting the types from generic attributes.
  75. ('l', 4): NC_INT,
  76. ('S', 1): NC_CHAR}
  77. class netcdf_file:
  78. """
  79. A file object for NetCDF data.
  80. A `netcdf_file` object has two standard attributes: `dimensions` and
  81. `variables`. The values of both are dictionaries, mapping dimension
  82. names to their associated lengths and variable names to variables,
  83. respectively. Application programs should never modify these
  84. dictionaries.
  85. All other attributes correspond to global attributes defined in the
  86. NetCDF file. Global file attributes are created by assigning to an
  87. attribute of the `netcdf_file` object.
  88. Parameters
  89. ----------
  90. filename : string or file-like
  91. string -> filename
  92. mode : {'r', 'w', 'a'}, optional
  93. read-write-append mode, default is 'r'
  94. mmap : None or bool, optional
  95. Whether to mmap `filename` when reading. Default is True
  96. when `filename` is a file name, False when `filename` is a
  97. file-like object. Note that when mmap is in use, data arrays
  98. returned refer directly to the mmapped data on disk, and the
  99. file cannot be closed as long as references to it exist.
  100. version : {1, 2}, optional
  101. version of netcdf to read / write, where 1 means *Classic
  102. format* and 2 means *64-bit offset format*. Default is 1. See
  103. `here <https://docs.unidata.ucar.edu/nug/current/netcdf_introduction.html#select_format>`__
  104. for more info.
  105. maskandscale : bool, optional
  106. Whether to automatically scale and/or mask data based on attributes.
  107. Default is False.
  108. Notes
  109. -----
  110. This module is derived from
  111. `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.
  112. The major advantage of this module over other modules is that it doesn't
  113. require the code to be linked to the NetCDF libraries. However, for a more
  114. recent version of the NetCDF standard and additional features, please consider
  115. the permissively-licensed
  116. `netcdf4-python <https://unidata.github.io/netcdf4-python/>`_.
  117. NetCDF files are a self-describing binary data format. The file contains
  118. metadata that describes the dimensions and variables in the file. More
  119. details about NetCDF files can be found `here
  120. <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There
  121. are three main sections to a NetCDF data structure:
  122. 1. Dimensions
  123. 2. Variables
  124. 3. Attributes
  125. The dimensions section records the name and length of each dimension used
  126. by the variables. The variables would then indicate which dimensions it
  127. uses and any attributes such as data units, along with containing the data
  128. values for the variable. It is good practice to include a
  129. variable that is the same name as a dimension to provide the values for
  130. that axes. Lastly, the attributes section would contain additional
  131. information such as the name of the file creator or the instrument used to
  132. collect the data.
  133. When writing data to a NetCDF file, there is often the need to indicate the
  134. 'record dimension'. A record dimension is the unbounded dimension for a
  135. variable. For example, a temperature variable may have dimensions of
  136. latitude, longitude and time. If one wants to add more temperature data to
  137. the NetCDF file as time progresses, then the temperature variable should
  138. have the time dimension flagged as the record dimension.
  139. In addition, the NetCDF file header contains the position of the data in
  140. the file, so access can be done in an efficient manner without loading
  141. unnecessary data into memory. It uses the ``mmap`` module to create
  142. Numpy arrays mapped to the data on disk, for the same purpose.
  143. Note that when `netcdf_file` is used to open a file with mmap=True
  144. (default for read-only), arrays returned by it refer to data
  145. directly on the disk. The file should not be closed, and cannot be cleanly
  146. closed when asked, if such arrays are alive. You may want to copy data arrays
  147. obtained from mmapped Netcdf file if they are to be processed after the file
  148. is closed, see the example below.
  149. Examples
  150. --------
  151. To create a NetCDF file:
  152. >>> from scipy.io import netcdf_file
  153. >>> import numpy as np
  154. >>> f = netcdf_file('simple.nc', 'w')
  155. >>> f.history = 'Created for a test'
  156. >>> f.createDimension('time', 10)
  157. >>> time = f.createVariable('time', 'i', ('time',))
  158. >>> time[:] = np.arange(10)
  159. >>> time.units = 'days since 2008-01-01'
  160. >>> f.close()
  161. Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice
  162. of the time variable allows for the data to be set in the object, rather
  163. than letting ``arange(10)`` overwrite the ``time`` variable.
  164. To read the NetCDF file we just created:
  165. >>> from scipy.io import netcdf_file
  166. >>> f = netcdf_file('simple.nc', 'r')
  167. >>> print(f.history)
  168. b'Created for a test'
  169. >>> time = f.variables['time']
  170. >>> print(time.units)
  171. b'days since 2008-01-01'
  172. >>> print(time.shape)
  173. (10,)
  174. >>> print(time[-1])
  175. 9
  176. NetCDF files, when opened read-only, return arrays that refer
  177. directly to memory-mapped data on disk:
  178. >>> data = time[:]
  179. If the data is to be processed after the file is closed, it needs
  180. to be copied to main memory:
  181. >>> data = time[:].copy()
  182. >>> del time
  183. >>> f.close()
  184. >>> data.mean()
  185. 4.5
  186. A NetCDF file can also be used as context manager:
  187. >>> from scipy.io import netcdf_file
  188. >>> with netcdf_file('simple.nc', 'r') as f:
  189. ... print(f.history)
  190. b'Created for a test'
  191. """
  192. def __init__(self, filename, mode='r', mmap=None, version=1,
  193. maskandscale=False):
  194. """Initialize netcdf_file from fileobj (str or file-like)."""
  195. if mode not in 'rwa':
  196. raise ValueError("Mode must be either 'r', 'w' or 'a'.")
  197. if hasattr(filename, 'seek'): # file-like
  198. self.fp = filename
  199. self.filename = 'None'
  200. if mmap is None:
  201. mmap = False
  202. elif mmap and not hasattr(filename, 'fileno'):
  203. raise ValueError('Cannot use file object for mmap')
  204. else: # maybe it's a string
  205. self.filename = filename
  206. omode = 'r+' if mode == 'a' else mode
  207. self.fp = open(self.filename, f'{omode}b')
  208. if mmap is None:
  209. # Mmapped files on PyPy cannot be usually closed
  210. # before the GC runs, so it's better to use mmap=False
  211. # as the default.
  212. mmap = (not IS_PYPY)
  213. if mode != 'r':
  214. # Cannot read write-only files
  215. mmap = False
  216. self.use_mmap = mmap
  217. self.mode = mode
  218. self.version_byte = version
  219. self.maskandscale = maskandscale
  220. self.dimensions = {}
  221. self.variables = {}
  222. self._dims = []
  223. self._recs = 0
  224. self._recsize = 0
  225. self._mm = None
  226. self._mm_buf = None
  227. if self.use_mmap:
  228. self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)
  229. self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)
  230. self._attributes = {}
  231. if mode in 'ra':
  232. self._read()
  233. def __setattr__(self, attr, value):
  234. # Store user defined attributes in a separate dict,
  235. # so we can save them to file later.
  236. try:
  237. self._attributes[attr] = value
  238. except AttributeError:
  239. pass
  240. self.__dict__[attr] = value
  241. def close(self):
  242. """Closes the NetCDF file."""
  243. if hasattr(self, 'fp') and not self.fp.closed:
  244. try:
  245. self.flush()
  246. finally:
  247. self.variables = {}
  248. if self._mm_buf is not None:
  249. ref = weakref.ref(self._mm_buf)
  250. self._mm_buf = None
  251. if ref() is None:
  252. # self._mm_buf is gc'd, and we can close the mmap
  253. self._mm.close()
  254. else:
  255. # we cannot close self._mm, since self._mm_buf is
  256. # alive and there may still be arrays referring to it
  257. warnings.warn(
  258. "Cannot close a netcdf_file opened with mmap=True, when "
  259. "netcdf_variables or arrays referring to its data still "
  260. "exist. All data arrays obtained from such files refer "
  261. "directly to data on disk, and must be copied before the "
  262. "file can be cleanly closed. "
  263. "(See netcdf_file docstring for more information on mmap.)",
  264. category=RuntimeWarning, stacklevel=2,
  265. )
  266. self._mm = None
  267. self.fp.close()
  268. __del__ = close
  269. def __enter__(self):
  270. return self
  271. def __exit__(self, type, value, traceback):
  272. self.close()
  273. def createDimension(self, name, length):
  274. """
  275. Adds a dimension to the Dimension section of the NetCDF data structure.
  276. Note that this function merely adds a new dimension that the variables can
  277. reference. The values for the dimension, if desired, should be added as
  278. a variable using `createVariable`, referring to this dimension.
  279. Parameters
  280. ----------
  281. name : str
  282. Name of the dimension (Eg, 'lat' or 'time').
  283. length : int
  284. Length of the dimension.
  285. See Also
  286. --------
  287. createVariable
  288. """
  289. if length is None and self._dims:
  290. raise ValueError("Only first dimension may be unlimited!")
  291. self.dimensions[name] = length
  292. self._dims.append(name)
  293. def createVariable(self, name, type, dimensions):
  294. """
  295. Create an empty variable for the `netcdf_file` object, specifying its data
  296. type and the dimensions it uses.
  297. Parameters
  298. ----------
  299. name : str
  300. Name of the new variable.
  301. type : dtype or str
  302. Data type of the variable.
  303. dimensions : sequence of str
  304. List of the dimension names used by the variable, in the desired order.
  305. Returns
  306. -------
  307. variable : netcdf_variable
  308. The newly created ``netcdf_variable`` object.
  309. This object has also been added to the `netcdf_file` object as well.
  310. See Also
  311. --------
  312. createDimension
  313. Notes
  314. -----
  315. Any dimensions to be used by the variable should already exist in the
  316. NetCDF data structure or should be created by `createDimension` prior to
  317. creating the NetCDF variable.
  318. """
  319. shape = tuple([self.dimensions[dim] for dim in dimensions])
  320. shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy
  321. type = dtype(type)
  322. typecode, size = type.char, type.itemsize
  323. if (typecode, size) not in REVERSE:
  324. raise ValueError(f"NetCDF 3 does not support type {type}")
  325. # convert to big endian always for NetCDF 3
  326. data = empty(shape_, dtype=type.newbyteorder("B"))
  327. self.variables[name] = netcdf_variable(
  328. data, typecode, size, shape, dimensions,
  329. maskandscale=self.maskandscale)
  330. return self.variables[name]
  331. def flush(self):
  332. """
  333. Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
  334. See Also
  335. --------
  336. sync : Identical function
  337. """
  338. if hasattr(self, 'mode') and self.mode in 'wa':
  339. self._write()
  340. sync = flush
  341. def _write(self):
  342. self.fp.seek(0)
  343. self.fp.write(b'CDF')
  344. self.fp.write(array(self.version_byte, '>b').tobytes())
  345. # Write headers and data.
  346. self._write_numrecs()
  347. self._write_dim_array()
  348. self._write_gatt_array()
  349. self._write_var_array()
  350. def _write_numrecs(self):
  351. # Get highest record count from all record variables.
  352. for var in self.variables.values():
  353. if var.isrec and len(var.data) > self._recs:
  354. self.__dict__['_recs'] = len(var.data)
  355. self._pack_int(self._recs)
  356. def _write_dim_array(self):
  357. if self.dimensions:
  358. self.fp.write(NC_DIMENSION)
  359. self._pack_int(len(self.dimensions))
  360. for name in self._dims:
  361. self._pack_string(name)
  362. length = self.dimensions[name]
  363. self._pack_int(length or 0) # replace None with 0 for record dimension
  364. else:
  365. self.fp.write(ABSENT)
  366. def _write_gatt_array(self):
  367. self._write_att_array(self._attributes)
  368. def _write_att_array(self, attributes):
  369. if attributes:
  370. self.fp.write(NC_ATTRIBUTE)
  371. self._pack_int(len(attributes))
  372. for name, values in attributes.items():
  373. self._pack_string(name)
  374. self._write_att_values(values)
  375. else:
  376. self.fp.write(ABSENT)
  377. def _write_var_array(self):
  378. if self.variables:
  379. self.fp.write(NC_VARIABLE)
  380. self._pack_int(len(self.variables))
  381. # Sort variable names non-recs first, then recs.
  382. def sortkey(n):
  383. v = self.variables[n]
  384. if v.isrec:
  385. return (-1,)
  386. return v._shape
  387. variables = sorted(self.variables, key=sortkey, reverse=True)
  388. # Set the metadata for all variables.
  389. for name in variables:
  390. self._write_var_metadata(name)
  391. # Now that we have the metadata, we know the vsize of
  392. # each record variable, so we can calculate recsize.
  393. self.__dict__['_recsize'] = sum([
  394. var._vsize for var in self.variables.values()
  395. if var.isrec])
  396. # Set the data for all variables.
  397. for name in variables:
  398. self._write_var_data(name)
  399. else:
  400. self.fp.write(ABSENT)
  401. def _write_var_metadata(self, name):
  402. var = self.variables[name]
  403. self._pack_string(name)
  404. self._pack_int(len(var.dimensions))
  405. for dimname in var.dimensions:
  406. dimid = self._dims.index(dimname)
  407. self._pack_int(dimid)
  408. self._write_att_array(var._attributes)
  409. nc_type = REVERSE[var.typecode(), var.itemsize()]
  410. self.fp.write(nc_type)
  411. if not var.isrec:
  412. vsize = var.data.size * var.data.itemsize
  413. vsize += -vsize % 4
  414. else: # record variable
  415. try:
  416. vsize = var.data[0].size * var.data.itemsize
  417. except IndexError:
  418. vsize = 0
  419. rec_vars = len([v for v in self.variables.values()
  420. if v.isrec])
  421. if rec_vars > 1:
  422. vsize += -vsize % 4
  423. self.variables[name].__dict__['_vsize'] = vsize
  424. self._pack_int(vsize)
  425. # Pack a bogus begin, and set the real value later.
  426. self.variables[name].__dict__['_begin'] = self.fp.tell()
  427. self._pack_begin(0)
  428. def _write_var_data(self, name):
  429. var = self.variables[name]
  430. # Set begin in file header.
  431. the_beguine = self.fp.tell()
  432. self.fp.seek(var._begin)
  433. self._pack_begin(the_beguine)
  434. self.fp.seek(the_beguine)
  435. # Write data.
  436. if not var.isrec:
  437. self.fp.write(var.data.tobytes())
  438. count = var.data.size * var.data.itemsize
  439. self._write_var_padding(var, var._vsize - count)
  440. else: # record variable
  441. # Handle rec vars with shape[0] < nrecs.
  442. if self._recs > len(var.data):
  443. shape = (self._recs,) + var.data.shape[1:]
  444. # Resize in-place does not always work since
  445. # the array might not be single-segment
  446. try:
  447. var.data.resize(shape)
  448. except ValueError:
  449. dtype = var.data.dtype
  450. var.__dict__['data'] = np.resize(var.data, shape).astype(dtype)
  451. pos0 = pos = self.fp.tell()
  452. for rec in var.data:
  453. # Apparently scalars cannot be converted to big endian. If we
  454. # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
  455. # will remain as ``=i4``.
  456. if not rec.shape and (rec.dtype.byteorder == '<' or
  457. (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
  458. rec = rec.byteswap()
  459. self.fp.write(rec.tobytes())
  460. # Padding
  461. count = rec.size * rec.itemsize
  462. self._write_var_padding(var, var._vsize - count)
  463. pos += self._recsize
  464. self.fp.seek(pos)
  465. self.fp.seek(pos0 + var._vsize)
  466. def _write_var_padding(self, var, size):
  467. encoded_fill_value = var._get_encoded_fill_value()
  468. num_fills = size // len(encoded_fill_value)
  469. self.fp.write(encoded_fill_value * num_fills)
  470. def _write_att_values(self, values):
  471. if hasattr(values, 'dtype'):
  472. nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]
  473. else:
  474. types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)]
  475. # bytes index into scalars in py3k. Check for "string" types
  476. if isinstance(values, str | bytes):
  477. sample = values
  478. else:
  479. try:
  480. sample = values[0] # subscriptable?
  481. except TypeError:
  482. sample = values # scalar
  483. for class_, nc_type in types:
  484. if isinstance(sample, class_):
  485. break
  486. typecode, size = TYPEMAP[nc_type]
  487. dtype_ = f'>{typecode}'
  488. # asarray() dies with bytes and '>c' in py3k. Change to 'S'
  489. dtype_ = 'S' if dtype_ == '>c' else dtype_
  490. values = asarray(values, dtype=dtype_)
  491. self.fp.write(nc_type)
  492. if values.dtype.char == 'S':
  493. nelems = values.itemsize
  494. else:
  495. nelems = values.size
  496. self._pack_int(nelems)
  497. if not values.shape and (values.dtype.byteorder == '<' or
  498. (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
  499. values = values.byteswap()
  500. self.fp.write(values.tobytes())
  501. count = values.size * values.itemsize
  502. self.fp.write(b'\x00' * (-count % 4)) # pad
  503. def _read(self):
  504. # Check magic bytes and version
  505. magic = self.fp.read(3)
  506. if not magic == b'CDF':
  507. raise TypeError(f"Error: {self.filename} is not a valid NetCDF 3 file")
  508. self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]
  509. # Read file headers and set data.
  510. self._read_numrecs()
  511. self._read_dim_array()
  512. self._read_gatt_array()
  513. self._read_var_array()
  514. def _read_numrecs(self):
  515. self.__dict__['_recs'] = self._unpack_int()
  516. def _read_dim_array(self):
  517. header = self.fp.read(4)
  518. if header not in [ZERO, NC_DIMENSION]:
  519. raise ValueError("Unexpected header.")
  520. count = self._unpack_int()
  521. for dim in range(count):
  522. name = self._unpack_string().decode('latin1')
  523. length = self._unpack_int() or None # None for record dimension
  524. self.dimensions[name] = length
  525. self._dims.append(name) # preserve order
  526. def _read_gatt_array(self):
  527. for k, v in self._read_att_array().items():
  528. self.__setattr__(k, v)
  529. def _read_att_array(self):
  530. header = self.fp.read(4)
  531. if header not in [ZERO, NC_ATTRIBUTE]:
  532. raise ValueError("Unexpected header.")
  533. count = self._unpack_int()
  534. attributes = {}
  535. for attr in range(count):
  536. name = self._unpack_string().decode('latin1')
  537. attributes[name] = self._read_att_values()
  538. return attributes
  539. def _read_var_array(self):
  540. header = self.fp.read(4)
  541. if header not in [ZERO, NC_VARIABLE]:
  542. raise ValueError("Unexpected header.")
  543. begin = 0
  544. dtypes = {'names': [], 'formats': []}
  545. rec_vars = []
  546. count = self._unpack_int()
  547. for var in range(count):
  548. (name, dimensions, shape, attributes,
  549. typecode, size, dtype_, begin_, vsize) = self._read_var()
  550. # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html
  551. # Note that vsize is the product of the dimension lengths
  552. # (omitting the record dimension) and the number of bytes
  553. # per value (determined from the type), increased to the
  554. # next multiple of 4, for each variable. If a record
  555. # variable, this is the amount of space per record. The
  556. # netCDF "record size" is calculated as the sum of the
  557. # vsize's of all the record variables.
  558. #
  559. # The vsize field is actually redundant, because its value
  560. # may be computed from other information in the header. The
  561. # 32-bit vsize field is not large enough to contain the size
  562. # of variables that require more than 2^32 - 4 bytes, so
  563. # 2^32 - 1 is used in the vsize field for such variables.
  564. if shape and shape[0] is None: # record variable
  565. rec_vars.append(name)
  566. # The netCDF "record size" is calculated as the sum of
  567. # the vsize's of all the record variables.
  568. self.__dict__['_recsize'] += vsize
  569. if begin == 0:
  570. begin = begin_
  571. dtypes['names'].append(name)
  572. dtypes['formats'].append(str(shape[1:]) + dtype_)
  573. # Handle padding with a virtual variable.
  574. if typecode in 'bch':
  575. actual_size = reduce(mul, (1,) + shape[1:]) * size
  576. padding = -actual_size % 4
  577. if padding:
  578. dtypes['names'].append(f'_padding_{var}')
  579. dtypes['formats'].append(f'({padding},)>b')
  580. # Data will be set later.
  581. data = None
  582. else: # not a record variable
  583. # Calculate size to avoid problems with vsize (above)
  584. a_size = reduce(mul, shape, 1) * size
  585. if self.use_mmap:
  586. data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_)
  587. data = data.reshape(shape)
  588. else:
  589. pos = self.fp.tell()
  590. self.fp.seek(begin_)
  591. data = frombuffer(self.fp.read(a_size), dtype=dtype_
  592. ).copy()
  593. data = data.reshape(shape)
  594. self.fp.seek(pos)
  595. # Add variable.
  596. self.variables[name] = netcdf_variable(
  597. data, typecode, size, shape, dimensions, attributes,
  598. maskandscale=self.maskandscale)
  599. if rec_vars:
  600. # Remove padding when only one record variable.
  601. if len(rec_vars) == 1:
  602. dtypes['names'] = dtypes['names'][:1]
  603. dtypes['formats'] = dtypes['formats'][:1]
  604. # Build rec array.
  605. if self.use_mmap:
  606. buf = self._mm_buf[begin:begin+self._recs*self._recsize]
  607. rec_array = buf.view(dtype=dtypes)
  608. rec_array = rec_array.reshape((self._recs,))
  609. else:
  610. pos = self.fp.tell()
  611. self.fp.seek(begin)
  612. rec_array = frombuffer(self.fp.read(self._recs*self._recsize),
  613. dtype=dtypes).copy()
  614. rec_array = rec_array.reshape((self._recs,))
  615. self.fp.seek(pos)
  616. for var in rec_vars:
  617. self.variables[var].__dict__['data'] = rec_array[var]
  618. def _read_var(self):
  619. name = self._unpack_string().decode('latin1')
  620. dimensions = []
  621. shape = []
  622. dims = self._unpack_int()
  623. for i in range(dims):
  624. dimid = self._unpack_int()
  625. dimname = self._dims[dimid]
  626. dimensions.append(dimname)
  627. dim = self.dimensions[dimname]
  628. shape.append(dim)
  629. dimensions = tuple(dimensions)
  630. shape = tuple(shape)
  631. attributes = self._read_att_array()
  632. nc_type = self.fp.read(4)
  633. vsize = self._unpack_int()
  634. begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
  635. typecode, size = TYPEMAP[nc_type]
  636. dtype_ = f'>{typecode}'
  637. return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
  638. def _read_att_values(self):
  639. nc_type = self.fp.read(4)
  640. n = self._unpack_int()
  641. typecode, size = TYPEMAP[nc_type]
  642. count = n*size
  643. values = self.fp.read(int(count))
  644. self.fp.read(-count % 4) # read padding
  645. if typecode != 'c':
  646. values = frombuffer(values, dtype=f'>{typecode}').copy()
  647. if values.shape == (1,):
  648. values = values[0]
  649. else:
  650. values = values.rstrip(b'\x00')
  651. return values
  652. def _pack_begin(self, begin):
  653. if self.version_byte == 1:
  654. self._pack_int(begin)
  655. elif self.version_byte == 2:
  656. self._pack_int64(begin)
  657. def _pack_int(self, value):
  658. self.fp.write(array(value, '>i').tobytes())
  659. _pack_int32 = _pack_int
  660. def _unpack_int(self):
  661. return int(frombuffer(self.fp.read(4), '>i')[0])
  662. _unpack_int32 = _unpack_int
  663. def _pack_int64(self, value):
  664. self.fp.write(array(value, '>q').tobytes())
  665. def _unpack_int64(self):
  666. return frombuffer(self.fp.read(8), '>q')[0]
  667. def _pack_string(self, s):
  668. count = len(s)
  669. self._pack_int(count)
  670. self.fp.write(s.encode('latin1'))
  671. self.fp.write(b'\x00' * (-count % 4)) # pad
  672. def _unpack_string(self):
  673. count = self._unpack_int()
  674. s = self.fp.read(count).rstrip(b'\x00')
  675. self.fp.read(-count % 4) # read padding
  676. return s
  677. class netcdf_variable:
  678. """
  679. A data object for netcdf files.
  680. `netcdf_variable` objects are constructed by calling the method
  681. `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`
  682. objects behave much like array objects defined in numpy, except that their
  683. data resides in a file. Data is read by indexing and written by assigning
  684. to an indexed subset; the entire array can be accessed by the index ``[:]``
  685. or (for scalars) by using the methods `getValue` and `assignValue`.
  686. `netcdf_variable` objects also have attribute `shape` with the same meaning
  687. as for arrays, but the shape cannot be modified. There is another read-only
  688. attribute `dimensions`, whose value is the tuple of dimension names.
  689. All other attributes correspond to variable attributes defined in
  690. the NetCDF file. Variable attributes are created by assigning to an
  691. attribute of the `netcdf_variable` object.
  692. Parameters
  693. ----------
  694. data : array_like
  695. The data array that holds the values for the variable.
  696. Typically, this is initialized as empty, but with the proper shape.
  697. typecode : dtype character code
  698. Desired data-type for the data array.
  699. size : int
  700. Desired element size for the data array.
  701. shape : sequence of ints
  702. The shape of the array. This should match the lengths of the
  703. variable's dimensions.
  704. dimensions : sequence of strings
  705. The names of the dimensions used by the variable. Must be in the
  706. same order of the dimension lengths given by `shape`.
  707. attributes : dict, optional
  708. Attribute values (any type) keyed by string names. These attributes
  709. become attributes for the netcdf_variable object.
  710. maskandscale : bool, optional
  711. Whether to automatically scale and/or mask data based on attributes.
  712. Default is False.
  713. Attributes
  714. ----------
  715. dimensions : list of str
  716. List of names of dimensions used by the variable object.
  717. isrec, shape
  718. Properties
  719. See also
  720. --------
  721. isrec, shape
  722. Notes
  723. -----
  724. For a more recent version of the NetCDF standard and additional features, please
  725. consider the permissively-licensed
  726. `netcdf4-python <https://unidata.github.io/netcdf4-python/>`_.
  727. """
  728. def __init__(self, data, typecode, size, shape, dimensions,
  729. attributes=None,
  730. maskandscale=False):
  731. self.data = data
  732. self._typecode = typecode
  733. self._size = size
  734. self._shape = shape
  735. self.dimensions = dimensions
  736. self.maskandscale = maskandscale
  737. self._attributes = attributes or {}
  738. for k, v in self._attributes.items():
  739. self.__dict__[k] = v
  740. def __setattr__(self, attr, value):
  741. # Store user defined attributes in a separate dict,
  742. # so we can save them to file later.
  743. try:
  744. self._attributes[attr] = value
  745. except AttributeError:
  746. pass
  747. self.__dict__[attr] = value
  748. def isrec(self):
  749. """Returns whether the variable has a record dimension or not.
  750. A record dimension is a dimension along which additional data could be
  751. easily appended in the netcdf data structure without much rewriting of
  752. the data file. This attribute is a read-only property of the
  753. `netcdf_variable`.
  754. """
  755. return bool(self.data.shape) and not self._shape[0]
  756. isrec = property(isrec)
  757. def shape(self):
  758. """Returns the shape tuple of the data variable.
  759. This is a read-only attribute and can not be modified in the
  760. same manner of other numpy arrays.
  761. """
  762. return self.data.shape
  763. shape = property(shape)
  764. def getValue(self):
  765. """
  766. Retrieve a scalar value from a `netcdf_variable` of length one.
  767. Raises
  768. ------
  769. ValueError
  770. If the netcdf variable is an array of length greater than one,
  771. this exception will be raised.
  772. """
  773. return self.data.item()
  774. def assignValue(self, value):
  775. """
  776. Assign a scalar value to a `netcdf_variable` of length one.
  777. Parameters
  778. ----------
  779. value : scalar
  780. Scalar value (of compatible type) to assign to a length-one netcdf
  781. variable. This value will be written to file.
  782. Raises
  783. ------
  784. ValueError
  785. If the input is not a scalar, or if the destination is not a length-one
  786. netcdf variable.
  787. """
  788. if not self.data.flags.writeable:
  789. # Work-around for a bug in NumPy. Calling itemset() on a read-only
  790. # memory-mapped array causes a seg. fault.
  791. # See NumPy ticket #1622, and SciPy ticket #1202.
  792. # This check for `writeable` can be removed when the oldest version
  793. # of NumPy still supported by scipy contains the fix for #1622.
  794. raise RuntimeError("variable is not writeable")
  795. self.data[:] = value
  796. def typecode(self):
  797. """
  798. Return the typecode of the variable.
  799. Returns
  800. -------
  801. typecode : char
  802. The character typecode of the variable (e.g., 'i' for int).
  803. """
  804. return self._typecode
  805. def itemsize(self):
  806. """
  807. Return the itemsize of the variable.
  808. Returns
  809. -------
  810. itemsize : int
  811. The element size of the variable (e.g., 8 for float64).
  812. """
  813. return self._size
  814. def __getitem__(self, index):
  815. if not self.maskandscale:
  816. return self.data[index]
  817. data = self.data[index].copy()
  818. missing_value = self._get_missing_value()
  819. data = self._apply_missing_value(data, missing_value)
  820. scale_factor = self._attributes.get('scale_factor')
  821. add_offset = self._attributes.get('add_offset')
  822. if add_offset is not None or scale_factor is not None:
  823. data = data.astype(np.float64)
  824. if scale_factor is not None:
  825. data = data * scale_factor
  826. if add_offset is not None:
  827. data += add_offset
  828. return data
  829. def __setitem__(self, index, data):
  830. if self.maskandscale:
  831. missing_value = (
  832. self._get_missing_value() or
  833. getattr(data, 'fill_value', 999999))
  834. self._attributes.setdefault('missing_value', missing_value)
  835. self._attributes.setdefault('_FillValue', missing_value)
  836. data = ((data - self._attributes.get('add_offset', 0.0)) /
  837. self._attributes.get('scale_factor', 1.0))
  838. data = np.ma.asarray(data).filled(missing_value)
  839. if self._typecode not in 'fd' and data.dtype.kind == 'f':
  840. data = np.round(data)
  841. # Expand data for record vars?
  842. if self.isrec:
  843. if isinstance(index, tuple):
  844. rec_index = index[0]
  845. else:
  846. rec_index = index
  847. if isinstance(rec_index, slice):
  848. recs = (rec_index.start or 0) + len(data)
  849. else:
  850. recs = rec_index + 1
  851. if recs > len(self.data):
  852. shape = (recs,) + self._shape[1:]
  853. # Resize in-place does not always work since
  854. # the array might not be single-segment
  855. try:
  856. self.data.resize(shape)
  857. except ValueError:
  858. dtype = self.data.dtype
  859. self.__dict__['data'] = np.resize(self.data, shape).astype(dtype)
  860. self.data[index] = data
  861. def _default_encoded_fill_value(self):
  862. """
  863. The default encoded fill-value for this Variable's data type.
  864. """
  865. nc_type = REVERSE[self.typecode(), self.itemsize()]
  866. return FILLMAP[nc_type]
  867. def _get_encoded_fill_value(self):
  868. """
  869. Returns the encoded fill value for this variable as bytes.
  870. This is taken from either the _FillValue attribute, or the default fill
  871. value for this variable's data type.
  872. """
  873. if '_FillValue' in self._attributes:
  874. fill_value = np.array(self._attributes['_FillValue'],
  875. dtype=self.data.dtype).tobytes()
  876. if len(fill_value) == self.itemsize():
  877. return fill_value
  878. else:
  879. return self._default_encoded_fill_value()
  880. else:
  881. return self._default_encoded_fill_value()
  882. def _get_missing_value(self):
  883. """
  884. Returns the value denoting "no data" for this variable.
  885. If this variable does not have a missing/fill value, returns None.
  886. If both _FillValue and missing_value are given, give precedence to
  887. _FillValue. The netCDF standard gives special meaning to _FillValue;
  888. missing_value is just used for compatibility with old datasets.
  889. """
  890. if '_FillValue' in self._attributes:
  891. missing_value = self._attributes['_FillValue']
  892. elif 'missing_value' in self._attributes:
  893. missing_value = self._attributes['missing_value']
  894. else:
  895. missing_value = None
  896. return missing_value
  897. @staticmethod
  898. def _apply_missing_value(data, missing_value):
  899. """
  900. Applies the given missing value to the data array.
  901. Returns a numpy.ma array, with any value equal to missing_value masked
  902. out (unless missing_value is None, in which case the original array is
  903. returned).
  904. """
  905. if missing_value is None:
  906. newdata = data
  907. else:
  908. try:
  909. missing_value_isnan = np.isnan(missing_value)
  910. except (TypeError, NotImplementedError):
  911. # some data types (e.g., characters) cannot be tested for NaN
  912. missing_value_isnan = False
  913. if missing_value_isnan:
  914. mymask = np.isnan(data)
  915. else:
  916. mymask = (data == missing_value)
  917. newdata = np.ma.masked_where(mymask, data)
  918. return newdata
  919. NetCDFFile = netcdf_file
  920. NetCDFVariable = netcdf_variable