_fetchers.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. import sys
  2. from numpy import array, frombuffer, load
  3. from ._registry import registry, registry_urls
  4. from scipy._lib._array_api import xp_capabilities
  5. try:
  6. import pooch
  7. except ImportError:
  8. pooch = None
  9. data_fetcher = None
  10. else:
  11. data_fetcher = pooch.create(
  12. # Use the default cache folder for the operating system
  13. # Pooch uses appdirs (https://github.com/ActiveState/appdirs) to
  14. # select an appropriate directory for the cache on each platform.
  15. path=pooch.os_cache("scipy-data"),
  16. # The remote data is on Github
  17. # base_url is a required param, even though we override this
  18. # using individual urls in the registry.
  19. base_url="https://github.com/scipy/",
  20. registry=registry,
  21. urls=registry_urls
  22. )
  23. def fetch_data(dataset_name, data_fetcher=data_fetcher):
  24. if data_fetcher is None:
  25. raise ImportError("Missing optional dependency 'pooch' required "
  26. "for scipy.datasets module. Please use pip or "
  27. "conda to install 'pooch'.")
  28. # https://github.com/scipy/scipy/issues/21879
  29. downloader = pooch.HTTPDownloader(
  30. headers={"User-Agent": f"SciPy {sys.modules['scipy'].__version__}"}
  31. )
  32. # The "fetch" method returns the full path to the downloaded data file.
  33. return data_fetcher.fetch(dataset_name, downloader=downloader)
  34. @xp_capabilities(out_of_scope=True)
  35. def ascent():
  36. """
  37. Get an 8-bit grayscale bit-depth, 512 x 512 derived image for easy
  38. use in demos.
  39. The image is derived from
  40. https://pixnio.com/people/accent-to-the-top
  41. Parameters
  42. ----------
  43. None
  44. Returns
  45. -------
  46. ascent : ndarray
  47. convenient image to use for testing and demonstration
  48. Examples
  49. --------
  50. >>> import scipy.datasets
  51. >>> ascent = scipy.datasets.ascent()
  52. >>> ascent.shape
  53. (512, 512)
  54. >>> ascent.max()
  55. np.uint8(255)
  56. >>> import matplotlib.pyplot as plt
  57. >>> plt.gray()
  58. >>> plt.imshow(ascent)
  59. >>> plt.show()
  60. """
  61. import pickle
  62. # The file will be downloaded automatically the first time this is run,
  63. # returning the path to the downloaded file. Afterwards, Pooch finds
  64. # it in the local cache and doesn't repeat the download.
  65. fname = fetch_data("ascent.dat")
  66. # Now we just need to load it with our standard Python tools.
  67. with open(fname, 'rb') as f:
  68. ascent = array(pickle.load(f))
  69. return ascent
  70. @xp_capabilities(out_of_scope=True)
  71. def electrocardiogram():
  72. """
  73. Load an electrocardiogram as an example for a 1-D signal.
  74. The returned signal is a 5 minute long electrocardiogram (ECG), a medical
  75. recording of the heart's electrical activity, sampled at 360 Hz.
  76. Returns
  77. -------
  78. ecg : ndarray
  79. The electrocardiogram in millivolt (mV) sampled at 360 Hz.
  80. Notes
  81. -----
  82. The provided signal is an excerpt (19:35 to 24:35) from the `record 208`_
  83. (lead MLII) provided by the MIT-BIH Arrhythmia Database [1]_ on
  84. PhysioNet [2]_. The excerpt includes noise induced artifacts, typical
  85. heartbeats as well as pathological changes.
  86. .. _record 208: https://physionet.org/physiobank/database/html/mitdbdir/records.htm#208
  87. .. versionadded:: 1.1.0
  88. References
  89. ----------
  90. .. [1] Moody GB, Mark RG. The impact of the MIT-BIH Arrhythmia Database.
  91. IEEE Eng in Med and Biol 20(3):45-50 (May-June 2001).
  92. (PMID: 11446209); :doi:`10.13026/C2F305`
  93. .. [2] Goldberger AL, Amaral LAN, Glass L, Hausdorff JM, Ivanov PCh,
  94. Mark RG, Mietus JE, Moody GB, Peng C-K, Stanley HE. PhysioBank,
  95. PhysioToolkit, and PhysioNet: Components of a New Research Resource
  96. for Complex Physiologic Signals. Circulation 101(23):e215-e220;
  97. :doi:`10.1161/01.CIR.101.23.e215`
  98. Examples
  99. --------
  100. >>> from scipy.datasets import electrocardiogram
  101. >>> ecg = electrocardiogram()
  102. >>> ecg
  103. array([-0.245, -0.215, -0.185, ..., -0.405, -0.395, -0.385], shape=(108000,))
  104. >>> ecg.shape, ecg.mean(), ecg.std()
  105. ((108000,), -0.16510875, 0.5992473991177294)
  106. As stated the signal features several areas with a different morphology.
  107. E.g., the first few seconds show the electrical activity of a heart in
  108. normal sinus rhythm as seen below.
  109. >>> import numpy as np
  110. >>> import matplotlib.pyplot as plt
  111. >>> fs = 360
  112. >>> time = np.arange(ecg.size) / fs
  113. >>> plt.plot(time, ecg)
  114. >>> plt.xlabel("time in s")
  115. >>> plt.ylabel("ECG in mV")
  116. >>> plt.xlim(9, 10.2)
  117. >>> plt.ylim(-1, 1.5)
  118. >>> plt.show()
  119. After second 16, however, the first premature ventricular contractions,
  120. also called extrasystoles, appear. These have a different morphology
  121. compared to typical heartbeats. The difference can easily be observed
  122. in the following plot.
  123. >>> plt.plot(time, ecg)
  124. >>> plt.xlabel("time in s")
  125. >>> plt.ylabel("ECG in mV")
  126. >>> plt.xlim(46.5, 50)
  127. >>> plt.ylim(-2, 1.5)
  128. >>> plt.show()
  129. At several points large artifacts disturb the recording, e.g.:
  130. >>> plt.plot(time, ecg)
  131. >>> plt.xlabel("time in s")
  132. >>> plt.ylabel("ECG in mV")
  133. >>> plt.xlim(207, 215)
  134. >>> plt.ylim(-2, 3.5)
  135. >>> plt.show()
  136. Finally, examining the power spectrum reveals that most of the biosignal is
  137. made up of lower frequencies. At 60 Hz the noise induced by the mains
  138. electricity can be clearly observed.
  139. >>> from scipy.signal import welch
  140. >>> f, Pxx = welch(ecg, fs=fs, nperseg=2048, scaling="spectrum")
  141. >>> plt.semilogy(f, Pxx)
  142. >>> plt.xlabel("Frequency in Hz")
  143. >>> plt.ylabel("Power spectrum of the ECG in mV**2")
  144. >>> plt.xlim(f[[0, -1]])
  145. >>> plt.show()
  146. """
  147. fname = fetch_data("ecg.dat")
  148. with load(fname) as file:
  149. ecg = file["ecg"].astype(int) # np.uint16 -> int
  150. # Convert raw output of ADC to mV: (ecg - adc_zero) / adc_gain
  151. ecg = (ecg - 1024) / 200.0
  152. return ecg
  153. @xp_capabilities(out_of_scope=True)
  154. def face(gray=False):
  155. """
  156. Get a 1024 x 768, color image of a raccoon face.
  157. The image is derived from
  158. https://pixnio.com/fauna-animals/raccoons/raccoon-procyon-lotor
  159. Parameters
  160. ----------
  161. gray : bool, optional
  162. If True return 8-bit grey-scale image, otherwise return a color image
  163. Returns
  164. -------
  165. face : ndarray
  166. image of a raccoon face
  167. Examples
  168. --------
  169. >>> import scipy.datasets
  170. >>> face = scipy.datasets.face()
  171. >>> face.shape
  172. (768, 1024, 3)
  173. >>> face.max()
  174. np.uint8(255)
  175. >>> import matplotlib.pyplot as plt
  176. >>> plt.gray()
  177. >>> plt.imshow(face)
  178. >>> plt.show()
  179. """
  180. import bz2
  181. fname = fetch_data("face.dat")
  182. with open(fname, 'rb') as f:
  183. rawdata = f.read()
  184. face_data = bz2.decompress(rawdata)
  185. face = frombuffer(face_data, dtype='uint8').reshape((768, 1024, 3))
  186. if gray is True:
  187. face = (0.21 * face[:, :, 0] + 0.71 * face[:, :, 1] +
  188. 0.07 * face[:, :, 2]).astype('uint8')
  189. return face