cloudpickle.py 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487
  1. """Pickler class to extend the standard pickle.Pickler functionality
  2. The main objective is to make it natural to perform distributed computing on
  3. clusters (such as PySpark, Dask, Ray...) with interactively defined code
  4. (functions, classes, ...) written in notebooks or console.
  5. In particular this pickler adds the following features:
  6. - serialize interactively-defined or locally-defined functions, classes,
  7. enums, typevars, lambdas and nested functions to compiled byte code;
  8. - deal with some other non-serializable objects in an ad-hoc manner where
  9. applicable.
  10. This pickler is therefore meant to be used for the communication between short
  11. lived Python processes running the same version of Python and libraries. In
  12. particular, it is not meant to be used for long term storage of Python objects.
  13. It does not include an unpickler, as standard Python unpickling suffices.
  14. This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
  15. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
  16. Copyright (c) 2012-now, CloudPickle developers and contributors.
  17. Copyright (c) 2012, Regents of the University of California.
  18. Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
  19. All rights reserved.
  20. Redistribution and use in source and binary forms, with or without
  21. modification, are permitted provided that the following conditions
  22. are met:
  23. * Redistributions of source code must retain the above copyright
  24. notice, this list of conditions and the following disclaimer.
  25. * Redistributions in binary form must reproduce the above copyright
  26. notice, this list of conditions and the following disclaimer in the
  27. documentation and/or other materials provided with the distribution.
  28. * Neither the name of the University of California, Berkeley nor the
  29. names of its contributors may be used to endorse or promote
  30. products derived from this software without specific prior written
  31. permission.
  32. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  35. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  36. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  37. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  38. TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  39. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  40. LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  41. NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  42. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. """
  44. import _collections_abc
  45. from collections import ChainMap, OrderedDict
  46. import abc
  47. import builtins
  48. import copyreg
  49. import dataclasses
  50. import dis
  51. from enum import Enum
  52. import io
  53. import itertools
  54. import logging
  55. import opcode
  56. import pickle
  57. from pickle import _getattribute
  58. import platform
  59. import struct
  60. import sys
  61. import threading
  62. import types
  63. import typing
  64. import uuid
  65. import warnings
  66. import weakref
  67. # The following import is required to be imported in the cloudpickle
  68. # namespace to be able to load pickle files generated with older versions of
  69. # cloudpickle. See: tests/test_backward_compat.py
  70. from types import CellType # noqa: F401
  71. # cloudpickle is meant for inter process communication: we expect all
  72. # communicating processes to run the same Python version hence we favor
  73. # communication speed over compatibility:
  74. DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
  75. # Names of modules whose resources should be treated as dynamic.
  76. _PICKLE_BY_VALUE_MODULES = set()
  77. # Track the provenance of reconstructed dynamic classes to make it possible to
  78. # reconstruct instances from the matching singleton class definition when
  79. # appropriate and preserve the usual "isinstance" semantics of Python objects.
  80. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
  81. _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
  82. _DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
  83. PYPY = platform.python_implementation() == "PyPy"
  84. builtin_code_type = None
  85. if PYPY:
  86. # builtin-code objects only exist in pypy
  87. builtin_code_type = type(float.__new__.__code__)
  88. _extract_code_globals_cache = weakref.WeakKeyDictionary()
  89. def _get_or_create_tracker_id(class_def):
  90. with _DYNAMIC_CLASS_TRACKER_LOCK:
  91. class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
  92. if class_tracker_id is None:
  93. class_tracker_id = uuid.uuid4().hex
  94. _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
  95. _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
  96. return class_tracker_id
  97. def _lookup_class_or_track(class_tracker_id, class_def):
  98. if class_tracker_id is not None:
  99. with _DYNAMIC_CLASS_TRACKER_LOCK:
  100. class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
  101. class_tracker_id, class_def
  102. )
  103. _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
  104. return class_def
  105. def register_pickle_by_value(module):
  106. """Register a module to make it functions and classes picklable by value.
  107. By default, functions and classes that are attributes of an importable
  108. module are to be pickled by reference, that is relying on re-importing
  109. the attribute from the module at load time.
  110. If `register_pickle_by_value(module)` is called, all its functions and
  111. classes are subsequently to be pickled by value, meaning that they can
  112. be loaded in Python processes where the module is not importable.
  113. This is especially useful when developing a module in a distributed
  114. execution environment: restarting the client Python process with the new
  115. source code is enough: there is no need to re-install the new version
  116. of the module on all the worker nodes nor to restart the workers.
  117. Note: this feature is considered experimental. See the cloudpickle
  118. README.md file for more details and limitations.
  119. """
  120. if not isinstance(module, types.ModuleType):
  121. raise ValueError(f"Input should be a module object, got {str(module)} instead")
  122. # In the future, cloudpickle may need a way to access any module registered
  123. # for pickling by value in order to introspect relative imports inside
  124. # functions pickled by value. (see
  125. # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
  126. # This access can be ensured by checking that module is present in
  127. # sys.modules at registering time and assuming that it will still be in
  128. # there when accessed during pickling. Another alternative would be to
  129. # store a weakref to the module. Even though cloudpickle does not implement
  130. # this introspection yet, in order to avoid a possible breaking change
  131. # later, we still enforce the presence of module inside sys.modules.
  132. if module.__name__ not in sys.modules:
  133. raise ValueError(
  134. f"{module} was not imported correctly, have you used an "
  135. "`import` statement to access it?"
  136. )
  137. _PICKLE_BY_VALUE_MODULES.add(module.__name__)
  138. def unregister_pickle_by_value(module):
  139. """Unregister that the input module should be pickled by value."""
  140. if not isinstance(module, types.ModuleType):
  141. raise ValueError(f"Input should be a module object, got {str(module)} instead")
  142. if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
  143. raise ValueError(f"{module} is not registered for pickle by value")
  144. else:
  145. _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
  146. def list_registry_pickle_by_value():
  147. return _PICKLE_BY_VALUE_MODULES.copy()
  148. def _is_registered_pickle_by_value(module):
  149. module_name = module.__name__
  150. if module_name in _PICKLE_BY_VALUE_MODULES:
  151. return True
  152. while True:
  153. parent_name = module_name.rsplit(".", 1)[0]
  154. if parent_name == module_name:
  155. break
  156. if parent_name in _PICKLE_BY_VALUE_MODULES:
  157. return True
  158. module_name = parent_name
  159. return False
  160. def _whichmodule(obj, name):
  161. """Find the module an object belongs to.
  162. This function differs from ``pickle.whichmodule`` in two ways:
  163. - it does not mangle the cases where obj's module is __main__ and obj was
  164. not found in any module.
  165. - Errors arising during module introspection are ignored, as those errors
  166. are considered unwanted side effects.
  167. """
  168. module_name = getattr(obj, "__module__", None)
  169. if module_name is not None:
  170. return module_name
  171. # Protect the iteration by using a copy of sys.modules against dynamic
  172. # modules that trigger imports of other modules upon calls to getattr or
  173. # other threads importing at the same time.
  174. for module_name, module in sys.modules.copy().items():
  175. # Some modules such as coverage can inject non-module objects inside
  176. # sys.modules
  177. if (
  178. module_name == "__main__"
  179. or module is None
  180. or not isinstance(module, types.ModuleType)
  181. ):
  182. continue
  183. try:
  184. if _getattribute(module, name)[0] is obj:
  185. return module_name
  186. except Exception:
  187. pass
  188. return None
  189. def _should_pickle_by_reference(obj, name=None):
  190. """Test whether an function or a class should be pickled by reference
  191. Pickling by reference means by that the object (typically a function or a
  192. class) is an attribute of a module that is assumed to be importable in the
  193. target Python environment. Loading will therefore rely on importing the
  194. module and then calling `getattr` on it to access the function or class.
  195. Pickling by reference is the only option to pickle functions and classes
  196. in the standard library. In cloudpickle the alternative option is to
  197. pickle by value (for instance for interactively or locally defined
  198. functions and classes or for attributes of modules that have been
  199. explicitly registered to be pickled by value.
  200. """
  201. if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
  202. module_and_name = _lookup_module_and_qualname(obj, name=name)
  203. if module_and_name is None:
  204. return False
  205. module, name = module_and_name
  206. return not _is_registered_pickle_by_value(module)
  207. elif isinstance(obj, types.ModuleType):
  208. # We assume that sys.modules is primarily used as a cache mechanism for
  209. # the Python import machinery. Checking if a module has been added in
  210. # is sys.modules therefore a cheap and simple heuristic to tell us
  211. # whether we can assume that a given module could be imported by name
  212. # in another Python process.
  213. if _is_registered_pickle_by_value(obj):
  214. return False
  215. return obj.__name__ in sys.modules
  216. else:
  217. raise TypeError(
  218. "cannot check importability of {} instances".format(type(obj).__name__)
  219. )
  220. def _lookup_module_and_qualname(obj, name=None):
  221. if name is None:
  222. name = getattr(obj, "__qualname__", None)
  223. if name is None: # pragma: no cover
  224. # This used to be needed for Python 2.7 support but is probably not
  225. # needed anymore. However we keep the __name__ introspection in case
  226. # users of cloudpickle rely on this old behavior for unknown reasons.
  227. name = getattr(obj, "__name__", None)
  228. module_name = _whichmodule(obj, name)
  229. if module_name is None:
  230. # In this case, obj.__module__ is None AND obj was not found in any
  231. # imported module. obj is thus treated as dynamic.
  232. return None
  233. if module_name == "__main__":
  234. return None
  235. # Note: if module_name is in sys.modules, the corresponding module is
  236. # assumed importable at unpickling time. See #357
  237. module = sys.modules.get(module_name, None)
  238. if module is None:
  239. # The main reason why obj's module would not be imported is that this
  240. # module has been dynamically created, using for example
  241. # types.ModuleType. The other possibility is that module was removed
  242. # from sys.modules after obj was created/imported. But this case is not
  243. # supported, as the standard pickle does not support it either.
  244. return None
  245. try:
  246. obj2, parent = _getattribute(module, name)
  247. except AttributeError:
  248. # obj was not found inside the module it points to
  249. return None
  250. if obj2 is not obj:
  251. return None
  252. return module, name
  253. def _extract_code_globals(co):
  254. """Find all globals names read or written to by codeblock co."""
  255. out_names = _extract_code_globals_cache.get(co)
  256. if out_names is None:
  257. # We use a dict with None values instead of a set to get a
  258. # deterministic order and avoid introducing non-deterministic pickle
  259. # bytes as a results.
  260. out_names = {name: None for name in _walk_global_ops(co)}
  261. # Declaring a function inside another one using the "def ..." syntax
  262. # generates a constant code object corresponding to the one of the
  263. # nested function's As the nested function may itself need global
  264. # variables, we need to introspect its code, extract its globals, (look
  265. # for code object in it's co_consts attribute..) and add the result to
  266. # code_globals
  267. if co.co_consts:
  268. for const in co.co_consts:
  269. if isinstance(const, types.CodeType):
  270. out_names.update(_extract_code_globals(const))
  271. _extract_code_globals_cache[co] = out_names
  272. return out_names
  273. def _find_imported_submodules(code, top_level_dependencies):
  274. """Find currently imported submodules used by a function.
  275. Submodules used by a function need to be detected and referenced for the
  276. function to work correctly at depickling time. Because submodules can be
  277. referenced as attribute of their parent package (``package.submodule``), we
  278. need a special introspection technique that does not rely on GLOBAL-related
  279. opcodes to find references of them in a code object.
  280. Example:
  281. ```
  282. import concurrent.futures
  283. import cloudpickle
  284. def func():
  285. x = concurrent.futures.ThreadPoolExecutor
  286. if __name__ == '__main__':
  287. cloudpickle.dumps(func)
  288. ```
  289. The globals extracted by cloudpickle in the function's state include the
  290. concurrent package, but not its submodule (here, concurrent.futures), which
  291. is the module used by func. Find_imported_submodules will detect the usage
  292. of concurrent.futures. Saving this module alongside with func will ensure
  293. that calling func once depickled does not fail due to concurrent.futures
  294. not being imported
  295. """
  296. subimports = []
  297. # check if any known dependency is an imported package
  298. for x in top_level_dependencies:
  299. if (
  300. isinstance(x, types.ModuleType)
  301. and hasattr(x, "__package__")
  302. and x.__package__
  303. ):
  304. # check if the package has any currently loaded sub-imports
  305. prefix = x.__name__ + "."
  306. # A concurrent thread could mutate sys.modules,
  307. # make sure we iterate over a copy to avoid exceptions
  308. for name in list(sys.modules):
  309. # Older versions of pytest will add a "None" module to
  310. # sys.modules.
  311. if name is not None and name.startswith(prefix):
  312. # check whether the function can address the sub-module
  313. tokens = set(name[len(prefix) :].split("."))
  314. if not tokens - set(code.co_names):
  315. subimports.append(sys.modules[name])
  316. return subimports
  317. # relevant opcodes
  318. STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
  319. DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
  320. LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
  321. GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
  322. HAVE_ARGUMENT = dis.HAVE_ARGUMENT
  323. EXTENDED_ARG = dis.EXTENDED_ARG
  324. _BUILTIN_TYPE_NAMES = {}
  325. for k, v in types.__dict__.items():
  326. if type(v) is type:
  327. _BUILTIN_TYPE_NAMES[v] = k
  328. def _builtin_type(name):
  329. if name == "ClassType": # pragma: no cover
  330. # Backward compat to load pickle files generated with cloudpickle
  331. # < 1.3 even if loading pickle files from older versions is not
  332. # officially supported.
  333. return type
  334. return getattr(types, name)
  335. def _walk_global_ops(code):
  336. """Yield referenced name for global-referencing instructions in code."""
  337. for instr in dis.get_instructions(code):
  338. op = instr.opcode
  339. if op in GLOBAL_OPS:
  340. yield instr.argval
  341. def _extract_class_dict(cls):
  342. """Retrieve a copy of the dict of a class without the inherited method."""
  343. clsdict = dict(cls.__dict__) # copy dict proxy to a dict
  344. if len(cls.__bases__) == 1:
  345. inherited_dict = cls.__bases__[0].__dict__
  346. else:
  347. inherited_dict = {}
  348. for base in reversed(cls.__bases__):
  349. inherited_dict.update(base.__dict__)
  350. to_remove = []
  351. for name, value in clsdict.items():
  352. try:
  353. base_value = inherited_dict[name]
  354. if value is base_value:
  355. to_remove.append(name)
  356. except KeyError:
  357. pass
  358. for name in to_remove:
  359. clsdict.pop(name)
  360. return clsdict
  361. def is_tornado_coroutine(func):
  362. """Return whether `func` is a Tornado coroutine function.
  363. Running coroutines are not supported.
  364. """
  365. warnings.warn(
  366. "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
  367. "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
  368. "directly instead.",
  369. category=DeprecationWarning,
  370. )
  371. if "tornado.gen" not in sys.modules:
  372. return False
  373. gen = sys.modules["tornado.gen"]
  374. if not hasattr(gen, "is_coroutine_function"):
  375. # Tornado version is too old
  376. return False
  377. return gen.is_coroutine_function(func)
  378. def subimport(name):
  379. # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
  380. # the name of a submodule, __import__ will return the top-level root module
  381. # of this submodule. For instance, __import__('os.path') returns the `os`
  382. # module.
  383. __import__(name)
  384. return sys.modules[name]
  385. def dynamic_subimport(name, vars):
  386. mod = types.ModuleType(name)
  387. mod.__dict__.update(vars)
  388. mod.__dict__["__builtins__"] = builtins.__dict__
  389. return mod
  390. def _get_cell_contents(cell):
  391. try:
  392. return cell.cell_contents
  393. except ValueError:
  394. # Handle empty cells explicitly with a sentinel value.
  395. return _empty_cell_value
  396. def instance(cls):
  397. """Create a new instance of a class.
  398. Parameters
  399. ----------
  400. cls : type
  401. The class to create an instance of.
  402. Returns
  403. -------
  404. instance : cls
  405. A new instance of ``cls``.
  406. """
  407. return cls()
  408. @instance
  409. class _empty_cell_value:
  410. """Sentinel for empty closures."""
  411. @classmethod
  412. def __reduce__(cls):
  413. return cls.__name__
  414. def _make_function(code, globals, name, argdefs, closure):
  415. # Setting __builtins__ in globals is needed for nogil CPython.
  416. globals["__builtins__"] = __builtins__
  417. return types.FunctionType(code, globals, name, argdefs, closure)
  418. def _make_empty_cell():
  419. if False:
  420. # trick the compiler into creating an empty cell in our lambda
  421. cell = None
  422. raise AssertionError("this route should not be executed")
  423. return (lambda: cell).__closure__[0]
  424. def _make_cell(value=_empty_cell_value):
  425. cell = _make_empty_cell()
  426. if value is not _empty_cell_value:
  427. cell.cell_contents = value
  428. return cell
  429. def _make_skeleton_class(
  430. type_constructor, name, bases, type_kwargs, class_tracker_id, extra
  431. ):
  432. """Build dynamic class with an empty __dict__ to be filled once memoized
  433. If class_tracker_id is not None, try to lookup an existing class definition
  434. matching that id. If none is found, track a newly reconstructed class
  435. definition under that id so that other instances stemming from the same
  436. class id will also reuse this class definition.
  437. The "extra" variable is meant to be a dict (or None) that can be used for
  438. forward compatibility shall the need arise.
  439. """
  440. skeleton_class = types.new_class(
  441. name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
  442. )
  443. return _lookup_class_or_track(class_tracker_id, skeleton_class)
  444. def _make_skeleton_enum(
  445. bases, name, qualname, members, module, class_tracker_id, extra
  446. ):
  447. """Build dynamic enum with an empty __dict__ to be filled once memoized
  448. The creation of the enum class is inspired by the code of
  449. EnumMeta._create_.
  450. If class_tracker_id is not None, try to lookup an existing enum definition
  451. matching that id. If none is found, track a newly reconstructed enum
  452. definition under that id so that other instances stemming from the same
  453. class id will also reuse this enum definition.
  454. The "extra" variable is meant to be a dict (or None) that can be used for
  455. forward compatibility shall the need arise.
  456. """
  457. # enums always inherit from their base Enum class at the last position in
  458. # the list of base classes:
  459. enum_base = bases[-1]
  460. metacls = enum_base.__class__
  461. classdict = metacls.__prepare__(name, bases)
  462. for member_name, member_value in members.items():
  463. classdict[member_name] = member_value
  464. enum_class = metacls.__new__(metacls, name, bases, classdict)
  465. enum_class.__module__ = module
  466. enum_class.__qualname__ = qualname
  467. return _lookup_class_or_track(class_tracker_id, enum_class)
  468. def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
  469. tv = typing.TypeVar(
  470. name,
  471. *constraints,
  472. bound=bound,
  473. covariant=covariant,
  474. contravariant=contravariant,
  475. )
  476. return _lookup_class_or_track(class_tracker_id, tv)
  477. def _decompose_typevar(obj):
  478. return (
  479. obj.__name__,
  480. obj.__bound__,
  481. obj.__constraints__,
  482. obj.__covariant__,
  483. obj.__contravariant__,
  484. _get_or_create_tracker_id(obj),
  485. )
  486. def _typevar_reduce(obj):
  487. # TypeVar instances require the module information hence why we
  488. # are not using the _should_pickle_by_reference directly
  489. module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
  490. if module_and_name is None:
  491. return (_make_typevar, _decompose_typevar(obj))
  492. elif _is_registered_pickle_by_value(module_and_name[0]):
  493. return (_make_typevar, _decompose_typevar(obj))
  494. return (getattr, module_and_name)
  495. def _get_bases(typ):
  496. if "__orig_bases__" in getattr(typ, "__dict__", {}):
  497. # For generic types (see PEP 560)
  498. # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
  499. # correct. Subclasses of a fully-parameterized generic class does not
  500. # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
  501. # will return True because it's defined in the base class.
  502. bases_attr = "__orig_bases__"
  503. else:
  504. # For regular class objects
  505. bases_attr = "__bases__"
  506. return getattr(typ, bases_attr)
  507. def _make_dict_keys(obj, is_ordered=False):
  508. if is_ordered:
  509. return OrderedDict.fromkeys(obj).keys()
  510. else:
  511. return dict.fromkeys(obj).keys()
  512. def _make_dict_values(obj, is_ordered=False):
  513. if is_ordered:
  514. return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
  515. else:
  516. return {i: _ for i, _ in enumerate(obj)}.values()
  517. def _make_dict_items(obj, is_ordered=False):
  518. if is_ordered:
  519. return OrderedDict(obj).items()
  520. else:
  521. return obj.items()
  522. # COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
  523. # -------------------------------------------------
  524. def _class_getnewargs(obj):
  525. type_kwargs = {}
  526. if "__module__" in obj.__dict__:
  527. type_kwargs["__module__"] = obj.__module__
  528. __dict__ = obj.__dict__.get("__dict__", None)
  529. if isinstance(__dict__, property):
  530. type_kwargs["__dict__"] = __dict__
  531. return (
  532. type(obj),
  533. obj.__name__,
  534. _get_bases(obj),
  535. type_kwargs,
  536. _get_or_create_tracker_id(obj),
  537. None,
  538. )
  539. def _enum_getnewargs(obj):
  540. members = {e.name: e.value for e in obj}
  541. return (
  542. obj.__bases__,
  543. obj.__name__,
  544. obj.__qualname__,
  545. members,
  546. obj.__module__,
  547. _get_or_create_tracker_id(obj),
  548. None,
  549. )
  550. # COLLECTION OF OBJECTS RECONSTRUCTORS
  551. # ------------------------------------
  552. def _file_reconstructor(retval):
  553. return retval
  554. # COLLECTION OF OBJECTS STATE GETTERS
  555. # -----------------------------------
  556. def _function_getstate(func):
  557. # - Put func's dynamic attributes (stored in func.__dict__) in state. These
  558. # attributes will be restored at unpickling time using
  559. # f.__dict__.update(state)
  560. # - Put func's members into slotstate. Such attributes will be restored at
  561. # unpickling time by iterating over slotstate and calling setattr(func,
  562. # slotname, slotvalue)
  563. slotstate = {
  564. "__name__": func.__name__,
  565. "__qualname__": func.__qualname__,
  566. "__annotations__": func.__annotations__,
  567. "__kwdefaults__": func.__kwdefaults__,
  568. "__defaults__": func.__defaults__,
  569. "__module__": func.__module__,
  570. "__doc__": func.__doc__,
  571. "__closure__": func.__closure__,
  572. }
  573. f_globals_ref = _extract_code_globals(func.__code__)
  574. f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
  575. if func.__closure__ is not None:
  576. closure_values = list(map(_get_cell_contents, func.__closure__))
  577. else:
  578. closure_values = ()
  579. # Extract currently-imported submodules used by func. Storing these modules
  580. # in a smoke _cloudpickle_subimports attribute of the object's state will
  581. # trigger the side effect of importing these modules at unpickling time
  582. # (which is necessary for func to work correctly once depickled)
  583. slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
  584. func.__code__, itertools.chain(f_globals.values(), closure_values)
  585. )
  586. slotstate["__globals__"] = f_globals
  587. state = func.__dict__
  588. return state, slotstate
  589. def _class_getstate(obj):
  590. clsdict = _extract_class_dict(obj)
  591. clsdict.pop("__weakref__", None)
  592. if issubclass(type(obj), abc.ABCMeta):
  593. # If obj is an instance of an ABCMeta subclass, don't pickle the
  594. # cache/negative caches populated during isinstance/issubclass
  595. # checks, but pickle the list of registered subclasses of obj.
  596. clsdict.pop("_abc_cache", None)
  597. clsdict.pop("_abc_negative_cache", None)
  598. clsdict.pop("_abc_negative_cache_version", None)
  599. registry = clsdict.pop("_abc_registry", None)
  600. if registry is None:
  601. # The abc caches and registered subclasses of a
  602. # class are bundled into the single _abc_impl attribute
  603. clsdict.pop("_abc_impl", None)
  604. (registry, _, _, _) = abc._get_dump(obj)
  605. clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
  606. else:
  607. # In the above if clause, registry is a set of weakrefs -- in
  608. # this case, registry is a WeakSet
  609. clsdict["_abc_impl"] = [type_ for type_ in registry]
  610. if "__slots__" in clsdict:
  611. # pickle string length optimization: member descriptors of obj are
  612. # created automatically from obj's __slots__ attribute, no need to
  613. # save them in obj's state
  614. if isinstance(obj.__slots__, str):
  615. clsdict.pop(obj.__slots__)
  616. else:
  617. for k in obj.__slots__:
  618. clsdict.pop(k, None)
  619. clsdict.pop("__dict__", None) # unpicklable property object
  620. return (clsdict, {})
  621. def _enum_getstate(obj):
  622. clsdict, slotstate = _class_getstate(obj)
  623. members = {e.name: e.value for e in obj}
  624. # Cleanup the clsdict that will be passed to _make_skeleton_enum:
  625. # Those attributes are already handled by the metaclass.
  626. for attrname in [
  627. "_generate_next_value_",
  628. "_member_names_",
  629. "_member_map_",
  630. "_member_type_",
  631. "_value2member_map_",
  632. ]:
  633. clsdict.pop(attrname, None)
  634. for member in members:
  635. clsdict.pop(member)
  636. # Special handling of Enum subclasses
  637. return clsdict, slotstate
  638. # COLLECTIONS OF OBJECTS REDUCERS
  639. # -------------------------------
  640. # A reducer is a function taking a single argument (obj), and that returns a
  641. # tuple with all the necessary data to re-construct obj. Apart from a few
  642. # exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
  643. # correctly pickle an object.
  644. # While many built-in objects (Exceptions objects, instances of the "object"
  645. # class, etc), are shipped with their own built-in reducer (invoked using
  646. # obj.__reduce__), some do not. The following methods were created to "fill
  647. # these holes".
  648. def _code_reduce(obj):
  649. """code object reducer."""
  650. # If you are not sure about the order of arguments, take a look at help
  651. # of the specific type from types, for example:
  652. # >>> from types import CodeType
  653. # >>> help(CodeType)
  654. if hasattr(obj, "co_exceptiontable"):
  655. # Python 3.11 and later: there are some new attributes
  656. # related to the enhanced exceptions.
  657. args = (
  658. obj.co_argcount,
  659. obj.co_posonlyargcount,
  660. obj.co_kwonlyargcount,
  661. obj.co_nlocals,
  662. obj.co_stacksize,
  663. obj.co_flags,
  664. obj.co_code,
  665. obj.co_consts,
  666. obj.co_names,
  667. obj.co_varnames,
  668. obj.co_filename,
  669. obj.co_name,
  670. obj.co_qualname,
  671. obj.co_firstlineno,
  672. obj.co_linetable,
  673. obj.co_exceptiontable,
  674. obj.co_freevars,
  675. obj.co_cellvars,
  676. )
  677. elif hasattr(obj, "co_linetable"):
  678. # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
  679. # expects obj.co_linetable instead.
  680. args = (
  681. obj.co_argcount,
  682. obj.co_posonlyargcount,
  683. obj.co_kwonlyargcount,
  684. obj.co_nlocals,
  685. obj.co_stacksize,
  686. obj.co_flags,
  687. obj.co_code,
  688. obj.co_consts,
  689. obj.co_names,
  690. obj.co_varnames,
  691. obj.co_filename,
  692. obj.co_name,
  693. obj.co_firstlineno,
  694. obj.co_linetable,
  695. obj.co_freevars,
  696. obj.co_cellvars,
  697. )
  698. elif hasattr(obj, "co_nmeta"): # pragma: no cover
  699. # "nogil" Python: modified attributes from 3.9
  700. args = (
  701. obj.co_argcount,
  702. obj.co_posonlyargcount,
  703. obj.co_kwonlyargcount,
  704. obj.co_nlocals,
  705. obj.co_framesize,
  706. obj.co_ndefaultargs,
  707. obj.co_nmeta,
  708. obj.co_flags,
  709. obj.co_code,
  710. obj.co_consts,
  711. obj.co_varnames,
  712. obj.co_filename,
  713. obj.co_name,
  714. obj.co_firstlineno,
  715. obj.co_lnotab,
  716. obj.co_exc_handlers,
  717. obj.co_jump_table,
  718. obj.co_freevars,
  719. obj.co_cellvars,
  720. obj.co_free2reg,
  721. obj.co_cell2reg,
  722. )
  723. else:
  724. # Backward compat for 3.8 and 3.9
  725. args = (
  726. obj.co_argcount,
  727. obj.co_posonlyargcount,
  728. obj.co_kwonlyargcount,
  729. obj.co_nlocals,
  730. obj.co_stacksize,
  731. obj.co_flags,
  732. obj.co_code,
  733. obj.co_consts,
  734. obj.co_names,
  735. obj.co_varnames,
  736. obj.co_filename,
  737. obj.co_name,
  738. obj.co_firstlineno,
  739. obj.co_lnotab,
  740. obj.co_freevars,
  741. obj.co_cellvars,
  742. )
  743. return types.CodeType, args
  744. def _cell_reduce(obj):
  745. """Cell (containing values of a function's free variables) reducer."""
  746. try:
  747. obj.cell_contents
  748. except ValueError: # cell is empty
  749. return _make_empty_cell, ()
  750. else:
  751. return _make_cell, (obj.cell_contents,)
  752. def _classmethod_reduce(obj):
  753. orig_func = obj.__func__
  754. return type(obj), (orig_func,)
  755. def _file_reduce(obj):
  756. """Save a file."""
  757. import io
  758. if not hasattr(obj, "name") or not hasattr(obj, "mode"):
  759. raise pickle.PicklingError(
  760. "Cannot pickle files that do not map to an actual file"
  761. )
  762. if obj is sys.stdout:
  763. return getattr, (sys, "stdout")
  764. if obj is sys.stderr:
  765. return getattr, (sys, "stderr")
  766. if obj is sys.stdin:
  767. raise pickle.PicklingError("Cannot pickle standard input")
  768. if obj.closed:
  769. raise pickle.PicklingError("Cannot pickle closed files")
  770. if hasattr(obj, "isatty") and obj.isatty():
  771. raise pickle.PicklingError("Cannot pickle files that map to tty objects")
  772. if "r" not in obj.mode and "+" not in obj.mode:
  773. raise pickle.PicklingError(
  774. "Cannot pickle files that are not opened for reading: %s" % obj.mode
  775. )
  776. name = obj.name
  777. retval = io.StringIO()
  778. try:
  779. # Read the whole file
  780. curloc = obj.tell()
  781. obj.seek(0)
  782. contents = obj.read()
  783. obj.seek(curloc)
  784. except OSError as e:
  785. raise pickle.PicklingError(
  786. "Cannot pickle file %s as it cannot be read" % name
  787. ) from e
  788. retval.write(contents)
  789. retval.seek(curloc)
  790. retval.name = name
  791. return _file_reconstructor, (retval,)
  792. def _getset_descriptor_reduce(obj):
  793. return getattr, (obj.__objclass__, obj.__name__)
  794. def _mappingproxy_reduce(obj):
  795. return types.MappingProxyType, (dict(obj),)
  796. def _memoryview_reduce(obj):
  797. return bytes, (obj.tobytes(),)
  798. def _module_reduce(obj):
  799. if _should_pickle_by_reference(obj):
  800. return subimport, (obj.__name__,)
  801. else:
  802. # Some external libraries can populate the "__builtins__" entry of a
  803. # module's `__dict__` with unpicklable objects (see #316). For that
  804. # reason, we do not attempt to pickle the "__builtins__" entry, and
  805. # restore a default value for it at unpickling time.
  806. state = obj.__dict__.copy()
  807. state.pop("__builtins__", None)
  808. return dynamic_subimport, (obj.__name__, state)
  809. def _method_reduce(obj):
  810. return (types.MethodType, (obj.__func__, obj.__self__))
  811. def _logger_reduce(obj):
  812. return logging.getLogger, (obj.name,)
  813. def _root_logger_reduce(obj):
  814. return logging.getLogger, ()
  815. def _property_reduce(obj):
  816. return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
  817. def _weakset_reduce(obj):
  818. return weakref.WeakSet, (list(obj),)
  819. def _dynamic_class_reduce(obj):
  820. """Save a class that can't be referenced as a module attribute.
  821. This method is used to serialize classes that are defined inside
  822. functions, or that otherwise can't be serialized as attribute lookups
  823. from importable modules.
  824. """
  825. if Enum is not None and issubclass(obj, Enum):
  826. return (
  827. _make_skeleton_enum,
  828. _enum_getnewargs(obj),
  829. _enum_getstate(obj),
  830. None,
  831. None,
  832. _class_setstate,
  833. )
  834. else:
  835. return (
  836. _make_skeleton_class,
  837. _class_getnewargs(obj),
  838. _class_getstate(obj),
  839. None,
  840. None,
  841. _class_setstate,
  842. )
  843. def _class_reduce(obj):
  844. """Select the reducer depending on the dynamic nature of the class obj."""
  845. if obj is type(None): # noqa
  846. return type, (None,)
  847. elif obj is type(Ellipsis):
  848. return type, (Ellipsis,)
  849. elif obj is type(NotImplemented):
  850. return type, (NotImplemented,)
  851. elif obj in _BUILTIN_TYPE_NAMES:
  852. return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
  853. elif not _should_pickle_by_reference(obj):
  854. return _dynamic_class_reduce(obj)
  855. return NotImplemented
  856. def _dict_keys_reduce(obj):
  857. # Safer not to ship the full dict as sending the rest might
  858. # be unintended and could potentially cause leaking of
  859. # sensitive information
  860. return _make_dict_keys, (list(obj),)
  861. def _dict_values_reduce(obj):
  862. # Safer not to ship the full dict as sending the rest might
  863. # be unintended and could potentially cause leaking of
  864. # sensitive information
  865. return _make_dict_values, (list(obj),)
  866. def _dict_items_reduce(obj):
  867. return _make_dict_items, (dict(obj),)
  868. def _odict_keys_reduce(obj):
  869. # Safer not to ship the full dict as sending the rest might
  870. # be unintended and could potentially cause leaking of
  871. # sensitive information
  872. return _make_dict_keys, (list(obj), True)
  873. def _odict_values_reduce(obj):
  874. # Safer not to ship the full dict as sending the rest might
  875. # be unintended and could potentially cause leaking of
  876. # sensitive information
  877. return _make_dict_values, (list(obj), True)
  878. def _odict_items_reduce(obj):
  879. return _make_dict_items, (dict(obj), True)
  880. def _dataclass_field_base_reduce(obj):
  881. return _get_dataclass_field_type_sentinel, (obj.name,)
  882. # COLLECTIONS OF OBJECTS STATE SETTERS
  883. # ------------------------------------
  884. # state setters are called at unpickling time, once the object is created and
  885. # it has to be updated to how it was at unpickling time.
  886. def _function_setstate(obj, state):
  887. """Update the state of a dynamic function.
  888. As __closure__ and __globals__ are readonly attributes of a function, we
  889. cannot rely on the native setstate routine of pickle.load_build, that calls
  890. setattr on items of the slotstate. Instead, we have to modify them inplace.
  891. """
  892. state, slotstate = state
  893. obj.__dict__.update(state)
  894. obj_globals = slotstate.pop("__globals__")
  895. obj_closure = slotstate.pop("__closure__")
  896. # _cloudpickle_subimports is a set of submodules that must be loaded for
  897. # the pickled function to work correctly at unpickling time. Now that these
  898. # submodules are depickled (hence imported), they can be removed from the
  899. # object's state (the object state only served as a reference holder to
  900. # these submodules)
  901. slotstate.pop("_cloudpickle_submodules")
  902. obj.__globals__.update(obj_globals)
  903. obj.__globals__["__builtins__"] = __builtins__
  904. if obj_closure is not None:
  905. for i, cell in enumerate(obj_closure):
  906. try:
  907. value = cell.cell_contents
  908. except ValueError: # cell is empty
  909. continue
  910. obj.__closure__[i].cell_contents = value
  911. for k, v in slotstate.items():
  912. setattr(obj, k, v)
  913. def _class_setstate(obj, state):
  914. state, slotstate = state
  915. registry = None
  916. for attrname, attr in state.items():
  917. if attrname == "_abc_impl":
  918. registry = attr
  919. else:
  920. setattr(obj, attrname, attr)
  921. if registry is not None:
  922. for subclass in registry:
  923. obj.register(subclass)
  924. return obj
  925. # COLLECTION OF DATACLASS UTILITIES
  926. # ---------------------------------
  927. # There are some internal sentinel values whose identity must be preserved when
  928. # unpickling dataclass fields. Each sentinel value has a unique name that we can
  929. # use to retrieve its identity at unpickling time.
  930. _DATACLASSE_FIELD_TYPE_SENTINELS = {
  931. dataclasses._FIELD.name: dataclasses._FIELD,
  932. dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
  933. dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
  934. }
  935. def _get_dataclass_field_type_sentinel(name):
  936. return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
  937. class Pickler(pickle.Pickler):
  938. # set of reducers defined and used by cloudpickle (private)
  939. _dispatch_table = {}
  940. _dispatch_table[classmethod] = _classmethod_reduce
  941. _dispatch_table[io.TextIOWrapper] = _file_reduce
  942. _dispatch_table[logging.Logger] = _logger_reduce
  943. _dispatch_table[logging.RootLogger] = _root_logger_reduce
  944. _dispatch_table[memoryview] = _memoryview_reduce
  945. _dispatch_table[property] = _property_reduce
  946. _dispatch_table[staticmethod] = _classmethod_reduce
  947. _dispatch_table[CellType] = _cell_reduce
  948. _dispatch_table[types.CodeType] = _code_reduce
  949. _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
  950. _dispatch_table[types.ModuleType] = _module_reduce
  951. _dispatch_table[types.MethodType] = _method_reduce
  952. _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
  953. _dispatch_table[weakref.WeakSet] = _weakset_reduce
  954. _dispatch_table[typing.TypeVar] = _typevar_reduce
  955. _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
  956. _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
  957. _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
  958. _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
  959. _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
  960. _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
  961. _dispatch_table[abc.abstractmethod] = _classmethod_reduce
  962. _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
  963. _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
  964. _dispatch_table[abc.abstractproperty] = _property_reduce
  965. _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
  966. dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
  967. # function reducers are defined as instance methods of cloudpickle.Pickler
  968. # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
  969. def _dynamic_function_reduce(self, func):
  970. """Reduce a function that is not pickleable via attribute lookup."""
  971. newargs = self._function_getnewargs(func)
  972. state = _function_getstate(func)
  973. return (_make_function, newargs, state, None, None, _function_setstate)
  974. def _function_reduce(self, obj):
  975. """Reducer for function objects.
  976. If obj is a top-level attribute of a file-backed module, this reducer
  977. returns NotImplemented, making the cloudpickle.Pickler fall back to
  978. traditional pickle.Pickler routines to save obj. Otherwise, it reduces
  979. obj using a custom cloudpickle reducer designed specifically to handle
  980. dynamic functions.
  981. """
  982. if _should_pickle_by_reference(obj):
  983. return NotImplemented
  984. else:
  985. return self._dynamic_function_reduce(obj)
  986. def _function_getnewargs(self, func):
  987. code = func.__code__
  988. # base_globals represents the future global namespace of func at
  989. # unpickling time. Looking it up and storing it in
  990. # cloudpickle.Pickler.globals_ref allow functions sharing the same
  991. # globals at pickling time to also share them once unpickled, at one
  992. # condition: since globals_ref is an attribute of a cloudpickle.Pickler
  993. # instance, and that a new cloudpickle.Pickler is created each time
  994. # cloudpickle.dump or cloudpickle.dumps is called, functions also need
  995. # to be saved within the same invocation of
  996. # cloudpickle.dump/cloudpickle.dumps (for example:
  997. # cloudpickle.dumps([f1, f2])). There is no such limitation when using
  998. # cloudpickle.Pickler.dump, as long as the multiple invocations are
  999. # bound to the same cloudpickle.Pickler instance.
  1000. base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
  1001. if base_globals == {}:
  1002. # Add module attributes used to resolve relative imports
  1003. # instructions inside func.
  1004. for k in ["__package__", "__name__", "__path__", "__file__"]:
  1005. if k in func.__globals__:
  1006. base_globals[k] = func.__globals__[k]
  1007. # Do not bind the free variables before the function is created to
  1008. # avoid infinite recursion.
  1009. if func.__closure__ is None:
  1010. closure = None
  1011. else:
  1012. closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
  1013. return code, base_globals, None, None, closure
  1014. def dump(self, obj):
  1015. try:
  1016. return super().dump(obj)
  1017. except RuntimeError as e:
  1018. if len(e.args) > 0 and "recursion" in e.args[0]:
  1019. msg = "Could not pickle object as excessively deep recursion required."
  1020. raise pickle.PicklingError(msg) from e
  1021. else:
  1022. raise
  1023. def __init__(self, file, protocol=None, buffer_callback=None):
  1024. if protocol is None:
  1025. protocol = DEFAULT_PROTOCOL
  1026. super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
  1027. # map functions __globals__ attribute ids, to ensure that functions
  1028. # sharing the same global namespace at pickling time also share
  1029. # their global namespace at unpickling time.
  1030. self.globals_ref = {}
  1031. self.proto = int(protocol)
  1032. if not PYPY:
  1033. # pickle.Pickler is the C implementation of the CPython pickler and
  1034. # therefore we rely on reduce_override method to customize the pickler
  1035. # behavior.
  1036. # `cloudpickle.Pickler.dispatch` is only left for backward
  1037. # compatibility - note that when using protocol 5,
  1038. # `cloudpickle.Pickler.dispatch` is not an extension of
  1039. # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
  1040. # subclasses the C-implemented `pickle.Pickler`, which does not expose
  1041. # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
  1042. # used `cloudpickle.Pickler.dispatch` as a class-level attribute
  1043. # storing all reducers implemented by cloudpickle, but the attribute
  1044. # name was not a great choice given because it would collide with a
  1045. # similarly named attribute in the pure-Python `pickle._Pickler`
  1046. # implementation in the standard library.
  1047. dispatch = dispatch_table
  1048. # Implementation of the reducer_override callback, in order to
  1049. # efficiently serialize dynamic functions and classes by subclassing
  1050. # the C-implemented `pickle.Pickler`.
  1051. # TODO: decorrelate reducer_override (which is tied to CPython's
  1052. # implementation - would it make sense to backport it to pypy? - and
  1053. # pickle's protocol 5 which is implementation agnostic. Currently, the
  1054. # availability of both notions coincide on CPython's pickle, but it may
  1055. # not be the case anymore when pypy implements protocol 5.
  1056. def reducer_override(self, obj):
  1057. """Type-agnostic reducing callback for function and classes.
  1058. For performance reasons, subclasses of the C `pickle.Pickler` class
  1059. cannot register custom reducers for functions and classes in the
  1060. dispatch_table attribute. Reducers for such types must instead
  1061. implemented via the special `reducer_override` method.
  1062. Note that this method will be called for any object except a few
  1063. builtin-types (int, lists, dicts etc.), which differs from reducers
  1064. in the Pickler's dispatch_table, each of them being invoked for
  1065. objects of a specific type only.
  1066. This property comes in handy for classes: although most classes are
  1067. instances of the ``type`` metaclass, some of them can be instances
  1068. of other custom metaclasses (such as enum.EnumMeta for example). In
  1069. particular, the metaclass will likely not be known in advance, and
  1070. thus cannot be special-cased using an entry in the dispatch_table.
  1071. reducer_override, among other things, allows us to register a
  1072. reducer that will be called for any class, independently of its
  1073. type.
  1074. Notes:
  1075. * reducer_override has the priority over dispatch_table-registered
  1076. reducers.
  1077. * reducer_override can be used to fix other limitations of
  1078. cloudpickle for other types that suffered from type-specific
  1079. reducers, such as Exceptions. See
  1080. https://github.com/cloudpipe/cloudpickle/issues/248
  1081. """
  1082. t = type(obj)
  1083. try:
  1084. is_anyclass = issubclass(t, type)
  1085. except TypeError: # t is not a class (old Boost; see SF #502085)
  1086. is_anyclass = False
  1087. if is_anyclass:
  1088. return _class_reduce(obj)
  1089. elif isinstance(obj, types.FunctionType):
  1090. return self._function_reduce(obj)
  1091. else:
  1092. # fallback to save_global, including the Pickler's
  1093. # dispatch_table
  1094. return NotImplemented
  1095. else:
  1096. # When reducer_override is not available, hack the pure-Python
  1097. # Pickler's types.FunctionType and type savers. Note: the type saver
  1098. # must override Pickler.save_global, because pickle.py contains a
  1099. # hard-coded call to save_global when pickling meta-classes.
  1100. dispatch = pickle.Pickler.dispatch.copy()
  1101. def _save_reduce_pickle5(
  1102. self,
  1103. func,
  1104. args,
  1105. state=None,
  1106. listitems=None,
  1107. dictitems=None,
  1108. state_setter=None,
  1109. obj=None,
  1110. ):
  1111. save = self.save
  1112. write = self.write
  1113. self.save_reduce(
  1114. func,
  1115. args,
  1116. state=None,
  1117. listitems=listitems,
  1118. dictitems=dictitems,
  1119. obj=obj,
  1120. )
  1121. # backport of the Python 3.8 state_setter pickle operations
  1122. save(state_setter)
  1123. save(obj) # simple BINGET opcode as obj is already memoized.
  1124. save(state)
  1125. write(pickle.TUPLE2)
  1126. # Trigger a state_setter(obj, state) function call.
  1127. write(pickle.REDUCE)
  1128. # The purpose of state_setter is to carry-out an
  1129. # inplace modification of obj. We do not care about what the
  1130. # method might return, so its output is eventually removed from
  1131. # the stack.
  1132. write(pickle.POP)
  1133. def save_global(self, obj, name=None, pack=struct.pack):
  1134. """Main dispatch method.
  1135. The name of this method is somewhat misleading: all types get
  1136. dispatched here.
  1137. """
  1138. if obj is type(None): # noqa
  1139. return self.save_reduce(type, (None,), obj=obj)
  1140. elif obj is type(Ellipsis):
  1141. return self.save_reduce(type, (Ellipsis,), obj=obj)
  1142. elif obj is type(NotImplemented):
  1143. return self.save_reduce(type, (NotImplemented,), obj=obj)
  1144. elif obj in _BUILTIN_TYPE_NAMES:
  1145. return self.save_reduce(
  1146. _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
  1147. )
  1148. if name is not None:
  1149. super().save_global(obj, name=name)
  1150. elif not _should_pickle_by_reference(obj, name=name):
  1151. self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
  1152. else:
  1153. super().save_global(obj, name=name)
  1154. dispatch[type] = save_global
  1155. def save_function(self, obj, name=None):
  1156. """Registered with the dispatch to handle all function types.
  1157. Determines what kind of function obj is (e.g. lambda, defined at
  1158. interactive prompt, etc) and handles the pickling appropriately.
  1159. """
  1160. if _should_pickle_by_reference(obj, name=name):
  1161. return super().save_global(obj, name=name)
  1162. elif PYPY and isinstance(obj.__code__, builtin_code_type):
  1163. return self.save_pypy_builtin_func(obj)
  1164. else:
  1165. return self._save_reduce_pickle5(
  1166. *self._dynamic_function_reduce(obj), obj=obj
  1167. )
  1168. def save_pypy_builtin_func(self, obj):
  1169. """Save pypy equivalent of builtin functions.
  1170. PyPy does not have the concept of builtin-functions. Instead,
  1171. builtin-functions are simple function instances, but with a
  1172. builtin-code attribute.
  1173. Most of the time, builtin functions should be pickled by attribute.
  1174. But PyPy has flaky support for __qualname__, so some builtin
  1175. functions such as float.__new__ will be classified as dynamic. For
  1176. this reason only, we created this special routine. Because
  1177. builtin-functions are not expected to have closure or globals,
  1178. there is no additional hack (compared the one already implemented
  1179. in pickle) to protect ourselves from reference cycles. A simple
  1180. (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
  1181. also that PyPy improved their support for __qualname__ in v3.6, so
  1182. this routing should be removed when cloudpickle supports only PyPy
  1183. 3.6 and later.
  1184. """
  1185. rv = (
  1186. types.FunctionType,
  1187. (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
  1188. obj.__dict__,
  1189. )
  1190. self.save_reduce(*rv, obj=obj)
  1191. dispatch[types.FunctionType] = save_function
  1192. # Shorthands similar to pickle.dump/pickle.dumps
  1193. def dump(obj, file, protocol=None, buffer_callback=None):
  1194. """Serialize obj as bytes streamed into file
  1195. protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
  1196. pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
  1197. speed between processes running the same Python version.
  1198. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
  1199. compatibility with older versions of Python (although this is not always
  1200. guaranteed to work because cloudpickle relies on some internal
  1201. implementation details that can change from one Python version to the
  1202. next).
  1203. """
  1204. Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
  1205. def dumps(obj, protocol=None, buffer_callback=None):
  1206. """Serialize obj as a string of bytes allocated in memory
  1207. protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
  1208. pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
  1209. speed between processes running the same Python version.
  1210. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
  1211. compatibility with older versions of Python (although this is not always
  1212. guaranteed to work because cloudpickle relies on some internal
  1213. implementation details that can change from one Python version to the
  1214. next).
  1215. """
  1216. with io.BytesIO() as file:
  1217. cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
  1218. cp.dump(obj)
  1219. return file.getvalue()
  1220. # Include pickles unloading functions in this namespace for convenience.
  1221. load, loads = pickle.load, pickle.loads
  1222. # Backward compat alias.
  1223. CloudPickler = Pickler