cloudpickle.py 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552
  1. """Pickler class to extend the standard pickle.Pickler functionality
  2. The main objective is to make it natural to perform distributed computing on
  3. clusters (such as PySpark, Dask, Ray...) with interactively defined code
  4. (functions, classes, ...) written in notebooks or console.
  5. In particular this pickler adds the following features:
  6. - serialize interactively-defined or locally-defined functions, classes,
  7. enums, typevars, lambdas and nested functions to compiled byte code;
  8. - deal with some other non-serializable objects in an ad-hoc manner where
  9. applicable.
  10. This pickler is therefore meant to be used for the communication between short
  11. lived Python processes running the same version of Python and libraries. In
  12. particular, it is not meant to be used for long term storage of Python objects.
  13. It does not include an unpickler, as standard Python unpickling suffices.
  14. This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
  15. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
  16. Copyright (c) 2012-now, CloudPickle developers and contributors.
  17. Copyright (c) 2012, Regents of the University of California.
  18. Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
  19. All rights reserved.
  20. Redistribution and use in source and binary forms, with or without
  21. modification, are permitted provided that the following conditions
  22. are met:
  23. * Redistributions of source code must retain the above copyright
  24. notice, this list of conditions and the following disclaimer.
  25. * Redistributions in binary form must reproduce the above copyright
  26. notice, this list of conditions and the following disclaimer in the
  27. documentation and/or other materials provided with the distribution.
  28. * Neither the name of the University of California, Berkeley nor the
  29. names of its contributors may be used to endorse or promote
  30. products derived from this software without specific prior written
  31. permission.
  32. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  33. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  34. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  35. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  36. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  37. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  38. TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  39. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  40. LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  41. NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  42. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  43. """
  44. import _collections_abc
  45. from collections import ChainMap, OrderedDict
  46. import abc
  47. import builtins
  48. import copyreg
  49. import dataclasses
  50. import dis
  51. from enum import Enum
  52. import io
  53. import itertools
  54. import logging
  55. import opcode
  56. import pickle
  57. from pickle import _getattribute as _pickle_getattribute
  58. import platform
  59. import struct
  60. import sys
  61. import threading
  62. import types
  63. import typing
  64. import uuid
  65. import warnings
  66. import weakref
  67. # The following import is required to be imported in the cloudpickle
  68. # namespace to be able to load pickle files generated with older versions of
  69. # cloudpickle. See: tests/test_backward_compat.py
  70. from types import CellType # noqa: F401
  71. # cloudpickle is meant for inter process communication: we expect all
  72. # communicating processes to run the same Python version hence we favor
  73. # communication speed over compatibility:
  74. DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
  75. # Names of modules whose resources should be treated as dynamic.
  76. _PICKLE_BY_VALUE_MODULES = set()
  77. # Track the provenance of reconstructed dynamic classes to make it possible to
  78. # reconstruct instances from the matching singleton class definition when
  79. # appropriate and preserve the usual "isinstance" semantics of Python objects.
  80. _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
  81. _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
  82. _DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
  83. PYPY = platform.python_implementation() == "PyPy"
  84. builtin_code_type = None
  85. if PYPY:
  86. # builtin-code objects only exist in pypy
  87. builtin_code_type = type(float.__new__.__code__)
  88. _extract_code_globals_cache = weakref.WeakKeyDictionary()
  89. def _get_or_create_tracker_id(class_def):
  90. with _DYNAMIC_CLASS_TRACKER_LOCK:
  91. class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
  92. if class_tracker_id is None:
  93. class_tracker_id = uuid.uuid4().hex
  94. _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
  95. _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
  96. return class_tracker_id
  97. def _lookup_class_or_track(class_tracker_id, class_def):
  98. if class_tracker_id is not None:
  99. with _DYNAMIC_CLASS_TRACKER_LOCK:
  100. class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
  101. class_tracker_id, class_def
  102. )
  103. _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
  104. return class_def
  105. def register_pickle_by_value(module):
  106. """Register a module to make its functions and classes picklable by value.
  107. By default, functions and classes that are attributes of an importable
  108. module are to be pickled by reference, that is relying on re-importing
  109. the attribute from the module at load time.
  110. If `register_pickle_by_value(module)` is called, all its functions and
  111. classes are subsequently to be pickled by value, meaning that they can
  112. be loaded in Python processes where the module is not importable.
  113. This is especially useful when developing a module in a distributed
  114. execution environment: restarting the client Python process with the new
  115. source code is enough: there is no need to re-install the new version
  116. of the module on all the worker nodes nor to restart the workers.
  117. Note: this feature is considered experimental. See the cloudpickle
  118. README.md file for more details and limitations.
  119. """
  120. if not isinstance(module, types.ModuleType):
  121. raise ValueError(f"Input should be a module object, got {str(module)} instead")
  122. # In the future, cloudpickle may need a way to access any module registered
  123. # for pickling by value in order to introspect relative imports inside
  124. # functions pickled by value. (see
  125. # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
  126. # This access can be ensured by checking that module is present in
  127. # sys.modules at registering time and assuming that it will still be in
  128. # there when accessed during pickling. Another alternative would be to
  129. # store a weakref to the module. Even though cloudpickle does not implement
  130. # this introspection yet, in order to avoid a possible breaking change
  131. # later, we still enforce the presence of module inside sys.modules.
  132. if module.__name__ not in sys.modules:
  133. raise ValueError(
  134. f"{module} was not imported correctly, have you used an "
  135. "`import` statement to access it?"
  136. )
  137. _PICKLE_BY_VALUE_MODULES.add(module.__name__)
  138. def unregister_pickle_by_value(module):
  139. """Unregister that the input module should be pickled by value."""
  140. if not isinstance(module, types.ModuleType):
  141. raise ValueError(f"Input should be a module object, got {str(module)} instead")
  142. if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
  143. raise ValueError(f"{module} is not registered for pickle by value")
  144. else:
  145. _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
  146. def list_registry_pickle_by_value():
  147. return _PICKLE_BY_VALUE_MODULES.copy()
  148. def _is_registered_pickle_by_value(module):
  149. module_name = module.__name__
  150. if module_name in _PICKLE_BY_VALUE_MODULES:
  151. return True
  152. while True:
  153. parent_name = module_name.rsplit(".", 1)[0]
  154. if parent_name == module_name:
  155. break
  156. if parent_name in _PICKLE_BY_VALUE_MODULES:
  157. return True
  158. module_name = parent_name
  159. return False
  160. if sys.version_info >= (3, 14):
  161. def _getattribute(obj, name):
  162. return _pickle_getattribute(obj, name.split('.'))
  163. else:
  164. def _getattribute(obj, name):
  165. return _pickle_getattribute(obj, name)[0]
  166. def _whichmodule(obj, name):
  167. """Find the module an object belongs to.
  168. This function differs from ``pickle.whichmodule`` in two ways:
  169. - it does not mangle the cases where obj's module is __main__ and obj was
  170. not found in any module.
  171. - Errors arising during module introspection are ignored, as those errors
  172. are considered unwanted side effects.
  173. """
  174. module_name = getattr(obj, "__module__", None)
  175. if module_name is not None:
  176. return module_name
  177. # Protect the iteration by using a copy of sys.modules against dynamic
  178. # modules that trigger imports of other modules upon calls to getattr or
  179. # other threads importing at the same time.
  180. for module_name, module in sys.modules.copy().items():
  181. # Some modules such as coverage can inject non-module objects inside
  182. # sys.modules
  183. if (
  184. module_name == "__main__"
  185. or module_name == "__mp_main__"
  186. or module is None
  187. or not isinstance(module, types.ModuleType)
  188. ):
  189. continue
  190. try:
  191. if _getattribute(module, name) is obj:
  192. return module_name
  193. except Exception:
  194. pass
  195. return None
  196. def _should_pickle_by_reference(obj, name=None):
  197. """Test whether an function or a class should be pickled by reference
  198. Pickling by reference means by that the object (typically a function or a
  199. class) is an attribute of a module that is assumed to be importable in the
  200. target Python environment. Loading will therefore rely on importing the
  201. module and then calling `getattr` on it to access the function or class.
  202. Pickling by reference is the only option to pickle functions and classes
  203. in the standard library. In cloudpickle the alternative option is to
  204. pickle by value (for instance for interactively or locally defined
  205. functions and classes or for attributes of modules that have been
  206. explicitly registered to be pickled by value.
  207. """
  208. if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
  209. module_and_name = _lookup_module_and_qualname(obj, name=name)
  210. if module_and_name is None:
  211. return False
  212. module, name = module_and_name
  213. return not _is_registered_pickle_by_value(module)
  214. elif isinstance(obj, types.ModuleType):
  215. # We assume that sys.modules is primarily used as a cache mechanism for
  216. # the Python import machinery. Checking if a module has been added in
  217. # is sys.modules therefore a cheap and simple heuristic to tell us
  218. # whether we can assume that a given module could be imported by name
  219. # in another Python process.
  220. if _is_registered_pickle_by_value(obj):
  221. return False
  222. return obj.__name__ in sys.modules
  223. else:
  224. raise TypeError(
  225. "cannot check importability of {} instances".format(type(obj).__name__)
  226. )
  227. def _lookup_module_and_qualname(obj, name=None):
  228. if name is None:
  229. name = getattr(obj, "__qualname__", None)
  230. if name is None: # pragma: no cover
  231. # This used to be needed for Python 2.7 support but is probably not
  232. # needed anymore. However we keep the __name__ introspection in case
  233. # users of cloudpickle rely on this old behavior for unknown reasons.
  234. name = getattr(obj, "__name__", None)
  235. module_name = _whichmodule(obj, name)
  236. if module_name is None:
  237. # In this case, obj.__module__ is None AND obj was not found in any
  238. # imported module. obj is thus treated as dynamic.
  239. return None
  240. if module_name == "__main__":
  241. return None
  242. # Note: if module_name is in sys.modules, the corresponding module is
  243. # assumed importable at unpickling time. See #357
  244. module = sys.modules.get(module_name, None)
  245. if module is None:
  246. # The main reason why obj's module would not be imported is that this
  247. # module has been dynamically created, using for example
  248. # types.ModuleType. The other possibility is that module was removed
  249. # from sys.modules after obj was created/imported. But this case is not
  250. # supported, as the standard pickle does not support it either.
  251. return None
  252. try:
  253. obj2 = _getattribute(module, name)
  254. except AttributeError:
  255. # obj was not found inside the module it points to
  256. return None
  257. if obj2 is not obj:
  258. return None
  259. return module, name
  260. def _extract_code_globals(co):
  261. """Find all globals names read or written to by codeblock co."""
  262. out_names = _extract_code_globals_cache.get(co)
  263. if out_names is None:
  264. # We use a dict with None values instead of a set to get a
  265. # deterministic order and avoid introducing non-deterministic pickle
  266. # bytes as a results.
  267. out_names = {name: None for name in _walk_global_ops(co)}
  268. # Declaring a function inside another one using the "def ..." syntax
  269. # generates a constant code object corresponding to the one of the
  270. # nested function's As the nested function may itself need global
  271. # variables, we need to introspect its code, extract its globals, (look
  272. # for code object in it's co_consts attribute..) and add the result to
  273. # code_globals
  274. if co.co_consts:
  275. for const in co.co_consts:
  276. if isinstance(const, types.CodeType):
  277. out_names.update(_extract_code_globals(const))
  278. _extract_code_globals_cache[co] = out_names
  279. return out_names
  280. def _find_imported_submodules(code, top_level_dependencies):
  281. """Find currently imported submodules used by a function.
  282. Submodules used by a function need to be detected and referenced for the
  283. function to work correctly at depickling time. Because submodules can be
  284. referenced as attribute of their parent package (``package.submodule``), we
  285. need a special introspection technique that does not rely on GLOBAL-related
  286. opcodes to find references of them in a code object.
  287. Example:
  288. ```
  289. import concurrent.futures
  290. import cloudpickle
  291. def func():
  292. x = concurrent.futures.ThreadPoolExecutor
  293. if __name__ == '__main__':
  294. cloudpickle.dumps(func)
  295. ```
  296. The globals extracted by cloudpickle in the function's state include the
  297. concurrent package, but not its submodule (here, concurrent.futures), which
  298. is the module used by func. Find_imported_submodules will detect the usage
  299. of concurrent.futures. Saving this module alongside with func will ensure
  300. that calling func once depickled does not fail due to concurrent.futures
  301. not being imported
  302. """
  303. subimports = []
  304. # check if any known dependency is an imported package
  305. for x in top_level_dependencies:
  306. if (
  307. isinstance(x, types.ModuleType)
  308. and hasattr(x, "__package__")
  309. and x.__package__
  310. ):
  311. # check if the package has any currently loaded sub-imports
  312. prefix = x.__name__ + "."
  313. # A concurrent thread could mutate sys.modules,
  314. # make sure we iterate over a copy to avoid exceptions
  315. for name in list(sys.modules):
  316. # Older versions of pytest will add a "None" module to
  317. # sys.modules.
  318. if name is not None and name.startswith(prefix):
  319. # check whether the function can address the sub-module
  320. tokens = set(name[len(prefix) :].split("."))
  321. if not tokens - set(code.co_names):
  322. subimports.append(sys.modules[name])
  323. return subimports
  324. # relevant opcodes
  325. STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
  326. DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
  327. LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
  328. GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
  329. HAVE_ARGUMENT = dis.HAVE_ARGUMENT
  330. EXTENDED_ARG = dis.EXTENDED_ARG
  331. _BUILTIN_TYPE_NAMES = {}
  332. for k, v in types.__dict__.items():
  333. if type(v) is type:
  334. _BUILTIN_TYPE_NAMES[v] = k
  335. def _builtin_type(name):
  336. if name == "ClassType": # pragma: no cover
  337. # Backward compat to load pickle files generated with cloudpickle
  338. # < 1.3 even if loading pickle files from older versions is not
  339. # officially supported.
  340. return type
  341. return getattr(types, name)
  342. def _walk_global_ops(code):
  343. """Yield referenced name for global-referencing instructions in code."""
  344. for instr in dis.get_instructions(code):
  345. op = instr.opcode
  346. if op in GLOBAL_OPS:
  347. yield instr.argval
  348. def _extract_class_dict(cls):
  349. """Retrieve a copy of the dict of a class without the inherited method."""
  350. # Hack to circumvent non-predictable memoization caused by string interning.
  351. # See the inline comment in _class_setstate for details.
  352. clsdict = {"".join(k): cls.__dict__[k] for k in sorted(cls.__dict__)}
  353. if len(cls.__bases__) == 1:
  354. inherited_dict = cls.__bases__[0].__dict__
  355. else:
  356. inherited_dict = {}
  357. for base in reversed(cls.__bases__):
  358. inherited_dict.update(base.__dict__)
  359. to_remove = []
  360. for name, value in clsdict.items():
  361. try:
  362. base_value = inherited_dict[name]
  363. if value is base_value:
  364. to_remove.append(name)
  365. except KeyError:
  366. pass
  367. for name in to_remove:
  368. clsdict.pop(name)
  369. return clsdict
  370. def is_tornado_coroutine(func):
  371. """Return whether `func` is a Tornado coroutine function.
  372. Running coroutines are not supported.
  373. """
  374. warnings.warn(
  375. "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
  376. "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
  377. "directly instead.",
  378. category=DeprecationWarning,
  379. )
  380. if "tornado.gen" not in sys.modules:
  381. return False
  382. gen = sys.modules["tornado.gen"]
  383. if not hasattr(gen, "is_coroutine_function"):
  384. # Tornado version is too old
  385. return False
  386. return gen.is_coroutine_function(func)
  387. def subimport(name):
  388. # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
  389. # the name of a submodule, __import__ will return the top-level root module
  390. # of this submodule. For instance, __import__('os.path') returns the `os`
  391. # module.
  392. __import__(name)
  393. return sys.modules[name]
  394. def dynamic_subimport(name, vars):
  395. mod = types.ModuleType(name)
  396. mod.__dict__.update(vars)
  397. mod.__dict__["__builtins__"] = builtins.__dict__
  398. return mod
  399. def _get_cell_contents(cell):
  400. try:
  401. return cell.cell_contents
  402. except ValueError:
  403. # Handle empty cells explicitly with a sentinel value.
  404. return _empty_cell_value
  405. def instance(cls):
  406. """Create a new instance of a class.
  407. Parameters
  408. ----------
  409. cls : type
  410. The class to create an instance of.
  411. Returns
  412. -------
  413. instance : cls
  414. A new instance of ``cls``.
  415. """
  416. return cls()
  417. @instance
  418. class _empty_cell_value:
  419. """Sentinel for empty closures."""
  420. @classmethod
  421. def __reduce__(cls):
  422. return cls.__name__
  423. def _make_function(code, globals, name, argdefs, closure):
  424. # Setting __builtins__ in globals is needed for nogil CPython.
  425. globals["__builtins__"] = __builtins__
  426. return types.FunctionType(code, globals, name, argdefs, closure)
  427. def _make_empty_cell():
  428. if False:
  429. # trick the compiler into creating an empty cell in our lambda
  430. cell = None
  431. raise AssertionError("this route should not be executed")
  432. return (lambda: cell).__closure__[0]
  433. def _make_cell(value=_empty_cell_value):
  434. cell = _make_empty_cell()
  435. if value is not _empty_cell_value:
  436. cell.cell_contents = value
  437. return cell
  438. def _make_skeleton_class(
  439. type_constructor, name, bases, type_kwargs, class_tracker_id, extra
  440. ):
  441. """Build dynamic class with an empty __dict__ to be filled once memoized
  442. If class_tracker_id is not None, try to lookup an existing class definition
  443. matching that id. If none is found, track a newly reconstructed class
  444. definition under that id so that other instances stemming from the same
  445. class id will also reuse this class definition.
  446. The "extra" variable is meant to be a dict (or None) that can be used for
  447. forward compatibility shall the need arise.
  448. """
  449. # We need to intern the keys of the type_kwargs dict to avoid having
  450. # different pickles for the same dynamic class depending on whether it was
  451. # dynamically created or reconstructed from a pickled stream.
  452. type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()}
  453. skeleton_class = types.new_class(
  454. name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
  455. )
  456. return _lookup_class_or_track(class_tracker_id, skeleton_class)
  457. def _make_skeleton_enum(
  458. bases, name, qualname, members, module, class_tracker_id, extra
  459. ):
  460. """Build dynamic enum with an empty __dict__ to be filled once memoized
  461. The creation of the enum class is inspired by the code of
  462. EnumMeta._create_.
  463. If class_tracker_id is not None, try to lookup an existing enum definition
  464. matching that id. If none is found, track a newly reconstructed enum
  465. definition under that id so that other instances stemming from the same
  466. class id will also reuse this enum definition.
  467. The "extra" variable is meant to be a dict (or None) that can be used for
  468. forward compatibility shall the need arise.
  469. """
  470. # enums always inherit from their base Enum class at the last position in
  471. # the list of base classes:
  472. enum_base = bases[-1]
  473. metacls = enum_base.__class__
  474. classdict = metacls.__prepare__(name, bases)
  475. for member_name, member_value in members.items():
  476. classdict[member_name] = member_value
  477. enum_class = metacls.__new__(metacls, name, bases, classdict)
  478. enum_class.__module__ = module
  479. enum_class.__qualname__ = qualname
  480. return _lookup_class_or_track(class_tracker_id, enum_class)
  481. def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
  482. tv = typing.TypeVar(
  483. name,
  484. *constraints,
  485. bound=bound,
  486. covariant=covariant,
  487. contravariant=contravariant,
  488. )
  489. return _lookup_class_or_track(class_tracker_id, tv)
  490. def _decompose_typevar(obj):
  491. return (
  492. obj.__name__,
  493. obj.__bound__,
  494. obj.__constraints__,
  495. obj.__covariant__,
  496. obj.__contravariant__,
  497. _get_or_create_tracker_id(obj),
  498. )
  499. def _typevar_reduce(obj):
  500. # TypeVar instances require the module information hence why we
  501. # are not using the _should_pickle_by_reference directly
  502. module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
  503. if module_and_name is None:
  504. return (_make_typevar, _decompose_typevar(obj))
  505. elif _is_registered_pickle_by_value(module_and_name[0]):
  506. return (_make_typevar, _decompose_typevar(obj))
  507. return (getattr, module_and_name)
  508. def _get_bases(typ):
  509. if "__orig_bases__" in getattr(typ, "__dict__", {}):
  510. # For generic types (see PEP 560)
  511. # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
  512. # correct. Subclasses of a fully-parameterized generic class does not
  513. # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
  514. # will return True because it's defined in the base class.
  515. bases_attr = "__orig_bases__"
  516. else:
  517. # For regular class objects
  518. bases_attr = "__bases__"
  519. return getattr(typ, bases_attr)
  520. def _make_dict_keys(obj, is_ordered=False):
  521. if is_ordered:
  522. return OrderedDict.fromkeys(obj).keys()
  523. else:
  524. return dict.fromkeys(obj).keys()
  525. def _make_dict_values(obj, is_ordered=False):
  526. if is_ordered:
  527. return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
  528. else:
  529. return {i: _ for i, _ in enumerate(obj)}.values()
  530. def _make_dict_items(obj, is_ordered=False):
  531. if is_ordered:
  532. return OrderedDict(obj).items()
  533. else:
  534. return obj.items()
  535. # COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
  536. # -------------------------------------------------
  537. def _class_getnewargs(obj):
  538. type_kwargs = {}
  539. if "__module__" in obj.__dict__:
  540. type_kwargs["__module__"] = obj.__module__
  541. __dict__ = obj.__dict__.get("__dict__", None)
  542. if isinstance(__dict__, property):
  543. type_kwargs["__dict__"] = __dict__
  544. return (
  545. type(obj),
  546. obj.__name__,
  547. _get_bases(obj),
  548. type_kwargs,
  549. _get_or_create_tracker_id(obj),
  550. None,
  551. )
  552. def _enum_getnewargs(obj):
  553. members = {e.name: e.value for e in obj}
  554. return (
  555. obj.__bases__,
  556. obj.__name__,
  557. obj.__qualname__,
  558. members,
  559. obj.__module__,
  560. _get_or_create_tracker_id(obj),
  561. None,
  562. )
  563. # COLLECTION OF OBJECTS RECONSTRUCTORS
  564. # ------------------------------------
  565. def _file_reconstructor(retval):
  566. return retval
  567. # COLLECTION OF OBJECTS STATE GETTERS
  568. # -----------------------------------
  569. def _function_getstate(func):
  570. # - Put func's dynamic attributes (stored in func.__dict__) in state. These
  571. # attributes will be restored at unpickling time using
  572. # f.__dict__.update(state)
  573. # - Put func's members into slotstate. Such attributes will be restored at
  574. # unpickling time by iterating over slotstate and calling setattr(func,
  575. # slotname, slotvalue)
  576. slotstate = {
  577. # Hack to circumvent non-predictable memoization caused by string interning.
  578. # See the inline comment in _class_setstate for details.
  579. "__name__": "".join(func.__name__),
  580. "__qualname__": "".join(func.__qualname__),
  581. "__annotations__": func.__annotations__,
  582. "__kwdefaults__": func.__kwdefaults__,
  583. "__defaults__": func.__defaults__,
  584. "__module__": func.__module__,
  585. "__doc__": func.__doc__,
  586. "__closure__": func.__closure__,
  587. }
  588. f_globals_ref = _extract_code_globals(func.__code__)
  589. f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
  590. if func.__closure__ is not None:
  591. closure_values = list(map(_get_cell_contents, func.__closure__))
  592. else:
  593. closure_values = ()
  594. # Extract currently-imported submodules used by func. Storing these modules
  595. # in a smoke _cloudpickle_subimports attribute of the object's state will
  596. # trigger the side effect of importing these modules at unpickling time
  597. # (which is necessary for func to work correctly once depickled)
  598. slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
  599. func.__code__, itertools.chain(f_globals.values(), closure_values)
  600. )
  601. slotstate["__globals__"] = f_globals
  602. # Hack to circumvent non-predictable memoization caused by string interning.
  603. # See the inline comment in _class_setstate for details.
  604. state = {"".join(k): v for k, v in func.__dict__.items()}
  605. return state, slotstate
  606. def _class_getstate(obj):
  607. clsdict = _extract_class_dict(obj)
  608. clsdict.pop("__weakref__", None)
  609. if issubclass(type(obj), abc.ABCMeta):
  610. # If obj is an instance of an ABCMeta subclass, don't pickle the
  611. # cache/negative caches populated during isinstance/issubclass
  612. # checks, but pickle the list of registered subclasses of obj.
  613. clsdict.pop("_abc_cache", None)
  614. clsdict.pop("_abc_negative_cache", None)
  615. clsdict.pop("_abc_negative_cache_version", None)
  616. registry = clsdict.pop("_abc_registry", None)
  617. if registry is None:
  618. # The abc caches and registered subclasses of a
  619. # class are bundled into the single _abc_impl attribute
  620. clsdict.pop("_abc_impl", None)
  621. (registry, _, _, _) = abc._get_dump(obj)
  622. clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
  623. else:
  624. # In the above if clause, registry is a set of weakrefs -- in
  625. # this case, registry is a WeakSet
  626. clsdict["_abc_impl"] = [type_ for type_ in registry]
  627. if "__slots__" in clsdict:
  628. # pickle string length optimization: member descriptors of obj are
  629. # created automatically from obj's __slots__ attribute, no need to
  630. # save them in obj's state
  631. if isinstance(obj.__slots__, str):
  632. clsdict.pop(obj.__slots__)
  633. else:
  634. for k in obj.__slots__:
  635. clsdict.pop(k, None)
  636. clsdict.pop("__dict__", None) # unpicklable property object
  637. if sys.version_info >= (3, 14):
  638. # PEP-649/749: __annotate_func__ contains a closure that references the class
  639. # dict. We need to exclude it from pickling. Python will recreate it when
  640. # __annotations__ is accessed at unpickling time.
  641. clsdict.pop("__annotate_func__", None)
  642. return (clsdict, {})
  643. def _enum_getstate(obj):
  644. clsdict, slotstate = _class_getstate(obj)
  645. members = {e.name: e.value for e in obj}
  646. # Cleanup the clsdict that will be passed to _make_skeleton_enum:
  647. # Those attributes are already handled by the metaclass.
  648. for attrname in [
  649. "_generate_next_value_",
  650. "_member_names_",
  651. "_member_map_",
  652. "_member_type_",
  653. "_value2member_map_",
  654. ]:
  655. clsdict.pop(attrname, None)
  656. for member in members:
  657. clsdict.pop(member)
  658. # Special handling of Enum subclasses
  659. return clsdict, slotstate
  660. # COLLECTIONS OF OBJECTS REDUCERS
  661. # -------------------------------
  662. # A reducer is a function taking a single argument (obj), and that returns a
  663. # tuple with all the necessary data to re-construct obj. Apart from a few
  664. # exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
  665. # correctly pickle an object.
  666. # While many built-in objects (Exceptions objects, instances of the "object"
  667. # class, etc), are shipped with their own built-in reducer (invoked using
  668. # obj.__reduce__), some do not. The following methods were created to "fill
  669. # these holes".
  670. def _code_reduce(obj):
  671. """code object reducer."""
  672. # If you are not sure about the order of arguments, take a look at help
  673. # of the specific type from types, for example:
  674. # >>> from types import CodeType
  675. # >>> help(CodeType)
  676. # Hack to circumvent non-predictable memoization caused by string interning.
  677. # See the inline comment in _class_setstate for details.
  678. co_name = "".join(obj.co_name)
  679. # Create shallow copies of these tuple to make cloudpickle payload deterministic.
  680. # When creating a code object during load, copies of these four tuples are
  681. # created, while in the main process, these tuples can be shared.
  682. # By always creating copies, we make sure the resulting payload is deterministic.
  683. co_names = tuple(name for name in obj.co_names)
  684. co_varnames = tuple(name for name in obj.co_varnames)
  685. co_freevars = tuple(name for name in obj.co_freevars)
  686. co_cellvars = tuple(name for name in obj.co_cellvars)
  687. if hasattr(obj, "co_exceptiontable"):
  688. # Python 3.11 and later: there are some new attributes
  689. # related to the enhanced exceptions.
  690. args = (
  691. obj.co_argcount,
  692. obj.co_posonlyargcount,
  693. obj.co_kwonlyargcount,
  694. obj.co_nlocals,
  695. obj.co_stacksize,
  696. obj.co_flags,
  697. obj.co_code,
  698. obj.co_consts,
  699. co_names,
  700. co_varnames,
  701. obj.co_filename,
  702. co_name,
  703. obj.co_qualname,
  704. obj.co_firstlineno,
  705. obj.co_linetable,
  706. obj.co_exceptiontable,
  707. co_freevars,
  708. co_cellvars,
  709. )
  710. elif hasattr(obj, "co_linetable"):
  711. # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
  712. # expects obj.co_linetable instead.
  713. args = (
  714. obj.co_argcount,
  715. obj.co_posonlyargcount,
  716. obj.co_kwonlyargcount,
  717. obj.co_nlocals,
  718. obj.co_stacksize,
  719. obj.co_flags,
  720. obj.co_code,
  721. obj.co_consts,
  722. co_names,
  723. co_varnames,
  724. obj.co_filename,
  725. co_name,
  726. obj.co_firstlineno,
  727. obj.co_linetable,
  728. co_freevars,
  729. co_cellvars,
  730. )
  731. elif hasattr(obj, "co_nmeta"): # pragma: no cover
  732. # "nogil" Python: modified attributes from 3.9
  733. args = (
  734. obj.co_argcount,
  735. obj.co_posonlyargcount,
  736. obj.co_kwonlyargcount,
  737. obj.co_nlocals,
  738. obj.co_framesize,
  739. obj.co_ndefaultargs,
  740. obj.co_nmeta,
  741. obj.co_flags,
  742. obj.co_code,
  743. obj.co_consts,
  744. co_varnames,
  745. obj.co_filename,
  746. co_name,
  747. obj.co_firstlineno,
  748. obj.co_lnotab,
  749. obj.co_exc_handlers,
  750. obj.co_jump_table,
  751. co_freevars,
  752. co_cellvars,
  753. obj.co_free2reg,
  754. obj.co_cell2reg,
  755. )
  756. else:
  757. # Backward compat for 3.8 and 3.9
  758. args = (
  759. obj.co_argcount,
  760. obj.co_posonlyargcount,
  761. obj.co_kwonlyargcount,
  762. obj.co_nlocals,
  763. obj.co_stacksize,
  764. obj.co_flags,
  765. obj.co_code,
  766. obj.co_consts,
  767. co_names,
  768. co_varnames,
  769. obj.co_filename,
  770. co_name,
  771. obj.co_firstlineno,
  772. obj.co_lnotab,
  773. co_freevars,
  774. co_cellvars,
  775. )
  776. return types.CodeType, args
  777. def _cell_reduce(obj):
  778. """Cell (containing values of a function's free variables) reducer."""
  779. try:
  780. obj.cell_contents
  781. except ValueError: # cell is empty
  782. return _make_empty_cell, ()
  783. else:
  784. return _make_cell, (obj.cell_contents,)
  785. def _classmethod_reduce(obj):
  786. orig_func = obj.__func__
  787. return type(obj), (orig_func,)
  788. def _file_reduce(obj):
  789. """Save a file."""
  790. import io
  791. if not hasattr(obj, "name") or not hasattr(obj, "mode"):
  792. raise pickle.PicklingError(
  793. "Cannot pickle files that do not map to an actual file"
  794. )
  795. if obj is sys.stdout:
  796. return getattr, (sys, "stdout")
  797. if obj is sys.stderr:
  798. return getattr, (sys, "stderr")
  799. if obj is sys.stdin:
  800. raise pickle.PicklingError("Cannot pickle standard input")
  801. if obj.closed:
  802. raise pickle.PicklingError("Cannot pickle closed files")
  803. if hasattr(obj, "isatty") and obj.isatty():
  804. raise pickle.PicklingError("Cannot pickle files that map to tty objects")
  805. if "r" not in obj.mode and "+" not in obj.mode:
  806. raise pickle.PicklingError(
  807. "Cannot pickle files that are not opened for reading: %s" % obj.mode
  808. )
  809. name = obj.name
  810. retval = io.StringIO()
  811. try:
  812. # Read the whole file
  813. curloc = obj.tell()
  814. obj.seek(0)
  815. contents = obj.read()
  816. obj.seek(curloc)
  817. except OSError as e:
  818. raise pickle.PicklingError(
  819. "Cannot pickle file %s as it cannot be read" % name
  820. ) from e
  821. retval.write(contents)
  822. retval.seek(curloc)
  823. retval.name = name
  824. return _file_reconstructor, (retval,)
  825. def _getset_descriptor_reduce(obj):
  826. return getattr, (obj.__objclass__, obj.__name__)
  827. def _mappingproxy_reduce(obj):
  828. return types.MappingProxyType, (dict(obj),)
  829. def _memoryview_reduce(obj):
  830. return bytes, (obj.tobytes(),)
  831. def _module_reduce(obj):
  832. if _should_pickle_by_reference(obj):
  833. return subimport, (obj.__name__,)
  834. else:
  835. # Some external libraries can populate the "__builtins__" entry of a
  836. # module's `__dict__` with unpicklable objects (see #316). For that
  837. # reason, we do not attempt to pickle the "__builtins__" entry, and
  838. # restore a default value for it at unpickling time.
  839. state = obj.__dict__.copy()
  840. state.pop("__builtins__", None)
  841. return dynamic_subimport, (obj.__name__, state)
  842. def _method_reduce(obj):
  843. return (types.MethodType, (obj.__func__, obj.__self__))
  844. def _logger_reduce(obj):
  845. return logging.getLogger, (obj.name,)
  846. def _root_logger_reduce(obj):
  847. return logging.getLogger, ()
  848. def _property_reduce(obj):
  849. return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
  850. def _weakset_reduce(obj):
  851. return weakref.WeakSet, (list(obj),)
  852. def _dynamic_class_reduce(obj):
  853. """Save a class that can't be referenced as a module attribute.
  854. This method is used to serialize classes that are defined inside
  855. functions, or that otherwise can't be serialized as attribute lookups
  856. from importable modules.
  857. """
  858. if Enum is not None and issubclass(obj, Enum):
  859. return (
  860. _make_skeleton_enum,
  861. _enum_getnewargs(obj),
  862. _enum_getstate(obj),
  863. None,
  864. None,
  865. _class_setstate,
  866. )
  867. else:
  868. return (
  869. _make_skeleton_class,
  870. _class_getnewargs(obj),
  871. _class_getstate(obj),
  872. None,
  873. None,
  874. _class_setstate,
  875. )
  876. def _class_reduce(obj):
  877. """Select the reducer depending on the dynamic nature of the class obj."""
  878. if obj is type(None): # noqa
  879. return type, (None,)
  880. elif obj is type(Ellipsis):
  881. return type, (Ellipsis,)
  882. elif obj is type(NotImplemented):
  883. return type, (NotImplemented,)
  884. elif obj in _BUILTIN_TYPE_NAMES:
  885. return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
  886. elif not _should_pickle_by_reference(obj):
  887. return _dynamic_class_reduce(obj)
  888. return NotImplemented
  889. def _dict_keys_reduce(obj):
  890. # Safer not to ship the full dict as sending the rest might
  891. # be unintended and could potentially cause leaking of
  892. # sensitive information
  893. return _make_dict_keys, (list(obj),)
  894. def _dict_values_reduce(obj):
  895. # Safer not to ship the full dict as sending the rest might
  896. # be unintended and could potentially cause leaking of
  897. # sensitive information
  898. return _make_dict_values, (list(obj),)
  899. def _dict_items_reduce(obj):
  900. return _make_dict_items, (dict(obj),)
  901. def _odict_keys_reduce(obj):
  902. # Safer not to ship the full dict as sending the rest might
  903. # be unintended and could potentially cause leaking of
  904. # sensitive information
  905. return _make_dict_keys, (list(obj), True)
  906. def _odict_values_reduce(obj):
  907. # Safer not to ship the full dict as sending the rest might
  908. # be unintended and could potentially cause leaking of
  909. # sensitive information
  910. return _make_dict_values, (list(obj), True)
  911. def _odict_items_reduce(obj):
  912. return _make_dict_items, (dict(obj), True)
  913. def _dataclass_field_base_reduce(obj):
  914. return _get_dataclass_field_type_sentinel, (obj.name,)
  915. # COLLECTIONS OF OBJECTS STATE SETTERS
  916. # ------------------------------------
  917. # state setters are called at unpickling time, once the object is created and
  918. # it has to be updated to how it was at unpickling time.
  919. def _function_setstate(obj, state):
  920. """Update the state of a dynamic function.
  921. As __closure__ and __globals__ are readonly attributes of a function, we
  922. cannot rely on the native setstate routine of pickle.load_build, that calls
  923. setattr on items of the slotstate. Instead, we have to modify them inplace.
  924. """
  925. state, slotstate = state
  926. obj.__dict__.update(state)
  927. obj_globals = slotstate.pop("__globals__")
  928. obj_closure = slotstate.pop("__closure__")
  929. # _cloudpickle_subimports is a set of submodules that must be loaded for
  930. # the pickled function to work correctly at unpickling time. Now that these
  931. # submodules are depickled (hence imported), they can be removed from the
  932. # object's state (the object state only served as a reference holder to
  933. # these submodules)
  934. slotstate.pop("_cloudpickle_submodules")
  935. obj.__globals__.update(obj_globals)
  936. obj.__globals__["__builtins__"] = __builtins__
  937. if obj_closure is not None:
  938. for i, cell in enumerate(obj_closure):
  939. try:
  940. value = cell.cell_contents
  941. except ValueError: # cell is empty
  942. continue
  943. obj.__closure__[i].cell_contents = value
  944. for k, v in slotstate.items():
  945. setattr(obj, k, v)
  946. def _class_setstate(obj, state):
  947. state, slotstate = state
  948. registry = None
  949. for attrname, attr in state.items():
  950. if attrname == "_abc_impl":
  951. registry = attr
  952. else:
  953. # Note: setting attribute names on a class automatically triggers their
  954. # interning in CPython:
  955. # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
  956. #
  957. # This means that to get deterministic pickling for a dynamic class that
  958. # was initially defined in a different Python process, the pickler
  959. # needs to ensure that dynamic class and function attribute names are
  960. # systematically copied into a non-interned version to avoid
  961. # unpredictable pickle payloads.
  962. #
  963. # Indeed the Pickler's memoizer relies on physical object identity to break
  964. # cycles in the reference graph of the object being serialized.
  965. setattr(obj, attrname, attr)
  966. if sys.version_info >= (3, 13) and "__firstlineno__" in state:
  967. # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it
  968. # will be automatically deleted by the `setattr(obj, attrname, attr)` call
  969. # above when `attrname` is "__firstlineno__". We assume that preserving this
  970. # information might be important for some users and that it not stale in the
  971. # context of cloudpickle usage, hence legitimate to propagate. Furthermore it
  972. # is necessary to do so to keep deterministic chained pickling as tested in
  973. # test_deterministic_str_interning_for_chained_dynamic_class_pickling.
  974. obj.__firstlineno__ = state["__firstlineno__"]
  975. if registry is not None:
  976. for subclass in registry:
  977. obj.register(subclass)
  978. # PEP-649/749: During pickling, we excluded the __annotate_func__ attribute but it
  979. # will be created by Python. Subsequently, annotations will be recreated when
  980. # __annotations__ is accessed.
  981. return obj
  982. # COLLECTION OF DATACLASS UTILITIES
  983. # ---------------------------------
  984. # There are some internal sentinel values whose identity must be preserved when
  985. # unpickling dataclass fields. Each sentinel value has a unique name that we can
  986. # use to retrieve its identity at unpickling time.
  987. _DATACLASSE_FIELD_TYPE_SENTINELS = {
  988. dataclasses._FIELD.name: dataclasses._FIELD,
  989. dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
  990. dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
  991. }
  992. def _get_dataclass_field_type_sentinel(name):
  993. return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
  994. class Pickler(pickle.Pickler):
  995. # set of reducers defined and used by cloudpickle (private)
  996. _dispatch_table = {}
  997. _dispatch_table[classmethod] = _classmethod_reduce
  998. _dispatch_table[io.TextIOWrapper] = _file_reduce
  999. _dispatch_table[logging.Logger] = _logger_reduce
  1000. _dispatch_table[logging.RootLogger] = _root_logger_reduce
  1001. _dispatch_table[memoryview] = _memoryview_reduce
  1002. _dispatch_table[property] = _property_reduce
  1003. _dispatch_table[staticmethod] = _classmethod_reduce
  1004. _dispatch_table[CellType] = _cell_reduce
  1005. _dispatch_table[types.CodeType] = _code_reduce
  1006. _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
  1007. _dispatch_table[types.ModuleType] = _module_reduce
  1008. _dispatch_table[types.MethodType] = _method_reduce
  1009. _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
  1010. _dispatch_table[weakref.WeakSet] = _weakset_reduce
  1011. _dispatch_table[typing.TypeVar] = _typevar_reduce
  1012. _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
  1013. _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
  1014. _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
  1015. _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
  1016. _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
  1017. _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
  1018. _dispatch_table[abc.abstractmethod] = _classmethod_reduce
  1019. _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
  1020. _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
  1021. _dispatch_table[abc.abstractproperty] = _property_reduce
  1022. _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
  1023. dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
  1024. # function reducers are defined as instance methods of cloudpickle.Pickler
  1025. # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
  1026. def _dynamic_function_reduce(self, func):
  1027. """Reduce a function that is not pickleable via attribute lookup."""
  1028. newargs = self._function_getnewargs(func)
  1029. state = _function_getstate(func)
  1030. return (_make_function, newargs, state, None, None, _function_setstate)
  1031. def _function_reduce(self, obj):
  1032. """Reducer for function objects.
  1033. If obj is a top-level attribute of a file-backed module, this reducer
  1034. returns NotImplemented, making the cloudpickle.Pickler fall back to
  1035. traditional pickle.Pickler routines to save obj. Otherwise, it reduces
  1036. obj using a custom cloudpickle reducer designed specifically to handle
  1037. dynamic functions.
  1038. """
  1039. if _should_pickle_by_reference(obj):
  1040. return NotImplemented
  1041. else:
  1042. return self._dynamic_function_reduce(obj)
  1043. def _function_getnewargs(self, func):
  1044. code = func.__code__
  1045. # base_globals represents the future global namespace of func at
  1046. # unpickling time. Looking it up and storing it in
  1047. # cloudpickle.Pickler.globals_ref allow functions sharing the same
  1048. # globals at pickling time to also share them once unpickled, at one
  1049. # condition: since globals_ref is an attribute of a cloudpickle.Pickler
  1050. # instance, and that a new cloudpickle.Pickler is created each time
  1051. # cloudpickle.dump or cloudpickle.dumps is called, functions also need
  1052. # to be saved within the same invocation of
  1053. # cloudpickle.dump/cloudpickle.dumps (for example:
  1054. # cloudpickle.dumps([f1, f2])). There is no such limitation when using
  1055. # cloudpickle.Pickler.dump, as long as the multiple invocations are
  1056. # bound to the same cloudpickle.Pickler instance.
  1057. base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
  1058. if base_globals == {}:
  1059. # Add module attributes used to resolve relative imports
  1060. # instructions inside func.
  1061. for k in ["__package__", "__name__", "__path__", "__file__"]:
  1062. if k in func.__globals__:
  1063. base_globals[k] = func.__globals__[k]
  1064. # Do not bind the free variables before the function is created to
  1065. # avoid infinite recursion.
  1066. if func.__closure__ is None:
  1067. closure = None
  1068. else:
  1069. closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
  1070. return code, base_globals, None, None, closure
  1071. def dump(self, obj):
  1072. try:
  1073. return super().dump(obj)
  1074. except RecursionError as e:
  1075. msg = "Could not pickle object as excessively deep recursion required."
  1076. raise pickle.PicklingError(msg) from e
  1077. def __init__(self, file, protocol=None, buffer_callback=None):
  1078. if protocol is None:
  1079. protocol = DEFAULT_PROTOCOL
  1080. super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
  1081. # map functions __globals__ attribute ids, to ensure that functions
  1082. # sharing the same global namespace at pickling time also share
  1083. # their global namespace at unpickling time.
  1084. self.globals_ref = {}
  1085. self.proto = int(protocol)
  1086. if not PYPY:
  1087. # pickle.Pickler is the C implementation of the CPython pickler and
  1088. # therefore we rely on reduce_override method to customize the pickler
  1089. # behavior.
  1090. # `cloudpickle.Pickler.dispatch` is only left for backward
  1091. # compatibility - note that when using protocol 5,
  1092. # `cloudpickle.Pickler.dispatch` is not an extension of
  1093. # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
  1094. # subclasses the C-implemented `pickle.Pickler`, which does not expose
  1095. # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
  1096. # used `cloudpickle.Pickler.dispatch` as a class-level attribute
  1097. # storing all reducers implemented by cloudpickle, but the attribute
  1098. # name was not a great choice given because it would collide with a
  1099. # similarly named attribute in the pure-Python `pickle._Pickler`
  1100. # implementation in the standard library.
  1101. dispatch = dispatch_table
  1102. # Implementation of the reducer_override callback, in order to
  1103. # efficiently serialize dynamic functions and classes by subclassing
  1104. # the C-implemented `pickle.Pickler`.
  1105. # TODO: decorrelate reducer_override (which is tied to CPython's
  1106. # implementation - would it make sense to backport it to pypy? - and
  1107. # pickle's protocol 5 which is implementation agnostic. Currently, the
  1108. # availability of both notions coincide on CPython's pickle, but it may
  1109. # not be the case anymore when pypy implements protocol 5.
  1110. def reducer_override(self, obj):
  1111. """Type-agnostic reducing callback for function and classes.
  1112. For performance reasons, subclasses of the C `pickle.Pickler` class
  1113. cannot register custom reducers for functions and classes in the
  1114. dispatch_table attribute. Reducers for such types must instead
  1115. implemented via the special `reducer_override` method.
  1116. Note that this method will be called for any object except a few
  1117. builtin-types (int, lists, dicts etc.), which differs from reducers
  1118. in the Pickler's dispatch_table, each of them being invoked for
  1119. objects of a specific type only.
  1120. This property comes in handy for classes: although most classes are
  1121. instances of the ``type`` metaclass, some of them can be instances
  1122. of other custom metaclasses (such as enum.EnumMeta for example). In
  1123. particular, the metaclass will likely not be known in advance, and
  1124. thus cannot be special-cased using an entry in the dispatch_table.
  1125. reducer_override, among other things, allows us to register a
  1126. reducer that will be called for any class, independently of its
  1127. type.
  1128. Notes:
  1129. * reducer_override has the priority over dispatch_table-registered
  1130. reducers.
  1131. * reducer_override can be used to fix other limitations of
  1132. cloudpickle for other types that suffered from type-specific
  1133. reducers, such as Exceptions. See
  1134. https://github.com/cloudpipe/cloudpickle/issues/248
  1135. """
  1136. t = type(obj)
  1137. try:
  1138. is_anyclass = issubclass(t, type)
  1139. except TypeError: # t is not a class (old Boost; see SF #502085)
  1140. is_anyclass = False
  1141. if is_anyclass:
  1142. return _class_reduce(obj)
  1143. elif isinstance(obj, types.FunctionType):
  1144. return self._function_reduce(obj)
  1145. else:
  1146. # fallback to save_global, including the Pickler's
  1147. # dispatch_table
  1148. return NotImplemented
  1149. else:
  1150. # When reducer_override is not available, hack the pure-Python
  1151. # Pickler's types.FunctionType and type savers. Note: the type saver
  1152. # must override Pickler.save_global, because pickle.py contains a
  1153. # hard-coded call to save_global when pickling meta-classes.
  1154. dispatch = pickle.Pickler.dispatch.copy()
  1155. def _save_reduce_pickle5(
  1156. self,
  1157. func,
  1158. args,
  1159. state=None,
  1160. listitems=None,
  1161. dictitems=None,
  1162. state_setter=None,
  1163. obj=None,
  1164. ):
  1165. save = self.save
  1166. write = self.write
  1167. self.save_reduce(
  1168. func,
  1169. args,
  1170. state=None,
  1171. listitems=listitems,
  1172. dictitems=dictitems,
  1173. obj=obj,
  1174. )
  1175. # backport of the Python 3.8 state_setter pickle operations
  1176. save(state_setter)
  1177. save(obj) # simple BINGET opcode as obj is already memoized.
  1178. save(state)
  1179. write(pickle.TUPLE2)
  1180. # Trigger a state_setter(obj, state) function call.
  1181. write(pickle.REDUCE)
  1182. # The purpose of state_setter is to carry-out an
  1183. # inplace modification of obj. We do not care about what the
  1184. # method might return, so its output is eventually removed from
  1185. # the stack.
  1186. write(pickle.POP)
  1187. def save_global(self, obj, name=None, pack=struct.pack):
  1188. """Main dispatch method.
  1189. The name of this method is somewhat misleading: all types get
  1190. dispatched here.
  1191. """
  1192. if obj is type(None): # noqa
  1193. return self.save_reduce(type, (None,), obj=obj)
  1194. elif obj is type(Ellipsis):
  1195. return self.save_reduce(type, (Ellipsis,), obj=obj)
  1196. elif obj is type(NotImplemented):
  1197. return self.save_reduce(type, (NotImplemented,), obj=obj)
  1198. elif obj in _BUILTIN_TYPE_NAMES:
  1199. return self.save_reduce(
  1200. _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
  1201. )
  1202. if name is not None:
  1203. super().save_global(obj, name=name)
  1204. elif not _should_pickle_by_reference(obj, name=name):
  1205. self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
  1206. else:
  1207. super().save_global(obj, name=name)
  1208. dispatch[type] = save_global
  1209. def save_function(self, obj, name=None):
  1210. """Registered with the dispatch to handle all function types.
  1211. Determines what kind of function obj is (e.g. lambda, defined at
  1212. interactive prompt, etc) and handles the pickling appropriately.
  1213. """
  1214. if _should_pickle_by_reference(obj, name=name):
  1215. return super().save_global(obj, name=name)
  1216. elif PYPY and isinstance(obj.__code__, builtin_code_type):
  1217. return self.save_pypy_builtin_func(obj)
  1218. else:
  1219. return self._save_reduce_pickle5(
  1220. *self._dynamic_function_reduce(obj), obj=obj
  1221. )
  1222. def save_pypy_builtin_func(self, obj):
  1223. """Save pypy equivalent of builtin functions.
  1224. PyPy does not have the concept of builtin-functions. Instead,
  1225. builtin-functions are simple function instances, but with a
  1226. builtin-code attribute.
  1227. Most of the time, builtin functions should be pickled by attribute.
  1228. But PyPy has flaky support for __qualname__, so some builtin
  1229. functions such as float.__new__ will be classified as dynamic. For
  1230. this reason only, we created this special routine. Because
  1231. builtin-functions are not expected to have closure or globals,
  1232. there is no additional hack (compared the one already implemented
  1233. in pickle) to protect ourselves from reference cycles. A simple
  1234. (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
  1235. also that PyPy improved their support for __qualname__ in v3.6, so
  1236. this routing should be removed when cloudpickle supports only PyPy
  1237. 3.6 and later.
  1238. """
  1239. rv = (
  1240. types.FunctionType,
  1241. (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
  1242. obj.__dict__,
  1243. )
  1244. self.save_reduce(*rv, obj=obj)
  1245. dispatch[types.FunctionType] = save_function
  1246. # Shorthands similar to pickle.dump/pickle.dumps
  1247. def dump(obj, file, protocol=None, buffer_callback=None):
  1248. """Serialize obj as bytes streamed into file
  1249. protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
  1250. pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
  1251. speed between processes running the same Python version.
  1252. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
  1253. compatibility with older versions of Python (although this is not always
  1254. guaranteed to work because cloudpickle relies on some internal
  1255. implementation details that can change from one Python version to the
  1256. next).
  1257. """
  1258. Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
  1259. def dumps(obj, protocol=None, buffer_callback=None):
  1260. """Serialize obj as a string of bytes allocated in memory
  1261. protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
  1262. pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
  1263. speed between processes running the same Python version.
  1264. Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
  1265. compatibility with older versions of Python (although this is not always
  1266. guaranteed to work because cloudpickle relies on some internal
  1267. implementation details that can change from one Python version to the
  1268. next).
  1269. """
  1270. with io.BytesIO() as file:
  1271. cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
  1272. cp.dump(obj)
  1273. return file.getvalue()
  1274. # Include pickles unloading functions in this namespace for convenience.
  1275. load, loads = pickle.load, pickle.loads
  1276. # Backward compat alias.
  1277. CloudPickler = Pickler