marsh.py 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115
  1. # (C) Copyright 2018-2025 by Rocky Bernstein
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. from types import CodeType, EllipsisType
  17. from typing import Optional
  18. from xdis.unmarshal import long
  19. """Internal Python object serialization
  20. This module contains functions that can read and write Python values
  21. in a binary format. The format is specific to Python, but independent
  22. of machine architecture issues (e.g., you can write a Python value to
  23. a file on a PC, transport the file to a Sun, and read it back
  24. there). Details of the format may change between Python versions.
  25. """
  26. # NOTE: This module is used in the Python3 interpreter, but also by
  27. # the "sandboxed" process. It must work for Python2 as well.
  28. import struct
  29. import types
  30. from sys import intern
  31. from xdis.codetype import Code2, Code3
  32. from xdis.version_info import PYTHON3, PYTHON_VERSION_TRIPLE, version_tuple_to_str
  33. try:
  34. from __pypy__ import builtinify
  35. except ImportError:
  36. def builtinify(f):
  37. return f
  38. @builtinify
  39. def Ord(c):
  40. return c if PYTHON3 else ord(c)
  41. # Bit set on marshalType if we should
  42. # add obj to internObjects.
  43. # FLAG_REF is the marshal.c name
  44. FLAG_REF = 0x80
  45. TYPE_NULL = "0"
  46. TYPE_NONE = "N"
  47. TYPE_FALSE = "F"
  48. TYPE_TRUE = "T"
  49. TYPE_STOPITER = "S"
  50. TYPE_ELLIPSIS = "."
  51. TYPE_INT = "i"
  52. TYPE_INT64 = "I" # Python 3.4 removed this
  53. TYPE_FLOAT = "f" # Seems not in use after Python 2.4
  54. TYPE_BINARY_FLOAT = "g"
  55. TYPE_COMPLEX = "x"
  56. TYPE_BINARY_COMPLEX = "y" # 3.x
  57. TYPE_LONG = "l"
  58. TYPE_STRING = "s"
  59. TYPE_INTERNED = "t"
  60. TYPE_REF = "r" # Since 3.4
  61. TYPE_STRINGREF = "R" # Python 2
  62. TYPE_TUPLE = "("
  63. TYPE_LIST = "["
  64. TYPE_DICT = "{"
  65. TYPE_CODE_OLD = "C" # used in Python 1.0 - 1.2
  66. TYPE_CODE = "c"
  67. TYPE_UNICODE = "u"
  68. TYPE_UNKNOWN = "?"
  69. TYPE_SET = "<"
  70. TYPE_FROZENSET = ">"
  71. TYPE_ASCII = "a" # since 3.4
  72. TYPE_ASCII_INTERNED = "A" # since 3.4
  73. TYPE_SMALL_TUPLE = ")" # since 3.4
  74. TYPE_SHORT_ASCII = "z" # since 3.4
  75. TYPE_SHORT_ASCII_INTERNED = "Z" # since 3.4
  76. class _Marshaller:
  77. """Python marshalling routine that runs in Python 2 and Python 3.
  78. We also extend to allow for xdis Code2 and Code3 types and instances.
  79. """
  80. dispatch = {}
  81. def __init__(
  82. self, writefunc, python_version: tuple, is_pypy: Optional[bool] = None
  83. ) -> None:
  84. self._write = writefunc
  85. self.python_version = python_version
  86. self.is_pypy = is_pypy
  87. def dump(self, x) -> None:
  88. if (
  89. isinstance(x, types.CodeType)
  90. and PYTHON_VERSION_TRIPLE[:2] != self.python_version[:2]
  91. ):
  92. raise RuntimeError(
  93. "code type passed for version %s but we are running version %s"
  94. % (version_tuple_to_str(), self.python_version)
  95. )
  96. try:
  97. self.dispatch[type(x)](self, x)
  98. except KeyError:
  99. if isinstance(x, Code3):
  100. self.dispatch[Code3](self, x)
  101. return
  102. elif isinstance(x, Code2):
  103. self.dispatch[Code2](self, x)
  104. return
  105. else:
  106. for tp in type(x).mro():
  107. func = self.dispatch.get(tp)
  108. if func:
  109. break
  110. else:
  111. raise ValueError("unmarshallable object")
  112. func(self, x)
  113. def w_long64(self, x) -> None:
  114. self.w_long(x)
  115. self.w_long(x >> 32)
  116. def w_long(self, x: int) -> None:
  117. a = chr(x & 0xFF)
  118. x >>= 8
  119. b = chr(x & 0xFF)
  120. x >>= 8
  121. c = chr(x & 0xFF)
  122. x >>= 8
  123. d = chr(x & 0xFF)
  124. self._write(a + b + c + d)
  125. def w_short(self, x: int) -> None:
  126. self._write(chr(x & 0xFF))
  127. self._write(chr((x >> 8) & 0xFF))
  128. def dump_none(self, x) -> None:
  129. self._write(TYPE_NONE)
  130. dispatch[type(None)] = dump_none
  131. def dump_bool(self, x) -> None:
  132. if x:
  133. self._write(TYPE_TRUE)
  134. else:
  135. self._write(TYPE_FALSE)
  136. dispatch[bool] = dump_bool
  137. def dump_stopiter(self, x) -> None:
  138. if x is not StopIteration:
  139. raise ValueError("unmarshallable object")
  140. self._write(TYPE_STOPITER)
  141. dispatch[type(StopIteration)] = dump_stopiter
  142. def dump_ellipsis(self, x) -> None:
  143. self._write(TYPE_ELLIPSIS)
  144. try:
  145. dispatch[type(Ellipsis)] = dump_ellipsis
  146. except NameError:
  147. pass
  148. # In Python3, this function is not used; see dump_long() below.
  149. def dump_int(self, x) -> None:
  150. y = x >> 31
  151. if y and y != -1:
  152. self._write(TYPE_INT64)
  153. self.w_long64(x)
  154. else:
  155. self._write(TYPE_INT)
  156. self.w_long(x)
  157. dispatch[int] = dump_int
  158. def dump_long(self, x) -> None:
  159. self._write(TYPE_LONG)
  160. sign = 1
  161. if x < 0:
  162. sign = -1
  163. x = -x
  164. digits = []
  165. while x:
  166. digits.append(x & 0x7FFF)
  167. x = x >> 15
  168. self.w_long(len(digits) * sign)
  169. for d in digits:
  170. self.w_short(d)
  171. try:
  172. long
  173. except NameError:
  174. dispatch[int] = dump_long
  175. else:
  176. dispatch[long] = dump_long # noqa
  177. def dump_float(self, x) -> None:
  178. write = self._write
  179. write(TYPE_FLOAT)
  180. s = repr(x)
  181. write(chr(len(s)))
  182. write(s)
  183. dispatch[float] = dump_float
  184. def dump_binary_float(self, x) -> None:
  185. write = self._write
  186. write(TYPE_BINARY_FLOAT)
  187. write(struct.pack("<d", x))
  188. dispatch[TYPE_BINARY_FLOAT] = dump_float
  189. def dump_complex(self, x) -> None:
  190. write = self._write
  191. write(TYPE_COMPLEX)
  192. s = repr(x.real)
  193. write(chr(len(s)))
  194. write(s)
  195. s = repr(x.imag)
  196. write(chr(len(s)))
  197. write(s)
  198. try:
  199. dispatch[complex] = dump_complex
  200. except NameError:
  201. pass
  202. def dump_binary_complex(self, x) -> None:
  203. write = self._write
  204. write(TYPE_BINARY_COMPLEX)
  205. write(struct.pack("<d", x.real))
  206. write(struct.pack("<d", x.imag))
  207. dispatch[TYPE_BINARY_COMPLEX] = dump_binary_complex
  208. def dump_string(self, x) -> None:
  209. # Python 3.11 seems to add the object ref flag bit for strings.
  210. type_string = TYPE_STRING if self.python_version < (3, 11) else chr(ord(TYPE_STRING) | FLAG_REF)
  211. self._write(type_string)
  212. self.w_long(len(x))
  213. self._write(x)
  214. if PYTHON_VERSION_TRIPLE > (2, 5):
  215. dispatch[bytes] = dump_string
  216. dispatch[bytearray] = dump_string
  217. def dump_unicode(self, x) -> None:
  218. self._write(TYPE_UNICODE)
  219. if not PYTHON3 and self.python_version < (3, 0):
  220. s = x.encode("utf8")
  221. else:
  222. s = x
  223. self.w_long(len(s))
  224. self._write(s)
  225. try:
  226. unicode
  227. except NameError:
  228. dispatch[str] = dump_unicode
  229. else:
  230. dispatch[unicode] = dump_unicode # noqa
  231. def dump_tuple(self, x) -> None:
  232. self._write(TYPE_TUPLE)
  233. self.w_long(len(x))
  234. for item in x:
  235. self.dump(item)
  236. dispatch[tuple] = dump_tuple
  237. dispatch[TYPE_TUPLE] = dump_tuple
  238. def dump_small_tuple(self, x) -> None:
  239. self._write(TYPE_SMALL_TUPLE)
  240. self.w_short(len(x))
  241. for item in x:
  242. self.dump(item)
  243. dispatch[TYPE_SMALL_TUPLE] = dump_small_tuple
  244. def dump_list(self, x) -> None:
  245. self._write(TYPE_LIST)
  246. self.w_long(len(x))
  247. for item in x:
  248. self.dump(item)
  249. dispatch[list] = dump_list
  250. dispatch[TYPE_LIST] = dump_tuple
  251. def dump_dict(self, x) -> None:
  252. self._write(TYPE_DICT)
  253. for key, value in x.items():
  254. self.dump(key)
  255. self.dump(value)
  256. self._write(TYPE_NULL)
  257. dispatch[dict] = dump_dict
  258. def dump_code2(self, x) -> None:
  259. # Careful here: many Python 2 code objects are strings,
  260. # but Python 3 marshaling, by default, will dump strings as
  261. # unicode. Force marsaling this type as string.
  262. self._write(TYPE_CODE)
  263. self.w_long(x.co_argcount)
  264. self.w_long(x.co_nlocals)
  265. self.w_long(x.co_stacksize)
  266. self.w_long(x.co_flags)
  267. self.dump_string(x.co_code)
  268. # If running in a Python3 interpreter, some constants will get
  269. # converted from string to unicode. For now, let's see if
  270. # that's okay.
  271. self.dump(x.co_consts)
  272. # The tuple "names" in Python2 must have string entries
  273. self._write(TYPE_TUPLE)
  274. self.w_long(len(x.co_names))
  275. for name in x.co_names:
  276. self.dump_string(name)
  277. # The tuple "varnames" in Python2 also must have string entries
  278. self._write(TYPE_TUPLE)
  279. self.w_long(len(x.co_varnames))
  280. for name in x.co_varnames:
  281. self.dump_string(name)
  282. self.dump(x.co_freevars)
  283. self.dump(x.co_cellvars)
  284. self.dump_string(x.co_filename)
  285. self.dump_string(x.co_name)
  286. self.w_long(x.co_firstlineno)
  287. self.dump_string(x.co_lnotab)
  288. return
  289. dispatch[Code2] = dump_code2
  290. # FIXME: will probably have to adjust similar to how we
  291. # adjusted dump_code2
  292. def dump_code3(self, x) -> None:
  293. if self.python_version >= (3, 4) and not self.is_pypy:
  294. self._write(chr(ord(TYPE_CODE) | FLAG_REF))
  295. else:
  296. self._write(TYPE_CODE)
  297. self.w_long(x.co_argcount)
  298. if hasattr(x, "co_posonlyargcount"):
  299. self.w_long(x.co_posonlyargcount)
  300. self.w_long(x.co_kwonlyargcount)
  301. if self.python_version < (3, 11):
  302. self.w_long(x.co_nlocals)
  303. self.w_long(x.co_stacksize)
  304. self.w_long(x.co_flags)
  305. self.dump(x.co_code)
  306. self.dump(x.co_consts)
  307. self.dump(x.co_names)
  308. self.dump(x.co_varnames)
  309. self.dump(x.co_freevars)
  310. self.dump(x.co_cellvars)
  311. self.dump(x.co_filename)
  312. self.dump(x.co_name)
  313. self.w_long(x.co_firstlineno)
  314. # 3.10 and greater uses co_linetable.
  315. linetable = x.co_linetable if hasattr(x, "co_linetable") else x.co_lnotab
  316. self.dump(linetable)
  317. dispatch[Code3] = dump_code3
  318. try:
  319. if PYTHON3:
  320. dispatch[types.CodeType] = dump_code3
  321. else:
  322. dispatch[types.CodeType] = dump_code2
  323. except NameError:
  324. pass
  325. def dump_set(self, x) -> None:
  326. self._write(TYPE_SET)
  327. self.w_long(len(x))
  328. for each in x:
  329. self.dump(each)
  330. try:
  331. dispatch[set] = dump_set
  332. except NameError:
  333. pass
  334. def dump_frozenset(self, x) -> None:
  335. self._write(TYPE_FROZENSET)
  336. self.w_long(len(x))
  337. for each in x:
  338. self.dump(each)
  339. try:
  340. dispatch[frozenset] = dump_frozenset
  341. except NameError:
  342. pass
  343. # FIXME: dump_ascii, dump_short_ascii are just guesses
  344. def dump_ascii(self, x) -> None:
  345. self._write(TYPE_ASCII)
  346. self.w_long(len(x))
  347. self._write(x)
  348. dispatch[TYPE_ASCII] = dump_ascii
  349. def dump_short_ascii(self, x) -> None:
  350. self._write(TYPE_SHORT_ASCII)
  351. # FIXME: check len(x)?
  352. self.w_short(len(x))
  353. self._write(x)
  354. dispatch[TYPE_SHORT_ASCII] = dump_short_ascii
  355. # FIXME: Handle interned versions of dump_ascii, dump_short_ascii
  356. class _NULL:
  357. pass
  358. class _StringBuffer:
  359. def __init__(self, value) -> None:
  360. self.bufstr = value
  361. self.bufpos = 0
  362. def read(self, n):
  363. pos = self.bufpos
  364. newpos = pos + n
  365. ret = self.bufstr[pos:newpos]
  366. self.bufpos = newpos
  367. return ret
  368. class _Unmarshaller:
  369. dispatch = {}
  370. def __init__(self, readfunc, python_version: bool, is_pypy: bool) -> None:
  371. self._read = readfunc
  372. self._stringtable = []
  373. self.is_pypy = is_pypy
  374. self.python_version = python_version
  375. def load(self):
  376. c = self._read(1)
  377. if not c:
  378. raise EOFError
  379. try:
  380. return self.dispatch[c](self)
  381. except KeyError:
  382. raise ValueError("bad marshal code: %c (%d)" % (c, Ord(c)))
  383. def r_byte(self):
  384. return Ord(self._read(1))
  385. def r_short(self):
  386. lo = Ord(self._read(1))
  387. hi = Ord(self._read(1))
  388. x = lo | (hi << 8)
  389. if x & 0x8000:
  390. x = x - 0x10000
  391. return x
  392. def r_long(self):
  393. s = self._read(4)
  394. a = Ord(s[0])
  395. b = Ord(s[1])
  396. c = Ord(s[2])
  397. d = Ord(s[3])
  398. x = a | (b << 8) | (c << 16) | (d << 24)
  399. if d & 0x80 and x > 0:
  400. x = -((1 << 32) - x)
  401. return int(x)
  402. else:
  403. return x
  404. def r_long64(self):
  405. a = Ord(self._read(1))
  406. b = Ord(self._read(1))
  407. c = Ord(self._read(1))
  408. d = Ord(self._read(1))
  409. e = Ord(self._read(1))
  410. f = Ord(self._read(1))
  411. g = Ord(self._read(1))
  412. h = Ord(self._read(1))
  413. x = a | (b << 8) | (c << 16) | (d << 24)
  414. x = x | (e << 32) | (f << 40) | (g << 48) | (h << 56)
  415. if h & 0x80 and x > 0:
  416. x = -((1 << 64) - x)
  417. return x
  418. def load_null(self) -> type[_NULL]:
  419. return _NULL
  420. dispatch[TYPE_NULL] = load_null
  421. def load_none(self) -> None:
  422. return None
  423. dispatch[TYPE_NONE] = load_none
  424. def load_true(self) -> bool:
  425. return True
  426. dispatch[TYPE_TRUE] = load_true
  427. def load_false(self) -> bool:
  428. return False
  429. dispatch[TYPE_FALSE] = load_false
  430. def load_ascii(self):
  431. return self.r_byte()
  432. dispatch[TYPE_ASCII] = load_null
  433. def load_stopiter(self) -> type[StopIteration]:
  434. return StopIteration
  435. dispatch[TYPE_STOPITER] = load_stopiter
  436. def load_ellipsis(self) -> EllipsisType:
  437. return Ellipsis
  438. dispatch[TYPE_ELLIPSIS] = load_ellipsis
  439. dispatch[TYPE_INT] = r_long
  440. dispatch[TYPE_INT64] = r_long64
  441. def load_long(self):
  442. size = self.r_long()
  443. sign = 1
  444. if size < 0:
  445. sign = -1
  446. size = -size
  447. x = 0
  448. for i in range(size):
  449. d = self.r_short()
  450. x = x | (d << (i * 15))
  451. return x * sign
  452. dispatch[TYPE_LONG] = load_long
  453. def load_float(self) -> float:
  454. n = Ord(self._read(1))
  455. s = self._read(n)
  456. return float(s)
  457. dispatch[TYPE_FLOAT] = load_float
  458. def load_binary_float(self) -> float:
  459. f = self._read(8)
  460. return float(struct.unpack("<d", f)[0])
  461. dispatch[TYPE_BINARY_FLOAT] = load_binary_float
  462. def load_complex(self) -> complex:
  463. n = Ord(self._read(1))
  464. s = self._read(n)
  465. real = float(s)
  466. n = Ord(self._read(1))
  467. s = self._read(n)
  468. imag = float(s)
  469. return complex(real, imag)
  470. dispatch[TYPE_COMPLEX] = load_complex
  471. def load_string(self):
  472. n = self.r_long()
  473. return self._read(n)
  474. dispatch[TYPE_STRING] = load_string
  475. def load_interned(self) -> str:
  476. n = self.r_long()
  477. ret = intern(self._read(n))
  478. self._stringtable.append(ret)
  479. return ret
  480. dispatch[TYPE_INTERNED] = load_interned
  481. def load_stringref(self):
  482. n = self.r_long()
  483. return self._stringtable[n]
  484. dispatch[TYPE_STRINGREF] = load_stringref
  485. def load_unicode(self):
  486. n = self.r_long()
  487. s = self._read(n)
  488. ret = s.decode("utf8")
  489. return ret
  490. dispatch[TYPE_UNICODE] = load_unicode
  491. def load_tuple(self):
  492. return tuple(self.load_list())
  493. dispatch[TYPE_TUPLE] = load_tuple
  494. def load_list(self):
  495. n = self.r_long()
  496. list = [self.load() for i in range(n)]
  497. return list
  498. dispatch[TYPE_LIST] = load_list
  499. def load_dict(self):
  500. d = {}
  501. while 1:
  502. key = self.load()
  503. if key is _NULL:
  504. break
  505. value = self.load()
  506. d[key] = value
  507. return d
  508. dispatch[TYPE_DICT] = load_dict
  509. # FIXME: GO over fo PYPY
  510. def load_code(self) -> Code2 | Code3 | CodeType:
  511. argcount = self.r_long()
  512. if self.python_version and self.python_version >= (3, 0):
  513. is_python3 = True
  514. kwonlyargcount = self.r_long()
  515. else:
  516. is_python3 = False
  517. nlocals = self.r_long()
  518. stacksize = self.r_long()
  519. flags = self.r_long()
  520. code = self.load()
  521. consts = self.load()
  522. names = self.load()
  523. varnames = self.load()
  524. freevars = self.load()
  525. cellvars = self.load()
  526. filename = self.load()
  527. name = self.load()
  528. firstlineno = self.r_long()
  529. lnotab = self.load()
  530. if is_python3:
  531. if PYTHON3:
  532. return types.CodeType(
  533. argcount,
  534. kwonlyargcount,
  535. nlocals,
  536. stacksize,
  537. flags,
  538. code,
  539. consts,
  540. names,
  541. varnames,
  542. filename,
  543. name,
  544. firstlineno,
  545. lnotab,
  546. freevars,
  547. cellvars,
  548. )
  549. else:
  550. return Code3(
  551. argcount,
  552. kwonlyargcount,
  553. nlocals,
  554. stacksize,
  555. flags,
  556. code,
  557. consts,
  558. names,
  559. varnames,
  560. filename,
  561. name,
  562. firstlineno,
  563. lnotab,
  564. freevars,
  565. cellvars,
  566. )
  567. else:
  568. if PYTHON3:
  569. return Code2(
  570. argcount,
  571. nlocals,
  572. stacksize,
  573. flags,
  574. code,
  575. consts,
  576. names,
  577. varnames,
  578. filename,
  579. name,
  580. firstlineno,
  581. lnotab,
  582. freevars,
  583. cellvars,
  584. )
  585. else:
  586. return types.CodeType(
  587. argcount,
  588. nlocals,
  589. stacksize,
  590. flags,
  591. code,
  592. consts,
  593. names,
  594. varnames,
  595. filename,
  596. name,
  597. firstlineno,
  598. lnotab,
  599. freevars,
  600. cellvars,
  601. )
  602. dispatch[TYPE_CODE] = load_code
  603. def load_set(self):
  604. n = self.r_long()
  605. args = [self.load() for i in range(n)]
  606. return set(args)
  607. dispatch[TYPE_SET] = load_set
  608. def load_frozenset(self):
  609. n = self.r_long()
  610. args = [self.load() for i in range(n)]
  611. return frozenset(args)
  612. dispatch[TYPE_FROZENSET] = load_frozenset
  613. # ________________________________________________________________
  614. def _read(self, n):
  615. pos = self.bufpos
  616. newpos = pos + n
  617. if newpos > len(self.bufstr):
  618. raise EOFError
  619. ret = self.bufstr[pos:newpos]
  620. self.bufpos = newpos
  621. return ret
  622. def _read1(self):
  623. ret = self.bufstr[self.bufpos]
  624. self.bufpos += 1
  625. return ret
  626. def _r_short(self):
  627. lo = Ord(_read1(self))
  628. hi = Ord(_read1(self))
  629. x = lo | (hi << 8)
  630. if x & 0x8000:
  631. x = x - 0x10000
  632. return x
  633. def _r_long(self):
  634. # inlined this most common case
  635. p = self.bufpos
  636. s = self.bufstr
  637. a = Ord(s[p])
  638. b = Ord(s[p + 1])
  639. c = Ord(s[p + 2])
  640. d = Ord(s[p + 3])
  641. self.bufpos += 4
  642. x = a | (b << 8) | (c << 16) | (d << 24)
  643. if d & 0x80 and x > 0:
  644. x = -((1 << 32) - x)
  645. return int(x)
  646. else:
  647. return x
  648. def _r_long64(self):
  649. a = Ord(_read1(self))
  650. b = Ord(_read1(self))
  651. c = Ord(_read1(self))
  652. d = Ord(_read1(self))
  653. e = Ord(_read1(self))
  654. f = Ord(_read1(self))
  655. g = Ord(_read1(self))
  656. h = Ord(_read1(self))
  657. x = a | (b << 8) | (c << 16) | (d << 24)
  658. x = x | (e << 32) | (f << 40) | (g << 48) | (h << 56)
  659. if h & 0x80 and x > 0:
  660. x = -((1 << 64) - x)
  661. return x
  662. _load_dispatch = {}
  663. class _FastUnmarshaller:
  664. dispatch = {}
  665. def __init__(self, buffer, python_version=None) -> None:
  666. self.bufstr = buffer
  667. self.bufpos = 0
  668. self._stringtable = []
  669. self.python_version = python_version
  670. def load(self):
  671. # make flow space happy
  672. c = "?"
  673. try:
  674. c = self.bufstr[self.bufpos]
  675. if PYTHON3:
  676. c = chr(c)
  677. self.bufpos += 1
  678. return _load_dispatch[c](self)
  679. except KeyError:
  680. exception = ValueError(
  681. "bad marshal code at position %d: %c" % (self.bufpos - 1, c)
  682. )
  683. except IndexError:
  684. exception = EOFError
  685. raise exception
  686. def load_null(self) -> type[_NULL]:
  687. return _NULL
  688. dispatch[TYPE_NULL] = load_null
  689. def load_none(self) -> None:
  690. return None
  691. dispatch[TYPE_NONE] = load_none
  692. def load_true(self) -> bool:
  693. return True
  694. dispatch[TYPE_TRUE] = load_true
  695. def load_false(self) -> bool:
  696. return False
  697. dispatch[TYPE_FALSE] = load_false
  698. def load_stopiter(self) -> type[StopIteration]:
  699. return StopIteration
  700. dispatch[TYPE_STOPITER] = load_stopiter
  701. def load_ellipsis(self) -> EllipsisType:
  702. return Ellipsis
  703. dispatch[TYPE_ELLIPSIS] = load_ellipsis
  704. def load_int(self):
  705. return _r_long(self)
  706. dispatch[TYPE_INT] = load_int
  707. def load_int64(self):
  708. return _r_long64(self)
  709. dispatch[TYPE_INT64] = load_int64
  710. def load_long(self):
  711. size = _r_long(self)
  712. sign = 1
  713. if size < 0:
  714. sign = -1
  715. size = -size
  716. x = 0
  717. for i in range(size):
  718. d = _r_short(self)
  719. x = x | (d << (i * 15))
  720. return x * sign
  721. dispatch[TYPE_LONG] = load_long
  722. def load_float(self) -> float:
  723. n = Ord(_read1(self))
  724. s = _read(self, n)
  725. return float(s)
  726. dispatch[TYPE_FLOAT] = load_float
  727. def load_complex(self) -> complex:
  728. n = Ord(_read1(self))
  729. s = _read(self, n)
  730. real = float(s)
  731. n = Ord(_read1(self))
  732. s = _read(self, n)
  733. imag = float(s)
  734. return complex(real, imag)
  735. dispatch[TYPE_COMPLEX] = load_complex
  736. def load_string(self):
  737. n = _r_long(self)
  738. return _read(self, n)
  739. dispatch[TYPE_STRING] = load_string
  740. def load_interned(self) -> str:
  741. n = _r_long(self)
  742. s = _read(self, n)
  743. if PYTHON3:
  744. s = s.decode("utf8")
  745. ret = intern(s)
  746. self._stringtable.append(ret)
  747. return ret
  748. dispatch[TYPE_INTERNED] = load_interned
  749. def load_stringref(self):
  750. n = _r_long(self)
  751. return self._stringtable[n]
  752. dispatch[TYPE_STRINGREF] = load_stringref
  753. def load_unicode(self):
  754. n = _r_long(self)
  755. s = _read(self, n)
  756. ret = s.decode("utf8")
  757. return ret
  758. dispatch[TYPE_UNICODE] = load_unicode
  759. def load_tuple(self):
  760. return tuple(self.load_list())
  761. dispatch[TYPE_TUPLE] = load_tuple
  762. def load_list(self):
  763. n = _r_long(self)
  764. list = []
  765. for i in range(n):
  766. list.append(self.load())
  767. return list
  768. dispatch[TYPE_LIST] = load_list
  769. def load_dict(self):
  770. d = {}
  771. while 1:
  772. key = self.load()
  773. if key is _NULL:
  774. break
  775. value = self.load()
  776. d[key] = value
  777. return d
  778. dispatch[TYPE_DICT] = load_dict
  779. def load_code(self) -> Code2 | CodeType:
  780. argcount = _r_long(self)
  781. nlocals = _r_long(self)
  782. stacksize = _r_long(self)
  783. flags = _r_long(self)
  784. code = self.load()
  785. consts = self.load()
  786. names = self.load()
  787. varnames = self.load()
  788. freevars = self.load()
  789. cellvars = self.load()
  790. filename = self.load()
  791. name = self.load()
  792. firstlineno = _r_long(self)
  793. lnotab = self.load()
  794. if PYTHON3:
  795. if isinstance(name, bytes):
  796. name = name.decode()
  797. return Code2(
  798. argcount,
  799. nlocals,
  800. stacksize,
  801. flags,
  802. code,
  803. consts,
  804. names,
  805. varnames,
  806. filename.decode(),
  807. name,
  808. firstlineno,
  809. lnotab,
  810. freevars,
  811. cellvars,
  812. )
  813. else:
  814. return types.CodeType(
  815. argcount,
  816. nlocals,
  817. stacksize,
  818. flags,
  819. code,
  820. consts,
  821. names,
  822. varnames,
  823. filename,
  824. name,
  825. firstlineno,
  826. lnotab,
  827. freevars,
  828. cellvars,
  829. )
  830. dispatch[TYPE_CODE] = load_code
  831. def load_set(self):
  832. n = _r_long(self)
  833. args = [self.load() for i in range(n)]
  834. return set(args)
  835. dispatch[TYPE_SET] = load_set
  836. def load_frozenset(self):
  837. n = _r_long(self)
  838. args = [self.load() for i in range(n)]
  839. return frozenset(args)
  840. dispatch[TYPE_FROZENSET] = load_frozenset
  841. _load_dispatch = _FastUnmarshaller.dispatch
  842. # _________________________________________________________________
  843. #
  844. # user interface
  845. version = 1
  846. @builtinify
  847. def dump(x, f, version: int = version, python_version: tuple=PYTHON_VERSION_TRIPLE, is_pypy: Optional[bool]=None) -> None:
  848. # XXX 'version' is ignored, we always dump in a version-0-compatible format
  849. m = _Marshaller(f.write, python_version, is_pypy)
  850. m.dump(x)
  851. @builtinify
  852. def load(f, python_version: tuple=PYTHON_VERSION_TRIPLE, is_pypy=None):
  853. um = _Unmarshaller(f.read, python_version, is_pypy)
  854. return um.load()
  855. @builtinify
  856. def dumps(
  857. x,
  858. version: int = version,
  859. python_version: tuple[int, ...] = PYTHON_VERSION_TRIPLE,
  860. is_pypy: Optional[bool] = None,
  861. ) -> bytes | str:
  862. # XXX 'version' is ignored, we always dump in a version-0-compatible format
  863. buffer = []
  864. m = _Marshaller(buffer.append, python_version=python_version, is_pypy=is_pypy)
  865. m.dump(x)
  866. if python_version:
  867. is_python3 = python_version >= (3, 0)
  868. else:
  869. is_python3 = PYTHON3
  870. if is_python3:
  871. if PYTHON_VERSION_TRIPLE >= (3, 0):
  872. # Python 3.x handling Python 3.x
  873. buf = []
  874. for b in buffer:
  875. if isinstance(b, str) and PYTHON3:
  876. s2b = bytes(ord(b[j]) for j in range(len(b)))
  877. buf.append(s2b)
  878. elif isinstance(b, bytearray):
  879. buf.append(str(b))
  880. else:
  881. buf.append(b)
  882. return b"".join(buf)
  883. else:
  884. # Python 2.x handling Python 3.x
  885. buf = b""
  886. for b in buffer:
  887. buf += b.decode(errors="ignore")
  888. pass
  889. return buf
  890. else:
  891. # Python 2 or 3 handling Python 2.x
  892. buf = []
  893. for b in buffer:
  894. if isinstance(b, str) and PYTHON3:
  895. try:
  896. s2b = bytes(ord(b[j]) for j in range(len(b)))
  897. except ValueError:
  898. s2b = b.encode("utf-8")
  899. buf.append(s2b)
  900. elif isinstance(b, bytearray):
  901. buf.append(str(b))
  902. else:
  903. buf.append(b)
  904. return "".join(buf)
  905. @builtinify
  906. def loads(s, python_version=None):
  907. um = _FastUnmarshaller(s, python_version)
  908. return um.load()