load.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. # Copyright (c) 2015-2021, 2024-2025 by Rocky Bernstein
  2. # This program is free software; you can redistribute it and/or
  3. # modify it under the terms of the GNU General Public License
  4. # as published by the Free Software Foundation; either version 2
  5. # of the License, or (at your option) any later version.
  6. #
  7. # This program is distributed in the hope that it will be useful,
  8. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. # GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License
  13. # along with this program; if not, write to the Free Software
  14. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  15. import marshal
  16. import os.path as osp
  17. import py_compile
  18. import sys
  19. import tempfile
  20. import types
  21. from datetime import datetime
  22. from os import close
  23. from struct import pack, unpack
  24. from types import CodeType
  25. import xdis.marsh
  26. import xdis.unmarshal
  27. from xdis.dropbox.decrypt25 import fix_dropbox_pyc
  28. from xdis.magics import (
  29. GRAAL3_MAGICS,
  30. JYTHON_MAGICS,
  31. PYPY3_MAGICS,
  32. PYTHON_MAGIC_INT,
  33. RUSTPYTHON_MAGICS,
  34. int2magic,
  35. magic2int,
  36. magic_int2tuple,
  37. magicint2version,
  38. py_str2tuple,
  39. versions,
  40. )
  41. from xdis.version_info import PYTHON3, PYTHON_VERSION_TRIPLE
  42. def is_python_source(path) -> bool:
  43. try:
  44. data = open(path, "r").read()
  45. except UnicodeDecodeError:
  46. for encoding in ("utf-8", "utf-16", "latin-1", "iso-8859-15"):
  47. try:
  48. data = open(path, "r", encoding=encoding).read()
  49. except UnicodeDecodeError:
  50. pass
  51. else:
  52. break
  53. except Exception:
  54. return False
  55. try:
  56. compile(data, path, "exec")
  57. except Exception:
  58. return False
  59. return True
  60. def is_bytecode_extension(path: str) -> bool:
  61. """
  62. Return True if filename ``path`` is named like a bytecode file,
  63. that is, has extension ".pyc" or ".pyo"
  64. """
  65. return path.endswith(".pyc") or path.endswith(".pyo")
  66. # FIXME: the function name is weird. This checks and returns the path.
  67. def check_object_path(path: str) -> str:
  68. if not is_bytecode_extension(path) and is_python_source(path):
  69. try:
  70. import importlib
  71. bytecode_path = importlib.util.cache_from_source(path, optimization="")
  72. if osp.exists(bytecode_path):
  73. return bytecode_path
  74. except Exception:
  75. try:
  76. import imp
  77. imp.cache_from_source(path, debug_override=False)
  78. except Exception:
  79. pass
  80. pass
  81. basename = osp.basename(path)[0:-3]
  82. if PYTHON3:
  83. spath = path
  84. else:
  85. spath = path.decode("utf-8")
  86. # It would be better to use a context manager function like WithNamedTemporary.
  87. # However we are seeing write errors when this is done in Windows.
  88. # So until this is resolved, we'll use mkstemp and explicitly do a close.
  89. fd, path = tempfile.mkstemp(prefix=basename + "-", suffix=".pyc", text=False)
  90. close(fd)
  91. py_compile.compile(spath, cfile=path, doraise=True)
  92. if not is_bytecode_extension(path):
  93. raise ValueError(
  94. f"Path {path} must point to a Python source that can be compiled, or Python bytecode (.pyc, .pyo)\n"
  95. )
  96. return path
  97. def is_pypy(magic_int: int, filename) -> bool:
  98. # PyPy 3.8 starts pyston's trend of using Python's magic numbers.
  99. if magic_int in (3413, 3414) and filename.endswith("pypy38.pyc"):
  100. return True
  101. return magic_int in ((62211 + 7, 3180 + 7) + PYPY3_MAGICS)
  102. def load_file(filename: str, out=sys.stdout) -> CodeType:
  103. """
  104. load a Python source file and compile it to byte-code
  105. _load_file(filename: string): code_object
  106. filename: name of file containing Python source code
  107. (normally a .py)
  108. code_object: code_object compiled from this source code
  109. This function does NOT write any file!
  110. """
  111. fp = open(filename, "rb")
  112. try:
  113. source = fp.read()
  114. try:
  115. if PYTHON_VERSION_TRIPLE < (2, 6):
  116. co = compile(source, filename, "exec")
  117. else:
  118. co = compile(source, filename, "exec", dont_inherit=True)
  119. except SyntaxError:
  120. out.write(f">>Syntax error in {filename}\n")
  121. raise
  122. finally:
  123. fp.close()
  124. return co
  125. def load_module(
  126. filename: str, code_objects=None, fast_load: bool = False, get_code: bool = True
  127. ):
  128. """load a module without importing it.
  129. Parameters:
  130. filename: name of file containing Python byte-code object
  131. (normally a .pyc)
  132. code_objects: list of additional code_object from this
  133. file. This might be a types.CodeType or one of
  134. the portable xdis code types, e.g. Code38, Code3,
  135. Code2, etc. This can be empty.
  136. fast_load: If True, then use Python's builtin loader. This can be done only if
  137. the bytecode version matches the current bytecode interpreter.
  138. get_code: Parsing the code object takes a bit of
  139. parsing time, but sometimes all you want is the
  140. module info, time string, code size, python
  141. version, etc. For that, set `get_code` to
  142. `False`.
  143. Return values are as follows:
  144. version_tuple: a tuple version number for the given magic_int,
  145. e.g. (2, 7) or (3, 4)
  146. timestamp: int; the seconds since EPOCH of the time of the bytecode creation, or None
  147. if no timestamp was stored
  148. magic_int: int, a bytecode-specific version number. This is related to the Python version
  149. number, the two aren't quite the same thing.
  150. co : code object
  151. ispypy : True if this was a PyPy code object
  152. source_size: The size of the source code mod 2**32, if that was stored in the bytecode.
  153. None otherwise.
  154. sip_hash : the SIP Hash for the file (only in Python 3.7 or greater), if the file
  155. was created with a SIP hash or None otherwise. Note that if the sip_hash is not
  156. none, then the timestamp and source_size will be invalid.
  157. """
  158. # Some sanity checks
  159. if not osp.exists(filename):
  160. raise ImportError(f"File name: '{filename}' doesn't exist")
  161. elif not osp.isfile(filename):
  162. raise ImportError(f"File name: '{filename}' isn't a file")
  163. elif osp.getsize(filename) < 50:
  164. raise ImportError(
  165. "File name: '%s (%d bytes)' is too short to be a valid pyc file"
  166. % (filename, osp.getsize(filename))
  167. )
  168. with open(filename, "rb") as fp:
  169. return load_module_from_file_object(
  170. fp,
  171. filename=filename,
  172. code_objects=code_objects,
  173. fast_load=fast_load,
  174. get_code=get_code,
  175. )
  176. def load_module_from_file_object(
  177. fp, filename="<unknown>", code_objects=None, fast_load=False, get_code=True
  178. ):
  179. """load a module from a file object without importing it.
  180. See :func:load_module for a list of return values.
  181. """
  182. if code_objects is None:
  183. code_objects = {}
  184. timestamp = 0
  185. try:
  186. magic = fp.read(4)
  187. magic_int = magic2int(magic)
  188. # For reasons I don't understand, PyPy 3.2 stores a magic
  189. # of '0'... The two values below are for Python 2.x and 3.x respectively
  190. if magic[0:1] in ["0", b"0"]:
  191. magic = int2magic(3180 + 7)
  192. try:
  193. # FIXME: use the internal routine below
  194. tuple_version = magic_int2tuple(magic_int)
  195. except KeyError:
  196. if len(magic) >= 2:
  197. raise ImportError(
  198. "Unknown magic number %s in %s"
  199. % (ord(magic[0:1]) + 256 * ord(magic[1:2]), filename)
  200. )
  201. else:
  202. raise ImportError(f"Bad magic number: '{magic}'")
  203. if magic_int in [2657, 22138] + list(GRAAL3_MAGICS) + list(RUSTPYTHON_MAGICS) + list(JYTHON_MAGICS):
  204. version = magicint2version.get(magic_int, "")
  205. raise ImportError(f"Magic int {magic_int} ({version}) is not supported.")
  206. if magic_int in (
  207. 3010,
  208. 3020,
  209. 3030,
  210. 3040,
  211. 3050,
  212. 3060,
  213. 3061,
  214. 3071,
  215. 3361,
  216. 3091,
  217. 3101,
  218. 3103,
  219. 3141,
  220. 3270,
  221. 3280,
  222. 3290,
  223. 3300,
  224. 3320,
  225. 3330,
  226. 3371,
  227. 62071,
  228. 62071,
  229. 62081,
  230. 62091,
  231. 62092,
  232. 62111,
  233. ):
  234. raise ImportError(
  235. "%s is interim Python %s (%d) bytecode which is "
  236. "not supported.\nFinal released versions are "
  237. "supported." % (filename, versions[magic], magic2int(magic))
  238. )
  239. elif magic_int == 62135:
  240. fp.seek(0)
  241. return fix_dropbox_pyc(fp)
  242. elif magic_int == 62215:
  243. raise ImportError(
  244. "%s is a dropbox-hacked Python %s (bytecode %d).\n"
  245. "See https://github.com/kholia/dedrop for how to "
  246. "decrypt." % (filename, versions[magic], magic2int(magic))
  247. )
  248. try:
  249. my_magic_int = PYTHON_MAGIC_INT
  250. magic_int = magic2int(magic)
  251. version = magic_int2tuple(magic_int)
  252. timestamp = None
  253. source_size = None
  254. sip_hash = None
  255. ts = fp.read(4)
  256. if magic_int in (3439,) or version >= (3, 7):
  257. # PEP 552. https://www.python.org/dev/peps/pep-0552/
  258. pep_bits = ts[-1]
  259. if PYTHON_VERSION_TRIPLE <= (2, 7):
  260. pep_bits = ord(pep_bits)
  261. if (pep_bits & 1) or magic_int == 3393: # 3393 is 3.7.0beta3
  262. # SipHash
  263. sip_hash = unpack("<Q", fp.read(8))[0]
  264. else:
  265. # Uses older-style timestamp and size
  266. timestamp = unpack("<I", fp.read(4))[0] # pep552_bits
  267. source_size = unpack("<I", fp.read(4))[0] # size mod 2**32
  268. pass
  269. else:
  270. # Early Pyston targeting 2.7 doesn't seem to have a timestamp!
  271. if magic_int not in (2657,):
  272. timestamp = unpack("<I", ts)[0]
  273. # Note: a higher magic number doesn't necessarily mean a later
  274. # release. At Python 3.0 the magic number decreased
  275. # significantly. Hence, the range below. Also note inclusion of
  276. # the size info, occurred within a Python major/minor
  277. # release. That is why there is the test on the magic value rather than
  278. # PYTHON_VERSION, although PYTHON_VERSION would probably work.
  279. if (
  280. (3200 <= magic_int < 20121)
  281. and version >= (1, 5)
  282. or magic_int in list(PYPY3_MAGICS) + [2657]
  283. ):
  284. source_size = unpack("<I", fp.read(4))[0] # size mod 2**32
  285. if get_code:
  286. if my_magic_int == magic_int:
  287. bytecode = fp.read()
  288. co = marshal.loads(bytecode)
  289. # Python 3.10 returns a tuple here?
  290. if isinstance(co, tuple):
  291. co = co[0]
  292. assert isinstance(co, types.CodeType)
  293. elif fast_load:
  294. co = xdis.marsh.load(fp, magicint2version[magic_int])
  295. else:
  296. co = xdis.unmarshal.load_code(fp, magic_int, code_objects)
  297. pass
  298. else:
  299. co = None
  300. except NotImplementedError:
  301. raise
  302. except Exception:
  303. kind, msg = sys.exc_info()[0:2]
  304. import traceback
  305. traceback.print_exc()
  306. raise ImportError(f"Ill-formed bytecode file {filename}\n{kind}; {msg}")
  307. finally:
  308. fp.close()
  309. return (
  310. tuple_version,
  311. timestamp,
  312. magic_int,
  313. co,
  314. is_pypy(magic_int, filename),
  315. source_size,
  316. sip_hash,
  317. )
  318. def write_bytecode_file(
  319. bytecode_path, code_obj, magic_int, compilation_ts=None, filesize: int=0
  320. ) -> None:
  321. """Write bytecode file _bytecode_path_, with code for having Python
  322. magic_int (i.e. bytecode associated with some version of Python)
  323. """
  324. fp = open(bytecode_path, "wb")
  325. version = py_str2tuple(magicint2version[magic_int])
  326. if version >= (3, 0):
  327. fp.write(pack("<Hcc", magic_int, b"\r", b"\n"))
  328. if version >= (3, 7): # pep552 bytes
  329. fp.write(pack("<I", 0)) # pep552 bytes
  330. else:
  331. fp.write(pack("<Hcc", magic_int, b"\r", b"\n"))
  332. if compilation_ts:
  333. if isinstance(compilation_ts, datetime):
  334. fp.write(pack("<I", int(compilation_ts.timestamp())))
  335. elif isinstance(compilation_ts, int):
  336. fp.write(pack("<I", compilation_ts))
  337. else:
  338. raise TypeError("Timestamp must be a datetime, int or None")
  339. else:
  340. fp.write(pack("<I", int(datetime.now().timestamp())))
  341. if version >= (3, 3):
  342. # In Python 3.3+, these 4 bytes are the size of the source code_obj file (mod 2^32)
  343. fp.write(pack("<I", filesize))
  344. if isinstance(code_obj, types.CodeType):
  345. fp.write(marshal.dumps(code_obj))
  346. else:
  347. fp.write(xdis.marsh.dumps(code_obj))
  348. fp.close()
  349. if __name__ == "__main__":
  350. co = load_file(__file__)
  351. obj_path = check_object_path(__file__)
  352. version, timestamp, magic_int, co2, pypy, source_size, sip_hash = load_module(
  353. obj_path
  354. )
  355. print("version", version, "magic int", magic_int, "is_pypy", pypy)
  356. if timestamp is not None:
  357. print(datetime.fromtimestamp(timestamp))
  358. if source_size is not None:
  359. print("source size mod 2**32: %d" % source_size)
  360. if sip_hash is not None:
  361. print(f"Sip Hash: 0x{sip_hash:x}")
  362. assert co == co2