main.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. # Copyright (C) 2018-2025 Rocky Bernstein <rocky@gnu.org>
  2. #
  3. # This program is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. import ast
  16. import datetime
  17. import os
  18. import os.path as osp
  19. import py_compile
  20. import subprocess
  21. import sys
  22. import tempfile
  23. from typing import Any, Optional, TextIO, Tuple
  24. from xdis import iscode, load_module
  25. from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE, version_tuple_to_str
  26. from decompyle3.disas import check_object_path
  27. from decompyle3.parsers.parse_heads import ParserError
  28. from decompyle3.semantics import pysource
  29. from decompyle3.semantics.fragments import code_deparse as code_deparse_fragments
  30. from decompyle3.semantics.linemap import deparse_code_with_map
  31. from decompyle3.semantics.pysource import PARSER_DEFAULT_DEBUG, code_deparse
  32. from decompyle3.version import __version__
  33. # from decompyle3.linenumbers import line_number_mapping
  34. def _get_outstream(outfile: str) -> Any:
  35. dir = osp.dirname(outfile)
  36. failed_file = outfile + "_failed"
  37. if osp.exists(failed_file):
  38. os.remove(failed_file)
  39. try:
  40. os.makedirs(dir)
  41. except OSError:
  42. pass
  43. return open(outfile, mode="w", encoding="utf-8")
  44. def syntax_check(filename: str) -> bool:
  45. with open(filename) as f:
  46. source = f.read()
  47. valid = True
  48. try:
  49. ast.parse(source)
  50. except SyntaxError:
  51. valid = False
  52. return valid
  53. def decompile(
  54. co,
  55. bytecode_version: Tuple[int] = PYTHON_VERSION_TRIPLE,
  56. out: Optional[TextIO] = sys.stdout,
  57. showasm: Optional[str] = None,
  58. showast={},
  59. timestamp=None,
  60. showgrammar=False,
  61. source_encoding=None,
  62. code_objects={},
  63. source_size=None,
  64. is_pypy: bool = False,
  65. magic_int=None,
  66. mapstream=None,
  67. do_fragments=False,
  68. compile_mode="exec",
  69. start_offset: int = 0,
  70. stop_offset: int = -1,
  71. ) -> Any:
  72. """
  73. ingests and deparses a given code block 'co'
  74. if `bytecode_version` is None, use the current Python interpreter
  75. version.
  76. Caller is responsible for closing `out` and `mapstream`
  77. """
  78. if bytecode_version is None:
  79. bytecode_version = PYTHON_VERSION_TRIPLE
  80. # store final output stream for case of error
  81. real_out = out or sys.stdout
  82. def write(s):
  83. s += "\n"
  84. real_out.write(s)
  85. assert iscode(co), f"""{co} does not smell like code"""
  86. co_pypy_str = "PyPy " if is_pypy else ""
  87. run_pypy_str = "PyPy " if IS_PYPY else ""
  88. sys_version_lines = sys.version.split("\n")
  89. if source_encoding:
  90. write(f"# -*- coding: {source_encoding} -*-")
  91. write(
  92. "# decompyle3 version %s\n"
  93. "# %sPython bytecode version base %s%s\n# Decompiled from: %sPython %s"
  94. % (
  95. __version__,
  96. co_pypy_str,
  97. version_tuple_to_str(bytecode_version),
  98. " (%s)" % str(magic_int) if magic_int else "",
  99. run_pypy_str,
  100. "\n# ".join(sys_version_lines),
  101. )
  102. )
  103. if co.co_filename:
  104. write(f"# Embedded file name: {co.co_filename}")
  105. if timestamp:
  106. write(f"# Compiled at: {datetime.datetime.fromtimestamp(timestamp)}")
  107. if source_size:
  108. write("# Size of source mod 2**32: %d bytes" % source_size)
  109. grammar = dict(PARSER_DEFAULT_DEBUG)
  110. if showgrammar:
  111. grammar["reduce"] = True
  112. debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar}
  113. try:
  114. if mapstream:
  115. if isinstance(mapstream, str):
  116. mapstream = _get_outstream(mapstream)
  117. debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar}
  118. deparsed = deparse_code_with_map(
  119. co=co,
  120. out=out,
  121. version=bytecode_version,
  122. code_objects=code_objects,
  123. is_pypy=is_pypy,
  124. debug_opts=debug_opts,
  125. )
  126. header_count = 3 + len(sys_version_lines)
  127. if deparsed is not None:
  128. linemap = [
  129. (line_no, deparsed.source_linemap[line_no] + header_count)
  130. for line_no in sorted(deparsed.source_linemap.keys())
  131. ]
  132. mapstream.write("\n\n# %s\n" % linemap)
  133. else:
  134. if do_fragments:
  135. deparse_fn = code_deparse_fragments
  136. else:
  137. deparse_fn = code_deparse
  138. deparsed = deparse_fn(
  139. co,
  140. out,
  141. bytecode_version,
  142. is_pypy=is_pypy,
  143. debug_opts=debug_opts,
  144. compile_mode=compile_mode,
  145. start_offset=start_offset,
  146. stop_offset=stop_offset,
  147. )
  148. pass
  149. real_out.write("\n")
  150. return deparsed
  151. except pysource.SourceWalkerError as e:
  152. # deparsing failed
  153. raise pysource.SourceWalkerError(str(e))
  154. def compile_file(source_path: str) -> str:
  155. if source_path.endswith(".py"):
  156. basename = source_path[:-3]
  157. else:
  158. basename = source_path
  159. if hasattr(sys, "pypy_version_info"):
  160. bytecode_path = f"{basename}-pypy{version_tuple_to_str()}.pyc"
  161. else:
  162. bytecode_path = f"{basename}-{version_tuple_to_str()}.pyc"
  163. print(f"compiling {source_path} to {bytecode_path}")
  164. py_compile.compile(source_path, bytecode_path, "exec")
  165. return bytecode_path
  166. def decompile_file(
  167. filename: str,
  168. outstream: Optional[TextIO] = None,
  169. showasm: Optional[str] = None,
  170. showast={},
  171. showgrammar=dict(PARSER_DEFAULT_DEBUG),
  172. source_encoding=None,
  173. mapstream=None,
  174. do_fragments=False,
  175. start_offset=0,
  176. stop_offset=-1,
  177. ) -> Any:
  178. """
  179. decompile Python byte-code file (.pyc). Return objects to
  180. all of the deparsed objects found in `filename`.
  181. """
  182. filename = check_object_path(filename)
  183. code_objects = {}
  184. version, timestamp, magic_int, co, is_pypy, source_size, _ = load_module(
  185. filename, code_objects
  186. )
  187. if isinstance(co, list):
  188. deparsed = []
  189. for bytecode in co:
  190. deparsed.append(
  191. decompile(
  192. bytecode,
  193. version,
  194. outstream,
  195. showasm,
  196. showast,
  197. timestamp,
  198. showgrammar,
  199. source_encoding,
  200. code_objects=code_objects,
  201. is_pypy=is_pypy,
  202. magic_int=magic_int,
  203. mapstream=mapstream,
  204. start_offset=start_offset,
  205. stop_offset=stop_offset,
  206. ),
  207. )
  208. else:
  209. deparsed = [
  210. decompile(
  211. co,
  212. version,
  213. outstream,
  214. showasm,
  215. showast,
  216. timestamp,
  217. showgrammar,
  218. source_encoding,
  219. code_objects=code_objects,
  220. source_size=source_size,
  221. is_pypy=is_pypy,
  222. magic_int=magic_int,
  223. mapstream=mapstream,
  224. do_fragments=do_fragments,
  225. compile_mode="exec",
  226. start_offset=start_offset,
  227. stop_offset=stop_offset,
  228. )
  229. ]
  230. return deparsed
  231. # FIXME: combine into an options parameter
  232. def main(
  233. in_base: str,
  234. out_base: Optional[str],
  235. compiled_files: list,
  236. source_files: list,
  237. outfile: Optional[str] = None,
  238. showasm: Optional[str] = None,
  239. showast={},
  240. do_verify: Optional[str] = None,
  241. showgrammar: bool = False,
  242. source_encoding=None,
  243. do_linemaps=False,
  244. do_fragments=False,
  245. start_offset: int = 0,
  246. stop_offset: int = -1,
  247. ) -> Tuple[int, int, int, int]:
  248. """
  249. in_base base directory for input files
  250. out_base base directory for output files (ignored when
  251. files list of filenames to be uncompyled (relative to in_base)
  252. outfile write output to this filename (overwrites out_base)
  253. For redirecting output to
  254. - <filename> outfile=<filename> (out_base is ignored)
  255. - files below out_base out_base=...
  256. - stdout out_base=None, outfile=None
  257. """
  258. tot_files = okay_files = failed_files = 0
  259. verify_failed_files = 0 if do_verify else 0
  260. current_outfile = outfile
  261. linemap_stream = None
  262. for source_path in source_files:
  263. compiled_files.append(compile_file(source_path))
  264. for filename in compiled_files:
  265. infile = osp.join(in_base, filename)
  266. # print("XXX", infile)
  267. if not osp.exists(infile):
  268. sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n")
  269. continue
  270. if do_linemaps:
  271. linemap_stream = infile + ".pymap"
  272. pass
  273. # print (infile, file=sys.stderr)
  274. if outfile: # outfile was given as parameter
  275. outstream = _get_outstream(outfile)
  276. elif out_base is None:
  277. out_base = tempfile.mkdtemp(prefix="py-dis-")
  278. if do_verify and filename.endswith(".pyc"):
  279. current_outfile = osp.join(out_base, filename[0:-1])
  280. outstream = open(current_outfile, "w")
  281. else:
  282. outstream = sys.stdout
  283. if do_linemaps:
  284. linemap_stream = sys.stdout
  285. else:
  286. if filename.endswith(".pyc"):
  287. current_outfile = osp.join(out_base, filename[0:-1])
  288. else:
  289. current_outfile = osp.join(out_base, filename) + "_dis"
  290. pass
  291. pass
  292. outstream = _get_outstream(current_outfile)
  293. # print(current_outfile, file=sys.stderr)
  294. # Try to decompile the input file.
  295. try:
  296. deparsed_objects = decompile_file(
  297. infile,
  298. outstream,
  299. showasm,
  300. showast,
  301. showgrammar,
  302. source_encoding,
  303. linemap_stream,
  304. do_fragments,
  305. start_offset,
  306. stop_offset,
  307. )
  308. if do_fragments:
  309. for deparsed_object in deparsed_objects:
  310. last_mod = None
  311. offsets = deparsed_object.offsets
  312. for e in sorted(
  313. [k for k in offsets.keys() if isinstance(k[1], int)]
  314. ):
  315. if e[0] != last_mod:
  316. line = "=" * len(e[0])
  317. outstream.write(f"{line}\n{e[0]}\n{line}\n")
  318. last_mod = e[0]
  319. info = offsets[e]
  320. extract_info = deparse_object.extract_node_info(info)
  321. outstream.write(f"{info.node.format().strip()}" + "\n")
  322. outstream.write(extract_info.selectedLine + "\n")
  323. outstream.write(extract_info.markerLine + "\n\n")
  324. pass
  325. pass
  326. if do_verify:
  327. for deparsed_object in deparsed_objects:
  328. deparsed_object.f.close()
  329. if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]:
  330. sys.stdout.write(
  331. f"\n# skipping running {deparsed_object.f.name}; it is "
  332. f"{version_tuple_to_str(deparsed_object.version, end=2)}, "
  333. "and we are "
  334. f"{version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2)}\n"
  335. )
  336. else:
  337. check_type = "syntax check"
  338. if do_verify == "run":
  339. check_type = "run"
  340. result = subprocess.run(
  341. [sys.executable, deparsed_object.f.name],
  342. capture_output=True,
  343. )
  344. valid = result.returncode == 0
  345. output = result.stdout.decode()
  346. if output:
  347. print(output)
  348. pass
  349. if not valid:
  350. print(result.stderr.decode())
  351. else:
  352. valid = syntax_check(deparsed_object.f.name)
  353. if not valid:
  354. verify_failed_files += 1
  355. sys.stderr.write(
  356. f"\n# {check_type} failed on file {deparsed_object.f.name}\n"
  357. )
  358. # sys.stderr.write(f"Ran {deparsed_object.f.name}\n")
  359. pass
  360. tot_files += 1
  361. except (ValueError, SyntaxError, ParserError, pysource.SourceWalkerError) as e:
  362. sys.stdout.write("\n")
  363. sys.stderr.write(f"\n# file {infile}\n# {e}\n")
  364. failed_files += 1
  365. tot_files += 1
  366. except KeyboardInterrupt:
  367. if outfile:
  368. outstream.close()
  369. os.remove(outfile)
  370. sys.stdout.write("\n")
  371. sys.stderr.write(f"\nLast file: {infile} ")
  372. raise
  373. except RuntimeError as e:
  374. sys.stdout.write(f"\n{str(e)}\n")
  375. if str(e).startswith("Unsupported Python"):
  376. sys.stdout.write("\n")
  377. sys.stderr.write(f"\n# Unsupported bytecode in file {infile}\n# {e}\n")
  378. else:
  379. if outfile:
  380. outstream.close()
  381. os.remove(outfile)
  382. sys.stdout.write("\n")
  383. sys.stderr.write(f"\nLast file: {infile} ")
  384. raise
  385. # except:
  386. # failed_files += 1
  387. # if current_outfile:
  388. # outstream.close()
  389. # os.rename(current_outfile, current_outfile + "_failed")
  390. # else:
  391. # sys.stderr.write("\n# %s" % sys.exc_info()[1])
  392. # sys.stderr.write("\n# Can't uncompile %s\n" % infile)
  393. else: # uncompile successful
  394. if current_outfile:
  395. outstream.close()
  396. okay_files += 1
  397. pass
  398. else:
  399. okay_files += 1
  400. if not current_outfile:
  401. mess = "\n# okay decompiling"
  402. print(mess, infile)
  403. if current_outfile:
  404. sys.stdout.write(
  405. "%s -- %s\r"
  406. % (
  407. infile,
  408. status_msg(
  409. do_verify,
  410. tot_files,
  411. okay_files,
  412. failed_files,
  413. verify_failed_files,
  414. ),
  415. )
  416. )
  417. try:
  418. # FIXME: Something is weird with Pypy here
  419. sys.stdout.flush()
  420. except Exception:
  421. pass
  422. if current_outfile:
  423. sys.stdout.write("\n")
  424. try:
  425. # FIXME: Something is weird with Pypy here
  426. sys.stdout.flush()
  427. except Exception:
  428. pass
  429. pass
  430. return tot_files, okay_files, failed_files, verify_failed_files
  431. # ---- main ----
  432. def status_msg(
  433. do_verify: Optional[str],
  434. tot_files: int,
  435. okay_files: int,
  436. failed_files: int,
  437. verify_failed_files: Optional[int],
  438. ):
  439. if tot_files == 1:
  440. if failed_files:
  441. return "\n# decompile failed"
  442. elif verify_failed_files:
  443. return "\n# decompile run verification failed"
  444. elif do_verify:
  445. return "\n# Successfully decompiled and ran or syntax-checked all files"
  446. else:
  447. return "\n# Successfully decompiled all files"
  448. pass
  449. pass
  450. mess = f"decompiled {tot_files} files: {okay_files} okay, {failed_files} failed"
  451. if do_verify:
  452. mess += f", {verify_failed_files} failed verification"
  453. return mess