main.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. # Copyright (C) 2018-2025 Rocky Bernstein <rocky@gnu.org>
  2. #
  3. # This program is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. import ast
  16. import datetime
  17. import os
  18. import os.path as osp
  19. import py_compile
  20. import subprocess
  21. import sys
  22. import tempfile
  23. from typing import Any, Optional, TextIO, Tuple
  24. from xdis import iscode
  25. from xdis.load import load_module
  26. from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE, version_tuple_to_str
  27. from uncompyle6.code_fns import check_object_path
  28. from uncompyle6.parser import ParserError
  29. from uncompyle6.semantics.fragments import code_deparse as code_deparse_fragments
  30. from uncompyle6.semantics.linemap import deparse_code_with_map
  31. from uncompyle6.semantics.pysource import (
  32. PARSER_DEFAULT_DEBUG,
  33. SourceWalkerError,
  34. code_deparse,
  35. )
  36. from uncompyle6.version import __version__
  37. # from uncompyle6.linenumbers import line_number_mapping
  38. def _get_outstream(outfile: str) -> Any:
  39. """
  40. Return an opened output file descriptor for ``outfile``.
  41. """
  42. dir_name = osp.dirname(outfile)
  43. failed_file = outfile + "_failed"
  44. if osp.exists(failed_file):
  45. os.remove(failed_file)
  46. try:
  47. os.makedirs(dir_name)
  48. except OSError:
  49. pass
  50. return open(outfile, mode="w", encoding="utf-8")
  51. def syntax_check(filename: str) -> bool:
  52. with open(filename) as f:
  53. source = f.read()
  54. valid = True
  55. try:
  56. ast.parse(source)
  57. except SyntaxError:
  58. valid = False
  59. return valid
  60. def decompile(
  61. co,
  62. bytecode_version: Tuple[int] = PYTHON_VERSION_TRIPLE,
  63. out: Optional[TextIO] = sys.stdout,
  64. showasm: Optional[str] = None,
  65. showast={},
  66. timestamp=None,
  67. showgrammar=False,
  68. source_encoding=None,
  69. code_objects={},
  70. source_size=None,
  71. is_pypy: bool = False,
  72. magic_int=None,
  73. mapstream=None,
  74. do_fragments=False,
  75. compile_mode="exec",
  76. start_offset: int = 0,
  77. stop_offset: int = -1,
  78. ) -> Any:
  79. """
  80. ingests and deparses a given code block 'co'
  81. if `bytecode_version` is None, use the current Python interpreter
  82. version.
  83. Caller is responsible for closing `out` and `mapstream`
  84. """
  85. if bytecode_version is None:
  86. bytecode_version = PYTHON_VERSION_TRIPLE
  87. # store final output stream for case of error
  88. real_out = out or sys.stdout
  89. def write(s):
  90. s += "\n"
  91. real_out.write(s)
  92. assert iscode(co), f"""{co} does not smell like code"""
  93. co_pypy_str = "PyPy " if is_pypy else ""
  94. run_pypy_str = "PyPy " if IS_PYPY else ""
  95. sys_version_lines = sys.version.split("\n")
  96. if source_encoding:
  97. write(f"# -*- coding: {source_encoding} -*-")
  98. write(
  99. "# uncompyle6 version %s\n"
  100. "# %sPython bytecode version base %s%s\n# Decompiled from: %sPython %s"
  101. % (
  102. __version__,
  103. co_pypy_str,
  104. version_tuple_to_str(bytecode_version),
  105. " (%s)" % str(magic_int) if magic_int else "",
  106. run_pypy_str,
  107. "\n# ".join(sys_version_lines),
  108. )
  109. )
  110. if co.co_filename:
  111. write(f"# Embedded file name: {co.co_filename}")
  112. if timestamp:
  113. write(f"# Compiled at: {datetime.datetime.fromtimestamp(timestamp)}")
  114. if source_size:
  115. write("# Size of source mod 2**32: %d bytes" % source_size)
  116. grammar = dict(PARSER_DEFAULT_DEBUG)
  117. if showgrammar:
  118. grammar["reduce"] = True
  119. debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar}
  120. try:
  121. if mapstream:
  122. if isinstance(mapstream, str):
  123. mapstream = _get_outstream(mapstream)
  124. debug_opts = {"asm": showasm, "tree": showast, "grammar": grammar}
  125. deparsed = deparse_code_with_map(
  126. co=co,
  127. out=out,
  128. version=bytecode_version,
  129. code_objects=code_objects,
  130. is_pypy=is_pypy,
  131. debug_opts=debug_opts,
  132. )
  133. header_count = 3 + len(sys_version_lines)
  134. if deparsed is not None:
  135. linemap = [
  136. (line_no, deparsed.source_linemap[line_no] + header_count)
  137. for line_no in sorted(deparsed.source_linemap.keys())
  138. ]
  139. mapstream.write(f"\n\n# {linemap}\n")
  140. else:
  141. if do_fragments:
  142. deparse_fn = code_deparse_fragments
  143. else:
  144. deparse_fn = code_deparse
  145. deparsed = deparse_fn(
  146. co,
  147. out,
  148. bytecode_version,
  149. is_pypy=is_pypy,
  150. debug_opts=debug_opts,
  151. compile_mode=compile_mode,
  152. start_offset=start_offset,
  153. stop_offset=stop_offset,
  154. )
  155. pass
  156. real_out.write("\n")
  157. return deparsed
  158. except SourceWalkerError as e:
  159. # deparsing failed
  160. raise SourceWalkerError(str(e))
  161. def compile_file(source_path: str) -> str:
  162. if source_path.endswith(".py"):
  163. basename = source_path[:-3]
  164. else:
  165. basename = source_path
  166. if hasattr(sys, "pypy_version_info"):
  167. bytecode_path = f"{basename}-pypy{version_tuple_to_str()}.pyc"
  168. else:
  169. bytecode_path = f"{basename}-{version_tuple_to_str()}.pyc"
  170. print(f"compiling {source_path} to {bytecode_path}")
  171. py_compile.compile(source_path, bytecode_path, "exec")
  172. return bytecode_path
  173. def decompile_file(
  174. filename: str,
  175. outstream: Optional[TextIO] = None,
  176. showasm: Optional[str] = None,
  177. showast={},
  178. showgrammar=False,
  179. source_encoding=None,
  180. mapstream=None,
  181. do_fragments=False,
  182. start_offset=0,
  183. stop_offset=-1,
  184. ) -> Any:
  185. """
  186. decompile Python byte-code file (.pyc). Return objects to
  187. all of the deparsed objects found in `filename`.
  188. """
  189. filename = check_object_path(filename)
  190. code_objects = {}
  191. version, timestamp, magic_int, co, is_pypy, source_size, _ = load_module(
  192. filename, code_objects
  193. )
  194. if isinstance(co, list):
  195. deparsed = []
  196. for bytecode in co:
  197. deparsed.append(
  198. decompile(
  199. bytecode,
  200. version,
  201. outstream,
  202. showasm,
  203. showast,
  204. timestamp,
  205. showgrammar,
  206. source_encoding,
  207. code_objects=code_objects,
  208. is_pypy=is_pypy,
  209. magic_int=magic_int,
  210. mapstream=mapstream,
  211. start_offset=start_offset,
  212. stop_offset=stop_offset,
  213. ),
  214. )
  215. else:
  216. deparsed = [
  217. decompile(
  218. co,
  219. version,
  220. outstream,
  221. showasm,
  222. showast,
  223. timestamp,
  224. showgrammar,
  225. source_encoding,
  226. code_objects=code_objects,
  227. source_size=source_size,
  228. is_pypy=is_pypy,
  229. magic_int=magic_int,
  230. mapstream=mapstream,
  231. do_fragments=do_fragments,
  232. compile_mode="exec",
  233. start_offset=start_offset,
  234. stop_offset=stop_offset,
  235. )
  236. ]
  237. return deparsed
  238. # FIXME: combine into an options parameter
  239. def main(
  240. in_base: str,
  241. out_base: Optional[str],
  242. compiled_files: list,
  243. source_files: list,
  244. outfile: Optional[str] = None,
  245. showasm: Optional[str] = None,
  246. showast={},
  247. do_verify: Optional[str] = None,
  248. showgrammar: bool = False,
  249. source_encoding=None,
  250. do_linemaps=False,
  251. do_fragments=False,
  252. start_offset: int = 0,
  253. stop_offset: int = -1,
  254. ) -> Tuple[int, int, int, int]:
  255. """
  256. in_base base directory for input files
  257. out_base base directory for output files (ignored when
  258. files list of filenames to be uncompyled (relative to in_base)
  259. outfile write output to this filename (overwrites out_base)
  260. For redirecting output to
  261. - <filename> outfile=<filename> (out_base is ignored)
  262. - files below out_base out_base=...
  263. - stdout out_base=None, outfile=None
  264. """
  265. tot_files = okay_files = failed_files = 0
  266. verify_failed_files = 0 if do_verify else 0
  267. current_outfile = outfile
  268. linemap_stream = None
  269. for source_path in source_files:
  270. compiled_files.append(compile_file(source_path))
  271. if len(compiled_files) == 0:
  272. return 0, 0, 0, 0
  273. for filename in compiled_files:
  274. infile = osp.join(in_base, filename)
  275. # print("XXX", infile)
  276. if not osp.exists(infile):
  277. sys.stderr.write(f"File '{infile}' doesn't exist. Skipped\n")
  278. continue
  279. if do_linemaps:
  280. linemap_stream = infile + ".pymap"
  281. pass
  282. # print (infile, file=sys.stderr)
  283. if outfile: # outfile was given as parameter
  284. outstream = _get_outstream(outfile)
  285. elif out_base is None:
  286. out_base = tempfile.mkdtemp(prefix="py-dis-")
  287. if do_verify and filename.endswith(".pyc"):
  288. current_outfile = osp.join(out_base, filename[0:-1])
  289. outstream = open(current_outfile, "w")
  290. else:
  291. outstream = sys.stdout
  292. if do_linemaps:
  293. linemap_stream = sys.stdout
  294. else:
  295. if filename.endswith(".pyc"):
  296. current_outfile = osp.join(out_base, filename[0:-1])
  297. else:
  298. current_outfile = osp.join(out_base, filename) + "_dis"
  299. pass
  300. pass
  301. outstream = _get_outstream(current_outfile)
  302. # print(current_outfile, file=sys.stderr)
  303. # Try to decompile the input file.
  304. try:
  305. deparsed_objects = decompile_file(
  306. infile,
  307. outstream,
  308. showasm,
  309. showast,
  310. showgrammar,
  311. source_encoding,
  312. linemap_stream,
  313. do_fragments,
  314. start_offset,
  315. stop_offset,
  316. )
  317. if do_fragments:
  318. for deparsed_object in deparsed_objects:
  319. last_mod = None
  320. offsets = deparsed_object.offsets
  321. for e in sorted(
  322. [k for k in offsets.keys() if isinstance(k[1], int)]
  323. ):
  324. if e[0] != last_mod:
  325. line = "=" * len(e[0])
  326. outstream.write(f"{line}\n{e[0]}\n{line}\n")
  327. last_mod = e[0]
  328. info = offsets[e]
  329. extract_info = deparsed_object.extract_node_info(info)
  330. outstream.write(f"{info.node.format().strip()}" + "\n")
  331. outstream.write(extract_info.selectedLine + "\n")
  332. outstream.write(extract_info.markerLine + "\n\n")
  333. pass
  334. if do_verify:
  335. for deparsed_object in deparsed_objects:
  336. deparsed_object.f.close()
  337. if PYTHON_VERSION_TRIPLE[:2] != deparsed_object.version[:2]:
  338. sys.stdout.write(
  339. f"\n# skipping running {deparsed_object.f.name}; it is "
  340. f"{version_tuple_to_str(deparsed_object.version, end=2)}, "
  341. "and we are "
  342. f"{version_tuple_to_str(PYTHON_VERSION_TRIPLE, end=2)}\n"
  343. )
  344. else:
  345. check_type = "syntax check"
  346. if do_verify == "run":
  347. check_type = "run"
  348. if PYTHON_VERSION_TRIPLE >= (3, 7):
  349. result = subprocess.run(
  350. [sys.executable, deparsed_object.f.name],
  351. capture_output=True,
  352. )
  353. valid = result.returncode == 0
  354. output = result.stdout.decode()
  355. if output:
  356. print(output)
  357. pass
  358. else:
  359. result = subprocess.run(
  360. [sys.executable, deparsed_object.f.name],
  361. )
  362. valid = result.returncode == 0
  363. pass
  364. if not valid:
  365. print(result.stderr.decode())
  366. else:
  367. valid = syntax_check(deparsed_object.f.name)
  368. if not valid:
  369. verify_failed_files += 1
  370. sys.stderr.write(
  371. f"\n# {check_type} failed on file {deparsed_object.f.name}\n"
  372. )
  373. # sys.stderr.write(f"Ran {deparsed_object.f.name}\n")
  374. pass
  375. tot_files += 1
  376. except (
  377. ValueError,
  378. SyntaxError,
  379. ParserError,
  380. SourceWalkerError,
  381. ImportError,
  382. ) as e:
  383. sys.stdout.write("\n")
  384. sys.stderr.write(f"\n# file {infile}\n# {e}\n")
  385. failed_files += 1
  386. tot_files += 1
  387. except KeyboardInterrupt:
  388. if outfile:
  389. outstream.close()
  390. os.remove(outfile)
  391. sys.stdout.write("\n")
  392. sys.stderr.write(f"\nLast file: {infile} ")
  393. raise
  394. except RuntimeError as e:
  395. sys.stdout.write(f"\n{str(e)}\n")
  396. if str(e).startswith("Unsupported Python"):
  397. sys.stdout.write("\n")
  398. sys.stderr.write(f"\n# Unsupported bytecode in file {infile}\n# {e}\n")
  399. failed_files += 1
  400. if current_outfile:
  401. outstream.close()
  402. os.rename(current_outfile, current_outfile + "_failed")
  403. else:
  404. sys.stderr.write("\n# %s" % sys.exc_info()[1])
  405. sys.stderr.write("\n# Can't uncompile %s\n" % infile)
  406. else:
  407. if outfile:
  408. outstream.close()
  409. os.remove(outfile)
  410. sys.stdout.write("\n")
  411. sys.stderr.write(f"\nLast file: {infile} ")
  412. raise
  413. # except:
  414. # failed_files += 1
  415. # if current_outfile:
  416. # outstream.close()
  417. # os.rename(current_outfile, current_outfile + "_failed")
  418. # else:
  419. # sys.stderr.write("\n# %s" % sys.exc_info()[1])
  420. # sys.stderr.write("\n# Can't uncompile %s\n" % infile)
  421. else: # uncompile successful
  422. if current_outfile:
  423. outstream.close()
  424. okay_files += 1
  425. pass
  426. else:
  427. okay_files += 1
  428. if not current_outfile:
  429. mess = "\n# okay decompiling"
  430. # mem_usage = __mem_usage()
  431. print(mess, infile)
  432. if current_outfile:
  433. sys.stdout.write(
  434. "%s -- %s\r"
  435. % (
  436. infile,
  437. status_msg(
  438. tot_files,
  439. okay_files,
  440. failed_files,
  441. verify_failed_files,
  442. ),
  443. )
  444. )
  445. try:
  446. # FIXME: Something is weird with Pypy here
  447. sys.stdout.flush()
  448. except Exception:
  449. pass
  450. if current_outfile:
  451. sys.stdout.write("\n")
  452. try:
  453. # FIXME: Something is weird with Pypy here
  454. sys.stdout.flush()
  455. except Exception:
  456. pass
  457. pass
  458. return tot_files, okay_files, failed_files, verify_failed_files
  459. # ---- main ----
  460. if sys.platform.startswith("linux") and os.uname()[2][:2] in ["2.", "3.", "4."]:
  461. def __mem_sage():
  462. mi = open("/proc/self/stat", "r")
  463. mu = mi.readline().split()[22]
  464. mi.close()
  465. return int(mu) / 1000000
  466. else:
  467. def __mem_usage():
  468. return ""
  469. def status_msg(tot_files, okay_files, failed_files, verify_failed_files):
  470. if tot_files == 1:
  471. if failed_files:
  472. return "\n# decompile failed"
  473. elif verify_failed_files:
  474. return "\n# decompile run verification failed"
  475. else:
  476. return "\n# Successfully decompiled file"
  477. pass
  478. pass
  479. mess = f"decompiled {tot_files} files: {okay_files} okay, {failed_files} failed"
  480. return mess