verify.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. #
  2. # (C) Copyright 2015-2018, 2020-2021, 2023 by Rocky Bernstein
  3. # (C) Copyright 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. """
  18. byte-code verification
  19. """
  20. from __future__ import print_function
  21. import operator, sys
  22. import xdis.std as dis
  23. from subprocess import call
  24. import uncompyle6
  25. from uncompyle6.scanner import Token as ScannerToken, get_scanner
  26. from xdis import iscode, load_file, load_module, pretty_code_flags, PYTHON_MAGIC_INT
  27. truediv = operator.truediv
  28. from functools import reduce
  29. def code_equal(a, b):
  30. return a.co_code == b.co_code
  31. BIN_OP_FUNCS = {
  32. "BINARY_POWER": operator.pow,
  33. "BINARY_MULTIPLY": operator.mul,
  34. "BINARY_DIVIDE": truediv,
  35. "BINARY_FLOOR_DIVIDE": operator.floordiv,
  36. "BINARY_TRUE_DIVIDE": operator.truediv,
  37. "BINARY_MODULO": operator.mod,
  38. "BINARY_ADD": operator.add,
  39. "BINARY_SUBRACT": operator.sub,
  40. "BINARY_LSHIFT": operator.lshift,
  41. "BINARY_RSHIFT": operator.rshift,
  42. "BINARY_AND": operator.and_,
  43. "BINARY_XOR": operator.xor,
  44. "BINARY_OR": operator.or_,
  45. }
  46. JUMP_OPS = None
  47. # --- exceptions ---
  48. class VerifyCmpError(Exception):
  49. pass
  50. class CmpErrorConsts(VerifyCmpError):
  51. """Exception to be raised when consts differ."""
  52. def __init__(self, name, index):
  53. self.name = name
  54. self.index = index
  55. def __str__(self):
  56. return "Compare Error within Consts of %s at index %i" % (
  57. repr(self.name),
  58. self.index,
  59. )
  60. class CmpErrorConstsType(VerifyCmpError):
  61. """Exception to be raised when consts differ."""
  62. def __init__(self, name, index):
  63. self.name = name
  64. self.index = index
  65. def __str__(self):
  66. return "Consts type differ in %s at index %i" % (repr(self.name), self.index)
  67. class CmpErrorConstsLen(VerifyCmpError):
  68. """Exception to be raised when length of co_consts differs."""
  69. def __init__(self, name, consts1, consts2):
  70. self.name = name
  71. self.consts = (consts1, consts2)
  72. def __str__(self):
  73. return "Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n" % (
  74. repr(self.name),
  75. len(self.consts[0]),
  76. repr(self.consts[0]),
  77. len(self.consts[1]),
  78. repr(self.consts[1]),
  79. )
  80. class CmpErrorCode(VerifyCmpError):
  81. """Exception to be raised when code differs."""
  82. def __init__(self, name, index, token1, token2, tokens1, tokens2):
  83. self.name = name
  84. self.index = index
  85. self.token1 = token1
  86. self.token2 = token2
  87. self.tokens = [tokens1, tokens2]
  88. def __str__(self):
  89. s = reduce(
  90. lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
  91. list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])),
  92. "Code differs in %s\n" % str(self.name),
  93. )
  94. return (
  95. "Code differs in %s at offset %s [%s] != [%s]\n\n"
  96. % (repr(self.name), self.index, repr(self.token1), repr(self.token2))
  97. ) + s
  98. class CmpErrorCodeLen(VerifyCmpError):
  99. """Exception to be raised when code length differs."""
  100. def __init__(self, name, tokens1, tokens2):
  101. self.name = name
  102. self.tokens = [tokens1, tokens2]
  103. def __str__(self):
  104. return reduce(
  105. lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
  106. list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])),
  107. "Code len differs in %s\n" % str(self.name),
  108. )
  109. class CmpErrorMember(VerifyCmpError):
  110. """Exception to be raised when other members differ."""
  111. def __init__(self, name, member, data1, data2):
  112. self.name = name
  113. self.member = member
  114. self.data = (data1, data2)
  115. def __str__(self):
  116. return "Member %s differs in %s:\n\t%s\n\t%s\n" % (
  117. repr(self.member),
  118. repr(self.name),
  119. repr(self.data[0]),
  120. repr(self.data[1]),
  121. )
  122. # --- compare ---
  123. # these members are ignored
  124. __IGNORE_CODE_MEMBERS__ = [
  125. "co_filename",
  126. "co_firstlineno",
  127. "co_lnotab",
  128. "co_stacksize",
  129. "co_names",
  130. ]
  131. def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
  132. """
  133. Compare two code-objects.
  134. This is the main part of this module.
  135. """
  136. # print code_obj1, type(code_obj2)
  137. assert iscode(
  138. code_obj1
  139. ), "cmp_code_object first object type is %s, not code" % type(code_obj1)
  140. assert iscode(
  141. code_obj2
  142. ), "cmp_code_object second object type is %s, not code" % type(code_obj2)
  143. # print dir(code_obj1)
  144. if isinstance(code_obj1, object):
  145. # new style classes (Python 2.2)
  146. # assume _both_ code objects to be new style classes
  147. assert dir(code_obj1) == dir(code_obj2)
  148. else:
  149. # old style classes
  150. assert dir(code_obj1) == code_obj1.__members__
  151. assert dir(code_obj2) == code_obj2.__members__
  152. assert code_obj1.__members__ == code_obj2.__members__
  153. if name == "__main__":
  154. name = code_obj1.co_name
  155. else:
  156. name = "%s.%s" % (name, code_obj1.co_name)
  157. if name == ".?":
  158. name = "__main__"
  159. if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
  160. # use the new style code-classes' __cmp__ method, which
  161. # should be faster and more sophisticated
  162. # if this compare fails, we use the old routine to
  163. # find out, what exactly is nor equal
  164. # if this compare succeeds, simply return
  165. # return
  166. pass
  167. if isinstance(code_obj1, object):
  168. members = [x for x in dir(code_obj1) if x.startswith("co_")]
  169. else:
  170. members = dir(code_obj1)
  171. members.sort() # ; members.reverse()
  172. tokens1 = None
  173. for member in members:
  174. if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
  175. pass
  176. elif member == "co_code":
  177. if verify != "strong":
  178. continue
  179. scanner = get_scanner(version, is_pypy, show_asm=False)
  180. global JUMP_OPS
  181. JUMP_OPS = list(JUMP_OPS) + ["JUMP_BACK"]
  182. # use changed Token class
  183. # We (re)set this here to save exception handling,
  184. # which would get confusing.
  185. scanner.setTokenClass(Token)
  186. try:
  187. # ingest both code-objects
  188. tokens1, customize = scanner.ingest(code_obj1)
  189. del customize # save memory
  190. tokens2, customize = scanner.ingest(code_obj2)
  191. del customize # save memory
  192. finally:
  193. scanner.resetTokenClass() # restore Token class
  194. targets1 = dis.findlabels(code_obj1.co_code)
  195. tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
  196. tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]
  197. i1 = 0
  198. i2 = 0
  199. offset_map = {}
  200. check_jumps = {}
  201. while i1 < len(tokens1):
  202. if i2 >= len(tokens2):
  203. if (
  204. len(tokens1) == len(tokens2) + 2
  205. and tokens1[-1].kind == "RETURN_VALUE"
  206. and tokens1[-2].kind == "LOAD_CONST"
  207. and tokens1[-2].pattr is None
  208. and tokens1[-3].kind == "RETURN_VALUE"
  209. ):
  210. break
  211. else:
  212. raise CmpErrorCodeLen(name, tokens1, tokens2)
  213. offset_map[tokens1[i1].offset] = tokens2[i2].offset
  214. for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
  215. if offset2 != tokens2[i2].offset:
  216. raise CmpErrorCode(
  217. name,
  218. tokens1[idx1].offset,
  219. tokens1[idx1],
  220. tokens2[idx2],
  221. tokens1,
  222. tokens2,
  223. )
  224. if tokens1[i1].kind != tokens2[i2].kind:
  225. if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
  226. i = 1
  227. while tokens1[i1 + i].kind == "LOAD_CONST":
  228. i += 1
  229. if tokens1[i1 + i].kind.startswith(
  230. ("BUILD_TUPLE", "BUILD_LIST")
  231. ) and i == int(tokens1[i1 + i].kind.split("_")[-1]):
  232. t = tuple([elem.pattr for elem in tokens1[i1 : i1 + i]])
  233. if t != tokens2[i2].pattr:
  234. raise CmpErrorCode(
  235. name,
  236. tokens1[i1].offset,
  237. tokens1[i1],
  238. tokens2[i2],
  239. tokens1,
  240. tokens2,
  241. )
  242. i1 += i + 1
  243. i2 += 1
  244. continue
  245. elif (
  246. i == 2
  247. and tokens1[i1 + i].kind == "ROT_TWO"
  248. and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"
  249. ):
  250. i1 += 3
  251. i2 += 2
  252. continue
  253. elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
  254. f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
  255. if (
  256. f(tokens1[i1].pattr, tokens1[i1 + 1].pattr)
  257. == tokens2[i2].pattr
  258. ):
  259. i1 += 3
  260. i2 += 1
  261. continue
  262. elif tokens1[i1].kind == "UNARY_NOT":
  263. if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
  264. if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
  265. i1 += 2
  266. i2 += 1
  267. continue
  268. elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
  269. if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
  270. i1 += 2
  271. i2 += 1
  272. continue
  273. elif (
  274. tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
  275. and tokens1[i1 - 1].kind == "RETURN_VALUE"
  276. and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF")
  277. and int(tokens1[i1].offset) not in targets1
  278. ):
  279. i1 += 1
  280. continue
  281. elif (
  282. tokens1[i1].kind == "JUMP_BACK"
  283. and tokens2[i2].kind == "CONTINUE"
  284. ):
  285. # FIXME: should make sure that offset is inside loop, not outside of it
  286. i1 += 2
  287. i2 += 2
  288. continue
  289. elif (
  290. tokens1[i1].kind == "JUMP_FORWARD"
  291. and tokens2[i2].kind == "JUMP_BACK"
  292. and tokens1[i1 + 1].kind == "JUMP_BACK"
  293. and tokens2[i2 + 1].kind == "JUMP_BACK"
  294. and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3
  295. ):
  296. if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset):
  297. i1 += 2
  298. i2 += 2
  299. continue
  300. elif (
  301. tokens1[i1].kind == "LOAD_NAME"
  302. and tokens2[i2].kind == "LOAD_CONST"
  303. and tokens1[i1].pattr == "None"
  304. and tokens2[i2].pattr is None
  305. ):
  306. pass
  307. elif (
  308. tokens1[i1].kind == "LOAD_GLOBAL"
  309. and tokens2[i2].kind == "LOAD_NAME"
  310. and tokens1[i1].pattr == tokens2[i2].pattr
  311. ):
  312. pass
  313. elif (
  314. tokens1[i1].kind == "LOAD_ASSERT"
  315. and tokens2[i2].kind == "LOAD_NAME"
  316. and tokens1[i1].pattr == tokens2[i2].pattr
  317. ):
  318. pass
  319. elif (
  320. tokens1[i1].kind == "RETURN_VALUE"
  321. and tokens2[i2].kind == "RETURN_END_IF"
  322. ):
  323. pass
  324. elif (
  325. tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == ()
  326. ):
  327. pass
  328. else:
  329. raise CmpErrorCode(
  330. name,
  331. tokens1[i1].offset,
  332. tokens1[i1],
  333. tokens2[i2],
  334. tokens1,
  335. tokens2,
  336. )
  337. elif (
  338. tokens1[i1].kind in JUMP_OPS
  339. and tokens1[i1].pattr != tokens2[i2].pattr
  340. ):
  341. if tokens1[i1].kind == "JUMP_BACK":
  342. dest1 = int(tokens1[i1].pattr)
  343. dest2 = int(tokens2[i2].pattr)
  344. if offset_map[dest1] != dest2:
  345. raise CmpErrorCode(
  346. name,
  347. tokens1[i1].offset,
  348. tokens1[i1],
  349. tokens2[i2],
  350. tokens1,
  351. tokens2,
  352. )
  353. else:
  354. # import pdb; pdb.set_trace()
  355. try:
  356. dest1 = int(tokens1[i1].pattr)
  357. if dest1 in check_jumps:
  358. check_jumps[dest1].append((i1, i2, dest2))
  359. else:
  360. check_jumps[dest1] = [(i1, i2, dest2)]
  361. except Exception:
  362. pass
  363. i1 += 1
  364. i2 += 1
  365. del tokens1, tokens2 # save memory
  366. elif member == "co_consts":
  367. # partial optimization can make the co_consts look different,
  368. # so we'll just compare the code consts
  369. codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts"))
  370. codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts"))
  371. for c1, c2 in zip(codes1, codes2):
  372. cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
  373. elif member == "co_flags":
  374. flags1 = code_obj1.co_flags
  375. flags2 = code_obj2.co_flags
  376. if is_pypy:
  377. # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
  378. flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8
  379. # We also don't care about COROUTINE or GENERATOR for now
  380. flags1 &= ~0x000000A0
  381. flags2 &= ~0x000000A0
  382. if flags1 != flags2:
  383. raise CmpErrorMember(
  384. name,
  385. "co_flags",
  386. pretty_code_flags(flags1),
  387. pretty_code_flags(flags2),
  388. )
  389. else:
  390. # all other members must be equal
  391. if getattr(code_obj1, member) != getattr(code_obj2, member):
  392. raise CmpErrorMember(
  393. name, member, getattr(code_obj1, member), getattr(code_obj2, member)
  394. )
  395. class Token(ScannerToken):
  396. """Token class with changed semantics for 'cmp()'."""
  397. def __cmp__(self, o):
  398. t = self.kind # shortcut
  399. if t == "BUILD_TUPLE_0" and o.kind == "LOAD_CONST" and o.pattr == ():
  400. return 0
  401. if t == "COME_FROM" == o.kind:
  402. return 0
  403. if t == "PRINT_ITEM_CONT" and o.kind == "PRINT_ITEM":
  404. return 0
  405. if t == "RETURN_VALUE" and o.kind == "RETURN_END_IF":
  406. return 0
  407. if t == "JUMP_IF_FALSE_OR_POP" and o.kind == "POP_JUMP_IF_FALSE":
  408. return 0
  409. if JUMP_OPS and t in JUMP_OPS:
  410. # ignore offset
  411. return t == o.kind
  412. return (t == o.kind) or self.pattr == o.pattr
  413. def __repr__(self):
  414. return "%s %s (%s)" % (str(self.kind), str(self.attr), repr(self.pattr))
  415. def __str__(self):
  416. return "%s\t%-17s %r" % (self.offset, self.kind, self.pattr)
  417. def compare_code_with_srcfile(pyc_filename, src_filename, verify):
  418. """Compare a .pyc with a source code file. If everything is okay, None
  419. is returned. Otherwise a string message describing the mismatch is returned.
  420. """
  421. (
  422. version,
  423. timestamp,
  424. magic_int,
  425. code_obj1,
  426. is_pypy,
  427. source_size,
  428. sip_hash,
  429. ) = load_module(pyc_filename)
  430. if magic_int != PYTHON_MAGIC_INT:
  431. msg = (
  432. "Can't compare code - Python is running with magic %s, but code is magic %s "
  433. % (PYTHON_MAGIC_INT, magic_int)
  434. )
  435. return msg
  436. try:
  437. code_obj2 = load_file(src_filename)
  438. except SyntaxError as e:
  439. # src_filename can be the first of a group sometimes
  440. return str(e).replace(src_filename, pyc_filename)
  441. cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify)
  442. if verify == "verify-run":
  443. try:
  444. retcode = call("%s %s" % (sys.executable, src_filename), shell=True)
  445. if retcode != 0:
  446. return "Child was terminated by signal %d" % retcode
  447. pass
  448. except OSError as e:
  449. return "Execution failed: %s" % e
  450. pass
  451. return None
  452. def compare_files(pyc_filename1, pyc_filename2, verify):
  453. """Compare two .pyc files."""
  454. (
  455. version1,
  456. timestamp,
  457. magic_int1,
  458. code_obj1,
  459. is_pypy,
  460. source_size,
  461. sip_hash,
  462. ) = uncompyle6.load_module(pyc_filename1)
  463. (
  464. version2,
  465. timestamp,
  466. magic_int2,
  467. code_obj2,
  468. is_pypy,
  469. source_size,
  470. sip_hash,
  471. ) = uncompyle6.load_module(pyc_filename2)
  472. if (magic_int1 != magic_int2) and verify == "verify":
  473. verify = "weak_verify"
  474. cmp_code_objects(version1, is_pypy, code_obj1, code_obj2, verify)
  475. if __name__ == "__main__":
  476. t1 = Token("LOAD_CONST", None, "code_object _expandLang", 52)
  477. t2 = Token("LOAD_CONST", -421, "code_object _expandLang", 55)
  478. print(repr(t1))
  479. print(repr(t2))
  480. print(t1.kind == t2.kind, t1.attr == t2.attr)