| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546 |
- #
- # (C) Copyright 2015-2018, 2020-2021, 2023 by Rocky Bernstein
- # (C) Copyright 2000-2002 by hartmut Goebel <h.goebel@crazy-compilers.com>
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
- """
- byte-code verification
- """
- from __future__ import print_function
- import operator, sys
- import xdis.std as dis
- from subprocess import call
- import uncompyle6
- from uncompyle6.scanner import Token as ScannerToken, get_scanner
- from xdis import iscode, load_file, load_module, pretty_code_flags, PYTHON_MAGIC_INT
- truediv = operator.truediv
- from functools import reduce
- def code_equal(a, b):
- return a.co_code == b.co_code
- BIN_OP_FUNCS = {
- "BINARY_POWER": operator.pow,
- "BINARY_MULTIPLY": operator.mul,
- "BINARY_DIVIDE": truediv,
- "BINARY_FLOOR_DIVIDE": operator.floordiv,
- "BINARY_TRUE_DIVIDE": operator.truediv,
- "BINARY_MODULO": operator.mod,
- "BINARY_ADD": operator.add,
- "BINARY_SUBRACT": operator.sub,
- "BINARY_LSHIFT": operator.lshift,
- "BINARY_RSHIFT": operator.rshift,
- "BINARY_AND": operator.and_,
- "BINARY_XOR": operator.xor,
- "BINARY_OR": operator.or_,
- }
- JUMP_OPS = None
- # --- exceptions ---
- class VerifyCmpError(Exception):
- pass
- class CmpErrorConsts(VerifyCmpError):
- """Exception to be raised when consts differ."""
- def __init__(self, name, index):
- self.name = name
- self.index = index
- def __str__(self):
- return "Compare Error within Consts of %s at index %i" % (
- repr(self.name),
- self.index,
- )
- class CmpErrorConstsType(VerifyCmpError):
- """Exception to be raised when consts differ."""
- def __init__(self, name, index):
- self.name = name
- self.index = index
- def __str__(self):
- return "Consts type differ in %s at index %i" % (repr(self.name), self.index)
- class CmpErrorConstsLen(VerifyCmpError):
- """Exception to be raised when length of co_consts differs."""
- def __init__(self, name, consts1, consts2):
- self.name = name
- self.consts = (consts1, consts2)
- def __str__(self):
- return "Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n" % (
- repr(self.name),
- len(self.consts[0]),
- repr(self.consts[0]),
- len(self.consts[1]),
- repr(self.consts[1]),
- )
- class CmpErrorCode(VerifyCmpError):
- """Exception to be raised when code differs."""
- def __init__(self, name, index, token1, token2, tokens1, tokens2):
- self.name = name
- self.index = index
- self.token1 = token1
- self.token2 = token2
- self.tokens = [tokens1, tokens2]
- def __str__(self):
- s = reduce(
- lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
- list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])),
- "Code differs in %s\n" % str(self.name),
- )
- return (
- "Code differs in %s at offset %s [%s] != [%s]\n\n"
- % (repr(self.name), self.index, repr(self.token1), repr(self.token2))
- ) + s
- class CmpErrorCodeLen(VerifyCmpError):
- """Exception to be raised when code length differs."""
- def __init__(self, name, tokens1, tokens2):
- self.name = name
- self.tokens = [tokens1, tokens2]
- def __str__(self):
- return reduce(
- lambda s, t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
- list(map(lambda a, b: (a, b), self.tokens[0], self.tokens[1])),
- "Code len differs in %s\n" % str(self.name),
- )
- class CmpErrorMember(VerifyCmpError):
- """Exception to be raised when other members differ."""
- def __init__(self, name, member, data1, data2):
- self.name = name
- self.member = member
- self.data = (data1, data2)
- def __str__(self):
- return "Member %s differs in %s:\n\t%s\n\t%s\n" % (
- repr(self.member),
- repr(self.name),
- repr(self.data[0]),
- repr(self.data[1]),
- )
- # --- compare ---
- # these members are ignored
- __IGNORE_CODE_MEMBERS__ = [
- "co_filename",
- "co_firstlineno",
- "co_lnotab",
- "co_stacksize",
- "co_names",
- ]
- def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
- """
- Compare two code-objects.
- This is the main part of this module.
- """
- # print code_obj1, type(code_obj2)
- assert iscode(
- code_obj1
- ), "cmp_code_object first object type is %s, not code" % type(code_obj1)
- assert iscode(
- code_obj2
- ), "cmp_code_object second object type is %s, not code" % type(code_obj2)
- # print dir(code_obj1)
- if isinstance(code_obj1, object):
- # new style classes (Python 2.2)
- # assume _both_ code objects to be new style classes
- assert dir(code_obj1) == dir(code_obj2)
- else:
- # old style classes
- assert dir(code_obj1) == code_obj1.__members__
- assert dir(code_obj2) == code_obj2.__members__
- assert code_obj1.__members__ == code_obj2.__members__
- if name == "__main__":
- name = code_obj1.co_name
- else:
- name = "%s.%s" % (name, code_obj1.co_name)
- if name == ".?":
- name = "__main__"
- if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
- # use the new style code-classes' __cmp__ method, which
- # should be faster and more sophisticated
- # if this compare fails, we use the old routine to
- # find out, what exactly is nor equal
- # if this compare succeeds, simply return
- # return
- pass
- if isinstance(code_obj1, object):
- members = [x for x in dir(code_obj1) if x.startswith("co_")]
- else:
- members = dir(code_obj1)
- members.sort() # ; members.reverse()
- tokens1 = None
- for member in members:
- if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
- pass
- elif member == "co_code":
- if verify != "strong":
- continue
- scanner = get_scanner(version, is_pypy, show_asm=False)
- global JUMP_OPS
- JUMP_OPS = list(JUMP_OPS) + ["JUMP_BACK"]
- # use changed Token class
- # We (re)set this here to save exception handling,
- # which would get confusing.
- scanner.setTokenClass(Token)
- try:
- # ingest both code-objects
- tokens1, customize = scanner.ingest(code_obj1)
- del customize # save memory
- tokens2, customize = scanner.ingest(code_obj2)
- del customize # save memory
- finally:
- scanner.resetTokenClass() # restore Token class
- targets1 = dis.findlabels(code_obj1.co_code)
- tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
- tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]
- i1 = 0
- i2 = 0
- offset_map = {}
- check_jumps = {}
- while i1 < len(tokens1):
- if i2 >= len(tokens2):
- if (
- len(tokens1) == len(tokens2) + 2
- and tokens1[-1].kind == "RETURN_VALUE"
- and tokens1[-2].kind == "LOAD_CONST"
- and tokens1[-2].pattr is None
- and tokens1[-3].kind == "RETURN_VALUE"
- ):
- break
- else:
- raise CmpErrorCodeLen(name, tokens1, tokens2)
- offset_map[tokens1[i1].offset] = tokens2[i2].offset
- for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
- if offset2 != tokens2[i2].offset:
- raise CmpErrorCode(
- name,
- tokens1[idx1].offset,
- tokens1[idx1],
- tokens2[idx2],
- tokens1,
- tokens2,
- )
- if tokens1[i1].kind != tokens2[i2].kind:
- if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
- i = 1
- while tokens1[i1 + i].kind == "LOAD_CONST":
- i += 1
- if tokens1[i1 + i].kind.startswith(
- ("BUILD_TUPLE", "BUILD_LIST")
- ) and i == int(tokens1[i1 + i].kind.split("_")[-1]):
- t = tuple([elem.pattr for elem in tokens1[i1 : i1 + i]])
- if t != tokens2[i2].pattr:
- raise CmpErrorCode(
- name,
- tokens1[i1].offset,
- tokens1[i1],
- tokens2[i2],
- tokens1,
- tokens2,
- )
- i1 += i + 1
- i2 += 1
- continue
- elif (
- i == 2
- and tokens1[i1 + i].kind == "ROT_TWO"
- and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"
- ):
- i1 += 3
- i2 += 2
- continue
- elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
- f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
- if (
- f(tokens1[i1].pattr, tokens1[i1 + 1].pattr)
- == tokens2[i2].pattr
- ):
- i1 += 3
- i2 += 1
- continue
- elif tokens1[i1].kind == "UNARY_NOT":
- if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
- if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
- i1 += 2
- i2 += 1
- continue
- elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
- if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
- i1 += 2
- i2 += 1
- continue
- elif (
- tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
- and tokens1[i1 - 1].kind == "RETURN_VALUE"
- and tokens2[i2 - 1].kind in ("RETURN_VALUE", "RETURN_END_IF")
- and int(tokens1[i1].offset) not in targets1
- ):
- i1 += 1
- continue
- elif (
- tokens1[i1].kind == "JUMP_BACK"
- and tokens2[i2].kind == "CONTINUE"
- ):
- # FIXME: should make sure that offset is inside loop, not outside of it
- i1 += 2
- i2 += 2
- continue
- elif (
- tokens1[i1].kind == "JUMP_FORWARD"
- and tokens2[i2].kind == "JUMP_BACK"
- and tokens1[i1 + 1].kind == "JUMP_BACK"
- and tokens2[i2 + 1].kind == "JUMP_BACK"
- and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3
- ):
- if int(tokens1[i1].pattr) == int(tokens1[i1 + 1].offset):
- i1 += 2
- i2 += 2
- continue
- elif (
- tokens1[i1].kind == "LOAD_NAME"
- and tokens2[i2].kind == "LOAD_CONST"
- and tokens1[i1].pattr == "None"
- and tokens2[i2].pattr is None
- ):
- pass
- elif (
- tokens1[i1].kind == "LOAD_GLOBAL"
- and tokens2[i2].kind == "LOAD_NAME"
- and tokens1[i1].pattr == tokens2[i2].pattr
- ):
- pass
- elif (
- tokens1[i1].kind == "LOAD_ASSERT"
- and tokens2[i2].kind == "LOAD_NAME"
- and tokens1[i1].pattr == tokens2[i2].pattr
- ):
- pass
- elif (
- tokens1[i1].kind == "RETURN_VALUE"
- and tokens2[i2].kind == "RETURN_END_IF"
- ):
- pass
- elif (
- tokens1[i1].kind == "BUILD_TUPLE_0" and tokens2[i2].pattr == ()
- ):
- pass
- else:
- raise CmpErrorCode(
- name,
- tokens1[i1].offset,
- tokens1[i1],
- tokens2[i2],
- tokens1,
- tokens2,
- )
- elif (
- tokens1[i1].kind in JUMP_OPS
- and tokens1[i1].pattr != tokens2[i2].pattr
- ):
- if tokens1[i1].kind == "JUMP_BACK":
- dest1 = int(tokens1[i1].pattr)
- dest2 = int(tokens2[i2].pattr)
- if offset_map[dest1] != dest2:
- raise CmpErrorCode(
- name,
- tokens1[i1].offset,
- tokens1[i1],
- tokens2[i2],
- tokens1,
- tokens2,
- )
- else:
- # import pdb; pdb.set_trace()
- try:
- dest1 = int(tokens1[i1].pattr)
- if dest1 in check_jumps:
- check_jumps[dest1].append((i1, i2, dest2))
- else:
- check_jumps[dest1] = [(i1, i2, dest2)]
- except Exception:
- pass
- i1 += 1
- i2 += 1
- del tokens1, tokens2 # save memory
- elif member == "co_consts":
- # partial optimization can make the co_consts look different,
- # so we'll just compare the code consts
- codes1 = (c for c in code_obj1.co_consts if hasattr(c, "co_consts"))
- codes2 = (c for c in code_obj2.co_consts if hasattr(c, "co_consts"))
- for c1, c2 in zip(codes1, codes2):
- cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
- elif member == "co_flags":
- flags1 = code_obj1.co_flags
- flags2 = code_obj2.co_flags
- if is_pypy:
- # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
- flags2 &= ~0x0100 # PYPY_SOURCE_IS_UTF8
- # We also don't care about COROUTINE or GENERATOR for now
- flags1 &= ~0x000000A0
- flags2 &= ~0x000000A0
- if flags1 != flags2:
- raise CmpErrorMember(
- name,
- "co_flags",
- pretty_code_flags(flags1),
- pretty_code_flags(flags2),
- )
- else:
- # all other members must be equal
- if getattr(code_obj1, member) != getattr(code_obj2, member):
- raise CmpErrorMember(
- name, member, getattr(code_obj1, member), getattr(code_obj2, member)
- )
- class Token(ScannerToken):
- """Token class with changed semantics for 'cmp()'."""
- def __cmp__(self, o):
- t = self.kind # shortcut
- if t == "BUILD_TUPLE_0" and o.kind == "LOAD_CONST" and o.pattr == ():
- return 0
- if t == "COME_FROM" == o.kind:
- return 0
- if t == "PRINT_ITEM_CONT" and o.kind == "PRINT_ITEM":
- return 0
- if t == "RETURN_VALUE" and o.kind == "RETURN_END_IF":
- return 0
- if t == "JUMP_IF_FALSE_OR_POP" and o.kind == "POP_JUMP_IF_FALSE":
- return 0
- if JUMP_OPS and t in JUMP_OPS:
- # ignore offset
- return t == o.kind
- return (t == o.kind) or self.pattr == o.pattr
- def __repr__(self):
- return "%s %s (%s)" % (str(self.kind), str(self.attr), repr(self.pattr))
- def __str__(self):
- return "%s\t%-17s %r" % (self.offset, self.kind, self.pattr)
- def compare_code_with_srcfile(pyc_filename, src_filename, verify):
- """Compare a .pyc with a source code file. If everything is okay, None
- is returned. Otherwise a string message describing the mismatch is returned.
- """
- (
- version,
- timestamp,
- magic_int,
- code_obj1,
- is_pypy,
- source_size,
- sip_hash,
- ) = load_module(pyc_filename)
- if magic_int != PYTHON_MAGIC_INT:
- msg = (
- "Can't compare code - Python is running with magic %s, but code is magic %s "
- % (PYTHON_MAGIC_INT, magic_int)
- )
- return msg
- try:
- code_obj2 = load_file(src_filename)
- except SyntaxError as e:
- # src_filename can be the first of a group sometimes
- return str(e).replace(src_filename, pyc_filename)
- cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify)
- if verify == "verify-run":
- try:
- retcode = call("%s %s" % (sys.executable, src_filename), shell=True)
- if retcode != 0:
- return "Child was terminated by signal %d" % retcode
- pass
- except OSError as e:
- return "Execution failed: %s" % e
- pass
- return None
- def compare_files(pyc_filename1, pyc_filename2, verify):
- """Compare two .pyc files."""
- (
- version1,
- timestamp,
- magic_int1,
- code_obj1,
- is_pypy,
- source_size,
- sip_hash,
- ) = uncompyle6.load_module(pyc_filename1)
- (
- version2,
- timestamp,
- magic_int2,
- code_obj2,
- is_pypy,
- source_size,
- sip_hash,
- ) = uncompyle6.load_module(pyc_filename2)
- if (magic_int1 != magic_int2) and verify == "verify":
- verify = "weak_verify"
- cmp_code_objects(version1, is_pypy, code_obj1, code_obj2, verify)
- if __name__ == "__main__":
- t1 = Token("LOAD_CONST", None, "code_object _expandLang", 52)
- t2 = Token("LOAD_CONST", -421, "code_object _expandLang", 55)
- print(repr(t1))
- print(repr(t2))
- print(t1.kind == t2.kind, t1.attr == t2.attr)
|