base.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. # (C) Copyright 2017, 2019-2024 by Rocky Bernstein
  2. #
  3. # This program is free software; you can redistribute it and/or
  4. # modify it under the terms of the GNU General Public License
  5. # as published by the Free Software Foundation; either version 2
  6. # of the License, or (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. """
  17. Common routines for entering and classifying opcodes. Inspired by,
  18. limited by, and somewhat compatible with the corresponding
  19. Python opcode.py structures
  20. """
  21. from copy import deepcopy
  22. from typing import Dict, List, Set
  23. from xdis import wordcode
  24. from xdis.cross_dis import findlabels, findlinestarts, get_jump_target_maps
  25. from xdis.version_info import IS_PYPY, PYTHON_VERSION_TRIPLE
  26. cmp_op = (
  27. "<", # 0
  28. "<=", # 1
  29. "==", # 2
  30. "!=", # 3
  31. ">", # 4
  32. ">=", # 5
  33. "in", # 6
  34. "not-in", # 7
  35. "is", # 8
  36. "is-not", # 9
  37. "exception-match", # 10
  38. "BAD",
  39. )
  40. # opcodes that perform a binary operation on the top two stack entries
  41. binaryop: Set[int] = set([])
  42. # opcodes that perform some sort of call
  43. callop: Set[int] = set([])
  44. # opcodes that have some encoding of its argument
  45. encoded_arg: Set[int] = set([])
  46. hascompare: List[int] = []
  47. hascondition: List[int] = [] # conditional operator; has jump offset
  48. hasconst: List[int] = []
  49. hasfree: List[int] = []
  50. hasjabs: List[int] = []
  51. hasjrel: List[int] = []
  52. haslocal: List[int] = []
  53. hasname: List[int] = []
  54. hasnargs: List[int] = [] # For function-like calls
  55. hasstore: List[int] = [] # Some sort of store operation
  56. hasvargs: List[int] = [] # Similar but for operators BUILD_xxx
  57. nofollow: List[int] = [] # Instruction doesn't fall to the next opcode
  58. nullaryop: Set[int] = set([]) # Instruction do not consume a stack entry
  59. # Nullary instruction that loads a value. LOAD_CONST is like this but
  60. # LOAD_ATTR is not since it requires an operand
  61. nullaryloadop: Set[int] = set([])
  62. # opmap[opcode_name] => opcode_number
  63. opmap: Dict[str, int] = {}
  64. # opcode[i] => opcode name
  65. opname: List[str] = [""] * 256
  66. # oppush[op] => number of stack entries pushed
  67. oppush: List[int] = [0] * 256
  68. # oppop[op] => number of stack entries popped
  69. oppop: List[int] = [0] * 256
  70. ternaryop: Set[int] = set([])
  71. # opcodes that perform a unary operation of the top stack entry
  72. unaryop: Set[int] = set()
  73. # Opcodes greater than 90 take an instruction operand or "argument"
  74. # as opcode.py likes to call it.
  75. HAVE_ARGUMENT = 90
  76. fields2copy = """
  77. binaryop
  78. callop
  79. encoded_arg
  80. hascompare hascondition
  81. hasconst hasfree hasjabs hasjrel haslocal
  82. hasname hasnargs hasstore hasvargs oppop oppush
  83. nofollow nullaryop nullaryloadop ternaryop unaryop
  84. """.split()
  85. # additional fields needed to copy in versions >= 3.13
  86. fields2copy_313 = "hasarg hasexc".split() # added in 3.12
  87. fields2copy_314 = "hasjump".split() # added in 3.13
  88. def init_opdata(loc, from_mod, version_tuple=None, is_pypy: bool=False) -> None:
  89. """Sets up a number of the structures found in Python's
  90. opcode.py. Python opcode.py routines assign attributes to modules.
  91. In order to do this in a modular way here, the local dictionary
  92. for the module is passed.
  93. """
  94. if version_tuple is not None:
  95. loc["python_version"] = version_tuple
  96. loc["is_pypy"] = is_pypy
  97. loc["cmp_op"] = cmp_op
  98. loc["HAVE_ARGUMENT"] = HAVE_ARGUMENT
  99. loc["findlinestarts"] = findlinestarts
  100. if version_tuple is None or version_tuple <= (3, 5):
  101. loc["findlabels"] = findlabels
  102. loc["get_jump_targets"] = findlabels
  103. loc["get_jump_target_maps"] = get_jump_target_maps
  104. else:
  105. loc["findlabels"] = wordcode.findlabels
  106. loc["get_jump_targets"] = wordcode.findlabels
  107. loc["get_jump_target_maps"] = wordcode.get_jump_target_maps
  108. if from_mod is not None:
  109. loc["opmap"] = deepcopy(from_mod.opmap)
  110. loc["opname"] = deepcopy(from_mod.opname)
  111. if version_tuple is not None:
  112. if version_tuple >= (3,13):
  113. fields2copy.extend(fields2copy_313)
  114. if version_tuple >= (3,14):
  115. fields2copy.extend(fields2copy_314)
  116. for field in fields2copy:
  117. loc[field] = getattr(from_mod, field).copy()
  118. pass
  119. else:
  120. # FIXME: DRY with above
  121. loc["binaryop"] = set([])
  122. loc["callop"] = set([])
  123. loc["encoded_arg"] = set([])
  124. loc["hascompare"] = []
  125. loc["hascondition"] = []
  126. loc["hasconst"] = []
  127. loc["hasfree"] = []
  128. loc["hasjabs"] = []
  129. loc["hasjrel"] = []
  130. loc["haslocal"] = []
  131. loc["hasname"] = []
  132. loc["hasnargs"] = []
  133. loc["hasstore"] = []
  134. loc["hasvargs"] = []
  135. loc["nofollow"] = []
  136. loc["nullaryop"] = set([])
  137. loc["nullaryloadop"] = set([])
  138. loc["opmap"] = {}
  139. loc["opname"] = [""] * 256
  140. for op in range(256):
  141. loc["opname"][op] = "<%r>" % (op,)
  142. loc["oppop"] = [0] * 256
  143. loc["oppush"] = [0] * 256
  144. loc["ternaryop"] = set([])
  145. loc["unaryop"] = set([])
  146. def binary_op(loc: dict, name: str, opcode: int, pop: int = 2, push: int = 1) -> None:
  147. """
  148. Put opcode in the class of instructions that are binary operations.
  149. """
  150. loc["binaryop"].add(opcode)
  151. def_op(loc, name, opcode, pop, push)
  152. def call_op(
  153. loc: dict, name: str, opcode: int, pop: int = -2, push: int = 1, fallthrough: bool=True
  154. ) -> None:
  155. """
  156. Put opcode in the class of instructions that perform calls.
  157. """
  158. loc["callop"].add(opcode)
  159. nargs_op(loc, name, opcode, pop, push, fallthrough)
  160. def compare_op(loc: dict, name: str, opcode: int, pop: int = 2, push: int = 1) -> None:
  161. def_op(loc, name, opcode, pop, push)
  162. loc["hascompare"].append(opcode)
  163. loc["binaryop"].add(opcode)
  164. def conditional_op(loc: dict, name: str, opcode: int) -> None:
  165. loc["hascompare"].append(opcode)
  166. def const_op(loc: dict, name: str, opcode: int, pop: int = 0, push: int = 1) -> None:
  167. def_op(loc, name, opcode, pop, push)
  168. loc["hasconst"].append(opcode)
  169. loc["nullaryop"].add(opcode)
  170. def def_op(
  171. loc: dict,
  172. op_name: str,
  173. opcode: int,
  174. pop: int = -2,
  175. push: int = -2,
  176. fallthrough: bool = True,
  177. ) -> None:
  178. loc["opname"][opcode] = op_name
  179. loc["opmap"][op_name] = opcode
  180. loc["oppush"][opcode] = push
  181. loc["oppop"][opcode] = pop
  182. if not fallthrough:
  183. loc["nofollow"].append(opcode)
  184. def free_op(loc: dict, name: str, opcode: int, pop: int = 0, push: int = 1) -> None:
  185. def_op(loc, name, opcode, pop, push)
  186. loc["hasfree"].append(opcode)
  187. def jabs_op(
  188. loc: dict,
  189. name: str,
  190. opcode: int,
  191. pop: int = 0,
  192. push: int = 0,
  193. conditional: bool = False,
  194. fallthrough: bool = True,
  195. ) -> None:
  196. """
  197. Put opcode in the class of instructions that can perform an absolute jump.
  198. """
  199. def_op(loc, name, opcode, pop, push, fallthrough=fallthrough)
  200. loc["hasjabs"].append(opcode)
  201. if conditional:
  202. loc["hascondition"].append(opcode)
  203. def jrel_op(loc, name: str, opcode: int, pop: int=0, push: int=0, conditional=False, fallthrough=True) -> None:
  204. """
  205. Put opcode in the class of instructions that can perform a relative jump.
  206. """
  207. def_op(loc, name, opcode, pop, push, fallthrough)
  208. loc["hasjrel"].append(opcode)
  209. if conditional:
  210. loc["hascondition"].append(opcode)
  211. def local_op(loc, name, opcode: int, pop=0, push=1) -> None:
  212. def_op(loc, name, opcode, pop, push)
  213. loc["haslocal"].append(opcode)
  214. loc["nullaryop"].add(opcode)
  215. def name_op(loc: dict, op_name, opcode: int, pop=-2, push=-2) -> None:
  216. """
  217. Put opcode in the class of instructions that index into the "name" table.
  218. """
  219. def_op(loc, op_name, opcode, pop, push)
  220. loc["hasname"].append(opcode)
  221. loc["nullaryop"].add(opcode)
  222. def nargs_op(
  223. loc, name: str, opcode: int, pop: int = -2, push: int = -1, fallthrough=True
  224. ) -> None:
  225. """
  226. Put opcode in the class of instructions that have a variable number of (or *n*) arguments
  227. """
  228. def_op(loc, name, opcode, pop, push, fallthrough=fallthrough)
  229. loc["hasnargs"].append(opcode)
  230. def opcode_check(loc) -> None:
  231. """When the version of Python we are running happens
  232. to have the same opcode set as the opcode we are
  233. importing, we perform checks to make sure our opcode
  234. set matches exactly.
  235. """
  236. if (PYTHON_VERSION_TRIPLE[:2] == loc["python_version"][:2]) and IS_PYPY == loc[
  237. "is_pypy"
  238. ]:
  239. try:
  240. import dis
  241. opmap = fix_opcode_names(dis.opmap)
  242. # print(set(opmap.items()) - set(loc['opmap'].items()))
  243. # print(set(loc['opmap'].items()) - set(opmap.items()))
  244. assert all(item in opmap.items() for item in loc["opmap"].items())
  245. assert all(item in loc["opmap"].items() for item in opmap.items())
  246. except Exception:
  247. pass
  248. def rm_op(loc, name, op) -> None:
  249. """Remove an opcode. This is used when basing a new Python release off
  250. of another one, and there is an opcode that is in the old release
  251. that was removed in the new release.
  252. We are pretty aggressive about removing traces of the op.
  253. """
  254. # opname is an array, so we need to keep the position in there.
  255. loc["opname"][op] = "<%s>" % op
  256. if op in loc["hascompare"]:
  257. loc["hascompare"].remove(op)
  258. if op in loc["hascondition"]:
  259. loc["hascondition"].remove(op)
  260. if op in loc["hasconst"]:
  261. loc["hasconst"].remove(op)
  262. if op in loc["hasfree"]:
  263. loc["hasfree"].remove(op)
  264. if op in loc["hasjabs"]:
  265. loc["hasjabs"].remove(op)
  266. if op in loc["hasjrel"]:
  267. loc["hasjrel"].remove(op)
  268. if op in loc["haslocal"]:
  269. loc["haslocal"].remove(op)
  270. if op in loc["hasname"]:
  271. loc["hasname"].remove(op)
  272. if op in loc["hasnargs"]:
  273. loc["hasnargs"].remove(op)
  274. if op in loc["hasstore"]:
  275. loc["hasstore"].remove(op)
  276. if op in loc["hasvargs"]:
  277. loc["hasvargs"].remove(op)
  278. if op in loc["nofollow"]:
  279. loc["nofollow"].remove(op)
  280. if op in loc["nullaryloadop"]:
  281. loc["nullaryloadop"].remove(op)
  282. if op in loc["nullaryop"]:
  283. loc["nullaryop"].remove(op)
  284. if loc["opmap"][name] != op:
  285. print(name, loc["opmap"][name], op)
  286. assert loc["opmap"][name] == op
  287. del loc["opmap"][name]
  288. def store_op(loc: dict, name: str, op, pop=0, push=1, is_type="def") -> None:
  289. if is_type == "name":
  290. name_op(loc, name, op, pop, push)
  291. loc["nullaryop"].remove(op)
  292. elif is_type == "local":
  293. local_op(loc, name, op, pop, push)
  294. loc["nullaryop"].remove(op)
  295. elif is_type == "free":
  296. free_op(loc, name, op, pop, push)
  297. else:
  298. assert is_type == "def"
  299. def_op(loc, name, op, pop, push)
  300. loc["hasstore"].append(op)
  301. def ternary_op(loc: dict, name: str, opcode: int, pop: int = 3, push: int = 1) -> None:
  302. """
  303. Put opcode in the class of instructions that are ternary operations.
  304. """
  305. loc["ternaryop"].add(opcode)
  306. def_op(loc, name, opcode, pop, push)
  307. def unary_op(loc, name: str, op, pop: int=1, push: int=1) -> None:
  308. loc["unaryop"].add(op)
  309. def_op(loc, name, op, pop, push)
  310. # This is not in Python. The operand indicates how
  311. # items on the pop from the stack. BUILD_TUPLE_UNPACK
  312. # is line this.
  313. def varargs_op(loc, op_name, op_code, pop: int=-1, push: int=1) -> None:
  314. def_op(loc, op_name, op_code, pop, push)
  315. loc["hasvargs"].append(op_code)
  316. # Some of the convoluted code below reflects some of the
  317. # many Python idiocies over the years.
  318. def finalize_opcodes(loc: list[str]) -> None:
  319. """
  320. Things done to Python codes after all opcode have been defined.
  321. """
  322. # Not sure why, but opcode.py address has opcode.EXTENDED_ARG
  323. # as well as opmap['EXTENDED_ARG']
  324. loc["EXTENDED_ARG"] = loc["opmap"]["EXTENDED_ARG"]
  325. if loc["version_tuple"] < (3, 6):
  326. loc["EXTENDED_ARG_SHIFT"] = 16
  327. else:
  328. loc["EXTENDED_ARG_SHIFT"] = 8
  329. loc["ARG_MAX_VALUE"] = (1 << loc["EXTENDED_ARG_SHIFT"]) - 1
  330. loc["EXTENDED_ARG"] = loc["opmap"]["EXTENDED_ARG"]
  331. loc["opmap"] = fix_opcode_names(loc["opmap"])
  332. # Now add in the attributes into the module
  333. for op in loc["opmap"]:
  334. loc[op] = loc["opmap"][op]
  335. loc["JUMP_OPs"] = frozenset(loc["hasjrel"] + loc["hasjabs"])
  336. loc["NOFOLLOW"] = frozenset(loc["nofollow"])
  337. loc["operator_set"] = frozenset(
  338. loc["nullaryop"]
  339. | loc["unaryop"]
  340. | loc["binaryop"]
  341. | loc["ternaryop"]
  342. | set([op for op in loc["hasnargs"] if op not in loc["nofollow"]])
  343. | set([op for op in loc["hasvargs"]])
  344. )
  345. opcode_check(loc)
  346. return
  347. def fix_opcode_names(opmap: dict[str, int]):
  348. """
  349. Python stupidly named some OPCODES with a + which prevents using opcode name
  350. directly as an attribute, e.g. SLICE+3. So we turn that into SLICE_3, so we
  351. can then use opcode_23.SLICE_3. Later Python's fix this.
  352. """
  353. return dict([(k.replace("+", "_"), v) for (k, v) in opmap.items()])
  354. def update_pj3(g, loc, is_pypy: bool=False) -> None:
  355. if loc["version_tuple"] < (3, 11):
  356. g.update({"PJIF": loc["opmap"]["POP_JUMP_IF_FALSE"]})
  357. g.update({"PJIT": loc["opmap"]["POP_JUMP_IF_TRUE"]})
  358. update_sets(loc, is_pypy)
  359. def update_pj2(g, loc, is_pypy: bool=False) -> None:
  360. g.update({"PJIF": loc["opmap"]["JUMP_IF_FALSE"]})
  361. g.update({"PJIT": loc["opmap"]["JUMP_IF_TRUE"]})
  362. update_sets(loc, is_pypy)
  363. def update_sets(loc, is_pypy) -> None:
  364. """
  365. Updates various category sets all opcode have been defined.
  366. """
  367. loc["COMPARE_OPS"] = frozenset(loc["hascompare"])
  368. loc["CONDITION_OPS"] = frozenset(loc["hascondition"])
  369. loc["CONST_OPS"] = frozenset(loc["hasconst"])
  370. loc["ENCODED_ARG_OPS"] = frozenset(loc["encoded_arg"])
  371. loc["FREE_OPS"] = frozenset(loc["hasfree"])
  372. loc["JREL_OPS"] = frozenset(loc["hasjrel"])
  373. loc["JABS_OPS"] = frozenset(loc["hasjabs"])
  374. python_version = loc.get("python_version")
  375. if python_version and python_version < (3, 11) or (is_pypy and python_version == (3, 11)):
  376. loc["JUMP_UNCONDITIONAL"] = frozenset(
  377. [loc["opmap"]["JUMP_ABSOLUTE"], loc["opmap"]["JUMP_FORWARD"]]
  378. )
  379. elif python_version:
  380. if not is_pypy:
  381. loc["JUMP_UNCONDITIONAL"] = frozenset(
  382. [
  383. loc["opmap"]["JUMP_FORWARD"],
  384. loc["opmap"]["JUMP_BACKWARD"],
  385. loc["opmap"]["JUMP_BACKWARD_NO_INTERRUPT"],
  386. ]
  387. )
  388. else:
  389. loc["JUMP_UNCONDITIONAL"] = frozenset([loc["opmap"]["JUMP_FORWARD"]])
  390. if PYTHON_VERSION_TRIPLE < (3, 8, 0) and python_version and python_version < (3, 8):
  391. loc["LOOP_OPS"] = frozenset([loc["opmap"]["SETUP_LOOP"]])
  392. else:
  393. loc["LOOP_OPS"] = frozenset()
  394. loc["LOCAL_OPS"] = frozenset(loc["haslocal"])
  395. loc["JUMP_OPS"] = (
  396. loc["JABS_OPS"] | loc["JREL_OPS"] | loc["LOOP_OPS"] | loc["JUMP_UNCONDITIONAL"]
  397. )
  398. loc["NAME_OPS"] = frozenset(loc["hasname"])
  399. loc["NARGS_OPS"] = frozenset(loc["hasnargs"])
  400. loc["VARGS_OPS"] = frozenset(loc["hasvargs"])
  401. loc["STORE_OPS"] = frozenset(loc["hasstore"])
  402. if python_version and python_version >= (3,12):
  403. loc["ARG_OPS"] = frozenset(loc["hasarg"])
  404. loc["EXC_OPS"] = frozenset(loc["hasarg"])
  405. if python_version and python_version >= (3,13):
  406. loc["JUMP_OPS"] = frozenset(loc["hasjump"])
  407. def dump_opcodes(opmap) -> None:
  408. """Utility for dumping opcodes"""
  409. op2name = {}
  410. for k in opmap.keys():
  411. op2name[opmap[k]] = k
  412. for i in sorted(op2name.keys()):
  413. print("%-3s %s" % (str(i), op2name[i]))