ifstmt.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # Copyright (c) 2020, 2022-2024 Rocky Bernstein
  2. #
  3. # This program is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. # Example A: an example where we have weird COME_FROMs
  16. #
  17. # if a:
  18. # if b: # false jumps around outer else
  19. # raise
  20. # elif c:
  21. # a = 2
  22. # #end is jump to by "if not b" above
  23. def ifstmt(
  24. self, lhs: str, n: int, rule, tree, tokens: list, first: int, last: int
  25. ) -> bool:
  26. # print("XXX", tokens[first].offset , tokens[last].offset, rule)
  27. # for t in range(first, last):
  28. # print(tokens[t])
  29. # print("=" * 40)
  30. if rule == ("ifstmt", ("bool_op", "stmts", "\\e__come_froms")):
  31. return False
  32. ltm1_index = last - 1
  33. while tokens[ltm1_index] == "COME_FROM":
  34. ltm1_index -= 1
  35. ltm1 = tokens[ltm1_index]
  36. first_offset = tokens[first].off2int(prefer_last=False)
  37. # The below doesn't work for Example A above
  38. # # Test that the outermost COME_FROM, if it exists, must be *somewhere*
  39. # # in the range of the if stmt.
  40. # if ltm1 == "COME_FROM" and ltm1.attr < first_offset:
  41. # return True
  42. if not tree:
  43. return False
  44. ifstmts_jump = tree[1]
  45. if ifstmts_jump.kind.startswith("ifstmts_jump"):
  46. come_from = ifstmts_jump[0]
  47. if come_from == "COME_FROM" and come_from.attr < first_offset:
  48. return True
  49. testexpr = tree[0]
  50. test = testexpr[0]
  51. # We have two grammar rules: ifstmtc and if_not_stmtc
  52. # which are the same:
  53. # xxx ::= testexprc ifstmts_jumpc _come_froms
  54. # and these need to be disambiguated
  55. # When ifstmts_jumpc goes back to to a loop
  56. # and testexprc is testtruec, then we have if_not_stmtc.
  57. if lhs == "ifstmtc" and test == "testtruec" and ifstmts_jump == "ifstmts_jumpc":
  58. if len(test) > 1:
  59. return test[1] != "POP_JUMP_IF_FALSE_LOOP"
  60. if lhs == "if_not_stmtc" and ifstmts_jump == "ifstmts_jumpc":
  61. if test == "testexpr":
  62. test = test[0]
  63. if test in ("testfalsec", "testfalse"):
  64. return True
  65. if test in ("testtruec", "testtrue") and ifstmts_jump == "ifstmts_jumpc":
  66. if test[0] == "expr_pjit":
  67. test = test[0]
  68. if len(test) > 1:
  69. return test[1] == "POP_JUMP_IF_FALSE_LOOP"
  70. pop_jump_if = None
  71. if test in ("testexpr", "testexprc"):
  72. test = test[0]
  73. pop_jump_if = None
  74. if test in ("testtrue", "testtruec", "testfalse"):
  75. if len(test) == 1 and test[0].kind.startswith("expr_pji"):
  76. pop_jump_if = test[0][1]
  77. elif len(test) > 1 and test[1].kind.startswith("POP_JUMP_IF_"):
  78. pop_jump_if = test[1]
  79. if pop_jump_if:
  80. jump_target = pop_jump_if.attr
  81. if last == n:
  82. last -= 1
  83. # Get reasonable offset "end if" offset
  84. endif_offset = ltm1.off2int(prefer_last=True)
  85. if endif_offset == -1:
  86. endif_offset = tokens[last - 2].off2int(prefer_last=True)
  87. if first_offset <= jump_target < endif_offset:
  88. if rule[1] == ("testexpr", "stmts", "come_froms"):
  89. come_froms = tree[2]
  90. if hasattr(come_froms, "first_child"):
  91. come_from_offset = come_froms.first_child()
  92. else:
  93. assert come_froms.kind.startswith("COME_FROM")
  94. come_from_offset = come_froms.off2int()
  95. return jump_target != come_from_offset
  96. # FIXME: investigate why this happens for "if"s with EXTENDED_ARG POP_JUMP_IF_FALSE.
  97. # An example is decompyle3/semantics/transform.py n_ifelsestmt.py
  98. elif rule[1][-1] == "\\e__come_froms":
  99. return True
  100. pass
  101. endif_inst_index = self.offset2inst_index[ltm1.off2int(prefer_last=False)]
  102. # FIXME: RAISE_VARARGS is an instance of a no-follow instruction.
  103. # Should this be generalized? For example for RETURN ?
  104. if ltm1.kind.startswith("RAISE_VARARGS"):
  105. endif_inst_index += 2
  106. if endif_inst_index + 1 == len(self.insts):
  107. return False
  108. endif_next_inst = self.insts[endif_inst_index + 1]
  109. # jump_target equal tokens[last] is also okay: normal non-optimized non-loop jump
  110. if jump_target > endif_next_inst.offset:
  111. # test for Example A where "if b" jumps around the outer "else"
  112. if jump_target == tokens[last - 1].attr:
  113. return False
  114. if last < n and tokens[last].kind.startswith("JUMP"):
  115. # Distinguish code like:
  116. #
  117. # if a and not b: # there are two jumps to "else" here
  118. # real = 2 # there is a jump around the else here
  119. # else:
  120. # real = 3
  121. #
  122. # and don't confuse with:
  123. #
  124. # if a:
  125. # if not b: # the test below excludes this inner "if"
  126. # real = 2
  127. # real = 3
  128. # which is wrong
  129. if (
  130. first > 0
  131. and tokens[first - 1].kind.startswith("POP_JUMP_IF_")
  132. and tokens[first - 1].attr == jump_target
  133. ):
  134. return True
  135. return False
  136. return True
  137. elif jump_target < first_offset:
  138. # jump_target less than tokens[first] is okay - is to a loop
  139. assert test == "testtruec" # and lhs == "ifsmtc"
  140. # Since the "if" test is backwards, there shouldn't
  141. # be a "COME_FROM", but should be some sort of
  142. # instruction that does "not' fall through, like a jump
  143. # return, or raise.
  144. if ltm1 == "COME_FROM":
  145. before_come_from = self.insts[
  146. self.offset2inst_index[endif_offset] - 1
  147. ]
  148. # FIXME: When xdis next changes, this will be a field in the instruction
  149. no_follow = before_come_from.opcode in self.opc.nofollow
  150. return not (before_come_from.is_jump() or no_follow)
  151. elif pop_jump_if == "POP_JUMP_IF_TRUE":
  152. # Make sure pop_jump_if doesn't jump inside the "then" part of the "if"
  153. # print("WOOT", pop_jump_if.attr - endif_offset)
  154. # We leave some slop for endif_offset being one instruction behind.
  155. return not ((pop_jump_if.attr - endif_offset) in (0, 2))
  156. pass
  157. # If there is a final COME_FROM and that test jumps to that, this is a strong
  158. # indication that this is ok, so we'll skip jumps jumping too far test.
  159. if (
  160. pop_jump_if is not None
  161. and ltm1 == "COME_FROM"
  162. and ltm1.attr == pop_jump_if.off2int()
  163. ):
  164. return False
  165. # Make sure jumps don't extend beyond the end of the if statement.
  166. # This is done after the weird stuff above. There is a problem with the
  167. # below is that it suffers from example A the "if b" jumping around
  168. # the outer else. So we do this after all of the above and
  169. # rely on the above COME_FROM test.
  170. last_offset = tokens[last].off2int()
  171. for i in range(first, last):
  172. t = tokens[i]
  173. # instead of POP_JUMP_IF, should we use op attributes?
  174. if t.kind.startswith("POP_JUMP_IF_"):
  175. pjif_target = t.attr
  176. if pjif_target > last_offset:
  177. # In some cases, where we have long bytecode, a
  178. # "POP_JUMP_IF_TRUE/FALSE" offset might be too
  179. # large for the instruction; so instead it
  180. # jumps to a JUMP_FORWARD. Allow that here.
  181. if tokens[last] == "JUMP_FORWARD":
  182. return tokens[last].attr != pjif_target
  183. return True
  184. # elif lhs == "ifstmtc" and tokens[first].off2int() > pjif_target:
  185. # # A conditional JUMP to the loop is expected for "ifstmtc"
  186. # return True
  187. pass
  188. pass
  189. # If the "if_stmt" includes a COME_FROM from before the beginning of the "if", then
  190. # no good. If the "if stmt" covers the non-COME_FROM instructions, there will have
  191. # been a prior reduction that doesn't include the last COME_FROM.
  192. if ltm1 == "COME_FROM":
  193. return ltm1.attr < first_offset
  194. elif tokens[last] == "COME_FROM":
  195. return tokens[last].attr < first_offset
  196. return False