lineoffsets.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. """
  2. For a given source or bytecode file or code object, retrieve
  3. * linenumbers,
  4. * bytecode offsets, and
  5. * nested functions (code objects)
  6. This is useful for example in debuggers that want to set breakpoints only
  7. at valid locations.
  8. """
  9. from collections import namedtuple
  10. from xdis.bytecode import get_instructions_bytes
  11. from xdis.codetype.base import iscode
  12. from xdis.load import check_object_path, load_module
  13. from xdis.op_imports import get_opcode_module
  14. # Information about a single line in a particular piece of code
  15. # Note that a code can have several lines with the same value but
  16. # different code.
  17. # For example:
  18. # x = 1; y = 2
  19. # will have two lines with the same line number each of the two statements.
  20. # We will have a LineInCode object for each
  21. LineOffsets = namedtuple("LineOffsets", ["line_number", "offsets", "code"])
  22. LineOffsetsCompact = namedtuple("LineOffsetsCompact", ["name", "offsets"])
  23. class LineOffsetInfo(object):
  24. def __init__(self, opc, code, include_children: bool=False) -> None:
  25. if not iscode(code):
  26. raise TypeError(
  27. "code parameter %s needs to be a code type; is %s" % (code, type(code))
  28. )
  29. self.code = code
  30. self.name = code.co_name
  31. self.opc = opc
  32. self.children = {}
  33. self.lines = []
  34. self.offsets = []
  35. self.linestarts = dict(opc.findlinestarts(code, dup_lines=True))
  36. self.instructions = []
  37. self.include_children = include_children
  38. self._populate_lines()
  39. return
  40. def _populate_lines(self) -> None:
  41. code = self.code
  42. code_map = {code.co_name: code}
  43. last_line_info = None
  44. for instr in get_instructions_bytes(
  45. bytecode=code.co_code,
  46. opc=self.opc,
  47. varnames=code.co_varnames,
  48. names=code.co_names,
  49. constants=code.co_consts,
  50. cells=code.co_cellvars + code.co_freevars,
  51. linestarts=self.linestarts,
  52. ):
  53. offset = instr.offset
  54. self.offsets.append(offset)
  55. self.instructions.append(instr)
  56. if instr.starts_line:
  57. if last_line_info:
  58. self.lines.append(last_line_info)
  59. pass
  60. last_line_info = LineOffsets(instr.starts_line, [offset], code)
  61. elif last_line_info is not None:
  62. last_line_info.offsets.append(offset)
  63. pass
  64. pass
  65. self.lines.append(last_line_info)
  66. if self.include_children:
  67. for c in code.co_consts:
  68. if iscode(c):
  69. code_map[c.co_name] = c
  70. code_info = LineOffsetInfo(self.opc, c, True)
  71. code_map.update(code_info.code_map)
  72. self.children[code_info.name] = code_info
  73. self.lines += code_info.lines
  74. pass
  75. pass
  76. pass
  77. self.code_map = code_map
  78. def __str__(self) -> str:
  79. return str(self.line_numbers())
  80. def line_numbers(self, include_dups: bool=True, include_offsets: bool=False):
  81. """Return all of the valid lines for a given piece of code"""
  82. if include_offsets:
  83. lines = {}
  84. for li in self.lines:
  85. number = li.line_number
  86. lines[number] = lines.get(number, [])
  87. lines[number].append(LineOffsetsCompact(li.code.co_name, li.offsets))
  88. pass
  89. pass
  90. else:
  91. lines = list(self.linestarts.values())
  92. if not include_dups:
  93. return sorted(list(set(lines)))
  94. if isinstance(lines, list):
  95. return sorted(lines)
  96. return lines
  97. pass
  98. def lineoffsets_in_file(filename: str, toplevel_only=False) -> LineOffsetInfo | None:
  99. obj_path = check_object_path(filename)
  100. version, timestamp, magic_int, code, pypy, source_size, sip_hash = load_module(
  101. obj_path
  102. )
  103. if pypy:
  104. variant = "pypy"
  105. else:
  106. variant = None
  107. opc = get_opcode_module(version, variant)
  108. return LineOffsetInfo(opc, code, not toplevel_only)
  109. pass
  110. def lineoffsets_in_module(module, toplevel_only: bool=False) -> LineOffsetInfo | None:
  111. return lineoffsets_in_file(module.__file__, toplevel_only)
  112. if __name__ == "__main__":
  113. def multi_line() -> tuple[int, int]:
  114. # We have two statements on the same line
  115. x = 1
  116. y = 2
  117. return x, y
  118. def foo() -> int:
  119. def bar() -> int:
  120. return 5
  121. return bar()
  122. def print_code_info(code_info: LineOffsetInfo | None) -> None:
  123. children = code_info.children.keys()
  124. if len(children):
  125. print("%s has %d children" % (code_info.name, len(children)))
  126. for child in code_info.children.keys():
  127. print("\t%s" % child)
  128. pass
  129. print("\n")
  130. else:
  131. print("%s has no children" % (code_info.name))
  132. print(
  133. "\tlines with children and dups:\n\t%s"
  134. % code_info.line_numbers(include_dups=True)
  135. )
  136. print(
  137. "\tlines without children and without dups:\n\t%s"
  138. % code_info.line_numbers(include_dups=False)
  139. )
  140. print("Offsets in %s" % code_info.name, code_info.offsets)
  141. lines = code_info.line_numbers(include_offsets=True)
  142. for line_num, li in lines.items():
  143. print(
  144. "\tline: %4d: %s" % (line_num, ", ".join([str(i.offsets) for i in li]))
  145. )
  146. print("=" * 30)
  147. for mod, code in code_info.code_map.items():
  148. print(mod, ":", code)
  149. print("=" * 30)
  150. for li in code_info.lines:
  151. print(li)
  152. pass
  153. return
  154. opc = get_opcode_module()
  155. print_code_info(lineoffsets_in_file(__file__))
  156. # print_code_info(LineOffsetInfo(opc, multi_line.__code__, include_children=True))