disasm.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. # MIT License
  2. # Copyright (c) 2020 Da Yan @ HKUST
  3. # Permission is hereby granted, free of charge, to any person obtaining a copy
  4. # of this software and associated documentation files (the "Software"), to deal
  5. # in the Software without restriction, including without limitation the rights
  6. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7. # copies of the Software, and to permit persons to whom the Software is
  8. # furnished to do so, subject to the following conditions:
  9. # The above copyright notice and this permission notice shall be included in all
  10. # copies or substantial portions of the Software.
  11. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  12. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  13. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  14. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  15. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  16. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  17. # SOFTWARE.
  18. import functools
  19. import os
  20. import re
  21. import subprocess
  22. import tempfile
  23. FLINE_RE = re.compile(r'\s*/\*\w{4}\*/\s*([^;]*;)\s*/\* 0x(\w{16}) \*/\s*')
  24. SLINE_RE = re.compile(r'\s*/\* 0x(\w{16}) \*/\s*')
  25. FNAME_RE = re.compile(r'\s*Function : (\w+)\s*')
  26. BRA_RE = re.compile(r'(.*BRA(?:\.U)? )(0x\w+);')
  27. def parseCtrl(sline):
  28. enc = int(SLINE_RE.match(sline).group(1), 16)
  29. stall = (enc >> 41) & 0xf
  30. yld = (enc >> 45) & 0x1
  31. wrtdb = (enc >> 46) & 0x7
  32. readb = (enc >> 49) & 0x7
  33. watdb = (enc >> 52) & 0x3f
  34. yld_str = 'Y' if yld == 0 else '-'
  35. wrtdb_str = '-' if wrtdb == 7 else str(wrtdb)
  36. readb_str = '-' if readb == 7 else str(readb)
  37. watdb_str = '--' if watdb == 0 else f'{watdb:02d}'
  38. return f'{watdb_str}:{readb_str}:{wrtdb_str}:{yld_str}:{stall:x}'
  39. def processSassLines(fline, sline, labels):
  40. asm = FLINE_RE.match(fline).group(1)
  41. # Remove tailing space
  42. if asm.endswith(" ;"):
  43. asm = asm[:-2] + ";"
  44. ctrl = parseCtrl(sline)
  45. # BRA target address
  46. if BRA_RE.match(asm) is not None:
  47. target = int(BRA_RE.match(asm).group(2), 16)
  48. if target in labels:
  49. pass
  50. else:
  51. labels[target] = len(labels)
  52. return (f'{ctrl}', f'{asm}')
  53. @functools.lru_cache()
  54. def get_sass(cubin_asm, fun=None):
  55. fd, path = tempfile.mkstemp()
  56. try:
  57. with open(fd, 'wb') as cubin:
  58. cubin.write(cubin_asm)
  59. sass = extract(path, fun)
  60. finally:
  61. os.remove(path)
  62. return sass
  63. def path_to_cuobjdump():
  64. from triton import knobs
  65. return knobs.nvidia.cuobjdump.path
  66. def extract(file_path, fun):
  67. cuobjdump = path_to_cuobjdump()
  68. if fun is None:
  69. sass_str = subprocess.check_output([cuobjdump, "-sass", file_path])
  70. else:
  71. sass_str = subprocess.check_output([cuobjdump, "-fun", fun, "-sass", file_path])
  72. sass_lines = sass_str.splitlines()
  73. line_idx = 0
  74. while line_idx < len(sass_lines):
  75. line = sass_lines[line_idx].decode()
  76. # format:
  77. # function : <function_name>
  78. # .headerflags: ...
  79. # /*0000*/ asmstr /*0x...*/
  80. # /*0x...*/
  81. # Looking for new function header (function: <name>)
  82. while FNAME_RE.match(line) is None:
  83. line_idx += 1
  84. if line_idx < len(sass_lines):
  85. line = sass_lines[line_idx].decode()
  86. else:
  87. return
  88. fname = FNAME_RE.match(line).group(1)
  89. ret = ''
  90. ret += f'Function:{fname}\n'
  91. line_idx += 2 # bypass .headerflags
  92. line = sass_lines[line_idx].decode()
  93. # Remapping address to label
  94. labels = {} # address -> label_idx
  95. # store sass asm in buffer and them print them (for labels)
  96. # (ctrl, asm)
  97. asm_buffer = []
  98. while FLINE_RE.match(line) is not None:
  99. # First line (Offset ASM Encoding)
  100. fline = sass_lines[line_idx].decode()
  101. line_idx += 1
  102. # Second line (Encoding)
  103. sline = sass_lines[line_idx].decode()
  104. line_idx += 1
  105. asm_buffer.append(processSassLines(fline, sline, labels))
  106. # peek the next line
  107. line = sass_lines[line_idx].decode()
  108. # Print sass
  109. # label naming convention: LBB#i
  110. for idx, (ctrl, asm) in enumerate(asm_buffer):
  111. # Print label if this is BRA target
  112. offset = idx * 16
  113. if offset in labels:
  114. label_name = f'LBB{labels[offset]}'
  115. ret += f'{label_name}:\n'
  116. ret += ctrl + '\t'
  117. # if this is BRA, remap offset to label
  118. if BRA_RE.match(asm):
  119. target = int(BRA_RE.match(asm).group(2), 16)
  120. target_name = f'LBB{labels[target]}'
  121. asm = BRA_RE.sub(rf'\1{target_name};', asm)
  122. ret += asm + '\n'
  123. ret += '\n'
  124. return ret