trace_profiler.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. #!/usr/bin/env python
  2. """ Parse OpenCV trace logs and present summarized statistics in a table
  3. To collect trace logs use OpenCV built with tracing support (enabled by default), set
  4. `OPENCV_TRACE=1` environment variable and run your application. `OpenCVTrace.txt` file will be
  5. created in the current folder.
  6. See https://github.com/opencv/opencv/wiki/Profiling-OpenCV-Applications for more details.
  7. ### Options
  8. ./trace_profiler.py <TraceLogFile> <num>
  9. <TraceLogFile> - usually OpenCVTrace.txt
  10. <num> - number of functions to show (depth)
  11. ### Example
  12. ./trace_profiler.py OpenCVTrace.txt 2
  13. ID name count thr min ...
  14. t-min ...
  15. 1 main#test_main.cpp:6 1 1 88.484 ...
  16. 200.210 ...
  17. 2 UMatBasicTests_copyTo#test_umat.cpp:176|main 40 1 0.125 ...
  18. 0.173 ...
  19. """
  20. from __future__ import print_function
  21. import os
  22. import sys
  23. import csv
  24. from pprint import pprint
  25. from collections import deque
  26. try:
  27. long # Python 2
  28. except NameError:
  29. long = int # Python 3
  30. # trace.hpp
  31. REGION_FLAG_IMPL_MASK = 15 << 16
  32. REGION_FLAG_IMPL_IPP = 1 << 16
  33. REGION_FLAG_IMPL_OPENCL = 2 << 16
  34. DEBUG = False
  35. if DEBUG:
  36. dprint = print
  37. dpprint = pprint
  38. else:
  39. def dprint(args, **kwargs):
  40. pass
  41. def dpprint(args, **kwargs):
  42. pass
  43. def tryNum(s):
  44. if s.startswith('0x'):
  45. try:
  46. return int(s, 16)
  47. except ValueError:
  48. pass
  49. try:
  50. return int(s)
  51. except ValueError:
  52. pass
  53. if sys.version_info[0] < 3:
  54. try:
  55. return long(s)
  56. except ValueError:
  57. pass
  58. return s
  59. def formatTimestamp(t):
  60. return "%.3f" % (t * 1e-6)
  61. try:
  62. from statistics import median
  63. except ImportError:
  64. def median(lst):
  65. sortedLst = sorted(lst)
  66. lstLen = len(lst)
  67. index = (lstLen - 1) // 2
  68. if (lstLen % 2):
  69. return sortedLst[index]
  70. else:
  71. return (sortedLst[index] + sortedLst[index + 1]) * 0.5
  72. def getCXXFunctionName(spec):
  73. def dropParams(spec):
  74. pos = len(spec) - 1
  75. depth = 0
  76. while pos >= 0:
  77. if spec[pos] == ')':
  78. depth = depth + 1
  79. elif spec[pos] == '(':
  80. depth = depth - 1
  81. if depth == 0:
  82. if pos == 0 or spec[pos - 1] in ['#', ':']:
  83. res = dropParams(spec[pos+1:-1])
  84. return (spec[:pos] + res[0], res[1])
  85. return (spec[:pos], spec[pos:])
  86. pos = pos - 1
  87. return (spec, '')
  88. def extractName(spec):
  89. pos = len(spec) - 1
  90. inName = False
  91. while pos >= 0:
  92. if spec[pos] == ' ':
  93. if inName:
  94. return spec[pos+1:]
  95. elif spec[pos].isalnum():
  96. inName = True
  97. pos = pos - 1
  98. return spec
  99. if spec.startswith('IPP') or spec.startswith('OpenCL'):
  100. prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
  101. prefix = spec[:prefix_size]
  102. if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
  103. prefix = prefix + spec[prefix_size]
  104. prefix_size = prefix_size + 1
  105. begin = prefix_size
  106. while begin < len(spec):
  107. if spec[begin].isalnum() or spec[begin] in ['_', ':']:
  108. break
  109. begin = begin + 1
  110. if begin == len(spec):
  111. return spec
  112. end = begin
  113. while end < len(spec):
  114. if not (spec[end].isalnum() or spec[end] in ['_', ':']):
  115. break
  116. end = end + 1
  117. return prefix + spec[begin:end]
  118. spec = spec.replace(') const', ')') # const methods
  119. (ret_type_name, params) = dropParams(spec)
  120. name = extractName(ret_type_name)
  121. if 'operator' in name:
  122. return name + params
  123. if name.startswith('&'):
  124. return name[1:]
  125. return name
  126. stack_size = 10
  127. class Trace:
  128. def __init__(self, filename=None):
  129. self.tasks = {}
  130. self.tasks_list = []
  131. self.locations = {}
  132. self.threads_stack = {}
  133. self.pending_files = deque()
  134. if filename:
  135. self.load(filename)
  136. class TraceTask:
  137. def __init__(self, threadID, taskID, locationID, beginTimestamp):
  138. self.threadID = threadID
  139. self.taskID = taskID
  140. self.locationID = locationID
  141. self.beginTimestamp = beginTimestamp
  142. self.endTimestamp = None
  143. self.parentTaskID = None
  144. self.parentThreadID = None
  145. self.childTask = []
  146. self.selfTimeIPP = 0
  147. self.selfTimeOpenCL = 0
  148. self.totalTimeIPP = 0
  149. self.totalTimeOpenCL = 0
  150. def __repr__(self):
  151. return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
  152. self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
  153. self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
  154. class TraceLocation:
  155. def __init__(self, locationID, filename, line, name, flags):
  156. self.locationID = locationID
  157. self.filename = os.path.split(filename)[1]
  158. self.line = line
  159. self.name = getCXXFunctionName(name)
  160. self.flags = flags
  161. def __str__(self):
  162. return "{}#{}:{}".format(self.name, self.filename, self.line)
  163. def __repr__(self):
  164. return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
  165. def parse_file(self, filename):
  166. dprint("Process file: '{}'".format(filename))
  167. with open(filename) as infile:
  168. for line in infile:
  169. line = str(line).strip()
  170. if line[0] == "#":
  171. if line.startswith("#thread file:"):
  172. name = str(line.split(':', 1)[1]).strip()
  173. self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
  174. continue
  175. self.parse_line(line)
  176. def parse_line(self, line):
  177. opts = line.split(',')
  178. dpprint(opts)
  179. if opts[0] == 'l':
  180. opts = list(csv.reader([line]))[0] # process quote more
  181. locationID = int(opts[1])
  182. filename = str(opts[2])
  183. line = int(opts[3])
  184. name = opts[4]
  185. flags = tryNum(opts[5])
  186. self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
  187. return
  188. extra_opts = {}
  189. for e in opts[5:]:
  190. if not '=' in e:
  191. continue
  192. (k, v) = e.split('=')
  193. extra_opts[k] = tryNum(v)
  194. if extra_opts:
  195. dpprint(extra_opts)
  196. threadID = None
  197. taskID = None
  198. locationID = None
  199. ts = None
  200. if opts[0] in ['b', 'e']:
  201. threadID = int(opts[1])
  202. taskID = int(opts[4])
  203. locationID = int(opts[3])
  204. ts = tryNum(opts[2])
  205. thread_stack = None
  206. currentTask = (None, None)
  207. if threadID is not None:
  208. if not threadID in self.threads_stack:
  209. thread_stack = deque()
  210. self.threads_stack[threadID] = thread_stack
  211. else:
  212. thread_stack = self.threads_stack[threadID]
  213. currentTask = None if not thread_stack else thread_stack[-1]
  214. t = (threadID, taskID)
  215. if opts[0] == 'b':
  216. assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
  217. task = self.TraceTask(threadID, taskID, locationID, ts)
  218. self.tasks[t] = task
  219. self.tasks_list.append(task)
  220. thread_stack.append((threadID, taskID))
  221. if currentTask:
  222. task.parentThreadID = currentTask[0]
  223. task.parentTaskID = currentTask[1]
  224. if 'parentThread' in extra_opts:
  225. task.parentThreadID = extra_opts['parentThread']
  226. if 'parent' in extra_opts:
  227. task.parentTaskID = extra_opts['parent']
  228. if opts[0] == 'e':
  229. task = self.tasks[t]
  230. task.endTimestamp = ts
  231. if 'tIPP' in extra_opts:
  232. task.selfTimeIPP = extra_opts['tIPP']
  233. if 'tOCL' in extra_opts:
  234. task.selfTimeOpenCL = extra_opts['tOCL']
  235. thread_stack.pop()
  236. def load(self, filename):
  237. self.pending_files.append(filename)
  238. if DEBUG:
  239. with open(filename, 'r') as f:
  240. print(f.read(), end='')
  241. while self.pending_files:
  242. self.parse_file(self.pending_files.pop())
  243. def getParentTask(self, task):
  244. return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
  245. def process(self):
  246. self.tasks_list.sort(key=lambda x: x.beginTimestamp)
  247. parallel_for_location = None
  248. for (id, l) in self.locations.items():
  249. if l.name == 'parallel_for':
  250. parallel_for_location = l.locationID
  251. break
  252. for task in self.tasks_list:
  253. try:
  254. task.duration = task.endTimestamp - task.beginTimestamp
  255. task.selfDuration = task.duration
  256. except:
  257. task.duration = None
  258. task.selfDuration = None
  259. task.totalTimeIPP = task.selfTimeIPP
  260. task.totalTimeOpenCL = task.selfTimeOpenCL
  261. dpprint(self.tasks)
  262. dprint("Calculate total times")
  263. for task in self.tasks_list:
  264. parentTask = self.getParentTask(task)
  265. if parentTask:
  266. parentTask.selfDuration = parentTask.selfDuration - task.duration
  267. parentTask.childTask.append(task)
  268. timeIPP = task.selfTimeIPP
  269. timeOpenCL = task.selfTimeOpenCL
  270. while parentTask:
  271. if parentTask.locationID == parallel_for_location: # TODO parallel_for
  272. break
  273. parentLocation = self.locations[parentTask.locationID]
  274. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
  275. parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
  276. timeIPP = 0
  277. else:
  278. parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
  279. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
  280. parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
  281. timeOpenCL = 0
  282. else:
  283. parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
  284. parentTask = self.getParentTask(parentTask)
  285. dpprint(self.tasks)
  286. dprint("Calculate total times (parallel_for)")
  287. for task in self.tasks_list:
  288. if task.locationID == parallel_for_location:
  289. task.selfDuration = 0
  290. childDuration = sum([t.duration for t in task.childTask])
  291. if task.duration == 0 or childDuration == 0:
  292. continue
  293. timeCoef = task.duration / float(childDuration)
  294. childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
  295. childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
  296. if childTimeIPP == 0 and childTimeOpenCL == 0:
  297. continue
  298. timeIPP = childTimeIPP * timeCoef
  299. timeOpenCL = childTimeOpenCL * timeCoef
  300. parentTask = task
  301. while parentTask:
  302. parentLocation = self.locations[parentTask.locationID]
  303. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
  304. parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
  305. timeIPP = 0
  306. else:
  307. parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
  308. if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
  309. parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
  310. timeOpenCL = 0
  311. else:
  312. parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
  313. parentTask = self.getParentTask(parentTask)
  314. dpprint(self.tasks)
  315. dprint("Done")
  316. def dump(self, max_entries):
  317. assert isinstance(max_entries, int)
  318. class CallInfo():
  319. def __init__(self, callID):
  320. self.callID = callID
  321. self.totalTimes = []
  322. self.selfTimes = []
  323. self.threads = set()
  324. self.selfTimesIPP = []
  325. self.selfTimesOpenCL = []
  326. self.totalTimesIPP = []
  327. self.totalTimesOpenCL = []
  328. calls = {}
  329. for currentTask in self.tasks_list:
  330. task = currentTask
  331. callID = []
  332. for i in range(stack_size):
  333. callID.append(task.locationID)
  334. task = self.getParentTask(task)
  335. if not task:
  336. break
  337. callID = tuple(callID)
  338. if not callID in calls:
  339. call = CallInfo(callID)
  340. calls[callID] = call
  341. else:
  342. call = calls[callID]
  343. call.totalTimes.append(currentTask.duration)
  344. call.selfTimes.append(currentTask.selfDuration)
  345. call.threads.add(currentTask.threadID)
  346. call.selfTimesIPP.append(currentTask.selfTimeIPP)
  347. call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
  348. call.totalTimesIPP.append(currentTask.totalTimeIPP)
  349. call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
  350. dpprint(self.tasks)
  351. dpprint(self.locations)
  352. dpprint(calls)
  353. calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
  354. calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
  355. calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
  356. calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
  357. calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
  358. calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
  359. calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
  360. calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
  361. if max_entries > 0 and len(calls_sorted) > max_entries:
  362. calls_sorted = calls_sorted[:max_entries]
  363. def formatPercents(p):
  364. if p is not None:
  365. return "{:>3d}".format(int(p*100))
  366. return ''
  367. name_width = 70
  368. timestamp_width = 12
  369. def fmtTS():
  370. return '{:>' + str(timestamp_width) + '}'
  371. fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
  372. fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
  373. print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
  374. print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
  375. for (index, callID) in enumerate(calls_sorted):
  376. call_self_times = calls[callID].selfTimes
  377. loc0 = self.locations[callID[0]]
  378. loc_array = [] # [str(callID)]
  379. for (i, l) in enumerate(callID):
  380. loc = self.locations[l]
  381. loc_array.append(loc.name if i > 0 else str(loc))
  382. loc_str = '|'.join(loc_array)
  383. if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
  384. print(fmt.format(index + 1, loc_str, len(call_self_times),
  385. len(calls[callID].threads),
  386. formatTimestamp(min(call_self_times)),
  387. formatTimestamp(max(call_self_times)),
  388. formatTimestamp(calls_median[callID]),
  389. formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
  390. formatTimestamp(sum(call_self_times)),
  391. formatTimestamp(calls_self_sum_IPP[callID]),
  392. formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
  393. formatTimestamp(calls_self_sum_OpenCL[callID]),
  394. formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
  395. ))
  396. call_total_times = calls[callID].totalTimes
  397. print(fmt2.format("", "", "", "",
  398. formatTimestamp(min(call_total_times)),
  399. formatTimestamp(max(call_total_times)),
  400. formatTimestamp(median(call_total_times)),
  401. formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
  402. formatTimestamp(sum(call_total_times)),
  403. formatTimestamp(calls_total_sum_IPP[callID]),
  404. formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
  405. formatTimestamp(calls_total_sum_OpenCL[callID]),
  406. formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
  407. ))
  408. print()
  409. if __name__ == "__main__":
  410. tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
  411. count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
  412. trace = Trace(tracefile)
  413. trace.process()
  414. trace.dump(max_entries = count)
  415. print("OK")