usability_checker.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. # Copyright (c) Microsoft Corporation. All rights reserved.
  2. # Licensed under the MIT License.
  3. from __future__ import annotations
  4. import argparse
  5. import logging
  6. import os
  7. import pathlib
  8. import tempfile
  9. from collections import deque
  10. from enum import IntEnum
  11. import onnx
  12. from ..onnx_model_utils import ModelProtoWithShapeInfo, get_producer_consumer_maps, is_fixed_size_tensor, optimize_model
  13. class _SupportedOpsChecker:
  14. """
  15. Class to process the md file with list of supported ops and caveats for an execution provider.
  16. e.g. /tools/ci_build/github/android/nnapi_supported_ops.md
  17. /tools/ci_build/github/apple/coreml_supported_mlprogram_ops.md
  18. /tools/ci_build/github/apple/coreml_supported_neuralnetwork_ops.md
  19. """
  20. def __init__(self, filename):
  21. self._filename = filename
  22. self._ops = {} # op to caveats
  23. self._ops_seen = set()
  24. with open(filename) as f:
  25. for line in f:
  26. # we're looking for a markdown table with 2 columns. first is op name. second is caveats
  27. # op name is domain:op
  28. if line.startswith("|"):
  29. pieces = line.strip().split("|")
  30. if len(pieces) == 4: # pre-first '|'. op, caveat, post-last '|'
  31. domain_op = pieces[1]
  32. caveat = pieces[2]
  33. caveat = caveat.replace("<br/>", " ") # remove some HTML tags
  34. # skip lines that don't have the ':' which separates the domain and op
  35. # e.g. the table header will fail this check
  36. if ":" in domain_op:
  37. self._ops[domain_op] = caveat
  38. def is_op_supported(self, node):
  39. domain = node.domain if node.domain else "ai.onnx"
  40. domain_op = domain + ":" + node.op_type
  41. is_supported = domain_op in self._ops
  42. if is_supported:
  43. self._ops_seen.add(domain_op)
  44. return is_supported
  45. def get_caveats(self):
  46. caveats = []
  47. for op in sorted(self._ops_seen):
  48. caveat = self._ops[op]
  49. if caveat:
  50. caveats.append(f"{op}:{caveat}")
  51. return caveats
  52. class PartitioningInfo:
  53. class TryWithEP(IntEnum):
  54. NO = (0,)
  55. MAYBE = (1,)
  56. YES = 2
  57. def __init__(
  58. self,
  59. num_nodes: int,
  60. num_supported_nodes: int,
  61. num_partitions: int,
  62. supported_ops_checker: _SupportedOpsChecker,
  63. supported_groups: list[onnx.NodeProto],
  64. unsupported_ops: set[str],
  65. nodes_unsupported_due_to_op: int,
  66. nodes_unsupported_due_to_dynamic_input: int,
  67. num_unsupported_nodes_due_to_rank: int,
  68. ops_with_unsupported_rank: set[str],
  69. ):
  70. self.num_nodes = num_nodes
  71. self.num_supported_nodes = num_supported_nodes
  72. self.num_partitions = num_partitions
  73. self.supported_ops_checker = supported_ops_checker
  74. self.supported_groups = supported_groups
  75. self.unsupported_ops = unsupported_ops
  76. self.nodes_unsupported_due_to_op = nodes_unsupported_due_to_op
  77. self.nodes_unsupported_due_to_dynamic_input = nodes_unsupported_due_to_dynamic_input
  78. self.num_unsupported_nodes_due_to_rank = num_unsupported_nodes_due_to_rank
  79. self.ops_with_unsupported_rank = ops_with_unsupported_rank
  80. self.num_subgraphs = 0
  81. self.num_nodes_in_subgraphs = 0
  82. def merge(self, other: PartitioningInfo):
  83. """
  84. Merge the information from another PartitioningInfo instance into this one.
  85. """
  86. self.num_nodes += other.num_nodes
  87. self.num_supported_nodes += other.num_supported_nodes
  88. self.num_partitions += other.num_partitions
  89. self.supported_groups.extend(other.supported_groups)
  90. self.unsupported_ops.update(other.unsupported_ops)
  91. self.nodes_unsupported_due_to_op += other.nodes_unsupported_due_to_op
  92. self.nodes_unsupported_due_to_dynamic_input += other.nodes_unsupported_due_to_dynamic_input
  93. self.num_unsupported_nodes_due_to_rank += other.num_unsupported_nodes_due_to_rank
  94. self.ops_with_unsupported_rank.update(other.ops_with_unsupported_rank)
  95. # hard assumption that we merge into the main graph partitioning info
  96. self.num_subgraphs += 1
  97. self.num_nodes_in_subgraphs += other.num_nodes
  98. def suitability(self):
  99. # semi-arbitrary choices that err on the side of MAYBE.
  100. # having 1 partition is always preferred, but if that is small it may not be useful.
  101. # having 2 partitions may be okay if they cover most nodes
  102. # more than 2 partitions and the device copy cost is almost guaranteed to outweigh the benefit of using the NPU
  103. # NOTE: This assumes the EP is not CPU based and there is device copy overhead to consider
  104. pct_supported = self.num_supported_nodes / self.num_nodes * 100
  105. if self.num_partitions == 1:
  106. if pct_supported > 75:
  107. return PartitioningInfo.TryWithEP.YES
  108. elif pct_supported > 50:
  109. return PartitioningInfo.TryWithEP.MAYBE
  110. else:
  111. return PartitioningInfo.TryWithEP.NO
  112. if self.num_partitions == 2:
  113. if pct_supported > 75:
  114. return PartitioningInfo.TryWithEP.MAYBE
  115. else:
  116. return PartitioningInfo.TryWithEP.NO
  117. return PartitioningInfo.TryWithEP.NO
  118. def print_analysis(self, logger: logging.Logger, ep_name: str):
  119. """
  120. Analyze the partitioning information and log the analysis
  121. :param logger: Logger to use
  122. :param ep_name: Execution provider name to use in the log messages
  123. """
  124. logger.info(
  125. f"{self.num_partitions} partitions with a total of {self.num_supported_nodes}/{self.num_nodes} "
  126. f"nodes can be handled by the {ep_name} EP."
  127. )
  128. if self.supported_groups:
  129. logger.info(
  130. f"\tPartition sizes: [{', '.join([str(len(partition)) for partition in self.supported_groups])}]"
  131. )
  132. # dump full groups if debug output is enabled
  133. for group in self.supported_groups:
  134. logger.debug(f"Nodes in group: {','.join([f'{node.op_type}:{node.name}' for node in group])}")
  135. logger.info(f"Unsupported nodes due to operator={self.nodes_unsupported_due_to_op}")
  136. if self.unsupported_ops:
  137. logger.info(f"\tUnsupported ops: {','.join(sorted(self.unsupported_ops))}")
  138. caveats = self.supported_ops_checker.get_caveats()
  139. if caveats:
  140. indent = " " * 5
  141. logger.info(
  142. "\tCaveats that have not been checked and may result in a node not actually being supported: "
  143. f"{''.join([os.linesep + indent + caveat for caveat in caveats])}"
  144. )
  145. if self.nodes_unsupported_due_to_dynamic_input:
  146. logger.info(
  147. "Unsupported nodes due to input having a dynamic shape=%d",
  148. self.nodes_unsupported_due_to_dynamic_input,
  149. )
  150. if self.num_unsupported_nodes_due_to_rank:
  151. logger.info(f"Unsupported nodes due to rank of input data={self.num_unsupported_nodes_due_to_rank}")
  152. logger.info(f"\tOps with unsupported rank: {','.join(sorted(self.ops_with_unsupported_rank))}")
  153. if self.num_subgraphs > 0:
  154. # TODO: CoreML has a flag. NNAPI doesn't. Either should be able to support a subgraph when treated as a
  155. # separate graph (only extra detail would be making sure implicit inputs are handled).
  156. # Merging the subgraph into the parent graph would be more complex.
  157. # e.g. for CoreML we could potentially convert Loop to while_loop and If to cond if the subgraphs in the
  158. # control flow node are fully supported.
  159. # NNAPI also has While and If.
  160. # It most likely will be necessary to support merging in If nodes with fully supported subgraphs,
  161. # as the subgraphs in those are often very simple, so the performance cost of going to the CPU EP and back
  162. # is high.
  163. logger.info(
  164. f"{self.num_nodes_in_subgraphs} nodes are in {self.num_subgraphs} subgraphs. "
  165. "Check EP as to whether subgraphs are supported."
  166. )
  167. pct_nodes_using_ep = self.num_supported_nodes / self.num_nodes * 100
  168. if self.num_partitions == 0:
  169. logger.info(f"{ep_name} cannot run any nodes in this model.")
  170. elif self.num_partitions == 1:
  171. if pct_nodes_using_ep > 75:
  172. logger.info(
  173. f"{ep_name} should work well for this model as there is one partition "
  174. f"covering {pct_nodes_using_ep:.1f}% of the nodes in the model."
  175. )
  176. elif pct_nodes_using_ep > 50:
  177. logger.info(
  178. f"{ep_name} may work well for this model, however only {pct_nodes_using_ep:.1f}% of nodes "
  179. "will use it. Performance testing is required to validate."
  180. )
  181. else:
  182. logger.info(
  183. f"{ep_name} will probably not work will for this model as only {pct_nodes_using_ep:.2f}% "
  184. "of nodes will use it."
  185. )
  186. elif self.num_partitions == 2 and pct_nodes_using_ep > 75:
  187. logger.info(
  188. f"{ep_name} can be considered for this model as there are two partitions "
  189. f"covering {pct_nodes_using_ep:.1f}% of the nodes. "
  190. "Performance testing is required to validate."
  191. )
  192. else:
  193. logger.info(
  194. f"{ep_name} is not recommended with this model as there are {self.num_partitions} partitions "
  195. f"covering {pct_nodes_using_ep:.1f}% of the nodes in the model. "
  196. "This will most likely result in worse performance than just using the CPU EP."
  197. )
  198. def _check_partitioning_for_graph(
  199. graph: onnx.GraphProto,
  200. node_to_producers: dict[onnx.NodeProto, set[onnx.NodeProto]],
  201. node_to_consumers: dict[onnx.NodeProto, set[onnx.NodeProto]],
  202. supported_ops_checker: _SupportedOpsChecker,
  203. outer_scope_initializers: set[str],
  204. require_fixed_input_sizes: bool,
  205. value_info: dict[str, onnx.ValueInfoProto],
  206. max_rank: int = 999, # max rank if EP has a limitation
  207. ):
  208. # initializers have fixed sizes.
  209. initializers = [i.name for i in graph.initializer]
  210. def _is_fixed_shape_value(value):
  211. if value in value_info:
  212. return is_fixed_size_tensor(value_info[value])
  213. if value in initializers or value in outer_scope_initializers:
  214. return True
  215. # if something has an unknown shape (e.g. something downstream of a Reshape with dynamic input for the shape)
  216. # it won't have an entry in value_info
  217. return False
  218. #
  219. # Replicate logic from /onnxruntime/core/providers/partitioning_utils.cc:CreateSupportedPartitionNodeGroups
  220. # to roughly estimate number of partitions for nodes that is_node_supported_fn returns true for.
  221. #
  222. # We keep the structure and variable names as close as possible to the C++ implementation to simplify keeping them
  223. # in sync if future updates are needed.
  224. #
  225. # NOTE: CreateSupportedPartitionNodeGroups was recently updated to be QDQ aware so that partitions did not split
  226. # QDQ node groups. This code does not need to be QDQ aware as splitting a QDQ node group does not affect the total
  227. # number of partitions or supported nodes.
  228. #
  229. # we don't currently support a callback for additional group closure checks in the python implementation
  230. on_group_closed_fn = None
  231. supported_groups = []
  232. # number of inputs from unprocessed nodes (in-degree) per node
  233. in_degree = {}
  234. # nodes that are ready to process
  235. nodes_to_process = deque() # deque of Node instances
  236. # nodes that will be processed when considering the next partition node group
  237. nodes_to_process_with_next_group = deque()
  238. # initialize in-degrees and find root nodes
  239. for node in graph.node:
  240. node_input_edge_count = len(node_to_producers[node]) if node in node_to_producers else 0
  241. in_degree[node] = node_input_edge_count
  242. if node_input_edge_count == 0:
  243. # node is only dependent on graph input or initializers
  244. nodes_to_process.append(node)
  245. supported_group = []
  246. # the partition node group's border is the aggregate of its nodes' output nodes
  247. supported_group_border = set()
  248. num_supported_nodes = 0
  249. num_unsupported_nodes_due_to_op = 0
  250. num_unsupported_nodes_due_to_dynamic_input = 0
  251. num_unsupported_nodes_due_to_rank = 0
  252. unsupported_ops = set()
  253. ops_with_unsupported_rank = set()
  254. def close_group():
  255. if supported_group:
  256. keep_partition = not on_group_closed_fn or on_group_closed_fn(supported_group)
  257. if keep_partition:
  258. supported_groups.append(supported_group.copy())
  259. supported_group.clear()
  260. supported_group_border.clear()
  261. while nodes_to_process or nodes_to_process_with_next_group:
  262. if not nodes_to_process:
  263. close_group()
  264. nodes_to_process = nodes_to_process_with_next_group
  265. nodes_to_process_with_next_group = deque()
  266. continue
  267. node = nodes_to_process.popleft()
  268. is_op_supported = supported_ops_checker.is_op_supported(node)
  269. is_input_shape_supported = not require_fixed_input_sizes or all(_is_fixed_shape_value(i) for i in node.input)
  270. is_rank_supported = True
  271. if value_info:
  272. for node_input in node.input:
  273. if node_input and node_input in value_info and value_info[node_input].type.HasField("tensor_type"):
  274. input_rank = len(value_info[node_input].type.tensor_type.shape.dim)
  275. if input_rank > max_rank:
  276. is_rank_supported = False
  277. break
  278. # special-case if we can infer the rank from the length of the 'perms' Transpose attribute
  279. # e.g. this works with SegmentAnything where dynamic Reshape operators result in no shape info.
  280. if node.op_type == "Transpose" and len(node.attribute[0].ints) > max_rank:
  281. is_rank_supported = False
  282. is_node_supported = is_op_supported and is_input_shape_supported and is_rank_supported
  283. if not is_node_supported:
  284. if node in supported_group_border:
  285. # an unsupported node on the border will be processed after the current partition node group
  286. # so skip any additional processing/counting here
  287. nodes_to_process_with_next_group.append(node)
  288. continue
  289. if not is_op_supported:
  290. unsupported_ops.add(f"{node.domain if node.domain else 'ai.onnx'}:{node.op_type}")
  291. num_unsupported_nodes_due_to_op += 1
  292. if not is_input_shape_supported:
  293. num_unsupported_nodes_due_to_dynamic_input += 1
  294. if not is_rank_supported:
  295. num_unsupported_nodes_due_to_rank += 1
  296. ops_with_unsupported_rank.add(f"{node.domain if node.domain else 'ai.onnx'}:{node.op_type}")
  297. if is_node_supported:
  298. num_supported_nodes += 1
  299. # add node to the partition node group
  300. supported_group.append(node)
  301. # remove node from the border and add its outputs to the border
  302. if node in supported_group_border: # noqa: FURB132
  303. supported_group_border.remove(node)
  304. # for each consumer node add to supported_group_border
  305. if node in node_to_consumers:
  306. for consumer in node_to_consumers[node]:
  307. supported_group_border.add(consumer)
  308. # adjust in-degrees of the node outputs and add any new nodes to process
  309. if node in node_to_consumers:
  310. for consumer in node_to_consumers[node]:
  311. consumer_node_in_degree = in_degree[consumer]
  312. consumer_node_in_degree -= 1
  313. if consumer_node_in_degree == 0:
  314. nodes_to_process.append(consumer)
  315. in_degree[consumer] = consumer_node_in_degree
  316. close_group()
  317. num_nodes = len(graph.node)
  318. num_partitions = len(supported_groups)
  319. info = PartitioningInfo(
  320. num_nodes,
  321. num_supported_nodes,
  322. num_partitions,
  323. supported_ops_checker,
  324. supported_groups,
  325. unsupported_ops,
  326. num_unsupported_nodes_due_to_op,
  327. num_unsupported_nodes_due_to_dynamic_input,
  328. num_unsupported_nodes_due_to_rank,
  329. ops_with_unsupported_rank,
  330. )
  331. return info
  332. def check_partitioning(
  333. main_graph: onnx.GraphProto,
  334. supported_ops_checker: _SupportedOpsChecker,
  335. require_fixed_input_sizes: bool,
  336. max_rank: int = 999,
  337. ) -> PartitioningInfo:
  338. """
  339. Estimate the partitions the graph will be split into for nodes that is_node_supported_fn returns true for.
  340. The check on whether a node is supported is purely based on the operator type. Additional limitations
  341. (e.g. NNAPI EP only supports 2D Conv) are not checked, so partitions may not be 100% accurate. The limitations
  342. for operators in the partitions are printed so the user can manually check.
  343. :param main_graph: Graph to process
  344. :param supported_ops_checker: Checker with info on supported ops.
  345. :param require_fixed_input_sizes: If True, require that the inputs to a potentially supported node are fixed size
  346. tensors for it to be considered as supported. This requires
  347. onnx.shape_inference.infer_shapes to have been run on the model to populate the
  348. shape information.
  349. If False, shapes are ignored during the check.
  350. :param max_rank: Set if EP has a limitation on the rank of tensors it supports.
  351. :return PartitioningInfo instance with details
  352. """
  353. if require_fixed_input_sizes and len(main_graph.value_info) == 0 and len(main_graph.node) > 1:
  354. raise ValueError("Run onnx.shape_inference.infer_shapes on the model to populate the shape information.")
  355. # create lookup map from ValueInfo for efficiency
  356. def _update_value_info(graph: onnx.GraphProto, value_to_shape: dict[str, onnx.ValueInfoProto]):
  357. for v in graph.input:
  358. value_to_shape[v.name] = v
  359. for v in graph.output:
  360. value_to_shape[v.name] = v
  361. for v in graph.value_info:
  362. value_to_shape[v.name] = v
  363. # the producer/consumer maps are for the entire model
  364. node_to_producers, node_to_consumers = get_producer_consumer_maps(main_graph)
  365. def _check_graph(
  366. graph: onnx.GraphProto,
  367. outer_scope_value_info: dict[str, onnx.ValueInfoProto] | None,
  368. outer_scope_initializers: set[str] | None = None,
  369. partitioning_info: PartitioningInfo | None = None,
  370. ) -> PartitioningInfo:
  371. if outer_scope_value_info is not None:
  372. # extend value info if we're using it. we replace any value shadowed with a local one
  373. value_info = outer_scope_value_info.copy()
  374. _update_value_info(graph, value_info)
  375. else:
  376. value_info = {}
  377. if outer_scope_initializers is None:
  378. outer_scope_initializers = set()
  379. info = _check_partitioning_for_graph(
  380. graph,
  381. node_to_producers,
  382. node_to_consumers,
  383. supported_ops_checker,
  384. outer_scope_initializers,
  385. require_fixed_input_sizes,
  386. value_info,
  387. max_rank,
  388. )
  389. if partitioning_info:
  390. # merge in subgraph info
  391. partitioning_info.merge(info)
  392. else:
  393. # main graph info
  394. partitioning_info = info
  395. # setup outer scope initializers. we copy the input set as a model may have multiple subgraphs
  396. # on multiple levels, so we need to keep the set for each descent separate
  397. subgraph_outer_scope_initializers = set(outer_scope_initializers)
  398. for initializer in graph.initializer:
  399. subgraph_outer_scope_initializers.add(initializer.name)
  400. for node in graph.node:
  401. # recurse into nodes with subgraphs
  402. for attr in node.attribute:
  403. if attr.HasField("g"):
  404. subgraph = attr.g
  405. partitioning_info = _check_graph(
  406. subgraph, value_info, subgraph_outer_scope_initializers, partitioning_info
  407. )
  408. return partitioning_info
  409. aggregated_partitioning_info = _check_graph(main_graph, {} if require_fixed_input_sizes else None)
  410. return aggregated_partitioning_info
  411. def _check_ep_partitioning(
  412. model: onnx.ModelProto, supported_ops_config: pathlib.Path, require_fixed_input_sizes: bool, max_rank: int = 999
  413. ):
  414. supported_ops = _SupportedOpsChecker(supported_ops_config)
  415. partition_info = check_partitioning(model.graph, supported_ops, require_fixed_input_sizes, max_rank)
  416. return partition_info
  417. def check_nnapi_partitions(model, require_fixed_input_sizes: bool):
  418. # if we're running in the ORT python package the file should be local. otherwise assume we're running from the
  419. # ORT repo
  420. script_dir = pathlib.Path(__file__).parent
  421. local_config = script_dir / "nnapi_supported_ops.md"
  422. if local_config.exists():
  423. config_path = local_config
  424. else:
  425. ort_root = script_dir.parents[3]
  426. config_path = ort_root / "tools" / "ci_build" / "github" / "android" / "nnapi_supported_ops.md"
  427. return _check_ep_partitioning(model, config_path, require_fixed_input_sizes)
  428. def check_coreml_partitions(model: onnx.ModelProto, require_fixed_input_sizes: bool, config_filename: str):
  429. # if we're running in the ORT python package the file should be local. otherwise assume we're running from the
  430. # ORT repo
  431. script_dir = pathlib.Path(__file__).parent
  432. local_config = script_dir / config_filename
  433. if local_config.exists():
  434. config_path = local_config
  435. else:
  436. ort_root = script_dir.parents[3]
  437. config_path = ort_root / "tools" / "ci_build" / "github" / "apple" / config_filename
  438. max_rank = 5
  439. return _check_ep_partitioning(model, config_path, require_fixed_input_sizes, max_rank)
  440. def check_shapes(graph: onnx.GraphProto, logger: logging.Logger | None = None):
  441. """
  442. Check the shapes of graph inputs, values and graph outputs to determine if they have static or dynamic sizes.
  443. NNAPI does not support dynamically sized values. CoreML does, but it will most likely cost performance.
  444. :param graph: Graph to check. If shape inferencing has been run the checks on values will be meaningful.
  445. :param logger: Optional logger for diagnostic information.
  446. :return: Tuple of List of inputs with dynamic shapes, Number of dynamic values found
  447. """
  448. # it's OK if the input is dynamically sized and we do a Resize early to a fixed size.
  449. # it's not good if lots of ops have dynamic inputs
  450. num_fixed_values = 0
  451. num_dynamic_values = 0
  452. dynamic_inputs = []
  453. for i in graph.input:
  454. if not is_fixed_size_tensor(i):
  455. dynamic_inputs.append(i)
  456. # split/join to remove repeated whitespace and newlines from str(i)
  457. if logger:
  458. logger.info(f"Input is not a fixed size tensor: {' '.join(str(i).split())}")
  459. num_dynamic_values += 1
  460. else:
  461. num_fixed_values += 1
  462. dynamic_outputs = []
  463. for o in graph.output:
  464. if not is_fixed_size_tensor(o):
  465. dynamic_outputs.append(o)
  466. if logger:
  467. logger.info(f"Output is not a fixed size tensor: {' '.join(str(o).split())}")
  468. num_dynamic_values += 1
  469. else:
  470. num_fixed_values += 1
  471. # check we have value info.
  472. # special case some test graphs with a single node which only have graph input and output values, and
  473. # a model where all inputs are dynamic (results in no value_info)
  474. if not graph.value_info and not (len(graph.node) == 1 or len(dynamic_inputs) == len(graph.input)):
  475. logger.warning(
  476. "Unable to check shapes within model. ONNX shape inferencing should be run on the model prior to checking."
  477. )
  478. for vi in graph.value_info:
  479. if is_fixed_size_tensor(vi):
  480. num_fixed_values += 1
  481. else:
  482. num_dynamic_values += 1
  483. if logger:
  484. logger.info(
  485. f"Num values with fixed shape={num_fixed_values}. Num values with dynamic shape={num_dynamic_values}"
  486. )
  487. if dynamic_inputs:
  488. if dynamic_outputs:
  489. logger.info(
  490. "Model has dynamic inputs and outputs. Consider re-exporting model with fixed sizes "
  491. "if NNAPI or CoreML can be used with this model."
  492. )
  493. else:
  494. logger.info(
  495. """Model has dynamically sized inputs but fixed sized outputs.
  496. If the sizes become fixed early in the model (e.g. pre-processing of a dynamic input size
  497. results in a fixed input size for the majority of the model) performance with NNAPI and CoreML,
  498. if applicable, should not be significantly impacted."""
  499. )
  500. return dynamic_inputs, num_dynamic_values
  501. def checker(model_path: pathlib.Path, logger: logging.Logger):
  502. model_with_shape_info_wrapper = ModelProtoWithShapeInfo(model_path)
  503. model_with_shape_info = model_with_shape_info_wrapper.model_with_shape_info
  504. dynamic_inputs, num_dynamic_values = check_shapes(model_with_shape_info.graph)
  505. def check_ep(ep_name, checker_func):
  506. logger.info(f"Checking {ep_name}")
  507. # check with shape info first so supported nodes takes into account values with dynamic shapes
  508. require_fixed_input_sizes = True
  509. partition_info = checker_func(model_with_shape_info, require_fixed_input_sizes)
  510. if logger.getEffectiveLevel() <= logging.INFO:
  511. partition_info.print_analysis(logger, ep_name)
  512. suitability = partition_info.suitability()
  513. logger.info(f"Model should perform well with {ep_name} as is: {suitability.name}")
  514. if suitability != PartitioningInfo.TryWithEP.YES and dynamic_inputs:
  515. logger.info("--------")
  516. logger.info("Checking if model will perform better if the dynamic shapes are fixed...")
  517. require_fixed_input_sizes = False
  518. partition_info_with_fixed_shapes = checker_func(model_with_shape_info, require_fixed_input_sizes)
  519. if logger.getEffectiveLevel() <= logging.INFO:
  520. # analyze and log detailed info
  521. logger.info("Partition information if the model was updated to make the shapes fixed:")
  522. partition_info_with_fixed_shapes.print_analysis(logger, ep_name)
  523. fixed_shape_suitability = partition_info_with_fixed_shapes.suitability()
  524. logger.info(
  525. f"Model should perform well with {ep_name} if modified to have fixed input shapes: "
  526. f"{fixed_shape_suitability.name}"
  527. )
  528. if fixed_shape_suitability != PartitioningInfo.TryWithEP.NO:
  529. logger.info("Shapes can be altered using python -m onnxruntime.tools.make_dynamic_shape_fixed")
  530. if fixed_shape_suitability.value > suitability.value:
  531. suitability = fixed_shape_suitability
  532. logger.info("================")
  533. logger.info("")
  534. return suitability
  535. nnapi_suitability = check_ep("NNAPI", check_nnapi_partitions)
  536. # Check for NeuralNetwork CoreML model
  537. def check_nn_coreml(model: onnx.ModelProto, require_fixed_input_sizes):
  538. return check_coreml_partitions(model, require_fixed_input_sizes, "coreml_supported_neuralnetwork_ops.md")
  539. # Check for MLProgram CoreML model
  540. def check_mlprogram_coreml(model: onnx.ModelProto, require_fixed_input_sizes):
  541. return check_coreml_partitions(model, require_fixed_input_sizes, "coreml_supported_mlprogram_ops.md")
  542. coreml_nn_suitability = check_ep("CoreML NeuralNetwork", check_nn_coreml)
  543. coreml_mlprogram_suitability = check_ep("CoreML MLProgram", check_mlprogram_coreml)
  544. if (
  545. nnapi_suitability != PartitioningInfo.TryWithEP.YES
  546. or coreml_nn_suitability != PartitioningInfo.TryWithEP.YES
  547. or coreml_mlprogram_suitability != PartitioningInfo.TryWithEP.YES
  548. ) and logger.getEffectiveLevel() > logging.INFO:
  549. logger.info("Re-run with log level of INFO for more details on the NNAPI/CoreML issues.")
  550. return (
  551. nnapi_suitability != PartitioningInfo.TryWithEP.NO
  552. or coreml_nn_suitability != PartitioningInfo.TryWithEP.NO
  553. or coreml_mlprogram_suitability != PartitioningInfo.TryWithEP.NO
  554. )
  555. def analyze_model(model_path: pathlib.Path, skip_optimize: bool = False, logger: logging.Logger | None = None):
  556. """
  557. Analyze the provided model to determine if it's likely to work well with the NNAPI or CoreML Execution Providers
  558. :param model_path: Model to analyze.
  559. :param skip_optimize: Skip optimizing to BASIC level before checking. When exporting to ORT format we will do this
  560. optimization..
  561. :param logger: Logger for output
  562. :return: True if either the NNAPI or CoreML Execution Providers may work well with this model.
  563. """
  564. if not logger:
  565. logger = logging.getLogger("usability_checker")
  566. logger.setLevel(logging.INFO)
  567. logger.info(f"Checking {model_path} for usability with ORT Mobile.")
  568. with tempfile.TemporaryDirectory() as tmp:
  569. if not skip_optimize:
  570. tmp_path = pathlib.Path(tmp) / model_path.name
  571. optimize_model(model_path, tmp_path, use_external_initializers=True)
  572. model_path = tmp_path
  573. try_eps = checker(model_path.resolve(strict=True), logger)
  574. return try_eps
  575. def parse_args():
  576. parser = argparse.ArgumentParser(
  577. os.path.basename(__file__), description="""Analyze an ONNX model for usage with the ORT mobile"""
  578. )
  579. parser.add_argument("--log_level", choices=["debug", "info"], default="info", help="Logging level")
  580. parser.add_argument(
  581. "--skip_optimize",
  582. action="store_true",
  583. help="Don't optimize the model to BASIC level prior to analyzing. "
  584. "Optimization will occur when exporting the model to ORT format, so in general "
  585. "should not be skipped unless you have a specific reason to do so.",
  586. )
  587. parser.add_argument("model_path", type=pathlib.Path, help="Provide path to ONNX model")
  588. return parser.parse_args()
  589. def run_analyze_model():
  590. args = parse_args()
  591. logger = logging.getLogger("default")
  592. if args.log_level == "debug":
  593. logger.setLevel(logging.DEBUG)
  594. elif args.log_level == "info":
  595. logger.setLevel(logging.INFO)
  596. elif args.log_level == "warning":
  597. logger.setLevel(logging.WARNING)
  598. else:
  599. logger.setLevel(logging.ERROR)
  600. model_path = args.model_path.resolve()
  601. analyze_model(model_path, args.skip_optimize, logger)
  602. if __name__ == "__main__":
  603. run_analyze_model()