reduced_build_config_parser.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # Copyright (c) Microsoft Corporation. All rights reserved.
  2. # Licensed under the MIT License.
  3. from __future__ import annotations
  4. import os
  5. # Check if the flatbuffers module is available. If not we cannot handle type reduction information in the config.
  6. try:
  7. import flatbuffers # noqa: F401
  8. have_flatbuffers = True
  9. from .ort_format_model import GloballyAllowedTypesOpTypeImplFilter, OperatorTypeUsageManager
  10. except ImportError:
  11. have_flatbuffers = False
  12. def parse_config(config_file: str, enable_type_reduction: bool = False):
  13. """
  14. Parse the configuration file and return the required operators dictionary and an
  15. OpTypeImplFilterInterface instance.
  16. Configuration file lines can do the following:
  17. 1. specify required operators
  18. 2. specify globally allowed types for all operators
  19. 3. specify what it means for no required operators to be specified
  20. 1. Specifying required operators
  21. The basic format for specifying required operators is `domain;opset1,opset2;op1,op2...`
  22. e.g. `ai.onnx;11;Add,Cast,Clip,... for a single opset
  23. `ai.onnx;11,12;Add,Cast,Clip,... for multiple opsets
  24. note: Configuration information is accrued as the file is parsed. If an operator requires support from multiple
  25. opsets that can be done with one entry for each opset, or one entry with multiple opsets in it.
  26. If the configuration file is generated from ORT format models it may optionally contain JSON for per-operator
  27. type reduction. The required types are generally listed per input and/or output of the operator.
  28. The type information is in a map, with 'inputs' and 'outputs' keys. The value for 'inputs' or 'outputs' is a map
  29. between the index number of the input/output and the required list of types.
  30. For example, both the input and output types are relevant to ai.onnx:Cast.
  31. Type information for input 0 and output 0 could look like this:
  32. `{"inputs": {"0": ["float", "int32_t"]}, "outputs": {"0": ["float", "int64_t"]}}`
  33. which is added directly after the operator name in the configuration file.
  34. e.g.
  35. `ai.onnx;12;Add,Cast{"inputs": {"0": ["float", "int32_t"]}, "outputs": {"0": ["float", "int64_t"]}},Concat`
  36. If for example the types of inputs 0 and 1 were important, the entry may look like this (e.g. ai.onnx:Gather):
  37. `{"inputs": {"0": ["float", "int32_t"], "1": ["int32_t"]}}`
  38. Finally some operators do non-standard things and store their type information under a 'custom' key.
  39. ai.onnx.OneHot is an example of this, where the three input types are combined into a triple.
  40. `{"custom": [["float", "int64_t", "int64_t"], ["int64_t", "std::string", "int64_t"]]}`
  41. 2. Specifying globally allowed types for all operators
  42. The format for specifying globally allowed types for all operators is:
  43. `!globally_allowed_types;T0,T1,...`
  44. Ti should be a C++ scalar type supported by ONNX and ORT.
  45. At most one globally allowed types specification is allowed.
  46. Specifying per-operator type information and specifying globally allowed types are mutually exclusive - it is an
  47. error to specify both.
  48. 3. Specify what it means for no required operators to be specified
  49. By default, if no required operators are specified, NO operators are required.
  50. With the following line, if no required operators are specified, ALL operators are required:
  51. `!no_ops_specified_means_all_ops_are_required`
  52. :param config_file: Configuration file to parse
  53. :param enable_type_reduction: Set to True to use the type information in the config.
  54. If False the type information will be ignored.
  55. If the flatbuffers module is unavailable type information will be ignored as the
  56. type-based filtering has a dependency on the ORT flatbuffers schema.
  57. :return: required_ops: Dictionary of domain:opset:[ops] for required operators. If None, all operators are
  58. required.
  59. op_type_impl_filter: OpTypeImplFilterInterface instance if type reduction is enabled, the flatbuffers
  60. module is available, and type reduction information is present. None otherwise.
  61. """
  62. if not os.path.isfile(config_file):
  63. raise ValueError(f"Configuration file {config_file} does not exist")
  64. # only enable type reduction when flatbuffers is available
  65. enable_type_reduction = enable_type_reduction and have_flatbuffers
  66. required_ops = {}
  67. no_ops_specified_means_all_ops_are_required = False
  68. op_type_usage_manager = OperatorTypeUsageManager() if enable_type_reduction else None
  69. has_op_type_reduction_info = False
  70. globally_allowed_types = None
  71. def process_non_op_line(line):
  72. if not line or line.startswith("#"): # skip empty lines and comments
  73. return True
  74. if line.startswith("!globally_allowed_types;"): # handle globally allowed types
  75. if enable_type_reduction:
  76. nonlocal globally_allowed_types
  77. if globally_allowed_types is not None:
  78. raise RuntimeError("Globally allowed types were already specified.")
  79. globally_allowed_types = {segment.strip() for segment in line.split(";")[1].split(",")}
  80. return True
  81. if line == "!no_ops_specified_means_all_ops_are_required": # handle all ops required line
  82. nonlocal no_ops_specified_means_all_ops_are_required
  83. no_ops_specified_means_all_ops_are_required = True
  84. return True
  85. return False
  86. with open(config_file) as config:
  87. for line in [orig_line.strip() for orig_line in config]:
  88. if process_non_op_line(line):
  89. continue
  90. domain, opset_str, operators_str = (segment.strip() for segment in line.split(";"))
  91. opsets = [int(s) for s in opset_str.split(",")]
  92. # any type reduction information is serialized json that starts/ends with { and }.
  93. # type info is optional for each operator.
  94. if "{" in operators_str:
  95. has_op_type_reduction_info = True
  96. # parse the entries in the json dictionary with type info
  97. operators = set()
  98. cur = 0
  99. end = len(operators_str)
  100. while cur < end:
  101. next_comma = operators_str.find(",", cur)
  102. next_open_brace = operators_str.find("{", cur)
  103. if next_comma == -1:
  104. next_comma = end
  105. # the json string starts with '{', so if that is found (next_open_brace != -1)
  106. # before the next comma (which would be the start of the next operator if there is no type info
  107. # for the current operator), we have type info to parse.
  108. # e.g. need to handle extracting the operator name and type info for OpB and OpD,
  109. # and just the operator names for OpA and OpC from this example string
  110. # OpA,OpB{"inputs": {"0": ["float", "int32_t"]}},OpC,OpD{"outputs": {"0": ["int32_t"]}}
  111. if 0 < next_open_brace < next_comma:
  112. operator = operators_str[cur:next_open_brace].strip()
  113. operators.add(operator)
  114. # parse out the json dictionary with the type info by finding the closing brace that matches
  115. # the opening brace
  116. i = next_open_brace + 1
  117. num_open_braces = 1
  118. while num_open_braces > 0 and i < end:
  119. if operators_str[i] == "{":
  120. num_open_braces += 1
  121. elif operators_str[i] == "}":
  122. num_open_braces -= 1
  123. i += 1
  124. if num_open_braces != 0:
  125. raise RuntimeError("Mismatched { and } in type string: " + operators_str[next_open_brace:])
  126. if op_type_usage_manager:
  127. type_str = operators_str[next_open_brace:i]
  128. op_type_usage_manager.restore_from_config_entry(domain, operator, type_str)
  129. cur = i + 1
  130. else:
  131. # comma or end of line is next
  132. end_str = next_comma if next_comma != -1 else end
  133. operators.add(operators_str[cur:end_str].strip())
  134. cur = end_str + 1
  135. else:
  136. operators = {op.strip() for op in operators_str.split(",")}
  137. for opset in opsets:
  138. if domain not in required_ops:
  139. required_ops[domain] = {opset: operators}
  140. elif opset not in required_ops[domain]:
  141. required_ops[domain][opset] = operators
  142. else:
  143. required_ops[domain][opset].update(operators)
  144. if len(required_ops) == 0 and no_ops_specified_means_all_ops_are_required:
  145. required_ops = None
  146. op_type_impl_filter = None
  147. if enable_type_reduction:
  148. if not has_op_type_reduction_info:
  149. op_type_usage_manager = None
  150. if globally_allowed_types is not None and op_type_usage_manager is not None:
  151. raise RuntimeError(
  152. "Specifying globally allowed types and per-op type reduction info together is unsupported."
  153. )
  154. if globally_allowed_types is not None:
  155. op_type_impl_filter = GloballyAllowedTypesOpTypeImplFilter(globally_allowed_types)
  156. elif op_type_usage_manager is not None:
  157. op_type_impl_filter = op_type_usage_manager.make_op_type_impl_filter()
  158. return required_ops, op_type_impl_filter