| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353 |
- # -------------------------------------------------------------------------
- # Copyright (c) Microsoft Corporation. All rights reserved.
- # Licensed under the MIT License. See License.txt in the project root for
- # license information.
- # --------------------------------------------------------------------------
- from __future__ import annotations
- import logging
- import tempfile
- from pathlib import Path
- import onnx
- from ....tools.onnx_model_utils import fix_output_shapes, make_input_shape_fixed, optimize_model
- from ....tools.remove_initializer_from_input import remove_initializer_from_input
- from ...fusions import FusionGelu, FusionLayerNormalization
- from ...onnx_model import ONNXModel
- from .fusion_lpnorm import FusionLpNormalization
- from .fusion_spacetodepth import FusionSpaceToDepth
- def qnn_preprocess_model(
- model_input: str | Path | onnx.ModelProto,
- model_output: str | Path,
- exclude_initializer_from_input: bool = False,
- fuse_layernorm: bool = False,
- save_as_external_data: bool = False,
- all_tensors_to_one_file: bool = False,
- external_data_location: str | None = None,
- external_data_size_threshold: int = 1024,
- external_data_convert_attribute: bool = False,
- inputs_to_make_channel_last: list[str] | None = None,
- outputs_to_make_channel_last: list[str] | None = None,
- dynamic_input_shapes: list[tuple[str, str]] | None = None,
- ) -> bool:
- """
- If necessary, this method creates a new "pre-processed" model in preparation for
- quantization of a model to be used in QNN EP. Returns true if a new model was created.
- This method perfoms the following operations:
- - Fuse Erf sequence into a single Gelu node.
- - Fuse ReduceL2 sequence into a single LpNormalization node (p == 2).
- - (Optional) Fuse ReduceMean sequence into a single LayerNormalization node.
- Args:
- model_input: Path to the input model file or ModelProto.
- model_output: Path the output model file, which is only created if this method returns True.
- exclude_initializer_from_input: A bool specifying whether to exclude initializer from input.
- Defaults to False.
- fuse_layernorm: True if ReduceMean sequences should be fused into LayerNormalization nodes.
- Defaults to False.
- save_as_external_data: True if output model should be saved with external data. Defaults to false.
- all_tensors_to_one_file: Effective only if save_as_external_data is true. Defaults to false.
- If true, save all tensors to one external file specified by external_data_location.
- If false, save each tensor to a file named with the tensor name.
- external_data_location: Effective only if save_as_external_data is true. Defaults to None.
- Specify the external file to which all tensors are saved. Path is relative
- to the model path. If not specified, the model's name is used.
- external_data_size_threshold: Effective only if save_as_external_data is true. Defaults to 1024.
- Tensors with a data size >= external_data_size_threshold are converted to external data.
- To convert every tensor with raw data to external data, set to 0.
- external_data_convert_attribute: Effective only if save_as_external_data is true. Defaults to false.
- If true, convert all tensors to external data.
- If false, convert only non-attribute tensors to external data.
- inputs_to_make_channel_last: List of graph input names to transpose to be "channel-last". For example,
- if "input0" originally has the shape (N, C, D1, D2, ..., Dn), the resulting model will change input0's
- shape to (N, D1, D2, ..., Dn, C) and add a transpose node after it.
- Original:
- input0 (N, C, D1, D2, ..., Dn) --> <Nodes>
- Updated:
- input0 (N, D1, D2, ..., Dn, C) --> Transpose --> input0_chanfirst (N, C, D1, D2, ..., Dn) --> <Nodes>
- This can potentially improve inference latency for QDQ models running on QNN EP because the
- additional transpose node may allow other transpose nodes inserted during ORT layout transformation
- to cancel out.
- outputs_to_make_channel_last: List of graph output names to transpose to be "channel-last". For example,
- if "output0" originally has the shape (N, C, D1, D2, ..., Dn), the resulting model will change output0's
- shape to (N, D1, D2, ..., Dn, C) and add a transpose node before it.
- Original:
- <Nodes> --> output0 (N, C, D1, D2, ..., Dn)
- Updated:
- <Nodes> --> output0_chanfirst (N, C, D1, D2, ..., Dn) --> Transpose --> output0 (N, D1, D2, ..., Dn, C)
- This can potentially improve inference latency for QDQ models running on QNN EP because the
- additional transpose node may allow other transpose nodes inserted during ORT layout transformation
- to cancel out.
- dynamic_input_shapes: A list of tuples specifying model input name to and its static shape in comma seprated
- format, for example: [('input', '1,3,256,256')]. Defaults to None.
- """
- modified = False
- model = model_input if isinstance(model_input, onnx.ModelProto) else onnx.load_model(model_input)
- model = save_and_reload_optimize_model(model, shape_infer=True)
- onnx_model = ONNXModel(model)
- # Optionally, fix the dynamic input shapes.
- if dynamic_input_shapes:
- for input_name, input_shape_str in dynamic_input_shapes:
- input_shape = [int(i) for i in input_shape_str.split(",")]
- make_input_shape_fixed(onnx_model.graph(), input_name, input_shape)
- fix_output_shapes(onnx_model.model)
- modified = True
- # Exclude initializer from input if model.ir_version >= 4
- if exclude_initializer_from_input:
- modified |= remove_initializer_from_input(onnx_model.model)
- # Fuse Erf sequence into a single Gelu
- fusion_gelu = FusionGelu(onnx_model)
- if fusion_gelu.apply():
- modified = True
- # Fuse ReduceL2 sequence into a single LpNormalization node with p == 2.
- fusion_lpnorm = FusionLpNormalization(onnx_model)
- if fusion_lpnorm.apply():
- modified = True
- # Fuse Reshape/Transpose sequence into a single SpaceToDepth.
- fusion_s2d = FusionSpaceToDepth(onnx_model)
- if fusion_s2d.apply():
- modified = True
- # Optionally, fuse ReduceMean sequence into a single LayerNormalization node.
- if fuse_layernorm:
- onnx_opset = next(x for x in model.opset_import if x.domain == "" or x.domain == "ai.onnx")
- # Need opset >= 17 to use LayerNormalization.
- if onnx_opset.version < 17:
- logging.warning(
- "Unable to fuse ReduceMean sequence into a LayerNormalization node. "
- "ONNX model must use an opset >= 17 in order to use LayerNormalization, "
- f"but found version {onnx_opset.version}. Please use onnx.version_converter to update your model."
- )
- else:
- fusion_layernorm = FusionLayerNormalization(onnx_model)
- if fusion_layernorm.apply():
- modified = True
- # Optionally, transpose inputs and/or outputs to make them "channel-last".
- if inputs_to_make_channel_last or outputs_to_make_channel_last:
- transpose_node_prefix = "Transpose_channel_"
- transpose_node_suffix: int = onnx_model.get_largest_node_name_suffix(transpose_node_prefix) + 1
- update_io_to_channel_last(
- onnx_model.model,
- inputs_to_make_channel_last,
- outputs_to_make_channel_last,
- transpose_node_name_prefix=transpose_node_prefix,
- transpose_node_name_start_suffix=transpose_node_suffix,
- )
- modified = True
- # Make sure all nodes have a name.
- unnamed_node_prefix = "qnn_preproc_node_"
- available_suffix = onnx_model.get_largest_node_name_suffix(unnamed_node_prefix) + 1
- for node in onnx_model.model.graph.node:
- if node.op_type != "Constant" and not node.name:
- new_node_name = f"{unnamed_node_prefix}{available_suffix!s}"
- available_suffix += 1
- node.name = new_node_name
- modified = True
- logging.warning(f"Node of type {node.op_type} does not have a name. Renamed to {new_node_name}.")
- if modified:
- onnx_model.topological_sort()
- onnx.save_model(
- model,
- model_output,
- save_as_external_data=save_as_external_data,
- all_tensors_to_one_file=all_tensors_to_one_file,
- location=external_data_location,
- size_threshold=external_data_size_threshold,
- convert_attribute=external_data_convert_attribute,
- )
- return modified
- def save_and_reload_optimize_model(model: onnx.ModelProto, shape_infer: bool) -> onnx.ModelProto:
- with tempfile.TemporaryDirectory(prefix="ort.qnn_preproc.") as qnn_preproc_tmp_dir:
- model_in_path = Path(qnn_preproc_tmp_dir).joinpath("qnn_proc_input.onnx")
- onnx.save_model(model, model_in_path, save_as_external_data=True)
- if shape_infer:
- model_infer_path = Path(qnn_preproc_tmp_dir).joinpath("qnn_proc_infer.onnx")
- onnx.shape_inference.infer_shapes_path(str(model_in_path), str(model_infer_path))
- model_in_path = model_infer_path
- model_out_path = Path(qnn_preproc_tmp_dir).joinpath("qnn_proc_output.onnx")
- optimize_model(model_in_path, model_out_path)
- ret_model = onnx.load_model(model_out_path)
- ret_metaprops = {"onnx.infer": "onnxruntime.tools.qnn.preprocess"}
- if ret_model.metadata_props:
- ret_metaprops.update(ret_model.metadata_props)
- onnx.helper.set_model_props(ret_model, ret_metaprops)
- return ret_model
- class InputOutputNameMap:
- def __init__(
- self,
- orig_tensor_names: set[str],
- orig_graph_inputs: dict[str, onnx.ValueInfoProto],
- orig_graph_outputs: dict[str, onnx.ValueInfoProto],
- ):
- self.orig_tensor_names = orig_tensor_names
- self.orig_graph_inputs = orig_graph_inputs
- self.orig_graph_outputs = orig_graph_outputs
- self.updated_io_names = {}
- self.new_value_infos = []
- def get_new_name(self, orig_name: str):
- if orig_name in self.updated_io_names:
- return self.updated_io_names[orig_name]
- # Make a new tensor name that is unique among all tensors in the graph.
- prefix: str = f"{orig_name}_channel_first_"
- suffix: int = -1
- for tensor_name in self.orig_tensor_names:
- if tensor_name.startswith(prefix) and tensor_name[len(prefix) :].isdigit():
- index = int(tensor_name[len(prefix) :])
- suffix = max(suffix, index)
- suffix += 1 # This is the first available suffix.
- new_name = f"{prefix}{suffix!s}"
- # Add new value_info objects for these new tensors.
- orig_value_info = self.orig_graph_inputs.get(orig_name) or self.orig_graph_outputs[orig_name]
- value_info_proto = onnx.ValueInfoProto()
- value_info_proto.CopyFrom(orig_value_info)
- value_info_proto.name = new_name
- self.new_value_infos.append(value_info_proto)
- self.updated_io_names[orig_name] = new_name
- return self.updated_io_names[orig_name]
- def update_io_to_channel_last(
- model: onnx.ModelProto,
- inputs_to_update: list[str] | None,
- outputs_to_update: list[str] | None,
- transpose_node_name_prefix: str = "Transpose_channel_",
- transpose_node_name_start_suffix: int = 0,
- ):
- inputs_to_update = set(inputs_to_update or [])
- outputs_to_update = set(outputs_to_update or [])
- if not inputs_to_update and not outputs_to_update:
- return
- graph = model.graph
- orig_graph_inputs = {ginput.name: ginput for ginput in graph.input}
- orig_graph_outputs = {goutput.name: goutput for goutput in graph.output}
- # Check that the user passed in actual input and output names.
- for input_name in inputs_to_update:
- if input_name not in orig_graph_inputs:
- raise ValueError(f"{input_name} is not a graph input")
- for output_name in outputs_to_update:
- if output_name not in orig_graph_outputs:
- raise ValueError(f"{output_name} is not a graph output")
- orig_tensor_names = set()
- orig_tensor_names.update(set(orig_graph_inputs))
- orig_tensor_names.update(set(orig_graph_outputs))
- orig_tensor_names.update(input_name for node in graph.node for input_name in node.input if input_name)
- # Maps original input (or output) name to its updated name used within the graph.
- io_map = InputOutputNameMap(orig_tensor_names, orig_graph_inputs, orig_graph_outputs)
- # Update each node's inputs/outputs to use the transposed versions.
- for node in graph.node:
- for i in range(len(node.input)):
- if node.input[i] and node.input[i] in inputs_to_update:
- node.input[i] = io_map.get_new_name(node.input[i])
- elif node.input[i] and node.input[i] in outputs_to_update:
- node.input[i] = io_map.get_new_name(node.input[i])
- for i in range(len(node.output)):
- if node.output[i] in outputs_to_update:
- node.output[i] = io_map.get_new_name(node.output[i])
- # Update graph inputs to channel-last and a Transpose (to channel-first) after each.
- for g_input_name in inputs_to_update:
- g_input = orig_graph_inputs[g_input_name]
- if not g_input.type.HasField("tensor_type") or not g_input.type.tensor_type.HasField("shape"):
- raise ValueError(f"Expected input {g_input.name} to have a tensor_type with a shape")
- input_shape = g_input.type.tensor_type.shape
- input_rank = len(input_shape.dim)
- if input_rank < 3:
- raise ValueError(f"Expected input {g_input.name} to be of rank >= 3")
- channel_dim = onnx.TensorShapeProto.Dimension()
- channel_dim.CopyFrom(input_shape.dim[1])
- for i in range(1, input_rank - 1):
- input_shape.dim[i].CopyFrom(input_shape.dim[i + 1])
- input_shape.dim[input_rank - 1].CopyFrom(channel_dim)
- transpose_perm = list(range(input_rank))
- for i in range(input_rank):
- transpose_perm[i] = i if i < 1 else i - 1
- transpose_perm[1] = input_rank - 1
- transpose_node = onnx.helper.make_node(
- "Transpose",
- name=f"{transpose_node_name_prefix}{transpose_node_name_start_suffix!s}",
- inputs=[g_input.name],
- outputs=[io_map.get_new_name(g_input.name)],
- perm=transpose_perm,
- )
- transpose_node_name_start_suffix += 1
- graph.node.extend([transpose_node])
- # Update graph outputs to channel-last and a Transpose (from channel-first) before each.
- for g_output_name in outputs_to_update:
- g_output = orig_graph_outputs[g_output_name]
- if not g_output.type.HasField("tensor_type") or not g_output.type.tensor_type.HasField("shape"):
- raise ValueError(f"Expected output {g_output.name} to have a tensor_type with a shape")
- output_shape = g_output.type.tensor_type.shape
- output_rank = len(output_shape.dim)
- if output_rank < 3:
- raise ValueError(f"Expected output {g_output.name} to be of rank >= 3")
- channel_dim = onnx.TensorShapeProto.Dimension()
- channel_dim.CopyFrom(output_shape.dim[1])
- for i in range(1, output_rank - 1):
- output_shape.dim[i].CopyFrom(output_shape.dim[i + 1])
- output_shape.dim[output_rank - 1].CopyFrom(channel_dim)
- transpose_perm = list(range(output_rank))
- for i in range(output_rank):
- transpose_perm[i] = i if i == 0 else i + 1
- transpose_perm[output_rank - 1] = 1
- transpose_node = onnx.helper.make_node(
- "Transpose",
- name=f"{transpose_node_name_prefix}{transpose_node_name_start_suffix!s}",
- inputs=[io_map.get_new_name(g_output.name)],
- outputs=[g_output.name],
- perm=transpose_perm,
- )
- transpose_node_name_start_suffix += 1
- graph.node.extend([transpose_node])
- graph.value_info.extend(io_map.new_value_infos)
|