symbolic_shape_infer.py 139 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094
  1. # Copyright (c) Microsoft Corporation. All rights reserved.
  2. # Licensed under the MIT License.
  3. # -*- coding: UTF-8 -*-
  4. import argparse
  5. import logging
  6. import numpy as np
  7. import onnx
  8. import sympy
  9. from onnx import helper, numpy_helper, shape_inference
  10. from packaging import version
  11. assert version.parse(onnx.__version__) >= version.parse("1.8.0")
  12. logger = logging.getLogger(__name__)
  13. def get_attribute(node, attr_name, default_value=None):
  14. found = [attr for attr in node.attribute if attr.name == attr_name]
  15. if found:
  16. return helper.get_attribute_value(found[0])
  17. return default_value
  18. def get_dim_from_proto(dim):
  19. return getattr(dim, dim.WhichOneof("value")) if type(dim.WhichOneof("value")) is str else None
  20. def is_sequence(type_proto):
  21. cls_type = type_proto.WhichOneof("value")
  22. assert cls_type in ["tensor_type", "sequence_type"]
  23. return cls_type == "sequence_type"
  24. def get_shape_from_type_proto(type_proto):
  25. assert not is_sequence(type_proto)
  26. if type_proto.tensor_type.HasField("shape"):
  27. return [get_dim_from_proto(d) for d in type_proto.tensor_type.shape.dim]
  28. else:
  29. return None # note no shape is different from shape without dim (scalar)
  30. def get_elem_type_from_type_proto(type_proto):
  31. if is_sequence(type_proto):
  32. return type_proto.sequence_type.elem_type.tensor_type.elem_type
  33. else:
  34. return type_proto.tensor_type.elem_type
  35. def get_shape_from_value_info(vi):
  36. cls_type = vi.type.WhichOneof("value")
  37. if cls_type is None:
  38. return None
  39. if is_sequence(vi.type):
  40. if vi.type.sequence_type.elem_type.WhichOneof("value") == "tensor_type":
  41. return get_shape_from_type_proto(vi.type.sequence_type.elem_type)
  42. else:
  43. return None
  44. else:
  45. return get_shape_from_type_proto(vi.type)
  46. def make_named_value_info(name):
  47. vi = onnx.ValueInfoProto()
  48. vi.name = name
  49. return vi
  50. def get_shape_from_sympy_shape(sympy_shape):
  51. return [None if i is None else (int(i) if is_literal(i) else str(i)) for i in sympy_shape]
  52. def is_literal(dim):
  53. return type(dim) in [int, np.int64, np.int32, sympy.Integer] or (hasattr(dim, "is_number") and dim.is_number)
  54. def handle_negative_axis(axis, rank):
  55. assert axis < rank and axis >= -rank
  56. return axis if axis >= 0 else rank + axis
  57. def get_opset(mp, domain=None):
  58. domain = domain or ["", "onnx", "ai.onnx"]
  59. if type(domain) != list: # noqa: E721
  60. domain = [domain]
  61. for opset in mp.opset_import:
  62. if opset.domain in domain:
  63. return opset.version
  64. return None
  65. def as_scalar(x):
  66. if type(x) is list:
  67. assert len(x) == 1
  68. return x[0]
  69. elif type(x) is np.ndarray:
  70. return x.item()
  71. else:
  72. return x
  73. def as_list(x, keep_none):
  74. if type(x) is list:
  75. return x
  76. elif type(x) is np.ndarray:
  77. return list(x)
  78. elif keep_none and x is None:
  79. return None
  80. else:
  81. return [x]
  82. def sympy_reduce_product(x):
  83. if type(x) is list:
  84. value = sympy.Integer(1)
  85. for v in x:
  86. value = value * v
  87. else:
  88. value = x
  89. return value
  90. class SymbolicShapeInference:
  91. def __init__(self, int_max, auto_merge, guess_output_rank, verbose, prefix=""):
  92. self.dispatcher_ = {
  93. "Add": self._infer_symbolic_compute_ops,
  94. "AllReduce": self._pass_on_shape_and_type,
  95. "ArrayFeatureExtractor": self._infer_ArrayFeatureExtractor,
  96. "AveragePool": self._infer_Pool,
  97. "BatchNormalization": self._infer_BatchNormalization,
  98. "Cast": self._infer_Cast,
  99. "CategoryMapper": self._infer_CategoryMapper,
  100. "Compress": self._infer_Compress,
  101. "Concat": self._infer_Concat,
  102. "ConcatFromSequence": self._infer_ConcatFromSequence,
  103. "Constant": self._infer_Constant,
  104. "ConstantOfShape": self._infer_ConstantOfShape,
  105. "Conv": self._infer_Conv,
  106. "CumSum": self._pass_on_shape_and_type,
  107. "Div": self._infer_symbolic_compute_ops,
  108. "Einsum": self._infer_Einsum,
  109. "Expand": self._infer_Expand,
  110. "Equal": self._infer_symbolic_compute_ops,
  111. "Floor": self._infer_symbolic_compute_ops,
  112. "Gather": self._infer_Gather,
  113. "GatherElements": self._infer_GatherElements,
  114. "GatherND": self._infer_GatherND,
  115. "Identity": self._pass_on_shape_and_type,
  116. "If": self._infer_If,
  117. "Loop": self._infer_Loop,
  118. "MatMul": self._infer_MatMul,
  119. "MatMulInteger16": self._infer_MatMulInteger,
  120. "MaxPool": self._infer_Pool,
  121. "Max": self._infer_symbolic_compute_ops,
  122. "MemcpyFromHost": self._pass_on_shape_and_type,
  123. "MemcpyToHost": self._pass_on_shape_and_type,
  124. "Min": self._infer_symbolic_compute_ops,
  125. "MoE": self._pass_on_shape_and_type,
  126. "Mul": self._infer_symbolic_compute_ops,
  127. "NonMaxSuppression": self._infer_NonMaxSuppression,
  128. "NonZero": self._infer_NonZero,
  129. "OneHot": self._infer_OneHot,
  130. "Pad": self._infer_Pad,
  131. "Range": self._infer_Range,
  132. "Reciprocal": self._pass_on_shape_and_type,
  133. "ReduceSum": self._infer_ReduceSum,
  134. "ReduceMean": self._infer_ReduceMean,
  135. "ReduceProd": self._infer_ReduceProd,
  136. "Reshape": self._infer_Reshape,
  137. "Resize": self._infer_Resize,
  138. "Round": self._pass_on_shape_and_type,
  139. "Scan": self._infer_Scan,
  140. "ScatterElements": self._infer_ScatterElements,
  141. "SequenceAt": self._infer_SequenceAt,
  142. "SequenceInsert": self._infer_SequenceInsert,
  143. "Shape": self._infer_Shape,
  144. "Size": self._infer_Size,
  145. "Slice": self._infer_Slice,
  146. "SoftmaxCrossEntropyLoss": self._infer_SoftmaxCrossEntropyLoss,
  147. "SoftmaxCrossEntropyLossInternal": self._infer_SoftmaxCrossEntropyLoss,
  148. "NegativeLogLikelihoodLossInternal": self._infer_SoftmaxCrossEntropyLoss,
  149. "Split": self._infer_Split,
  150. "SplitToSequence": self._infer_SplitToSequence,
  151. "Squeeze": self._infer_Squeeze,
  152. "Sub": self._infer_symbolic_compute_ops,
  153. "Tile": self._infer_Tile,
  154. "TopK": self._infer_TopK,
  155. "Transpose": self._infer_Transpose,
  156. "Unsqueeze": self._infer_Unsqueeze,
  157. "Where": self._infer_symbolic_compute_ops,
  158. "ZipMap": self._infer_ZipMap,
  159. "Neg": self._infer_symbolic_compute_ops,
  160. # contrib ops:
  161. "Attention": self._infer_Attention,
  162. "BiasAdd": self._infer_BiasAdd,
  163. "BiasGelu": self._infer_BiasGelu,
  164. "BiasSplitGelu": self._infer_BiasSplitGelu,
  165. "DecoderMaskedMultiHeadAttention": self._infer_DecoderMaskedMultiHeadAttention,
  166. "DequantizeLinear": self._infer_DequantizeLinear,
  167. "DynamicTimeWarping": self._infer_DynamicTimeWarping,
  168. "EmbedLayerNormalization": self._infer_EmbedLayerNormalization,
  169. "FastGelu": self._infer_FastGelu,
  170. "GatedRelativePositionBias": self._infer_GatedRelativePositionBias,
  171. "GatherBlockQuantized": self._infer_Gather,
  172. "Gelu": self._infer_Gelu,
  173. "GemmFastGelu": self._infer_GemmFastGelu,
  174. "GemmFloat8": self._infer_GemmFloat8,
  175. "GroupNorm": self._infer_GroupNorm,
  176. "GroupNormalization": self._infer_GroupNorm,
  177. "GroupQueryAttention": self._infer_GroupQueryAttention,
  178. "LayerNormalization": self._infer_LayerNormalization,
  179. "LongformerAttention": self._infer_LongformerAttention,
  180. "MatMulNBits": self._infer_MatMulNBits,
  181. "MultiHeadAttention": self._infer_MultiHeadAttention,
  182. "NhwcConv": self._infer_NhwcConv,
  183. "PackedAttention": self._infer_PackedAttention,
  184. "PackedMultiHeadAttention": self._infer_PackedMultiHeadAttention,
  185. "PagedAttention": self._infer_PagedAttention,
  186. "PythonOp": self._infer_PythonOp,
  187. "QLinearAdd": self._infer_QLinearBinary,
  188. "QLinearMul": self._infer_QLinearBinary,
  189. "QuantizeLinear": self._infer_QuantizeLinear,
  190. "QuickGelu": self._infer_FastGelu,
  191. "RelativePositionBias": self._infer_RelativePositionBias,
  192. "RemovePadding": self._infer_RemovePadding,
  193. "RestorePadding": self._infer_RestorePadding,
  194. "RotaryEmbedding": self._infer_RotaryEmbedding,
  195. "SimplifiedLayerNormalization": self._infer_LayerNormalization,
  196. "SkipGroupNorm": self._infer_SkipGroupNorm,
  197. "SkipLayerNormalization": self._infer_SkipLayerNormalization,
  198. "SkipSimplifiedLayerNormalization": self._infer_SkipLayerNormalization,
  199. "SparseAttention": self._infer_SparseAttention,
  200. "UnfoldTensor": self._infer_UnfoldTensor,
  201. }
  202. self.aten_op_dispatcher_ = {
  203. "embedding": self._infer_Gather,
  204. "bitwise_or": self._infer_aten_bitwise_or,
  205. "diagonal": self._infer_aten_diagonal,
  206. "max_pool2d_with_indices": self._infer_aten_pool2d,
  207. "max": self._infer_aten_minmax,
  208. "min": self._infer_aten_minmax,
  209. "multinomial": self._infer_aten_multinomial,
  210. "unfold": self._infer_aten_unfold,
  211. "argmax": self._infer_aten_argmax,
  212. "avg_pool2d": self._infer_aten_pool2d,
  213. "_adaptive_avg_pool2d": self._infer_aten_pool2d,
  214. "numpy_T": self._infer_Transpose,
  215. "native_group_norm": self._infer_aten_group_norm,
  216. "upsample_nearest1d": self._infer_aten_upsample,
  217. "upsample_nearest2d": self._infer_aten_upsample,
  218. "upsample_nearest3d": self._infer_aten_upsample,
  219. "upsample_bicubic2d": self._infer_aten_upsample,
  220. }
  221. self.run_ = True
  222. self.suggested_merge_ = {}
  223. self.symbolic_dims_ = {}
  224. self.input_symbols_ = {}
  225. self.auto_merge_ = auto_merge
  226. self.guess_output_rank_ = guess_output_rank
  227. self.verbose_ = verbose
  228. self.int_max_ = int_max
  229. self.subgraph_id_ = 0
  230. self.prefix_ = prefix
  231. def _add_suggested_merge(self, symbols, apply=False):
  232. assert all((type(s) is str and s in self.symbolic_dims_) or is_literal(s) for s in symbols)
  233. symbols = set(symbols)
  234. for k, v in self.suggested_merge_.items():
  235. if k in symbols:
  236. symbols.remove(k)
  237. symbols.add(v)
  238. map_to = None
  239. # if there is literal, map to it first
  240. for s in symbols:
  241. if is_literal(s):
  242. map_to = s
  243. break
  244. # when no literals, map to input symbolic dims, then existing symbolic dims
  245. if map_to is None:
  246. for s in symbols:
  247. if s in self.input_symbols_:
  248. map_to = s
  249. break
  250. if map_to is None:
  251. for s in symbols:
  252. if type(self.symbolic_dims_[s]) is sympy.Symbol:
  253. map_to = s
  254. break
  255. # when nothing to map to, use the shorter one
  256. if map_to is None:
  257. if self.verbose_ > 0:
  258. logger.warning("Potential unsafe merge between symbolic expressions: (%s)", ",".join(symbols))
  259. symbols_list = list(symbols)
  260. lens = [len(s) for s in symbols_list]
  261. map_to = symbols_list[lens.index(min(lens))]
  262. symbols.remove(map_to)
  263. for s in symbols:
  264. if s == map_to:
  265. continue
  266. if is_literal(map_to) and is_literal(s):
  267. assert int(map_to) == int(s)
  268. self.suggested_merge_[s] = int(map_to) if is_literal(map_to) else map_to
  269. for k, v in self.suggested_merge_.items():
  270. if v == s:
  271. self.suggested_merge_[k] = map_to
  272. if apply and self.auto_merge_:
  273. self._apply_suggested_merge()
  274. def _apply_suggested_merge(self, graph_input_only=False):
  275. if not self.suggested_merge_:
  276. return
  277. for i in list(self.out_mp_.graph.input) + ([] if graph_input_only else list(self.out_mp_.graph.value_info)):
  278. for d in i.type.tensor_type.shape.dim:
  279. if d.dim_param in self.suggested_merge_:
  280. v = self.suggested_merge_[d.dim_param]
  281. if is_literal(v):
  282. d.dim_value = int(v)
  283. else:
  284. d.dim_param = v
  285. def _preprocess(self, in_mp):
  286. self.out_mp_ = onnx.ModelProto()
  287. self.out_mp_.CopyFrom(in_mp)
  288. self.graph_inputs_ = {i.name: i for i in list(self.out_mp_.graph.input)}
  289. self.initializers_ = {i.name: i for i in self.out_mp_.graph.initializer}
  290. self.known_vi_ = {i.name: i for i in list(self.out_mp_.graph.input)}
  291. self.known_vi_.update(
  292. {
  293. i.name: helper.make_tensor_value_info(i.name, i.data_type, list(i.dims))
  294. for i in self.out_mp_.graph.initializer
  295. }
  296. )
  297. def _merge_symbols(self, dims):
  298. if not all(type(d) is str for d in dims):
  299. if self.auto_merge_:
  300. unique_dims = list(set(dims))
  301. is_int = [is_literal(d) for d in unique_dims]
  302. assert sum(is_int) <= 1 # if there are more than 1 unique ints, something is wrong
  303. if sum(is_int) == 1:
  304. int_dim = is_int.index(1)
  305. if self.verbose_ > 0:
  306. logger.debug(
  307. f"dim {unique_dims[:int_dim] + unique_dims[int_dim + 1 :]} has been merged with value {unique_dims[int_dim]}"
  308. )
  309. self._check_merged_dims(unique_dims, allow_broadcast=False)
  310. return unique_dims[int_dim]
  311. else:
  312. if self.verbose_ > 0:
  313. logger.debug(f"dim {unique_dims[1:]} has been merged with dim {unique_dims[0]}")
  314. return dims[0]
  315. else:
  316. return None
  317. if all(d == dims[0] for d in dims):
  318. return dims[0]
  319. merged = [self.suggested_merge_.get(d, d) for d in dims]
  320. if all(d == merged[0] for d in merged):
  321. assert merged[0] in self.symbolic_dims_
  322. return merged[0]
  323. else:
  324. return None
  325. # broadcast from right to left, and merge symbolic dims if needed
  326. def _broadcast_shapes(self, shape1, shape2):
  327. new_shape = []
  328. rank1 = len(shape1)
  329. rank2 = len(shape2)
  330. new_rank = max(rank1, rank2)
  331. for i in range(new_rank):
  332. dim1 = shape1[rank1 - 1 - i] if i < rank1 else 1
  333. dim2 = shape2[rank2 - 1 - i] if i < rank2 else 1
  334. if dim1 == 1 or dim1 == dim2:
  335. new_dim = dim2
  336. elif dim2 == 1:
  337. new_dim = dim1
  338. else:
  339. new_dim = self._merge_symbols([dim1, dim2])
  340. if not new_dim:
  341. # warning about unsupported broadcast when not auto merge
  342. # note that auto merge has the risk of incorrectly merge symbols while one of them being 1
  343. # for example, 'a' = 1, 'b' = 5 at runtime is valid broadcasting, but with auto merge 'a' == 'b'
  344. if self.auto_merge_:
  345. self._add_suggested_merge([dim1, dim2], apply=True)
  346. else:
  347. logger.warning("unsupported broadcast between " + str(dim1) + " " + str(dim2)) # noqa: G003
  348. new_shape = [new_dim, *new_shape]
  349. return new_shape
  350. def _get_shape(self, node, idx):
  351. name = node.input[idx]
  352. if name in self.known_vi_:
  353. vi = self.known_vi_[name]
  354. return get_shape_from_value_info(vi)
  355. else:
  356. assert name in self.initializers_
  357. return list(self.initializers_[name].dims)
  358. def _try_get_shape(self, node, idx):
  359. if idx > len(node.input) - 1:
  360. return None
  361. name = node.input[idx]
  362. if name in self.known_vi_:
  363. vi = self.known_vi_[name]
  364. return get_shape_from_value_info(vi)
  365. if name in self.initializers_:
  366. return list(self.initializers_[name].dims)
  367. return None
  368. def _get_shape_rank(self, node, idx):
  369. return len(self._get_shape(node, idx))
  370. def _get_sympy_shape(self, node, idx):
  371. sympy_shape = []
  372. for d in self._get_shape(node, idx):
  373. if type(d) is str:
  374. sympy_shape.append(
  375. self.symbolic_dims_[d]
  376. if d in self.symbolic_dims_
  377. else sympy.Symbol(d, integer=True, nonnegative=True)
  378. )
  379. else:
  380. assert None is not d
  381. sympy_shape.append(d)
  382. return sympy_shape
  383. def _get_value(self, node, idx):
  384. name = node.input[idx]
  385. assert name in self.sympy_data_ or name in self.initializers_
  386. return self.sympy_data_[name] if name in self.sympy_data_ else numpy_helper.to_array(self.initializers_[name])
  387. def _try_get_value(self, node, idx):
  388. if idx >= len(node.input):
  389. return None
  390. name = node.input[idx]
  391. if name in self.sympy_data_ or name in self.initializers_:
  392. return self._get_value(node, idx)
  393. return None
  394. def _update_computed_dims(self, new_sympy_shape):
  395. for i, new_dim in enumerate(new_sympy_shape):
  396. if not is_literal(new_dim) and type(new_dim) != str: # noqa: E721
  397. str_dim = str(new_dim)
  398. if str_dim in self.suggested_merge_:
  399. if is_literal(self.suggested_merge_[str_dim]):
  400. continue # no need to create dim for literals
  401. new_sympy_shape[i] = self.symbolic_dims_[self.suggested_merge_[str_dim]]
  402. else:
  403. # add new_dim if it's a computational expression
  404. if str(new_dim) not in self.symbolic_dims_:
  405. self.symbolic_dims_[str(new_dim)] = new_dim
  406. def _onnx_infer_single_node(self, node):
  407. # skip onnx shape inference for some ops, as they are handled in _infer_*
  408. skip_infer = node.op_type in [
  409. "If",
  410. "Loop",
  411. "Scan",
  412. "SplitToSequence",
  413. "ZipMap", # contrib ops
  414. "Attention",
  415. "BiasAdd",
  416. "BiasGelu",
  417. "BiasSplitGelu",
  418. "DequantizeLinear",
  419. "DynamicTimeWarping",
  420. "EmbedLayerNormalization",
  421. "FastGelu",
  422. "GatherBlockQuantized",
  423. "Gelu",
  424. "GemmFastGelu",
  425. "GroupNorm",
  426. "GroupNormalization",
  427. "GroupQueryAttention",
  428. "LayerNormalization",
  429. "LongformerAttention",
  430. "MultiHeadAttention",
  431. "NhwcConv",
  432. "PackedAttention",
  433. "PagedAttention",
  434. "PythonOp",
  435. "QuantizeLinear",
  436. "QuickGelu",
  437. "RelativePositionBias",
  438. "RemovePadding",
  439. "RestorePadding",
  440. "RotaryEmbedding",
  441. "SimplifiedLayerNormalization",
  442. "SkipLayerNormalization",
  443. "SkipSimplifiedLayerNormalization",
  444. "SparseAttention",
  445. "SkipGroupNorm",
  446. "QLinearAdd",
  447. "QLinearMul",
  448. ]
  449. if not skip_infer:
  450. # Only pass initializers that satisfy the following condition:
  451. # (1) Operator need value of some input for shape inference.
  452. # For example, Unsqueeze in opset 13 uses the axes input to calculate shape of output.
  453. # (2) opset version >= 9. In older version, initializer is required in graph input by onnx spec.
  454. # (3) The initializer is not in graph input. The means the node input is "constant" in inference.
  455. initializers = []
  456. if (get_opset(self.out_mp_) >= 9) and node.op_type in ["Unsqueeze"]:
  457. initializers = [
  458. self.initializers_[name]
  459. for name in node.input
  460. if (name in self.initializers_ and name not in self.graph_inputs_)
  461. ]
  462. if node.op_type in [
  463. "Add",
  464. "Sub",
  465. "Mul",
  466. "Div",
  467. "MatMul",
  468. "MatMulInteger",
  469. "MatMulInteger16",
  470. "Where",
  471. "Sum",
  472. ]:
  473. if node.output[0] in self.known_vi_:
  474. vi = self.known_vi_[node.output[0]]
  475. out_rank = len(get_shape_from_type_proto(vi.type))
  476. in_shapes = [self._get_shape(node, i) for i in range(len(node.input))]
  477. for d in range(
  478. out_rank - (2 if node.op_type in ["MatMul", "MatMulInteger", "MatMulInteger16"] else 0)
  479. ):
  480. in_dims = [s[len(s) - out_rank + d] for s in in_shapes if len(s) + d >= out_rank]
  481. if len(in_dims) > 1:
  482. self._check_merged_dims(in_dims, allow_broadcast=True)
  483. # run single node inference with self.known_vi_ shapes
  484. tmp_graph = helper.make_graph(
  485. [node],
  486. "tmp",
  487. [self.known_vi_[i] for i in node.input if i],
  488. [make_named_value_info(i) for i in node.output],
  489. initializers,
  490. )
  491. self.tmp_mp_.graph.CopyFrom(tmp_graph)
  492. self.tmp_mp_ = shape_inference.infer_shapes(self.tmp_mp_)
  493. for i_o in range(len(node.output)):
  494. o = node.output[i_o]
  495. if o: # skip optional output
  496. vi = self.out_mp_.graph.value_info.add()
  497. if not skip_infer:
  498. vi.CopyFrom(self.tmp_mp_.graph.output[i_o])
  499. else:
  500. vi.name = o
  501. self.known_vi_[o] = vi
  502. def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph_id=True):
  503. if self.verbose_ > 2:
  504. logger.debug(f"Inferencing subgraph of node {node.name} with output({node.output[0]}...): {node.op_type}")
  505. # node inputs are not passed directly to the subgraph
  506. # it's up to the node dispatcher to prepare subgraph input
  507. # for example, with Scan/Loop, subgraph input shape would be trimmed from node input shape
  508. # besides, inputs in subgraph could shadow implicit inputs
  509. subgraph_inputs = {i.name for i in list(subgraph.initializer) + list(subgraph.input)}
  510. subgraph_implicit_input = {name for name in self.known_vi_ if name not in subgraph_inputs}
  511. tmp_graph = helper.make_graph(
  512. list(subgraph.node),
  513. "tmp",
  514. list(subgraph.input) + [self.known_vi_[i] for i in subgraph_implicit_input],
  515. [make_named_value_info(i.name) for i in subgraph.output],
  516. )
  517. tmp_graph.initializer.extend([i for i in self.out_mp_.graph.initializer if i.name in subgraph_implicit_input])
  518. tmp_graph.initializer.extend(subgraph.initializer)
  519. self.tmp_mp_.graph.CopyFrom(tmp_graph)
  520. symbolic_shape_inference = SymbolicShapeInference(
  521. self.int_max_,
  522. self.auto_merge_,
  523. self.guess_output_rank_,
  524. self.verbose_,
  525. prefix=self.prefix_ + "_" + str(self.subgraph_id_),
  526. )
  527. if inc_subgraph_id:
  528. self.subgraph_id_ += 1
  529. symbolic_shape_inference._preprocess(self.tmp_mp_)
  530. symbolic_shape_inference.suggested_merge_ = self.suggested_merge_.copy()
  531. while symbolic_shape_inference.run_:
  532. symbolic_shape_inference._infer_impl(self.sympy_data_.copy())
  533. symbolic_shape_inference._update_output_from_vi()
  534. if use_node_input:
  535. # if subgraph uses node input, it needs to update to merged dims
  536. subgraph.ClearField("input")
  537. subgraph.input.extend(symbolic_shape_inference.out_mp_.graph.input[: len(node.input)])
  538. subgraph.ClearField("output")
  539. subgraph.output.extend(symbolic_shape_inference.out_mp_.graph.output)
  540. subgraph.ClearField("value_info")
  541. subgraph.value_info.extend(symbolic_shape_inference.out_mp_.graph.value_info)
  542. subgraph.ClearField("node")
  543. subgraph.node.extend(symbolic_shape_inference.out_mp_.graph.node)
  544. # for new symbolic dims from subgraph output, add to main graph symbolic dims
  545. subgraph_shapes = [get_shape_from_value_info(o) for o in symbolic_shape_inference.out_mp_.graph.output]
  546. subgraph_new_symbolic_dims = {
  547. d for s in subgraph_shapes if s for d in s if type(d) is str and d not in self.symbolic_dims_
  548. }
  549. new_dims = {}
  550. for d in subgraph_new_symbolic_dims:
  551. assert d in symbolic_shape_inference.symbolic_dims_
  552. new_dims[d] = symbolic_shape_inference.symbolic_dims_[d]
  553. self.symbolic_dims_.update(new_dims)
  554. return symbolic_shape_inference
  555. def _get_int_or_float_values(self, node, broadcast=False, allow_float_values=False):
  556. def int_or_float(value, allow_float_values):
  557. # If casting into int has precision loss: keep float output
  558. if allow_float_values and value % 1 != 0:
  559. return value
  560. return int(value)
  561. values = [self._try_get_value(node, i) for i in range(len(node.input))]
  562. if all(v is not None for v in values):
  563. # some shape compute is in floating point, cast to int for sympy
  564. for i, v in enumerate(values):
  565. if type(v) is not np.ndarray:
  566. continue
  567. if len(v.shape) > 1:
  568. new_v = None # ignore value for rank > 1
  569. elif len(v.shape) == 0:
  570. new_v = int_or_float(v.item(), allow_float_values)
  571. else:
  572. assert len(v.shape) == 1
  573. new_v = [int_or_float(vv, allow_float_values) for vv in v]
  574. values[i] = new_v
  575. values_len = [len(v) if isinstance(v, list) else 0 for v in values]
  576. max_len = max(values_len)
  577. if max_len >= 1 and broadcast:
  578. # broadcast
  579. for i, v in enumerate(values):
  580. if v is None:
  581. continue # don't broadcast if value is unknown
  582. if isinstance(v, list):
  583. if len(v) < max_len:
  584. values[i] = v * max_len
  585. else:
  586. assert len(v) == max_len
  587. else:
  588. values[i] = [v] * max_len
  589. return values
  590. def _compute_on_sympy_data(self, node, op_func):
  591. assert len(node.output) == 1
  592. # Before mul & div operations
  593. # cast inputs into interger might lose decimal part and reduce precision
  594. # keep them as float, finish the operation, then cast the result into integer
  595. if node.op_type in ["Mul", "Div"]:
  596. values = self._get_int_or_float_values(node, broadcast=True, allow_float_values=True)
  597. else:
  598. values = self._get_int_or_float_values(node, broadcast=True)
  599. if all(v is not None for v in values):
  600. is_list = [isinstance(v, list) for v in values]
  601. as_list = any(is_list)
  602. if as_list:
  603. self.sympy_data_[node.output[0]] = [op_func(vs) for vs in zip(*values, strict=False)]
  604. else:
  605. self.sympy_data_[node.output[0]] = op_func(values)
  606. def _pass_on_sympy_data(self, node):
  607. assert len(node.input) == 1 or node.op_type in [
  608. "Reshape",
  609. "Unsqueeze",
  610. "Squeeze",
  611. ]
  612. self._compute_on_sympy_data(node, lambda x: x[0])
  613. def _pass_on_shape_and_type(self, node):
  614. vi = self.known_vi_[node.output[0]]
  615. vi.CopyFrom(
  616. helper.make_tensor_value_info(
  617. node.output[0],
  618. get_elem_type_from_type_proto(self.known_vi_[node.input[0]].type),
  619. self._get_shape(node, 0),
  620. )
  621. )
  622. def _new_symbolic_dim(self, prefix, dim):
  623. new_dim = f"{prefix}_d{dim}"
  624. if new_dim in self.suggested_merge_:
  625. v = self.suggested_merge_[new_dim]
  626. new_symbolic_dim = sympy.Integer(int(v)) if is_literal(v) else v
  627. else:
  628. new_symbolic_dim = sympy.Symbol(new_dim, integer=True, nonnegative=True)
  629. self.symbolic_dims_[new_dim] = new_symbolic_dim
  630. return new_symbolic_dim
  631. def _new_symbolic_dim_from_output(self, node, out_idx=0, dim=0):
  632. return self._new_symbolic_dim(
  633. f"{node.op_type}{self.prefix_}_{list(self.out_mp_.graph.node).index(node)}_o{out_idx}_",
  634. dim,
  635. )
  636. def _new_symbolic_shape(self, rank, node, out_idx=0):
  637. return [self._new_symbolic_dim_from_output(node, out_idx, i) for i in range(rank)]
  638. def _compute_conv_pool_shape(self, node, channels_last=False):
  639. sympy_shape = self._get_sympy_shape(node, 0)
  640. if len(node.input) > 1:
  641. W_shape = self._get_sympy_shape(node, 1) # noqa: N806
  642. rank = len(W_shape) - 2 # number of spatial axes
  643. kernel_shape = W_shape[-rank - 1 : -1] if channels_last else W_shape[-rank:]
  644. sympy_shape[3 if channels_last else 1] = W_shape[0]
  645. else:
  646. W_shape = None # noqa: N806
  647. kernel_shape = get_attribute(node, "kernel_shape")
  648. rank = len(kernel_shape)
  649. assert len(sympy_shape) == rank + 2
  650. # only need to symbolic shape inference if input has symbolic dims in spatial axes
  651. spatial_shape = sympy_shape[-rank - 1 : -1] if channels_last else sympy_shape[-rank:]
  652. is_symbolic_dims = [not is_literal(i) for i in spatial_shape]
  653. if not any(is_symbolic_dims):
  654. shape = get_shape_from_value_info(self.known_vi_[node.output[0]])
  655. if len(shape) > 0:
  656. assert len(sympy_shape) == len(shape)
  657. if channels_last:
  658. sympy_shape[-rank - 1 : -1] = [sympy.Integer(d) for d in shape[-rank - 1 : -1]]
  659. else:
  660. sympy_shape[-rank:] = [sympy.Integer(d) for d in shape[-rank:]]
  661. return sympy_shape
  662. dilations = get_attribute(node, "dilations", [1] * rank)
  663. strides = get_attribute(node, "strides", [1] * rank)
  664. effective_kernel_shape = [(k - 1) * d + 1 for k, d in zip(kernel_shape, dilations, strict=False)]
  665. pads = get_attribute(node, "pads")
  666. if pads is None:
  667. pads = [0] * (2 * rank)
  668. auto_pad = get_attribute(node, "auto_pad", b"NOTSET").decode("utf-8")
  669. if auto_pad != "VALID" and auto_pad != "NOTSET":
  670. try:
  671. residual = [sympy.Mod(d, s) for d, s in zip(sympy_shape[-rank:], strides, strict=False)]
  672. total_pads = [
  673. max(0, (k - s) if r == 0 else (k - r))
  674. for k, s, r in zip(effective_kernel_shape, strides, residual, strict=False)
  675. ]
  676. except TypeError: # sympy may throw TypeError: cannot determine truth value of Relational
  677. total_pads = [
  678. max(0, (k - s)) for k, s in zip(effective_kernel_shape, strides, strict=False)
  679. ] # assuming no residual if sympy throws error
  680. elif auto_pad == "VALID":
  681. total_pads = []
  682. else:
  683. total_pads = [0] * rank
  684. else:
  685. assert len(pads) == 2 * rank
  686. total_pads = [p1 + p2 for p1, p2 in zip(pads[:rank], pads[rank:], strict=False)]
  687. ceil_mode = get_attribute(node, "ceil_mode", 0)
  688. for i in range(rank):
  689. effective_input_size = sympy_shape[-rank + i + (-1 if channels_last else 0)]
  690. if len(total_pads) > 0:
  691. effective_input_size = effective_input_size + total_pads[i]
  692. if ceil_mode:
  693. strided_kernel_positions = sympy.ceiling(
  694. (effective_input_size - effective_kernel_shape[i]) / strides[i]
  695. )
  696. else:
  697. strided_kernel_positions = (effective_input_size - effective_kernel_shape[i]) // strides[i]
  698. sympy_shape[-rank + i + (-1 if channels_last else 0)] = strided_kernel_positions + 1
  699. return sympy_shape
  700. def _check_merged_dims(self, dims, allow_broadcast=True):
  701. if allow_broadcast:
  702. dims = [d for d in dims if not (is_literal(d) and int(d) <= 1)]
  703. if not all(d == dims[0] for d in dims):
  704. self._add_suggested_merge(dims, apply=True)
  705. def _compute_matmul_shape(self, node, output_dtype=None):
  706. lhs_shape = self._get_shape(node, 0)
  707. rhs_shape = self._get_shape(node, 1)
  708. lhs_rank = len(lhs_shape)
  709. rhs_rank = len(rhs_shape)
  710. lhs_reduce_dim = 0
  711. rhs_reduce_dim = 0
  712. assert lhs_rank > 0 and rhs_rank > 0
  713. if lhs_rank == 1 and rhs_rank == 1:
  714. new_shape = []
  715. elif lhs_rank == 1:
  716. rhs_reduce_dim = -2
  717. new_shape = [*rhs_shape[:rhs_reduce_dim], rhs_shape[-1]]
  718. elif rhs_rank == 1:
  719. lhs_reduce_dim = -1
  720. new_shape = lhs_shape[:lhs_reduce_dim]
  721. else:
  722. lhs_reduce_dim = -1
  723. rhs_reduce_dim = -2
  724. new_shape = [*self._broadcast_shapes(lhs_shape[:-2], rhs_shape[:-2]), lhs_shape[-2], rhs_shape[-1]]
  725. # merge reduce dim
  726. self._check_merged_dims(
  727. [lhs_shape[lhs_reduce_dim], rhs_shape[rhs_reduce_dim]],
  728. allow_broadcast=False,
  729. )
  730. if output_dtype is None:
  731. # infer output_dtype from input type when not specified
  732. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  733. vi = self.known_vi_[node.output[0]]
  734. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
  735. def _fuse_tensor_type(self, node, out_idx, dst_type, src_type):
  736. """
  737. update dst_tensor_type to be compatible with src_tensor_type when dimension mismatches
  738. """
  739. dst_tensor_type = (
  740. dst_type.sequence_type.elem_type.tensor_type if is_sequence(dst_type) else dst_type.tensor_type
  741. )
  742. src_tensor_type = (
  743. src_type.sequence_type.elem_type.tensor_type if is_sequence(src_type) else src_type.tensor_type
  744. )
  745. if dst_tensor_type.elem_type != src_tensor_type.elem_type:
  746. node_id = node.name if node.name else node.op_type
  747. raise ValueError(
  748. f"For node {node_id}, dst_tensor_type.elem_type != src_tensor_type.elem_type: "
  749. f"{onnx.onnx_pb.TensorProto.DataType.Name(dst_tensor_type.elem_type)} vs "
  750. f"{onnx.onnx_pb.TensorProto.DataType.Name(src_tensor_type.elem_type)}"
  751. )
  752. if dst_tensor_type.HasField("shape"):
  753. for di, ds in enumerate(zip(dst_tensor_type.shape.dim, src_tensor_type.shape.dim, strict=False)):
  754. if ds[0] != ds[1]:
  755. # create a new symbolic dimension for node/out_idx/mismatch dim id in dst_tensor_type for tensor_type
  756. # for sequence_type, clear the dimension
  757. new_dim = onnx.TensorShapeProto.Dimension()
  758. if not is_sequence(dst_type):
  759. new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, out_idx, di))
  760. dst_tensor_type.shape.dim[di].CopyFrom(new_dim)
  761. else:
  762. dst_tensor_type.CopyFrom(src_tensor_type)
  763. def _infer_ArrayFeatureExtractor(self, node): # noqa: N802
  764. data_shape = self._get_shape(node, 0)
  765. indices_shape = self._get_shape(node, 1)
  766. vi = self.known_vi_[node.output[0]]
  767. vi.CopyFrom(
  768. helper.make_tensor_value_info(
  769. node.output[0],
  770. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  771. data_shape[:-1] + indices_shape,
  772. )
  773. )
  774. def _infer_symbolic_compute_ops(self, node):
  775. funcs = {
  776. "Add": lambda l: l[0] + l[1], # noqa: E741
  777. "Div": lambda l: ( # noqa: E741
  778. int(l[0] // l[1]) if isinstance(l[0] // l[1], float) else l[0] // l[1]
  779. ), # integer div in sympy
  780. "Equal": lambda l: l[0] == l[1], # noqa: E741
  781. "Floor": lambda l: sympy.floor(l[0]), # noqa: E741
  782. "Max": lambda l: ( # noqa: E741
  783. l[1]
  784. if is_literal(l[0]) and int(l[0]) < -self.int_max_
  785. else (l[0] if is_literal(l[1]) and int(l[1]) < -self.int_max_ else sympy.Max(l[0], l[1]))
  786. ),
  787. "Min": lambda l: ( # noqa: E741
  788. l[1]
  789. if is_literal(l[0]) and int(l[0]) > self.int_max_
  790. else (l[0] if is_literal(l[1]) and int(l[1]) > self.int_max_ else sympy.Min(l[0], l[1]))
  791. ),
  792. "Mul": lambda l: int(l[0] * l[1]) if isinstance(l[0] * l[1], float) else l[0] * l[1], # noqa: E741
  793. "Sub": lambda l: l[0] - l[1], # noqa: E741
  794. "Where": lambda l: l[1] if l[0] else l[2], # noqa: E741
  795. "Neg": lambda l: -l[0], # noqa: E741
  796. }
  797. assert node.op_type in funcs
  798. self._compute_on_sympy_data(node, funcs[node.op_type])
  799. def _infer_Cast(self, node): # noqa: N802
  800. self._pass_on_sympy_data(node)
  801. def _infer_CategoryMapper(self, node): # noqa: N802
  802. input_type = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  803. if input_type == onnx.TensorProto.STRING:
  804. output_type = onnx.TensorProto.INT64
  805. else:
  806. output_type = onnx.TensorProto.STRING
  807. vi = self.known_vi_[node.output[0]]
  808. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_type, self._get_shape(node, 0)))
  809. def _infer_Compress(self, node): # noqa: N802
  810. input_shape = self._get_shape(node, 0)
  811. # create a new symbolic dimension for Compress output
  812. compress_len = str(self._new_symbolic_dim_from_output(node))
  813. axis = get_attribute(node, "axis")
  814. if axis is None:
  815. # when axis is not specified, input is flattened before compress so output is 1D
  816. output_shape = [compress_len]
  817. else:
  818. output_shape = input_shape
  819. output_shape[handle_negative_axis(axis, len(input_shape))] = compress_len
  820. vi = self.known_vi_[node.output[0]]
  821. vi.CopyFrom(
  822. helper.make_tensor_value_info(
  823. node.output[0],
  824. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  825. output_shape,
  826. )
  827. )
  828. def _infer_Concat(self, node): # noqa: N802
  829. if any(i in self.sympy_data_ or i in self.initializers_ for i in node.input):
  830. values = self._get_int_or_float_values(node)
  831. if all(v is not None for v in values):
  832. assert get_attribute(node, "axis") == 0
  833. self.sympy_data_[node.output[0]] = []
  834. for i in range(len(node.input)):
  835. value = values[i]
  836. if isinstance(value, list):
  837. self.sympy_data_[node.output[0]].extend(value)
  838. else:
  839. self.sympy_data_[node.output[0]].append(value)
  840. sympy_shape = self._get_sympy_shape(node, 0)
  841. axis = handle_negative_axis(get_attribute(node, "axis"), len(sympy_shape))
  842. for i_idx in range(1, len(node.input)):
  843. input_shape = self._get_sympy_shape(node, i_idx)
  844. if input_shape:
  845. sympy_shape[axis] = sympy_shape[axis] + input_shape[axis]
  846. self._update_computed_dims(sympy_shape)
  847. # merge symbolic dims for non-concat axes
  848. for d in range(len(sympy_shape)):
  849. if d == axis:
  850. continue
  851. dims = [self._get_shape(node, i_idx)[d] for i_idx in range(len(node.input)) if self._get_shape(node, i_idx)]
  852. if all(d == dims[0] for d in dims):
  853. continue
  854. merged = self._merge_symbols(dims)
  855. if type(merged) is str:
  856. sympy_shape[d] = self.symbolic_dims_[merged] if merged else None
  857. else:
  858. sympy_shape[d] = merged
  859. vi = self.known_vi_[node.output[0]]
  860. vi.CopyFrom(
  861. helper.make_tensor_value_info(
  862. node.output[0],
  863. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  864. get_shape_from_sympy_shape(sympy_shape),
  865. )
  866. )
  867. def _infer_ConcatFromSequence(self, node): # noqa: N802
  868. seq_shape = self._get_shape(node, 0)
  869. new_axis = 1 if get_attribute(node, "new_axis") else 0
  870. axis = handle_negative_axis(get_attribute(node, "axis"), len(seq_shape) + new_axis)
  871. concat_dim = str(self._new_symbolic_dim_from_output(node, 0, axis))
  872. new_shape = seq_shape
  873. if new_axis:
  874. new_shape = [*seq_shape[:axis], concat_dim, *seq_shape[axis:]]
  875. else:
  876. new_shape[axis] = concat_dim
  877. vi = self.known_vi_[node.output[0]]
  878. vi.CopyFrom(
  879. helper.make_tensor_value_info(
  880. node.output[0],
  881. self.known_vi_[node.input[0]].type.sequence_type.elem_type.tensor_type.elem_type,
  882. new_shape,
  883. )
  884. )
  885. def _infer_Constant(self, node): # noqa: N802
  886. t = get_attribute(node, "value")
  887. self.sympy_data_[node.output[0]] = numpy_helper.to_array(t)
  888. def _infer_ConstantOfShape(self, node): # noqa: N802
  889. sympy_shape = self._get_int_or_float_values(node)[0]
  890. vi = self.known_vi_[node.output[0]]
  891. if sympy_shape is not None:
  892. if type(sympy_shape) != list: # noqa: E721
  893. sympy_shape = [sympy_shape]
  894. self._update_computed_dims(sympy_shape)
  895. # update sympy data if output type is int, and shape is known
  896. if vi.type.tensor_type.elem_type == onnx.TensorProto.INT64 and all(is_literal(x) for x in sympy_shape):
  897. self.sympy_data_[node.output[0]] = np.ones(
  898. [int(x) for x in sympy_shape], dtype=np.int64
  899. ) * numpy_helper.to_array(get_attribute(node, "value", 0))
  900. else:
  901. # create new dynamic shape
  902. # note input0 is a 1D vector of shape, the new symbolic shape has the rank of the shape vector length
  903. sympy_shape = self._new_symbolic_shape(self._get_shape(node, 0)[0], node)
  904. vi.CopyFrom(
  905. helper.make_tensor_value_info(
  906. node.output[0],
  907. vi.type.tensor_type.elem_type,
  908. get_shape_from_sympy_shape(sympy_shape),
  909. )
  910. )
  911. def _infer_Conv(self, node): # noqa: N802
  912. sympy_shape = self._compute_conv_pool_shape(node)
  913. self._update_computed_dims(sympy_shape)
  914. vi = self.known_vi_[node.output[0]]
  915. vi.CopyFrom(
  916. helper.make_tensor_value_info(
  917. node.output[0],
  918. vi.type.tensor_type.elem_type,
  919. get_shape_from_sympy_shape(sympy_shape),
  920. )
  921. )
  922. def _infer_NhwcConv(self, node): # noqa: N802
  923. sympy_shape = self._compute_conv_pool_shape(node, channels_last=True)
  924. self._update_computed_dims(sympy_shape)
  925. vi = self.known_vi_[node.output[0]]
  926. vi.CopyFrom(
  927. helper.make_tensor_value_info(
  928. node.output[0],
  929. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  930. get_shape_from_sympy_shape(sympy_shape),
  931. )
  932. )
  933. def _infer_DequantizeLinear(self, node): # noqa: N802
  934. # Get the output data type from the scale input (index 1, required).
  935. output_dtype = self.known_vi_[node.input[1]].type.tensor_type.elem_type
  936. # Get the output shape from the first input.
  937. output_shape = self._get_shape(node, 0)
  938. vi = self.known_vi_[node.output[0]]
  939. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  940. def _infer_QuantizeLinear(self, node): # noqa: N802
  941. # Get the output data type from the zero-point input (index 2, optional).
  942. # Otherwise, default to uint8
  943. output_dtype = onnx.TensorProto.UINT8
  944. if len(node.input) > 2 and node.input[2]:
  945. output_dtype = self.known_vi_[node.input[2]].type.tensor_type.elem_type
  946. # Get the output shape from the first input.
  947. output_shape = self._get_shape(node, 0)
  948. vi = self.known_vi_[node.output[0]]
  949. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  950. def _infer_QLinearBinary(self, node): # noqa: N802
  951. # Get the output data type from the first input to QLinearAdd / QLinearMul.
  952. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  953. # The inputs are first and fourth operands respectively.
  954. input_1_shape = self._get_shape(node, 0)
  955. input_2_shape = self._get_shape(node, 3)
  956. # Compute the broadcasted shape
  957. new_shape = self._broadcast_shapes(input_1_shape, input_2_shape)
  958. vi = self.known_vi_[node.output[0]]
  959. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
  960. def _infer_Einsum(self, node): # noqa: N802
  961. # ref:https://github.com/onnx/onnx/blob/623dfaa0151b2e4ce49779c3ec31cbd78c592b80/onnx/defs/math/defs.cc#L3275
  962. equation = get_attribute(node, "equation")
  963. equation = equation.replace(b" ", b"")
  964. mid_index = equation.find(b"->")
  965. left_equation = equation[:mid_index] if mid_index != -1 else equation
  966. num_operands = 0
  967. num_ellipsis = 0
  968. num_ellipsis_indices = 0
  969. letter_to_dim = {}
  970. terms = left_equation.split(b",")
  971. for term in terms:
  972. ellipsis_index = term.find(b"...")
  973. shape = self._get_shape(node, num_operands)
  974. rank = len(shape)
  975. if ellipsis_index != -1:
  976. if num_ellipsis == 0:
  977. num_ellipsis_indices = rank - len(term) + 3
  978. num_ellipsis = num_ellipsis + 1
  979. for i in range(1, rank + 1):
  980. letter = term[-i]
  981. if letter != 46: # letter != b'.'
  982. dim = shape[-i]
  983. if letter not in letter_to_dim:
  984. letter_to_dim[letter] = dim
  985. elif type(dim) is not sympy.Symbol:
  986. letter_to_dim[letter] = dim
  987. num_operands = num_operands + 1
  988. new_sympy_shape = []
  989. from collections import OrderedDict # noqa: PLC0415
  990. num_letter_occurrences = OrderedDict()
  991. if mid_index != -1:
  992. right_equation = equation[mid_index + 2 :]
  993. right_ellipsis_index = right_equation.find(b"...")
  994. if right_ellipsis_index != -1:
  995. for i in range(num_ellipsis_indices):
  996. new_sympy_shape.append(shape[i])
  997. for c in right_equation:
  998. if c != 46: # c != b'.'
  999. new_sympy_shape.append(letter_to_dim[c])
  1000. else:
  1001. for i in range(num_ellipsis_indices):
  1002. new_sympy_shape.append(shape[i])
  1003. for c in left_equation:
  1004. if c != 44 and c != 46: # c != b',' and c != b'.':
  1005. if c in num_letter_occurrences:
  1006. num_letter_occurrences[c] = num_letter_occurrences[c] + 1
  1007. else:
  1008. num_letter_occurrences[c] = 1
  1009. for key, value in num_letter_occurrences.items():
  1010. if value == 1:
  1011. new_sympy_shape.append(letter_to_dim[key])
  1012. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1013. vi = self.known_vi_[node.output[0]]
  1014. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_sympy_shape))
  1015. def _infer_Expand(self, node): # noqa: N802
  1016. expand_to_shape = as_list(self._try_get_value(node, 1), keep_none=True)
  1017. if expand_to_shape is not None:
  1018. # new_shape's dim can come from shape value
  1019. self._update_computed_dims(expand_to_shape)
  1020. shape = self._get_shape(node, 0)
  1021. new_shape = self._broadcast_shapes(shape, get_shape_from_sympy_shape(expand_to_shape))
  1022. vi = self.known_vi_[node.output[0]]
  1023. vi.CopyFrom(
  1024. helper.make_tensor_value_info(
  1025. node.output[0],
  1026. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1027. new_shape,
  1028. )
  1029. )
  1030. def _infer_Gather(self, node): # noqa: N802
  1031. data_shape = self._get_shape(node, 0)
  1032. axis = handle_negative_axis(get_attribute(node, "axis", 0), len(data_shape))
  1033. indices_shape = self._get_shape(node, 1)
  1034. vi = self.known_vi_[node.output[0]]
  1035. if node.op_type == "Gather":
  1036. elem_type = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1037. elif node.op_type == "GatherBlockQuantized":
  1038. # scales
  1039. elem_type = self.known_vi_[node.input[2]].type.tensor_type.elem_type
  1040. else:
  1041. raise ValueError(f"Unsupported Gather op_type: {node.op_type}")
  1042. vi.CopyFrom(
  1043. helper.make_tensor_value_info(
  1044. node.output[0],
  1045. elem_type,
  1046. data_shape[:axis] + indices_shape + data_shape[axis + 1 :],
  1047. )
  1048. )
  1049. # for 1D input, do some sympy compute
  1050. if node.input[0] in self.sympy_data_ and len(data_shape) == 1 and get_attribute(node, "axis", 0) == 0:
  1051. idx = self._try_get_value(node, 1)
  1052. if idx is not None:
  1053. data = self.sympy_data_[node.input[0]]
  1054. if type(data) is list:
  1055. if type(idx) is np.ndarray and len(idx.shape) == 1:
  1056. self.sympy_data_[node.output[0]] = [data[int(i)] for i in idx]
  1057. else:
  1058. self.sympy_data_[node.output[0]] = data[int(idx)]
  1059. else:
  1060. assert idx == 0 or idx == -1
  1061. self.sympy_data_[node.output[0]] = data
  1062. def _infer_GatherElements(self, node): # noqa: N802
  1063. indices_shape = self._get_shape(node, 1)
  1064. vi = self.known_vi_[node.output[0]]
  1065. vi.CopyFrom(
  1066. helper.make_tensor_value_info(
  1067. node.output[0],
  1068. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1069. indices_shape,
  1070. )
  1071. )
  1072. def _infer_GatherND(self, node): # noqa: N802
  1073. data_shape = self._get_shape(node, 0)
  1074. data_rank = len(data_shape)
  1075. indices_shape = self._get_shape(node, 1)
  1076. len(indices_shape)
  1077. last_index_dimension = indices_shape[-1]
  1078. assert is_literal(last_index_dimension) and last_index_dimension <= data_rank
  1079. new_shape = indices_shape[:-1] + data_shape[last_index_dimension:]
  1080. vi = self.known_vi_[node.output[0]]
  1081. vi.CopyFrom(
  1082. helper.make_tensor_value_info(
  1083. node.output[0],
  1084. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1085. new_shape,
  1086. )
  1087. )
  1088. def _infer_If(self, node): # noqa: N802
  1089. # special case for constant condition, in case there are mismatching shape from the non-executed branch
  1090. subgraphs = [
  1091. get_attribute(node, "then_branch"),
  1092. get_attribute(node, "else_branch"),
  1093. ]
  1094. cond = self._try_get_value(node, 0)
  1095. if cond is not None:
  1096. if as_scalar(cond) > 0:
  1097. subgraphs[1].CopyFrom(subgraphs[0])
  1098. else:
  1099. subgraphs[0].CopyFrom(subgraphs[1])
  1100. for i_sub, subgraph in enumerate(subgraphs):
  1101. subgraph_infer = self._onnx_infer_subgraph(node, subgraph, use_node_input=False)
  1102. for i_out in range(len(node.output)):
  1103. vi = self.known_vi_[node.output[i_out]]
  1104. if i_sub == 0:
  1105. vi.CopyFrom(subgraph.output[i_out])
  1106. vi.name = node.output[i_out]
  1107. else:
  1108. self._fuse_tensor_type(node, i_out, vi.type, subgraph.output[i_out].type)
  1109. # pass on sympy data from subgraph, if cond is constant
  1110. if cond is not None and i_sub == (0 if as_scalar(cond) > 0 else 1):
  1111. if subgraph.output[i_out].name in subgraph_infer.sympy_data_:
  1112. self.sympy_data_[vi.name] = subgraph_infer.sympy_data_[subgraph.output[i_out].name]
  1113. def _infer_Loop(self, node): # noqa: N802
  1114. subgraph = get_attribute(node, "body")
  1115. assert len(subgraph.input) == len(node.input)
  1116. num_loop_carried = len(node.input) - 2 # minus the length and initial loop condition
  1117. # when sequence_type is used as loop carried input
  1118. # needs to run subgraph infer twice if the tensor shape in sequence contains None
  1119. for i, si in enumerate(subgraph.input):
  1120. si_name = si.name
  1121. si.CopyFrom(self.known_vi_[node.input[i]])
  1122. si.name = si_name
  1123. self._onnx_infer_subgraph(node, subgraph)
  1124. # check subgraph input/output for shape changes in loop carried variables
  1125. # for tensor_type, create new symbolic dim when changing, i.e., output = Concat(input, a)
  1126. # for sequence_type, propagate from output to input
  1127. need_second_infer = False
  1128. for i_out in range(1, num_loop_carried + 1):
  1129. so = subgraph.output[i_out]
  1130. so_shape = get_shape_from_value_info(so)
  1131. if is_sequence(so.type):
  1132. if so_shape and None in so_shape:
  1133. # copy shape from output to input
  1134. # note that loop input is [loop_len, cond, input_0, input_1, ...]
  1135. # while loop output is [cond, output_0, output_1, ...]
  1136. subgraph.input[i_out + 1].type.sequence_type.elem_type.CopyFrom(so.type.sequence_type.elem_type)
  1137. need_second_infer = True
  1138. else:
  1139. si = subgraph.input[i_out + 1]
  1140. si_shape = get_shape_from_value_info(si)
  1141. for di, dims in enumerate(zip(si_shape, so_shape, strict=False)):
  1142. if dims[0] != dims[1]:
  1143. new_dim = onnx.TensorShapeProto.Dimension()
  1144. new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, i_out, di))
  1145. si.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
  1146. so.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
  1147. need_second_infer = True
  1148. if need_second_infer:
  1149. if self.verbose_ > 2:
  1150. logger.debug(
  1151. f"Rerun Loop: {node.name}({node.output[0]}...), because of sequence in loop carried variables"
  1152. )
  1153. self._onnx_infer_subgraph(node, subgraph, inc_subgraph_id=False)
  1154. # create a new symbolic dimension for iteration dependent dimension
  1155. loop_iter_dim = str(self._new_symbolic_dim_from_output(node))
  1156. for i in range(len(node.output)):
  1157. vi = self.known_vi_[node.output[i]]
  1158. vi.CopyFrom(subgraph.output[i + 1]) # first subgraph output is condition, not in node output
  1159. if i >= num_loop_carried:
  1160. assert not is_sequence(vi.type) # TODO: handle loop accumulation in sequence_type
  1161. subgraph_vi_dim = subgraph.output[i + 1].type.tensor_type.shape.dim
  1162. vi.type.tensor_type.shape.ClearField("dim")
  1163. vi_dim = vi.type.tensor_type.shape.dim
  1164. vi_dim.add().dim_param = loop_iter_dim
  1165. vi_dim.extend(list(subgraph_vi_dim))
  1166. vi.name = node.output[i]
  1167. def _infer_MatMul(self, node): # noqa: N802
  1168. self._compute_matmul_shape(node)
  1169. def _infer_MatMulInteger(self, node): # noqa: N802
  1170. self._compute_matmul_shape(node, onnx.TensorProto.INT32)
  1171. def _infer_MatMulNBits(self, node): # noqa: N802
  1172. lhs_shape = self._get_shape(node, 0)
  1173. rhs_shape = [get_attribute(node, "K"), get_attribute(node, "N")]
  1174. lhs_rank = len(lhs_shape)
  1175. assert lhs_rank > 0
  1176. if lhs_rank == 1:
  1177. new_shape = rhs_shape[1:]
  1178. else:
  1179. new_shape = lhs_shape[:-1] + rhs_shape[1:]
  1180. # merge reduce dim
  1181. self._check_merged_dims(
  1182. [lhs_shape[-1], rhs_shape[0]],
  1183. allow_broadcast=False,
  1184. )
  1185. # infer output_dtype from input type when not specified
  1186. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1187. vi = self.known_vi_[node.output[0]]
  1188. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
  1189. def _infer_NonMaxSuppression(self, node): # noqa: N802
  1190. selected = str(self._new_symbolic_dim_from_output(node))
  1191. vi = self.known_vi_[node.output[0]]
  1192. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, [selected, 3]))
  1193. def _infer_NonZero(self, node): # noqa: N802
  1194. input_rank = self._get_shape_rank(node, 0)
  1195. # create a new symbolic dimension for NonZero output
  1196. nz_len = str(self._new_symbolic_dim_from_output(node, 0, 1))
  1197. vi = self.known_vi_[node.output[0]]
  1198. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], vi.type.tensor_type.elem_type, [input_rank, nz_len]))
  1199. def _infer_OneHot(self, node): # noqa: N802
  1200. sympy_shape = self._get_sympy_shape(node, 0)
  1201. depth = self._try_get_value(node, 1)
  1202. axis = get_attribute(node, "axis", -1)
  1203. axis = handle_negative_axis(axis, len(sympy_shape) + 1)
  1204. new_shape = get_shape_from_sympy_shape(
  1205. [
  1206. *sympy_shape[:axis],
  1207. self._new_symbolic_dim_from_output(node) if not is_literal(depth) else depth,
  1208. *sympy_shape[axis:],
  1209. ]
  1210. )
  1211. vi = self.known_vi_[node.output[0]]
  1212. vi.CopyFrom(
  1213. helper.make_tensor_value_info(
  1214. node.output[0],
  1215. self.known_vi_[node.input[2]].type.tensor_type.elem_type,
  1216. new_shape,
  1217. )
  1218. )
  1219. def _infer_Pad(self, node): # noqa: N802
  1220. if get_opset(self.out_mp_) <= 10:
  1221. pads = get_attribute(node, "pads")
  1222. else:
  1223. pads = self._try_get_value(node, 1)
  1224. sympy_shape = self._get_sympy_shape(node, 0)
  1225. rank = len(sympy_shape)
  1226. if pads is not None:
  1227. assert len(pads) == 2 * rank
  1228. new_sympy_shape = [
  1229. d + pad_up + pad_down
  1230. for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:], strict=False)
  1231. ]
  1232. self._update_computed_dims(new_sympy_shape)
  1233. else:
  1234. # dynamic pads, create new symbolic dimensions
  1235. new_sympy_shape = self._new_symbolic_shape(rank, node)
  1236. output_tp = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1237. vi = self.known_vi_[node.output[0]]
  1238. vi.CopyFrom(
  1239. helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape))
  1240. )
  1241. def _infer_Pool(self, node): # noqa: N802
  1242. sympy_shape = self._compute_conv_pool_shape(node)
  1243. self._update_computed_dims(sympy_shape)
  1244. for o in node.output:
  1245. if not o:
  1246. continue
  1247. vi = self.known_vi_[o]
  1248. vi.CopyFrom(
  1249. helper.make_tensor_value_info(
  1250. o,
  1251. vi.type.tensor_type.elem_type,
  1252. get_shape_from_sympy_shape(sympy_shape),
  1253. )
  1254. )
  1255. def _infer_aten_bitwise_or(self, node):
  1256. shape0 = self._get_shape(node, 0)
  1257. shape1 = self._get_shape(node, 1)
  1258. new_shape = self._broadcast_shapes(shape0, shape1)
  1259. t0 = self.known_vi_[node.input[0]]
  1260. vi = self.known_vi_[node.output[0]]
  1261. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], t0.type.tensor_type.elem_type, new_shape))
  1262. def _infer_aten_diagonal(self, node):
  1263. sympy_shape = self._get_sympy_shape(node, 0)
  1264. rank = len(sympy_shape)
  1265. offset = self._try_get_value(node, 1)
  1266. dim1 = self._try_get_value(node, 2)
  1267. dim2 = self._try_get_value(node, 3)
  1268. assert offset is not None and dim1 is not None and dim2 is not None
  1269. dim1 = handle_negative_axis(dim1, rank)
  1270. dim2 = handle_negative_axis(dim2, rank)
  1271. new_shape = []
  1272. for dim, val in enumerate(sympy_shape):
  1273. if dim not in [dim1, dim2]:
  1274. new_shape.append(val)
  1275. shape1 = sympy_shape[dim1]
  1276. shape2 = sympy_shape[dim2]
  1277. if offset >= 0:
  1278. diag_shape = sympy.Max(0, sympy.Min(shape1, shape2 - offset))
  1279. else:
  1280. diag_shape = sympy.Max(0, sympy.Min(shape1 + offset, shape2))
  1281. new_shape.append(diag_shape)
  1282. if node.output[0]:
  1283. vi = self.known_vi_[node.output[0]]
  1284. vi.CopyFrom(
  1285. helper.make_tensor_value_info(
  1286. node.output[0],
  1287. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1288. get_shape_from_sympy_shape(new_shape),
  1289. )
  1290. )
  1291. def _infer_aten_multinomial(self, node):
  1292. sympy_shape = self._get_sympy_shape(node, 0)
  1293. rank = len(sympy_shape)
  1294. assert rank in [1, 2]
  1295. num_samples = self._try_get_value(node, 1)
  1296. di = rank - 1
  1297. last_dim = num_samples if num_samples else str(self._new_symbolic_dim_from_output(node, 0, di))
  1298. output_shape = [*sympy_shape[:-1], last_dim]
  1299. vi = self.known_vi_[node.output[0]]
  1300. vi.CopyFrom(
  1301. helper.make_tensor_value_info(
  1302. node.output[0],
  1303. onnx.TensorProto.INT64,
  1304. get_shape_from_sympy_shape(output_shape),
  1305. )
  1306. )
  1307. def _infer_aten_pool2d(self, node):
  1308. sympy_shape = self._get_sympy_shape(node, 0)
  1309. assert len(sympy_shape) == 4
  1310. sympy_shape[-2:] = [self._new_symbolic_dim_from_output(node, 0, i) for i in [2, 3]]
  1311. self._update_computed_dims(sympy_shape)
  1312. for i, o in enumerate(node.output):
  1313. if not o:
  1314. continue
  1315. vi = self.known_vi_[o]
  1316. elem_type = onnx.TensorProto.INT64 if i == 1 else self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1317. vi.CopyFrom(helper.make_tensor_value_info(o, elem_type, get_shape_from_sympy_shape(sympy_shape)))
  1318. def _infer_aten_minmax(self, node):
  1319. vi = self.known_vi_[node.output[0]]
  1320. if len(node.input) == 1:
  1321. vi.CopyFrom(
  1322. helper.make_tensor_value_info(
  1323. node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, []
  1324. )
  1325. )
  1326. else:
  1327. assert len(node.input) == 3
  1328. keepdim = self._try_get_value(node, 2)
  1329. assert keepdim is not None # can only handle known keepdim case.
  1330. dim = self._try_get_value(node, 1)
  1331. if dim is None:
  1332. rank = self._get_shape_rank(node, 0)
  1333. output_shape = self._new_symbolic_shape(rank if keepdim else rank - 1, node)
  1334. else:
  1335. shape = self._get_sympy_shape(node, 0)
  1336. dim = handle_negative_axis(dim, len(shape))
  1337. output_shape = shape[:dim]
  1338. if keepdim:
  1339. output_shape += [1]
  1340. output_shape += shape[dim + 1 :]
  1341. output_shape = get_shape_from_sympy_shape(output_shape)
  1342. vi.CopyFrom(
  1343. helper.make_tensor_value_info(
  1344. node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, output_shape
  1345. )
  1346. )
  1347. vi1 = self.known_vi_[node.output[1]]
  1348. vi1.CopyFrom(helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT64, output_shape))
  1349. def _infer_aten_unfold(self, node):
  1350. sympy_shape = self._get_sympy_shape(node, 0)
  1351. dimension = self._try_get_value(node, 1)
  1352. size = self._try_get_value(node, 2)
  1353. step = self._try_get_value(node, 3)
  1354. if dimension is not None and size is not None and step is not None:
  1355. assert dimension < len(sympy_shape)
  1356. sympy_shape[dimension] = (sympy_shape[dimension] - size) // step + 1
  1357. sympy_shape.append(size)
  1358. else:
  1359. rank = len(sympy_shape)
  1360. sympy_shape = self._new_symbolic_shape(rank + 1, node)
  1361. self._update_computed_dims(sympy_shape)
  1362. if node.output[0]:
  1363. vi = self.known_vi_[node.output[0]]
  1364. vi.CopyFrom(
  1365. helper.make_tensor_value_info(
  1366. node.output[0],
  1367. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1368. get_shape_from_sympy_shape(sympy_shape),
  1369. )
  1370. )
  1371. def _infer_aten_argmax(self, node):
  1372. new_shape = None
  1373. if not node.input[1]:
  1374. # The argmax of the flattened input is returned.
  1375. new_shape = []
  1376. else:
  1377. dim = self._try_get_value(node, 1)
  1378. keepdim = self._try_get_value(node, 2)
  1379. if keepdim is not None:
  1380. sympy_shape = self._get_sympy_shape(node, 0)
  1381. if dim is not None:
  1382. dim = handle_negative_axis(dim, len(sympy_shape))
  1383. if keepdim:
  1384. sympy_shape[dim] = 1
  1385. else:
  1386. del sympy_shape[dim]
  1387. else:
  1388. rank = len(sympy_shape)
  1389. sympy_shape = self._new_symbolic_shape(rank if keepdim else rank - 1, node)
  1390. self._update_computed_dims(sympy_shape)
  1391. new_shape = get_shape_from_sympy_shape(sympy_shape)
  1392. if node.output[0] and new_shape is not None:
  1393. vi = self.known_vi_[node.output[0]]
  1394. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, new_shape))
  1395. def _infer_aten_group_norm(self, node):
  1396. self._propagate_shape_and_type(node)
  1397. input_shape = self._get_shape(node, 0)
  1398. N = input_shape[0] if input_shape is not None and len(input_shape) != 0 else None # noqa: N806
  1399. group = self._try_get_value(node, 6)
  1400. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1401. for i in [1, 2]:
  1402. if node.output[i]:
  1403. vi = self.known_vi_[node.output[i]]
  1404. vi.CopyFrom(
  1405. helper.make_tensor_value_info(
  1406. node.output[i],
  1407. output_dtype,
  1408. [
  1409. N if N is not None else str(self._new_symbolic_dim_from_output(node, i, 0)),
  1410. (
  1411. as_scalar(group)
  1412. if group is not None
  1413. else str(self._new_symbolic_dim_from_output(node, i, 1))
  1414. ),
  1415. ],
  1416. )
  1417. )
  1418. def _infer_aten_upsample(self, node):
  1419. new_shape = None
  1420. input_shape = self._get_shape(node, 0)
  1421. if input_shape is not None:
  1422. new_shape = input_shape[:2]
  1423. output_size = self._try_get_value(node, 1)
  1424. if output_size is not None:
  1425. new_shape += [dim_size.item() if type(dim_size) is np.int64 else dim_size for dim_size in output_size]
  1426. else:
  1427. rank = len(input_shape)
  1428. new_shape += [str(self._new_symbolic_dim_from_output(node, 0, i)) for i in range(2, rank)]
  1429. if node.output[0] and new_shape is not None:
  1430. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1431. vi = self.known_vi_[node.output[0]]
  1432. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
  1433. def _infer_BatchNormalization(self, node): # noqa: N802
  1434. self._propagate_shape_and_type(node)
  1435. # this works for opsets < 14 and 14 since we check i < len(node.output) in the loop
  1436. for i in [1, 2, 3, 4]:
  1437. if i < len(node.output) and node.output[i]:
  1438. # all of these parameters have the same shape as the 1st input
  1439. self._propagate_shape_and_type(node, input_index=1, output_index=i)
  1440. def _infer_Range(self, node): # noqa: N802
  1441. vi = self.known_vi_[node.output[0]]
  1442. input_data = self._get_int_or_float_values(node)
  1443. if all(i is not None for i in input_data):
  1444. start = as_scalar(input_data[0])
  1445. limit = as_scalar(input_data[1])
  1446. delta = as_scalar(input_data[2])
  1447. new_sympy_shape = [sympy.Max(sympy.ceiling((limit - start) / delta), 0)]
  1448. else:
  1449. new_sympy_shape = [self._new_symbolic_dim_from_output(node)]
  1450. self._update_computed_dims(new_sympy_shape)
  1451. vi.CopyFrom(
  1452. helper.make_tensor_value_info(
  1453. node.output[0],
  1454. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1455. get_shape_from_sympy_shape(new_sympy_shape),
  1456. )
  1457. )
  1458. def _infer_ReduceSum(self, node): # noqa: N802
  1459. keep_dims = get_attribute(node, "keepdims", 1)
  1460. if get_opset(self.out_mp_) >= 13 and len(node.input) > 1:
  1461. # ReduceSum changes axes to input[1] in opset 13
  1462. axes = self._try_get_value(node, 1)
  1463. vi = self.known_vi_[node.output[0]]
  1464. if axes is None:
  1465. assert keep_dims # can only handle keep_dims==True when axes is unknown, by generating new ranks
  1466. vi.CopyFrom(
  1467. helper.make_tensor_value_info(
  1468. node.output[0],
  1469. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1470. get_shape_from_sympy_shape(self._new_symbolic_shape(self._get_shape_rank(node, 0), node)),
  1471. )
  1472. )
  1473. else:
  1474. shape = self._get_shape(node, 0)
  1475. output_shape = []
  1476. axes = [handle_negative_axis(a, len(shape)) for a in axes]
  1477. for i, d in enumerate(shape):
  1478. if i in axes:
  1479. if keep_dims:
  1480. output_shape.append(1)
  1481. else:
  1482. output_shape.append(d)
  1483. vi.CopyFrom(
  1484. helper.make_tensor_value_info(
  1485. node.output[0],
  1486. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1487. output_shape,
  1488. )
  1489. )
  1490. def _infer_ReduceMean(self, node): # noqa: N802
  1491. if get_opset(self.out_mp_) >= 18:
  1492. # reduce mean spec 18+ is same as reduce sum spec 13+
  1493. self._infer_ReduceSum(node)
  1494. def _infer_ReduceProd(self, node): # noqa: N802
  1495. axes = get_attribute(node, "axes")
  1496. keep_dims = get_attribute(node, "keepdims", 1)
  1497. if keep_dims == 0 and axes == [0]:
  1498. data = self._get_int_or_float_values(node)[0]
  1499. if data is not None:
  1500. self.sympy_data_[node.output[0]] = sympy_reduce_product(data)
  1501. def _infer_RelativePositionBias(self, node): # noqa: N802
  1502. seq_len = self._try_get_value(node, 1)
  1503. real_seq_len = self._try_get_value(node, 2)
  1504. if seq_len is None or real_seq_len is None:
  1505. return
  1506. num_heads = self._get_sympy_shape(node, 0)[1]
  1507. new_shape = [1, num_heads, str(seq_len), str(real_seq_len)]
  1508. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1509. vi = self.known_vi_[node.output[0]]
  1510. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, new_shape))
  1511. def _infer_Reshape(self, node): # noqa: N802
  1512. shape_value = self._try_get_value(node, 1)
  1513. vi = self.known_vi_[node.output[0]]
  1514. if shape_value is None:
  1515. shape_shape = self._get_shape(node, 1)
  1516. assert len(shape_shape) == 1
  1517. shape_rank = shape_shape[0]
  1518. assert is_literal(shape_rank)
  1519. vi.CopyFrom(
  1520. helper.make_tensor_value_info(
  1521. node.output[0],
  1522. vi.type.tensor_type.elem_type,
  1523. get_shape_from_sympy_shape(self._new_symbolic_shape(shape_rank, node)),
  1524. )
  1525. )
  1526. else:
  1527. input_sympy_shape = self._get_sympy_shape(node, 0)
  1528. total = 1
  1529. for d in input_sympy_shape:
  1530. total = total * d
  1531. new_sympy_shape = []
  1532. deferred_dim_idx = -1
  1533. non_deferred_size = 1
  1534. for i, d in enumerate(shape_value):
  1535. if type(d) is sympy.Symbol:
  1536. new_sympy_shape.append(d)
  1537. elif d == 0:
  1538. new_sympy_shape.append(input_sympy_shape[i])
  1539. non_deferred_size = non_deferred_size * input_sympy_shape[i]
  1540. else:
  1541. new_sympy_shape.append(d)
  1542. if d == -1:
  1543. deferred_dim_idx = i
  1544. elif d != 0:
  1545. non_deferred_size = non_deferred_size * d
  1546. assert new_sympy_shape.count(-1) < 2
  1547. if -1 in new_sympy_shape:
  1548. new_dim = total // non_deferred_size
  1549. new_sympy_shape[deferred_dim_idx] = new_dim
  1550. self._update_computed_dims(new_sympy_shape)
  1551. vi.CopyFrom(
  1552. helper.make_tensor_value_info(
  1553. node.output[0],
  1554. vi.type.tensor_type.elem_type,
  1555. get_shape_from_sympy_shape(new_sympy_shape),
  1556. )
  1557. )
  1558. self._pass_on_sympy_data(node)
  1559. def _infer_Resize(self, node): # noqa: N802
  1560. vi = self.known_vi_[node.output[0]]
  1561. input_sympy_shape = self._get_sympy_shape(node, 0)
  1562. if get_opset(self.out_mp_) <= 10:
  1563. scales = self._try_get_value(node, 1)
  1564. if scales is not None:
  1565. new_sympy_shape = [
  1566. sympy.simplify(sympy.floor(d * s)) for d, s in zip(input_sympy_shape, scales, strict=False)
  1567. ]
  1568. self._update_computed_dims(new_sympy_shape)
  1569. vi.CopyFrom(
  1570. helper.make_tensor_value_info(
  1571. node.output[0],
  1572. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1573. get_shape_from_sympy_shape(new_sympy_shape),
  1574. )
  1575. )
  1576. else:
  1577. roi = self._try_get_value(node, 1)
  1578. scales = self._try_get_value(node, 2)
  1579. sizes = self._try_get_value(node, 3)
  1580. if sizes is not None:
  1581. new_sympy_shape = [sympy.simplify(sympy.floor(s)) for s in sizes]
  1582. self._update_computed_dims(new_sympy_shape)
  1583. elif scales is not None:
  1584. rank = len(scales)
  1585. if get_attribute(node, "coordinate_transformation_mode") == "tf_crop_and_resize":
  1586. assert len(roi) == 2 * rank
  1587. roi_start = list(roi)[:rank]
  1588. roi_end = list(roi)[rank:]
  1589. else:
  1590. roi_start = [0] * rank
  1591. roi_end = [1] * rank
  1592. scales = list(scales)
  1593. new_sympy_shape = [
  1594. sympy.simplify(sympy.floor(d * (end - start) * scale))
  1595. for d, start, end, scale in zip(input_sympy_shape, roi_start, roi_end, scales, strict=False)
  1596. ]
  1597. self._update_computed_dims(new_sympy_shape)
  1598. else:
  1599. new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
  1600. vi.CopyFrom(
  1601. helper.make_tensor_value_info(
  1602. node.output[0],
  1603. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1604. get_shape_from_sympy_shape(new_sympy_shape),
  1605. )
  1606. )
  1607. def _infer_Scan(self, node): # noqa: N802
  1608. subgraph = get_attribute(node, "body")
  1609. num_scan_inputs = get_attribute(node, "num_scan_inputs")
  1610. scan_input_axes = get_attribute(node, "scan_input_axes", [0] * num_scan_inputs)
  1611. num_scan_states = len(node.input) - num_scan_inputs
  1612. scan_input_axes = [
  1613. handle_negative_axis(ax, self._get_shape_rank(node, i + num_scan_states))
  1614. for i, ax in enumerate(scan_input_axes)
  1615. ]
  1616. # We may have cases where the subgraph has optional inputs that appear in both subgraph's input and initializer,
  1617. # but not in the node's input. In such cases, the input model might be invalid, but let's skip those optional inputs.
  1618. assert len(subgraph.input) >= len(node.input)
  1619. subgraph_inputs = subgraph.input[: len(node.input)]
  1620. for i, si in enumerate(subgraph_inputs):
  1621. subgraph_name = si.name
  1622. si.CopyFrom(self.known_vi_[node.input[i]])
  1623. if i >= num_scan_states:
  1624. scan_input_dim = si.type.tensor_type.shape.dim
  1625. scan_input_dim.remove(scan_input_dim[scan_input_axes[i - num_scan_states]])
  1626. si.name = subgraph_name
  1627. self._onnx_infer_subgraph(node, subgraph)
  1628. num_scan_outputs = len(node.output) - num_scan_states
  1629. scan_output_axes = get_attribute(node, "scan_output_axes", [0] * num_scan_outputs)
  1630. scan_input_dim = get_shape_from_type_proto(self.known_vi_[node.input[-1]].type)[scan_input_axes[-1]]
  1631. for i, o in enumerate(node.output):
  1632. vi = self.known_vi_[o]
  1633. if i >= num_scan_states:
  1634. shape = get_shape_from_type_proto(subgraph.output[i].type)
  1635. new_dim = handle_negative_axis(scan_output_axes[i - num_scan_states], len(shape) + 1)
  1636. shape = [*shape[:new_dim], scan_input_dim, *shape[new_dim:]]
  1637. vi.CopyFrom(helper.make_tensor_value_info(o, subgraph.output[i].type.tensor_type.elem_type, shape))
  1638. else:
  1639. vi.CopyFrom(subgraph.output[i])
  1640. vi.name = o
  1641. def _infer_ScatterElements(self, node): # noqa: N802
  1642. data_shape = self._get_shape(node, 0)
  1643. vi = self.known_vi_[node.output[0]]
  1644. vi.CopyFrom(
  1645. helper.make_tensor_value_info(
  1646. node.output[0],
  1647. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1648. data_shape,
  1649. )
  1650. )
  1651. def _infer_SequenceAt(self, node): # noqa: N802
  1652. # need to create new symbolic dimension if sequence shape has None:
  1653. seq_shape = self._get_shape(node, 0)
  1654. vi = self.known_vi_[node.output[0]]
  1655. if seq_shape is not None:
  1656. for di, d in enumerate(seq_shape):
  1657. if d is not None:
  1658. continue
  1659. new_dim = onnx.TensorShapeProto.Dimension()
  1660. new_dim.dim_param = str(self._new_symbolic_dim_from_output(node, 0, di))
  1661. vi.type.tensor_type.shape.dim[di].CopyFrom(new_dim)
  1662. def _infer_SequenceInsert(self, node): # noqa: N802
  1663. # workaround bug in onnx's shape inference
  1664. vi_seq = self.known_vi_[node.input[0]]
  1665. vi_tensor = self.known_vi_[node.input[1]]
  1666. vi_out_seq = self.known_vi_[node.output[0]]
  1667. vi_out_seq.CopyFrom(vi_seq)
  1668. vi_out_seq.name = node.output[0]
  1669. self._fuse_tensor_type(node, 0, vi_out_seq.type, vi_tensor.type)
  1670. def _infer_Shape(self, node): # noqa: N802
  1671. self.sympy_data_[node.output[0]] = self._get_sympy_shape(node, 0)
  1672. def _infer_Size(self, node): # noqa: N802
  1673. sympy_shape = self._get_sympy_shape(node, 0)
  1674. self.sympy_data_[node.output[0]] = sympy_reduce_product(sympy_shape)
  1675. self.known_vi_[node.output[0]].CopyFrom(
  1676. helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, [])
  1677. )
  1678. def _infer_Slice(self, node): # noqa: N802
  1679. # SymPy fails to prove that `x_0 + ... + x_n >= 0` if one of `x_i` is a `sympy.Min(a, b)`,
  1680. # even when the relation holds for both `a` and `b`.
  1681. #
  1682. # When given `expr` of form `min(a, b) + ...`, this function returns `[a + ..., b + ...]`,
  1683. # so that we can prove inequalities for both expressions separately.
  1684. #
  1685. # If the number of `min(...)` subexpressions is not exactly one, this function just returns `[expr]`.
  1686. def flatten_min(expr):
  1687. assert isinstance(expr, sympy.Add), f"Expected a sum of two arguments, got {expr}"
  1688. min_positions = [idx for idx in range(len(expr.args)) if isinstance(expr.args[idx], sympy.Min)]
  1689. if len(min_positions) == 1:
  1690. min_pos = min_positions[0]
  1691. def replace_min_with_arg(arg_idx):
  1692. replaced = list(expr.args)
  1693. assert isinstance(replaced[min_pos], sympy.Min), (
  1694. f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}"
  1695. )
  1696. assert len(replaced[min_pos].args) == 2, (
  1697. f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}"
  1698. )
  1699. replaced[min_pos] = replaced[min_pos].args[arg_idx]
  1700. return sympy.Add(*replaced)
  1701. return [
  1702. replace_min_with_arg(0),
  1703. replace_min_with_arg(1),
  1704. ]
  1705. return [expr]
  1706. def less_equal(x, y):
  1707. try:
  1708. return bool(x <= y)
  1709. except TypeError:
  1710. pass
  1711. try:
  1712. return bool(y >= x)
  1713. except TypeError:
  1714. pass
  1715. try:
  1716. return bool(-x >= -y)
  1717. except TypeError:
  1718. pass
  1719. try:
  1720. return bool(-y <= -x)
  1721. except TypeError:
  1722. pass
  1723. try:
  1724. return bool(y - x >= 0)
  1725. except TypeError:
  1726. # the last attempt; this may raise TypeError
  1727. return all(bool(d >= 0) for d in flatten_min(y - x))
  1728. def handle_negative_index(index, bound):
  1729. """normalizes a negative index to be in [0, bound)"""
  1730. try:
  1731. if not less_equal(0, index):
  1732. if is_literal(index) and index <= -self.int_max_:
  1733. # this case is handled separately
  1734. return index
  1735. return bound + index
  1736. except TypeError:
  1737. logger.warning(f"Cannot determine if {index} < 0")
  1738. return index
  1739. if get_opset(self.out_mp_) <= 9:
  1740. axes = get_attribute(node, "axes")
  1741. starts = get_attribute(node, "starts")
  1742. ends = get_attribute(node, "ends")
  1743. if not axes:
  1744. axes = list(range(len(starts)))
  1745. steps = [1] * len(axes)
  1746. else:
  1747. starts = as_list(self._try_get_value(node, 1), keep_none=True)
  1748. ends = as_list(self._try_get_value(node, 2), keep_none=True)
  1749. axes = self._try_get_value(node, 3)
  1750. steps = self._try_get_value(node, 4)
  1751. if axes is None and not (starts is None and ends is None):
  1752. axes = list(range(len(starts if starts is not None else ends)))
  1753. if steps is None and not (starts is None and ends is None):
  1754. steps = [1] * len(starts if starts is not None else ends)
  1755. axes = as_list(axes, keep_none=True)
  1756. steps = as_list(steps, keep_none=True)
  1757. new_sympy_shape = self._get_sympy_shape(node, 0)
  1758. if starts is None or ends is None:
  1759. if axes is None:
  1760. for i in range(len(new_sympy_shape)):
  1761. new_sympy_shape[i] = self._new_symbolic_dim_from_output(node, 0, i)
  1762. else:
  1763. new_sympy_shape = get_shape_from_sympy_shape(new_sympy_shape)
  1764. for i in axes:
  1765. new_sympy_shape[i] = self._new_symbolic_dim_from_output(node, 0, i)
  1766. else:
  1767. for i, s, e, t in zip(axes, starts, ends, steps, strict=False):
  1768. e = handle_negative_index(e, new_sympy_shape[i]) # noqa: PLW2901
  1769. if is_literal(e):
  1770. if e >= self.int_max_:
  1771. e = new_sympy_shape[i] # noqa: PLW2901
  1772. elif e <= -self.int_max_:
  1773. e = 0 if s > 0 else -1 # noqa: PLW2901
  1774. elif is_literal(new_sympy_shape[i]):
  1775. if e < 0:
  1776. e = max(0, e + new_sympy_shape[i]) # noqa: PLW2901
  1777. e = min(e, new_sympy_shape[i]) # noqa: PLW2901
  1778. else:
  1779. if e > 0:
  1780. e = ( # noqa: PLW2901
  1781. sympy.Min(e, new_sympy_shape[i]) if e > 1 else e
  1782. ) # special case for slicing first to make computation easier
  1783. else:
  1784. if is_literal(new_sympy_shape[i]):
  1785. e = sympy.Min(e, new_sympy_shape[i]) # noqa: PLW2901
  1786. else:
  1787. try:
  1788. if not less_equal(e, new_sympy_shape[i]):
  1789. e = new_sympy_shape[i] # noqa: PLW2901
  1790. except Exception:
  1791. logger.warning(f"Unable to determine if {e} <= {new_sympy_shape[i]}, treat as equal")
  1792. e = new_sympy_shape[i] # noqa: PLW2901
  1793. s = handle_negative_index(s, new_sympy_shape[i]) # noqa: PLW2901
  1794. if is_literal(new_sympy_shape[i]) and is_literal(s):
  1795. s = max(0, min(s, new_sympy_shape[i])) # noqa: PLW2901
  1796. new_sympy_shape[i] = sympy.simplify((e - s + t + (-1 if t > 0 else 1)) // t)
  1797. self._update_computed_dims(new_sympy_shape)
  1798. vi = self.known_vi_[node.output[0]]
  1799. vi.CopyFrom(
  1800. helper.make_tensor_value_info(
  1801. node.output[0],
  1802. vi.type.tensor_type.elem_type,
  1803. get_shape_from_sympy_shape(new_sympy_shape),
  1804. )
  1805. )
  1806. # handle sympy_data if needed, for slice in shape computation
  1807. if (
  1808. node.input[0] in self.sympy_data_
  1809. and axes == [0]
  1810. and starts is not None
  1811. and len(starts) == 1
  1812. and ends is not None
  1813. and len(ends) == 1
  1814. and steps is not None
  1815. and len(steps) == 1
  1816. ):
  1817. input_sympy_data = self.sympy_data_[node.input[0]]
  1818. if type(input_sympy_data) is list or (
  1819. type(input_sympy_data) is np.array and len(input_sympy_data.shape) == 1
  1820. ):
  1821. self.sympy_data_[node.output[0]] = input_sympy_data[starts[0] : ends[0] : steps[0]]
  1822. def _infer_SoftmaxCrossEntropyLoss(self, node): # noqa: N802
  1823. vi = self.known_vi_[node.output[0]]
  1824. elem_type = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  1825. # If output type is explicit specified in attribute, we use it as output tensor type.
  1826. specified_output_type = get_attribute(node, "output_type", None)
  1827. if specified_output_type is not None:
  1828. elem_type = specified_output_type
  1829. vi.type.tensor_type.elem_type = elem_type
  1830. vi.type.tensor_type.shape.CopyFrom(onnx.TensorShapeProto())
  1831. if len(node.output) > 1:
  1832. data_shape = self._get_shape(node, 0)
  1833. vi = self.known_vi_[node.output[1]]
  1834. vi.CopyFrom(helper.make_tensor_value_info(vi.name, elem_type, data_shape))
  1835. def _infer_Split_Common(self, node, make_value_info_func): # noqa: N802
  1836. input_sympy_shape = self._get_sympy_shape(node, 0)
  1837. axis = handle_negative_axis(get_attribute(node, "axis", 0), len(input_sympy_shape))
  1838. op_set = get_opset(self.out_mp_)
  1839. # Depending on op-version 'split' are provided as attribute or via 2nd input
  1840. if op_set < 13:
  1841. split = get_attribute(node, "split")
  1842. assert self._try_get_value(node, 1) is None
  1843. else:
  1844. split = self._try_get_value(node, 1)
  1845. assert get_attribute(node, "split") is None
  1846. if split is None:
  1847. num_outputs = len(node.output)
  1848. split = [input_sympy_shape[axis] / sympy.Integer(num_outputs)] * num_outputs
  1849. self._update_computed_dims(split)
  1850. else:
  1851. split = [sympy.Integer(s) for s in split]
  1852. for i_o in range(len(split)):
  1853. vi = self.known_vi_[node.output[i_o]]
  1854. vi.CopyFrom(
  1855. make_value_info_func(
  1856. node.output[i_o],
  1857. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1858. get_shape_from_sympy_shape([*input_sympy_shape[:axis], split[i_o], *input_sympy_shape[axis + 1 :]]),
  1859. )
  1860. )
  1861. self.known_vi_[vi.name] = vi
  1862. def _infer_Split(self, node): # noqa: N802
  1863. self._infer_Split_Common(node, helper.make_tensor_value_info)
  1864. def _infer_SplitToSequence(self, node): # noqa: N802
  1865. self._infer_Split_Common(node, helper.make_sequence_value_info)
  1866. def _infer_Squeeze(self, node): # noqa: N802
  1867. input_shape = self._get_shape(node, 0)
  1868. op_set = get_opset(self.out_mp_)
  1869. # Depending on op-version 'axes' are provided as attribute or via 2nd input
  1870. if op_set < 13:
  1871. axes = get_attribute(node, "axes")
  1872. assert self._try_get_value(node, 1) is None
  1873. else:
  1874. axes = self._try_get_value(node, 1)
  1875. assert get_attribute(node, "axes") is None
  1876. if axes is None:
  1877. # No axes have been provided (neither via attribute nor via input).
  1878. # In this case the 'Shape' op should remove all axis with dimension 1.
  1879. # For symbolic dimensions we guess they are !=1.
  1880. output_shape = [s for s in input_shape if s != 1]
  1881. if self.verbose_ > 0:
  1882. symbolic_dimensions = [s for s in input_shape if type(s) != int] # noqa: E721
  1883. if len(symbolic_dimensions) > 0:
  1884. logger.debug(
  1885. f"Symbolic dimensions in input shape of op: '{node.op_type}' node: '{node.name}'. "
  1886. f"Assuming the following dimensions are never equal to 1: {symbolic_dimensions}"
  1887. )
  1888. else:
  1889. axes = [handle_negative_axis(a, len(input_shape)) for a in axes]
  1890. output_shape = []
  1891. for i in range(len(input_shape)):
  1892. if i not in axes:
  1893. output_shape.append(input_shape[i])
  1894. else:
  1895. assert input_shape[i] == 1 or type(input_shape[i]) != int # noqa: E721
  1896. if self.verbose_ > 0 and type(input_shape[i]) != int: # noqa: E721
  1897. logger.debug(
  1898. f"Symbolic dimensions in input shape of op: '{node.op_type}' node: '{node.name}'. "
  1899. f"Assuming the dimension '{input_shape[i]}' at index {i} of the input to be equal to 1."
  1900. )
  1901. vi = self.known_vi_[node.output[0]]
  1902. vi.CopyFrom(
  1903. helper.make_tensor_value_info(
  1904. node.output[0],
  1905. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1906. output_shape,
  1907. )
  1908. )
  1909. self._pass_on_sympy_data(node)
  1910. def _infer_Tile(self, node): # noqa: N802
  1911. repeats_value = self._try_get_value(node, 1)
  1912. new_sympy_shape = []
  1913. if repeats_value is not None:
  1914. input_sympy_shape = self._get_sympy_shape(node, 0)
  1915. for i, d in enumerate(input_sympy_shape):
  1916. new_dim = d * repeats_value[i]
  1917. new_sympy_shape.append(new_dim)
  1918. self._update_computed_dims(new_sympy_shape)
  1919. else:
  1920. new_sympy_shape = self._new_symbolic_shape(self._get_shape_rank(node, 0), node)
  1921. vi = self.known_vi_[node.output[0]]
  1922. vi.CopyFrom(
  1923. helper.make_tensor_value_info(
  1924. node.output[0],
  1925. vi.type.tensor_type.elem_type,
  1926. get_shape_from_sympy_shape(new_sympy_shape),
  1927. )
  1928. )
  1929. def _infer_TopK(self, node): # noqa: N802
  1930. rank = self._get_shape_rank(node, 0)
  1931. axis = handle_negative_axis(get_attribute(node, "axis", -1), rank)
  1932. new_shape = self._get_shape(node, 0)
  1933. if get_opset(self.out_mp_) <= 9:
  1934. k = get_attribute(node, "k")
  1935. else:
  1936. k = self._get_int_or_float_values(node)[1]
  1937. if k is None:
  1938. k = self._new_symbolic_dim_from_output(node)
  1939. else:
  1940. k = as_scalar(k)
  1941. if type(k) in [int, str]:
  1942. new_shape[axis] = k
  1943. else:
  1944. new_sympy_shape = self._get_sympy_shape(node, 0)
  1945. new_sympy_shape[axis] = k
  1946. self._update_computed_dims(
  1947. new_sympy_shape
  1948. ) # note that TopK dim could be computed in sympy_data, so need to update computed_dims when it enters shape
  1949. new_shape = get_shape_from_sympy_shape(new_sympy_shape)
  1950. for i_o in range(len(node.output)):
  1951. vi = self.known_vi_[node.output[i_o]]
  1952. vi.CopyFrom(helper.make_tensor_value_info(node.output[i_o], vi.type.tensor_type.elem_type, new_shape))
  1953. def _infer_Transpose(self, node): # noqa: N802
  1954. if node.input[0] in self.sympy_data_:
  1955. data_shape = self._get_shape(node, 0)
  1956. perm = get_attribute(node, "perm", reversed(list(range(len(data_shape)))))
  1957. input_data = self.sympy_data_[node.input[0]]
  1958. self.sympy_data_[node.output[0]] = (
  1959. np.transpose(np.array(input_data).reshape(*data_shape), axes=tuple(perm)).flatten().tolist()
  1960. )
  1961. def _infer_Unsqueeze(self, node): # noqa: N802
  1962. input_shape = self._get_shape(node, 0)
  1963. op_set = get_opset(self.out_mp_)
  1964. # Depending on op-version 'axes' are provided as attribute or via 2nd input
  1965. if op_set < 13:
  1966. axes = get_attribute(node, "axes")
  1967. assert self._try_get_value(node, 1) is None
  1968. else:
  1969. axes = self._try_get_value(node, 1)
  1970. assert get_attribute(node, "axes") is None
  1971. output_rank = len(input_shape) + len(axes)
  1972. axes = [handle_negative_axis(a, output_rank) for a in axes]
  1973. input_axis = 0
  1974. output_shape = []
  1975. for i in range(output_rank):
  1976. if i in axes:
  1977. output_shape.append(1)
  1978. else:
  1979. output_shape.append(input_shape[input_axis])
  1980. input_axis += 1
  1981. vi = self.known_vi_[node.output[0]]
  1982. vi.CopyFrom(
  1983. helper.make_tensor_value_info(
  1984. node.output[0],
  1985. self.known_vi_[node.input[0]].type.tensor_type.elem_type,
  1986. output_shape,
  1987. )
  1988. )
  1989. self._pass_on_sympy_data(node)
  1990. def _infer_ZipMap(self, node): # noqa: N802
  1991. map_key_type = None
  1992. if get_attribute(node, "classlabels_int64s") is not None:
  1993. map_key_type = onnx.TensorProto.INT64
  1994. elif get_attribute(node, "classlabels_strings") is not None:
  1995. map_key_type = onnx.TensorProto.STRING
  1996. assert map_key_type is not None
  1997. new_vi = onnx.ValueInfoProto()
  1998. new_vi.name = node.output[0]
  1999. new_vi.type.sequence_type.elem_type.map_type.value_type.tensor_type.elem_type = onnx.TensorProto.FLOAT
  2000. new_vi.type.sequence_type.elem_type.map_type.key_type = map_key_type
  2001. vi = self.known_vi_[node.output[0]]
  2002. vi.CopyFrom(new_vi)
  2003. def _infer_Attention(self, node): # noqa: N802
  2004. shape = self._get_shape(node, 0)
  2005. shape_weights = self._get_shape(node, 1)
  2006. shape_bias = self._try_get_shape(node, 2)
  2007. if shape_bias is not None:
  2008. assert len(shape_bias) == 1
  2009. tripled_hidden_size = shape_bias[0] if shape_bias is not None else shape_weights[1]
  2010. if shape and len(shape) == 3:
  2011. qkv_hidden_sizes_attr = get_attribute(node, "qkv_hidden_sizes")
  2012. if qkv_hidden_sizes_attr is not None:
  2013. assert len(qkv_hidden_sizes_attr) == 3
  2014. shape[2] = int(qkv_hidden_sizes_attr[2])
  2015. elif isinstance(tripled_hidden_size, int):
  2016. shape[2] = int(tripled_hidden_size / 3)
  2017. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2018. vi = self.known_vi_[node.output[0]]
  2019. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, shape))
  2020. if len(node.output) > 1:
  2021. # input shape: (batch_size, sequence_length, hidden_size)
  2022. # past shape: (2, batch_size, num_heads, past_sequence_length, head_size)
  2023. # mask shape: (batch_size, total_sequence_length) or (batch_size, sequence_length, total_sequence_length) or (batch_size, 1, max_seq_len, max_seq_len)
  2024. # present shape: (2, batch_size, num_heads, total_sequence_length, head_size), where total_sequence_length=sequence_length+past_sequence_length
  2025. input_shape = self._get_shape(node, 0)
  2026. past_shape = self._get_shape(node, 4) if len(node.input) > 4 and node.input[4] else []
  2027. mask_shape = self._get_shape(node, 3) if len(node.input) > 3 and node.input[3] else []
  2028. if past_shape and len(past_shape) == 5:
  2029. if mask_shape and len(mask_shape) in [2, 3]:
  2030. past_shape[3] = mask_shape[-1]
  2031. elif input_shape and len(input_shape) == 3:
  2032. if isinstance(input_shape[1], int) and isinstance(past_shape[3], int):
  2033. past_shape[3] = input_shape[1] + past_shape[3]
  2034. else:
  2035. past_shape[3] = f"{past_shape[3]}+{input_shape[1]}"
  2036. vi = self.known_vi_[node.output[1]]
  2037. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
  2038. # No past input but present output still exists
  2039. else:
  2040. num_heads = get_attribute(node, "num_heads")
  2041. head_size = input_shape[2] // num_heads
  2042. present_shape = [2, input_shape[0], num_heads, input_shape[1], head_size]
  2043. vi = self.known_vi_[node.output[1]]
  2044. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, present_shape))
  2045. def _infer_GatedRelativePositionBias(self, node): # noqa: N802
  2046. # When padding is removed:
  2047. # query_layer: (token_count, num_heads x head_size)
  2048. # token_offset: (batch_size, seq_len)
  2049. # Otherwise:
  2050. # query_layer: (batch_size, seq_len, num_heads x head_size)
  2051. # token_offset: None
  2052. # Output shape: (batch_size, num_heads, seq_len, seq_len)
  2053. num_heads = get_attribute(node, "num_heads")
  2054. token_offset_shape = self._try_get_shape(node, 6)
  2055. if token_offset_shape is not None:
  2056. output_shape = [token_offset_shape[0], num_heads, token_offset_shape[1], token_offset_shape[1]]
  2057. else:
  2058. query_layer_shape = self._get_shape(node, 0)
  2059. assert query_layer_shape is not None and len(query_layer_shape) == 3
  2060. output_shape = [query_layer_shape[0], num_heads, query_layer_shape[1], query_layer_shape[1]]
  2061. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2062. vi = self.known_vi_[node.output[0]]
  2063. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2064. def _infer_PackedAttention(self, node): # noqa: N802
  2065. shape = self._get_shape(node, 0)
  2066. shape_weights = self._get_shape(node, 1)
  2067. shape_bias = self._try_get_shape(node, 2)
  2068. if shape_bias is not None:
  2069. assert len(shape_bias) == 1
  2070. tripled_hidden_size = shape_bias[0] if shape_bias is not None else shape_weights[1]
  2071. if shape and len(shape) == 2:
  2072. qkv_hidden_sizes_attr = get_attribute(node, "qkv_hidden_sizes")
  2073. if qkv_hidden_sizes_attr is not None:
  2074. assert len(qkv_hidden_sizes_attr) == 3
  2075. shape[1] = int(qkv_hidden_sizes_attr[2])
  2076. elif isinstance(tripled_hidden_size, int):
  2077. shape[1] = int(tripled_hidden_size / 3)
  2078. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2079. vi = self.known_vi_[node.output[0]]
  2080. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, shape))
  2081. def _infer_PackedMultiHeadAttention(self, node): # noqa: N802
  2082. shape_value = self._try_get_shape(node, 2)
  2083. if shape_value is not None and len(shape_value) == 2:
  2084. output_shape = shape_value
  2085. else:
  2086. shape_query = self._get_shape(node, 0)
  2087. assert shape_query is not None and len(shape_query) == 4
  2088. output_shape = [shape_query[0], shape_query[1] * shape_query[3]]
  2089. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2090. vi = self.known_vi_[node.output[0]]
  2091. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2092. def _infer_RemovePadding(self, node): # noqa: N802
  2093. shape = self._get_shape(node, 0)
  2094. if shape and len(shape) == 3:
  2095. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2096. vi = self.known_vi_[node.output[0]]
  2097. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, ["token_count", shape[2]]))
  2098. vi_token_offset = self.known_vi_[node.output[1]]
  2099. vi_token_offset.CopyFrom(
  2100. helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT32, [shape[0], shape[1]])
  2101. )
  2102. vi_cumulated_seq_len = self.known_vi_[node.output[2]]
  2103. vi_cumulated_seq_len.CopyFrom(
  2104. helper.make_tensor_value_info(node.output[2], onnx.TensorProto.INT32, ["batch_size + 1"])
  2105. )
  2106. vi_max_seq_len = self.known_vi_[node.output[3]]
  2107. vi_max_seq_len.CopyFrom(helper.make_tensor_value_info(node.output[3], onnx.TensorProto.INT32, [1]))
  2108. def _infer_RestorePadding(self, node): # noqa: N802
  2109. shape_input = self._get_shape(node, 0)
  2110. shape_token_offset = self._get_shape(node, 1)
  2111. if shape_input and len(shape_input) == 2 and shape_token_offset and len(shape_token_offset) == 2:
  2112. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2113. vi = self.known_vi_[node.output[0]]
  2114. output_shape = [shape_token_offset[0], shape_token_offset[1], shape_input[1]]
  2115. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2116. def _infer_BiasGelu(self, node): # noqa: N802
  2117. self._propagate_shape_and_type(node)
  2118. def _infer_MultiHeadAttention(self, node): # noqa: N802
  2119. # Output 0 has shape (batch_size, sequence_length, v_hidden_size)
  2120. # Q, K and V without packing:
  2121. # Input 0 (query) has shape (batch_size, sequence_length, hidden_size)
  2122. # Input 1 (key) has shape (batch_size, kv_sequence_length, hidden_size) or (batch_size, num_heads, kv_sequence_length, head_size)
  2123. # Input 2 (value) has shape (batch_size, kv_sequence_length, v_hidden_size) or (batch_size, num_heads, kv_sequence_length, head_size)
  2124. # Packed KV:
  2125. # Input 0 (query) has shape (batch_size, sequence_length, hidden_size)
  2126. # Input 1 (batch_size, kv_sequence_length, num_heads, 2, head_size)
  2127. # Input 2 nullptr
  2128. # Packed QKV:
  2129. # Input 0 (batch_size, sequence_length, num_heads, 3, head_size)
  2130. # Input 1 nullptr
  2131. # Input 2 nullptr
  2132. query_shape = self._get_shape(node, 0)
  2133. total_sequence_length = None
  2134. output_dtype = None
  2135. if query_shape is not None:
  2136. if len(query_shape) == 3:
  2137. key_shape = self._try_get_shape(node, 1)
  2138. # By default, hidden size is same for Q/K/V. Only need check v_hidden_size when value is provided.
  2139. output_shape = query_shape
  2140. if key_shape is not None and len(key_shape) == 3:
  2141. value_shape = self._try_get_shape(node, 2)
  2142. if value_shape is not None and len(value_shape) == 3:
  2143. output_shape[2] = value_shape[2]
  2144. total_sequence_length = key_shape[1]
  2145. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2146. vi = self.known_vi_[node.output[0]]
  2147. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2148. elif len(query_shape) == 5:
  2149. if isinstance(query_shape[2], int) and isinstance(query_shape[4], int):
  2150. output_shape = [query_shape[0], query_shape[1], query_shape[2] * query_shape[4]]
  2151. else:
  2152. output_shape = [query_shape[0], query_shape[1], f"{query_shape[2]}*{query_shape[4]}"]
  2153. total_sequence_length = query_shape[1]
  2154. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2155. vi = self.known_vi_[node.output[0]]
  2156. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2157. if len(node.output) > 1:
  2158. batch_size = query_shape[0]
  2159. num_heads = get_attribute(node, "num_heads")
  2160. head_size = None
  2161. if len(query_shape) == 3:
  2162. head_size = (
  2163. int(query_shape[2] / num_heads)
  2164. if isinstance(query_shape[2], int)
  2165. else f"{query_shape[2]}/{num_heads}"
  2166. )
  2167. else:
  2168. head_size = query_shape[4]
  2169. past_shape = self._try_get_shape(node, 6)
  2170. if past_shape is not None:
  2171. if isinstance(past_shape[2], int) and isinstance(total_sequence_length, int):
  2172. total_sequence_length = past_shape[2] + total_sequence_length
  2173. else:
  2174. total_sequence_length = f"{past_shape[2]}+{total_sequence_length}"
  2175. present_shape = [batch_size, num_heads, total_sequence_length, head_size]
  2176. assert output_dtype is not None
  2177. if len(node.output) > 2 and node.output[1] and node.output[2]:
  2178. vi = self.known_vi_[node.output[1]]
  2179. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, present_shape))
  2180. vi = self.known_vi_[node.output[2]]
  2181. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, present_shape))
  2182. def _infer_DecoderMaskedMultiHeadAttention(self, node): # noqa: N802
  2183. # Output 0 has shape (batch_size, 1, v_hidden_size)
  2184. # Q, K and V without packing:
  2185. # Input 0 (query) has shape (batch_size, 1, hidden_size)
  2186. # Input 5 (past_key) if exists has shape (batch_size, num_heads, max_sequence_length, head_size)
  2187. query_shape = self._get_shape(node, 0)
  2188. if query_shape is not None:
  2189. output_shape = query_shape
  2190. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2191. assert output_dtype is not None
  2192. vi = self.known_vi_[node.output[0]]
  2193. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2194. if len(node.output) > 2 and node.output[1] and node.output[2]:
  2195. past_shape = self._try_get_shape(node, 5)
  2196. if past_shape is not None:
  2197. vi = self.known_vi_[node.output[1]]
  2198. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
  2199. vi = self.known_vi_[node.output[2]]
  2200. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
  2201. def _infer_UnfoldTensor(self, node): # noqa: N802
  2202. input_shape = self._get_shape(node, 0)
  2203. if input_shape is not None:
  2204. output_shape = input_shape.copy()
  2205. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2206. assert output_dtype is not None
  2207. rank, dim, size, step = len(input_shape), None, None, None
  2208. for attr in node.attribute:
  2209. if attr.name == "dim":
  2210. dim = attr.i
  2211. dim = rank + dim if dim == -1 else dim
  2212. elif attr.name == "size":
  2213. size = attr.i
  2214. elif attr.name == "step":
  2215. step = attr.i
  2216. output_shape.append(size)
  2217. output_shape[dim] = (input_shape[dim] - size) // step + 1
  2218. vi = self.known_vi_[node.output[0]]
  2219. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2220. def _infer_DynamicTimeWarping(self, node): # noqa: N802
  2221. # Input 0 has shape M x N or 1 x M x N
  2222. # Output 0 has shape (2, O) where max(M, N) <= O < M + N
  2223. input_shape = self._get_shape(node, 0)
  2224. if input_shape is not None:
  2225. shape_len = len(input_shape)
  2226. assert shape_len == 2 or shape_len == 3
  2227. M, N = input_shape[shape_len - 2], input_shape[shape_len - 1] # noqa: N806
  2228. output_shape = [2, f"max({M}, {N}) <= O < {M} + {N}"]
  2229. output_dtype = onnx.TensorProto.FLOAT
  2230. vi = self.known_vi_[node.output[0]]
  2231. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, output_shape))
  2232. def _infer_FastGelu(self, node): # noqa: N802
  2233. self._propagate_shape_and_type(node)
  2234. def _infer_Gelu(self, node): # noqa: N802
  2235. self._propagate_shape_and_type(node)
  2236. def _infer_QuickGelu(self, node): # noqa: N802
  2237. self._propagate_shape_and_type(node)
  2238. def _infer_GemmFastGelu(self, node): # noqa: N802
  2239. self._compute_matmul_shape(node)
  2240. def _infer_GemmFloat8(self, node): # noqa: N802
  2241. self._compute_matmul_shape(node)
  2242. def _infer_LayerNormalization(self, node): # noqa: N802
  2243. self._propagate_shape_and_type(node)
  2244. if len(node.output) > 1:
  2245. axis = get_attribute(node, "axis")
  2246. if axis is None:
  2247. axis = -1
  2248. x_shape = self._get_shape(node, 0)
  2249. if x_shape is not None:
  2250. rank = len(x_shape)
  2251. axis = handle_negative_axis(axis, rank)
  2252. mean_shape = x_shape[:axis] + [1 for _ in range(rank - axis)]
  2253. mean_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2254. if mean_dtype == onnx.TensorProto.FLOAT16 or mean_dtype == onnx.TensorProto.BFLOAT16:
  2255. mean_dtype = onnx.TensorProto.FLOAT
  2256. vi = self.known_vi_[node.output[1]]
  2257. vi.CopyFrom(helper.make_tensor_value_info(node.output[1], mean_dtype, mean_shape))
  2258. if len(node.output) > 2:
  2259. vi = self.known_vi_[node.output[2]]
  2260. vi.CopyFrom(helper.make_tensor_value_info(node.output[2], mean_dtype, mean_shape))
  2261. def _infer_LongformerAttention(self, node): # noqa: N802
  2262. self._propagate_shape_and_type(node)
  2263. def _infer_EmbedLayerNormalization(self, node): # noqa: N802
  2264. input_ids_shape = self._get_shape(node, 0)
  2265. word_embedding_shape = self._get_shape(node, 2)
  2266. assert len(input_ids_shape) == 2 and len(word_embedding_shape) == 2
  2267. output_shape = [*input_ids_shape, word_embedding_shape[1]]
  2268. word_embedding_dtype = self.known_vi_[node.input[2]].type.tensor_type.elem_type
  2269. vi = self.known_vi_[node.output[0]]
  2270. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], word_embedding_dtype, output_shape))
  2271. if len(node.output) > 1 and node.output[1]:
  2272. mask_index_shape = [input_ids_shape[0]]
  2273. vi = self.known_vi_[node.output[1]]
  2274. vi.CopyFrom(helper.make_tensor_value_info(node.output[1], onnx.TensorProto.INT32, mask_index_shape))
  2275. if len(node.output) > 2:
  2276. # Optional output of add before layer normalization is done
  2277. # shape is same as the output
  2278. vi = self.known_vi_[node.output[2]]
  2279. vi.CopyFrom(helper.make_tensor_value_info(node.output[2], word_embedding_dtype, output_shape))
  2280. def _infer_SkipLayerNormalization(self, node): # noqa: N802
  2281. self._propagate_shape_and_type(node)
  2282. # If the SkipLayerNormalization node contains the optional
  2283. # output for inference, infer the shape and type for it too
  2284. if len(node.output) > 3:
  2285. self._propagate_shape_and_type(node, 0, 3)
  2286. def _infer_GroupNorm(self, node): # noqa: N802
  2287. self._propagate_shape_and_type(node)
  2288. def _infer_PagedAttention(self, node): # noqa: N802
  2289. self._propagate_shape_and_type(node)
  2290. def _infer_GroupQueryAttention(self, node): # noqa: N802
  2291. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2292. past_shape = self._try_get_shape(node, 3)
  2293. if past_shape is not None:
  2294. # When past and present has the maximum sequence length, we can propagate the shape from past to present.
  2295. # Note that GQA also supports different sequence lengths for past and present, but it is rarely used.
  2296. vi = self.known_vi_[node.output[1]]
  2297. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
  2298. vi = self.known_vi_[node.output[2]]
  2299. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, past_shape))
  2300. if node.input[1] != "" and node.input[2] != "":
  2301. self._propagate_shape_and_type(node, 0, 0)
  2302. else:
  2303. # combined qkv: (batch_size, sequence_length, num_heads * head_size + 2 * kv_num_heads * head_size)
  2304. assert node.input[1] == "" and node.input[2] == ""
  2305. num_heads = get_attribute(node, "num_heads")
  2306. kv_num_heads = get_attribute(node, "kv_num_heads")
  2307. query_shape = self._get_shape(node, 0)
  2308. if query_shape is not None:
  2309. hidden_size = query_shape[2]
  2310. if isinstance(hidden_size, int):
  2311. head_size = int(hidden_size / (num_heads + 2 * kv_num_heads))
  2312. query_shape[2] = num_heads * head_size
  2313. vi = self.known_vi_[node.output[0]]
  2314. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_dtype, query_shape))
  2315. def _infer_SparseAttention(self, node): # noqa: N802
  2316. self._infer_GroupQueryAttention(node)
  2317. def _infer_SkipGroupNorm(self, node): # noqa: N802
  2318. self._propagate_shape_and_type(node, 0, 0)
  2319. if len(node.output) > 1:
  2320. self._propagate_shape_and_type(node, 0, 1)
  2321. def _infer_BiasSplitGelu(self, node): # noqa: N802
  2322. input_shape = self._get_shape(node, 0)
  2323. bias_shape = self._get_shape(node, 1)
  2324. if input_shape and bias_shape and isinstance(bias_shape[0], int):
  2325. output_shape = input_shape
  2326. output_shape[2] = int(bias_shape[0] / 2)
  2327. vi = self.known_vi_[node.output[0]]
  2328. output_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2329. vi.CopyFrom(helper.make_tensor_value_info(vi.name, output_dtype, output_shape))
  2330. def _infer_BiasAdd(self, node): # noqa: N802
  2331. self._propagate_shape_and_type(node)
  2332. def _infer_RotaryEmbedding(self, node): # noqa: N802
  2333. if len(node.output) == 1:
  2334. self._propagate_shape_and_type(node)
  2335. elif len(node.output) == 2:
  2336. # Extraneous constant nodes outputted by RotaryEmbedding function made with `export_modules_as_functions`
  2337. self._propagate_shape_and_type(node, input_index=1, output_index=0)
  2338. self._propagate_shape_and_type(node, input_index=0, output_index=1) # true output
  2339. elif len(node.output) == 3:
  2340. # Extraneous constant nodes outputted by RotaryEmbedding function made with `export_modules_as_functions`
  2341. self._propagate_shape_and_type(node, input_index=1, output_index=0)
  2342. self._propagate_shape_and_type(node, input_index=1, output_index=1)
  2343. self._propagate_shape_and_type(node, input_index=0, output_index=2) # true output
  2344. def _infer_PythonOp(self, node): # noqa: N802
  2345. output_tensor_types = get_attribute(node, "output_tensor_types")
  2346. assert output_tensor_types, f"PythonOp '{node.name}' has no output_tensor_types attribute."
  2347. output_tensor_ranks = get_attribute(node, "output_tensor_ranks")
  2348. assert output_tensor_ranks, f"PythonOp '{node.name}' has no output_tensor_ranks attribute."
  2349. from onnxruntime.capi._pybind_state import get_shape_inference_function # noqa: PLC0415
  2350. func_name = get_attribute(node, "func_name").decode()
  2351. shape_inferer = get_shape_inference_function(func_name)
  2352. # Set the context output separately.
  2353. # The first output is torch.autograd.Function''s context.
  2354. vi = self.known_vi_[node.output[0]]
  2355. vi.CopyFrom(helper.make_tensor_value_info(node.output[0], onnx.TensorProto.INT64, []))
  2356. if shape_inferer is not None:
  2357. input_shapes = []
  2358. input_dtypes = []
  2359. for input_index in range(len(node.input)):
  2360. shape = self._get_shape(node, input_index)
  2361. input_shapes.append(shape)
  2362. input_dtype = self.known_vi_[node.input[input_index]].type.tensor_type.elem_type
  2363. input_dtypes.append(input_dtype)
  2364. output_shapes, output_dtypes = shape_inferer(node, input_shapes, input_dtypes)
  2365. assert len(output_shapes) == len(output_dtypes) == (len(node.output) - 1), (
  2366. f"PythonOp '{func_name}' returned {len(output_shapes)} shapes and {len(output_dtypes)} dtypes, "
  2367. f"but expected {len(node.output) - 1} outputs."
  2368. )
  2369. for i in range(len(node.output) - 1):
  2370. output_index = i + 1
  2371. vi = self.known_vi_[node.output[output_index]]
  2372. vi.CopyFrom(
  2373. helper.make_tensor_value_info(node.output[output_index], output_dtypes[i], output_shapes[i])
  2374. )
  2375. else:
  2376. # General shape inference for PythonOp.
  2377. # Outputs after torch.autograd.Function's context are tensors.
  2378. # We assume their ranks are fixed for different model inputs.
  2379. for i in range(len(node.output) - 1):
  2380. # Process the i-th tensor outputs.
  2381. vi = self.known_vi_[node.output[i + 1]]
  2382. sympy_shape = self._new_symbolic_shape(output_tensor_ranks[i], node)
  2383. shape = get_shape_from_sympy_shape(sympy_shape)
  2384. value_info = helper.make_tensor_value_info(node.output[i + 1], output_tensor_types[i], shape)
  2385. vi.CopyFrom(value_info)
  2386. def _propagate_shape_and_type(self, node, input_index=0, output_index=0):
  2387. shape = self._get_shape(node, input_index)
  2388. output_dtype = self.known_vi_[node.input[input_index]].type.tensor_type.elem_type
  2389. vi = self.known_vi_[node.output[output_index]]
  2390. vi.CopyFrom(helper.make_tensor_value_info(node.output[output_index], output_dtype, shape))
  2391. def _is_none_dim(self, dim_value):
  2392. if type(dim_value) != str: # noqa: E721
  2393. return False
  2394. if "unk__" not in dim_value:
  2395. return False
  2396. if dim_value in self.symbolic_dims_:
  2397. return False
  2398. return True
  2399. def _is_shape_contains_none_dim(self, out_shape):
  2400. for out in out_shape:
  2401. if self._is_none_dim(out):
  2402. return out
  2403. return None
  2404. def _infer_impl(self, start_sympy_data=None):
  2405. self.sympy_data_ = start_sympy_data or {}
  2406. self.out_mp_.graph.ClearField("value_info")
  2407. self._apply_suggested_merge(graph_input_only=True)
  2408. self.input_symbols_ = set()
  2409. for i in self.out_mp_.graph.input:
  2410. input_shape = get_shape_from_value_info(i)
  2411. if input_shape is None:
  2412. continue
  2413. if is_sequence(i.type):
  2414. input_dims = i.type.sequence_type.elem_type.tensor_type.shape.dim
  2415. else:
  2416. input_dims = i.type.tensor_type.shape.dim
  2417. for i_dim, dim in enumerate(input_shape):
  2418. if dim is None:
  2419. # some models use None for symbolic dim in input, replace it with a string
  2420. input_dims[i_dim].dim_param = str(self._new_symbolic_dim(i.name, i_dim))
  2421. self.input_symbols_.update([d for d in input_shape if type(d) is str])
  2422. for s in self.input_symbols_:
  2423. if s in self.suggested_merge_:
  2424. s_merge = self.suggested_merge_[s]
  2425. assert s_merge in self.symbolic_dims_
  2426. self.symbolic_dims_[s] = self.symbolic_dims_[s_merge]
  2427. else:
  2428. # Since inputs are not produced by other ops, we can assume positivity
  2429. self.symbolic_dims_[s] = sympy.Symbol(s, integer=True, positive=True)
  2430. # create a temporary ModelProto for single node inference
  2431. # note that we remove initializer to have faster inference
  2432. # for tensor ops like Reshape/Tile/Expand that read initializer, we need to do sympy computation based inference anyways
  2433. self.tmp_mp_ = onnx.ModelProto()
  2434. self.tmp_mp_.CopyFrom(self.out_mp_)
  2435. self.tmp_mp_.graph.ClearField("initializer")
  2436. # compute prerequesite for node for topological sort
  2437. # node with subgraphs may have dependency on implicit inputs, which will affect topological sort
  2438. prereq_for_node = {} # map from node to all its inputs, including implicit ones in subgraph
  2439. def get_prereq(node):
  2440. names = {i for i in node.input if i}
  2441. subgraphs = []
  2442. if node.op_type == "If":
  2443. subgraphs = [
  2444. get_attribute(node, "then_branch"),
  2445. get_attribute(node, "else_branch"),
  2446. ]
  2447. elif node.op_type in ["Loop", "Scan"]:
  2448. subgraphs = [get_attribute(node, "body")]
  2449. for g in subgraphs:
  2450. g_outputs_and_initializers = {i.name for i in g.initializer}
  2451. g_prereq = set()
  2452. for n in g.node:
  2453. g_outputs_and_initializers.update(n.output)
  2454. for n in g.node:
  2455. g_prereq.update([i for i in get_prereq(n) if i not in g_outputs_and_initializers])
  2456. names.update(g_prereq)
  2457. # remove subgraph inputs from g_prereq since those are local-only
  2458. for i in g.input:
  2459. names.discard(i.name)
  2460. return names
  2461. for n in self.tmp_mp_.graph.node:
  2462. prereq_for_node[n.output[0]] = get_prereq(n)
  2463. # topological sort nodes, note there might be dead nodes so we check if all graph outputs are reached to terminate
  2464. sorted_nodes = []
  2465. sorted_known_vi = {i.name for i in list(self.out_mp_.graph.input) + list(self.out_mp_.graph.initializer)}
  2466. if any(o.name in sorted_known_vi for o in self.out_mp_.graph.output):
  2467. # Loop/Scan will have some graph output in graph inputs, so don't do topological sort
  2468. sorted_nodes = self.out_mp_.graph.node
  2469. else:
  2470. while not all(o.name in sorted_known_vi for o in self.out_mp_.graph.output):
  2471. old_sorted_nodes_len = len(sorted_nodes)
  2472. for node in self.out_mp_.graph.node:
  2473. if (node.output[0] not in sorted_known_vi) and all(
  2474. i in sorted_known_vi for i in prereq_for_node[node.output[0]] if i
  2475. ):
  2476. sorted_known_vi.update(node.output)
  2477. sorted_nodes.append(node)
  2478. if old_sorted_nodes_len == len(sorted_nodes) and not all(
  2479. o.name in sorted_known_vi for o in self.out_mp_.graph.output
  2480. ):
  2481. raise Exception("Invalid model with cyclic graph")
  2482. for node in sorted_nodes:
  2483. assert all(i in self.known_vi_ for i in node.input if i)
  2484. self._onnx_infer_single_node(node)
  2485. known_aten_op = False
  2486. if node.op_type in self.dispatcher_:
  2487. self.dispatcher_[node.op_type](node)
  2488. elif node.op_type in ["ConvTranspose"]:
  2489. # onnx shape inference ops like ConvTranspose may have empty shape for symbolic input
  2490. # before adding symbolic compute for them
  2491. # mark the output type as UNDEFINED to allow guessing of rank
  2492. vi = self.known_vi_[node.output[0]]
  2493. if len(vi.type.tensor_type.shape.dim) == 0:
  2494. vi.type.tensor_type.elem_type = onnx.TensorProto.UNDEFINED
  2495. elif node.op_type == "ATen" and node.domain == "org.pytorch.aten":
  2496. for attr in node.attribute:
  2497. # TODO: Is overload_name needed?
  2498. if attr.name == "operator":
  2499. aten_op_name = attr.s.decode("utf-8") if isinstance(attr.s, bytes) else attr.s
  2500. if aten_op_name in self.aten_op_dispatcher_:
  2501. known_aten_op = True
  2502. self.aten_op_dispatcher_[aten_op_name](node)
  2503. break
  2504. if self.verbose_ > 2:
  2505. logger.debug(node.op_type + ": " + node.name) # noqa: G003
  2506. for i, name in enumerate(node.input):
  2507. logger.debug(" Input %s: %s %s", i, name, "initializer" if name in self.initializers_ else "")
  2508. # onnx automatically merge dims with value, i.e. Mul(['aaa', 'bbb'], [1000, 1]) -> [1000, 'bbb']
  2509. # symbolic shape inference needs to apply merge of 'aaa' -> 1000 in this case
  2510. if node.op_type in [
  2511. "Add",
  2512. "Sub",
  2513. "Mul",
  2514. "Div",
  2515. "MatMul",
  2516. "MatMulInteger",
  2517. "MatMulInteger16",
  2518. "Where",
  2519. "Sum",
  2520. ]:
  2521. vi = self.known_vi_[node.output[0]]
  2522. out_rank = len(get_shape_from_type_proto(vi.type))
  2523. in_shapes = [self._get_shape(node, i) for i in range(len(node.input))]
  2524. for d in range(out_rank - (2 if node.op_type in ["MatMul", "MatMulInteger", "MatMulInteger16"] else 0)):
  2525. in_dims = [s[len(s) - out_rank + d] for s in in_shapes if len(s) + d >= out_rank]
  2526. if len(in_dims) > 1:
  2527. self._check_merged_dims(in_dims, allow_broadcast=True)
  2528. for i_o in range(len(node.output)):
  2529. # Special cases:
  2530. # 1) We do not care about the training related outputs of SkipLayerNormalization
  2531. # 2) We do not care about the extraneous constant outputs in RotaryEmbedding because
  2532. # the RotaryEmbedding op created during export can be replaced by the RotaryEmbedding
  2533. # contrib op
  2534. if (
  2535. node.op_type == "SkipLayerNormalization" or node.op_type == "SkipSimplifiedLayerNormalization"
  2536. ) and i_o in [1, 2]:
  2537. continue
  2538. if node.op_type == "RotaryEmbedding" and len(node.output) > 1:
  2539. # Skip symbolic shape inference for RotaryEmbedding functions that have extraneous outputs
  2540. # generated by `export_modules_as_functions`
  2541. continue
  2542. vi = self.known_vi_[node.output[i_o]]
  2543. out_type = vi.type
  2544. out_type_kind = out_type.WhichOneof("value")
  2545. # do not process shape for non-tensors
  2546. if out_type_kind not in ["tensor_type", "sparse_tensor_type", None]:
  2547. if self.verbose_ > 2:
  2548. if out_type_kind == "sequence_type":
  2549. seq_cls_type = out_type.sequence_type.elem_type.WhichOneof("value")
  2550. if seq_cls_type == "tensor_type":
  2551. logger.debug(
  2552. " {}: sequence of {} {}".format( # noqa: G001
  2553. node.output[i_o],
  2554. str(get_shape_from_value_info(vi)),
  2555. onnx.TensorProto.DataType.Name(
  2556. vi.type.sequence_type.elem_type.tensor_type.elem_type
  2557. ),
  2558. )
  2559. )
  2560. else:
  2561. logger.debug(f" {node.output[i_o]}: sequence of {seq_cls_type}")
  2562. else:
  2563. logger.debug(f" {node.output[i_o]}: {out_type_kind}")
  2564. continue
  2565. out_shape = get_shape_from_value_info(vi)
  2566. out_type_undefined = out_type.tensor_type.elem_type == onnx.TensorProto.UNDEFINED
  2567. if self.verbose_ > 2:
  2568. logger.debug(
  2569. f" {node.output[i_o]}: {out_shape!s} {onnx.TensorProto.DataType.Name(vi.type.tensor_type.elem_type)}"
  2570. )
  2571. if node.output[i_o] in self.sympy_data_:
  2572. logger.debug(" Sympy Data: " + str(self.sympy_data_[node.output[i_o]])) # noqa: G003
  2573. # onnx >= 1.11.0, use unk__#index instead of None when the shape dim is uncertain
  2574. if (
  2575. out_shape is not None and (None in out_shape or self._is_shape_contains_none_dim(out_shape))
  2576. ) or out_type_undefined:
  2577. if self.auto_merge_:
  2578. if node.op_type in [
  2579. "Add",
  2580. "Sub",
  2581. "Mul",
  2582. "Div",
  2583. "MatMul",
  2584. "MatMulInteger",
  2585. "MatMulInteger16",
  2586. "Concat",
  2587. "Where",
  2588. "Sum",
  2589. "Equal",
  2590. "Less",
  2591. "Greater",
  2592. "LessOrEqual",
  2593. "GreaterOrEqual",
  2594. "Min",
  2595. "Max",
  2596. ]:
  2597. shapes = [self._get_shape(node, i) for i in range(len(node.input))]
  2598. if node.op_type in [
  2599. "MatMul",
  2600. "MatMulInteger",
  2601. "MatMulInteger16",
  2602. ]:
  2603. if None in out_shape or self._is_shape_contains_none_dim(out_shape):
  2604. if None in out_shape:
  2605. idx = out_shape.index(None)
  2606. else:
  2607. idx = out_shape.index(self._is_shape_contains_none_dim(out_shape))
  2608. dim_idx = [len(s) - len(out_shape) + idx for s in shapes]
  2609. # only support auto merge for MatMul for dim < rank-2 when rank > 2
  2610. assert len(shapes[0]) > 2 and dim_idx[0] < len(shapes[0]) - 2
  2611. assert len(shapes[1]) > 2 and dim_idx[1] < len(shapes[1]) - 2
  2612. elif node.op_type == "Expand":
  2613. # auto merge for cases like Expand([min(batch, 1), min(seq, 512)], [batch, seq])
  2614. shapes = [
  2615. self._get_shape(node, 0),
  2616. self._get_value(node, 1),
  2617. ]
  2618. else:
  2619. shapes = []
  2620. if shapes:
  2621. for idx in range(len(out_shape)):
  2622. if out_shape[idx] is not None and not self._is_none_dim(out_shape[idx]):
  2623. continue
  2624. # note that the broadcasting rule aligns from right to left
  2625. # if a tensor has a lower rank (dim_idx[idx] < 0), it would automatically broadcast and need no merge
  2626. dim_idx = [len(s) - len(out_shape) + idx for s in shapes]
  2627. if len(dim_idx) > 0:
  2628. self._add_suggested_merge(
  2629. [
  2630. s[i] if is_literal(s[i]) else str(s[i])
  2631. for s, i in zip(shapes, dim_idx, strict=False)
  2632. if i >= 0
  2633. ]
  2634. )
  2635. self.run_ = True
  2636. else:
  2637. self.run_ = False
  2638. else:
  2639. self.run_ = False
  2640. # create new dynamic dims for ops not handled by symbolic shape inference
  2641. if self.run_ is False and node.op_type not in self.dispatcher_ and not known_aten_op:
  2642. is_unknown_op = out_type_undefined and (out_shape is None or len(out_shape) == 0)
  2643. if is_unknown_op:
  2644. # unknown op to ONNX, maybe from higher opset or other domain
  2645. # only guess the output rank from input 0 when using guess_output_rank option
  2646. out_rank = self._get_shape_rank(node, 0) if self.guess_output_rank_ else -1
  2647. else:
  2648. # valid ONNX op, but not handled by symbolic shape inference, just assign dynamic shape
  2649. out_rank = len(out_shape)
  2650. if out_rank >= 0:
  2651. new_shape = self._new_symbolic_shape(out_rank, node, i_o)
  2652. if out_type_undefined:
  2653. # guess output data type from input vi if not defined
  2654. out_dtype = self.known_vi_[node.input[0]].type.tensor_type.elem_type
  2655. else:
  2656. # otherwise, use original data type
  2657. out_dtype = vi.type.tensor_type.elem_type
  2658. vi.CopyFrom(
  2659. helper.make_tensor_value_info(
  2660. vi.name,
  2661. out_dtype,
  2662. get_shape_from_sympy_shape(new_shape),
  2663. )
  2664. )
  2665. if self.verbose_ > 0:
  2666. if is_unknown_op:
  2667. logger.debug(
  2668. f"Possible unknown op: {node.op_type} node: {node.name}, guessing {vi.name} shape"
  2669. )
  2670. if self.verbose_ > 2:
  2671. logger.debug(f" {node.output[i_o]}: {new_shape!s} {vi.type.tensor_type.elem_type}")
  2672. self.run_ = True
  2673. continue # continue the inference after guess, no need to stop as no merge is needed
  2674. if self.verbose_ > 0 or not self.auto_merge_ or out_type_undefined:
  2675. logger.debug("Stopping at incomplete shape inference at %s: %s", node.op_type, node.name)
  2676. logger.debug("node inputs:")
  2677. for i in node.input:
  2678. if i in self.known_vi_:
  2679. logger.debug(self.known_vi_[i])
  2680. else:
  2681. logger.debug(f"not in known_vi_ for {i}")
  2682. logger.debug("node outputs:")
  2683. for o in node.output:
  2684. if o in self.known_vi_:
  2685. logger.debug(self.known_vi_[o])
  2686. else:
  2687. logger.debug(f"not in known_vi_ for {o}")
  2688. if self.auto_merge_ and not out_type_undefined:
  2689. logger.debug("Merging: " + str(self.suggested_merge_)) # noqa: G003
  2690. return False
  2691. self.run_ = False
  2692. return True
  2693. def _update_output_from_vi(self):
  2694. for output in self.out_mp_.graph.output:
  2695. if output.name in self.known_vi_:
  2696. output.CopyFrom(self.known_vi_[output.name])
  2697. @staticmethod
  2698. def infer_shapes(in_mp, int_max=2**31 - 1, auto_merge=False, guess_output_rank=False, verbose=0):
  2699. onnx_opset = get_opset(in_mp)
  2700. if (not onnx_opset) or onnx_opset < 7:
  2701. logger.warning("Only support models of onnx opset 7 and above.")
  2702. return None
  2703. symbolic_shape_inference = SymbolicShapeInference(int_max, auto_merge, guess_output_rank, verbose)
  2704. all_shapes_inferred = False
  2705. symbolic_shape_inference._preprocess(in_mp)
  2706. while symbolic_shape_inference.run_:
  2707. all_shapes_inferred = symbolic_shape_inference._infer_impl()
  2708. symbolic_shape_inference._update_output_from_vi()
  2709. if not all_shapes_inferred:
  2710. onnx.save_model(symbolic_shape_inference.out_mp_, "sym_shape_infer_temp.onnx", save_as_external_data=True)
  2711. raise Exception("Incomplete symbolic shape inference")
  2712. return symbolic_shape_inference.out_mp_
  2713. def parse_arguments():
  2714. parser = argparse.ArgumentParser()
  2715. parser.add_argument("--input", required=True, help="The input model file")
  2716. parser.add_argument("--output", help="The output model file")
  2717. parser.add_argument(
  2718. "--auto_merge",
  2719. help="Automatically merge symbolic dims when confliction happens",
  2720. action="store_true",
  2721. default=False,
  2722. )
  2723. parser.add_argument(
  2724. "--int_max",
  2725. help="maximum value for integer to be treated as boundless for ops like slice",
  2726. type=int,
  2727. default=2**31 - 1,
  2728. )
  2729. parser.add_argument(
  2730. "--guess_output_rank",
  2731. help="guess output rank to be the same as input 0 for unknown ops",
  2732. action="store_true",
  2733. default=False,
  2734. )
  2735. parser.add_argument(
  2736. "--verbose",
  2737. help="Prints detailed logs of inference, 0: turn off, 1: warnings, 3: detailed",
  2738. type=int,
  2739. default=0,
  2740. )
  2741. parser.add_argument(
  2742. "--save_as_external_data",
  2743. help="Saving an ONNX model to external data",
  2744. action="store_true",
  2745. default=False,
  2746. )
  2747. parser.add_argument(
  2748. "--all_tensors_to_one_file",
  2749. help="Saving all the external data to one file",
  2750. action="store_true",
  2751. default=False,
  2752. )
  2753. parser.add_argument(
  2754. "--external_data_location",
  2755. help="The file location to save the external file",
  2756. default="./",
  2757. )
  2758. parser.add_argument(
  2759. "--external_data_size_threshold",
  2760. help="The size threshold for external data",
  2761. type=int,
  2762. default=1024,
  2763. )
  2764. return parser.parse_args()
  2765. if __name__ == "__main__":
  2766. args = parse_arguments()
  2767. logger.info("input model: " + args.input) # noqa: G003
  2768. if args.output:
  2769. logger.info("output model " + args.output) # noqa: G003
  2770. logger.info("Doing symbolic shape inference...")
  2771. out_mp = SymbolicShapeInference.infer_shapes(
  2772. onnx.load(args.input),
  2773. args.int_max,
  2774. args.auto_merge,
  2775. args.guess_output_rank,
  2776. args.verbose,
  2777. )
  2778. if args.output and out_mp:
  2779. if args.save_as_external_data:
  2780. onnx.save_model(
  2781. out_mp,
  2782. args.output,
  2783. save_as_external_data=True,
  2784. all_tensors_to_one_file=args.all_tensors_to_one_file,
  2785. location=args.external_data_location,
  2786. size_threshold=args.external_data_size_threshold,
  2787. convert_attribute=False,
  2788. )
  2789. else:
  2790. onnx.save(out_mp, args.output)
  2791. logger.info("Done!")