byobnet.py 114 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180
  1. """ Bring-Your-Own-Blocks Network
  2. A flexible network w/ dataclass based config for stacking those NN blocks.
  3. This model is currently used to implement the following networks:
  4. GPU Efficient (ResNets) - gernet_l/m/s (original versions called genet, but this was already used (by SENet author)).
  5. Paper: `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  6. Code and weights: https://github.com/idstcv/GPU-Efficient-Networks, licensed Apache 2.0
  7. RepVGG - repvgg_*
  8. Paper: `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  9. Code and weights: https://github.com/DingXiaoH/RepVGG, licensed MIT
  10. MobileOne - mobileone_*
  11. Paper: `MobileOne: An Improved One millisecond Mobile Backbone` - https://arxiv.org/abs/2206.04040
  12. Code and weights: https://github.com/apple/ml-mobileone, licensed MIT
  13. In all cases the models have been modified to fit within the design of ByobNet. I've remapped
  14. the original weights and verified accuracies.
  15. For GPU Efficient nets, I used the original names for the blocks since they were for the most part
  16. the same as original residual blocks in ResNe(X)t, DarkNet, and other existing models. Note also some
  17. changes introduced in RegNet were also present in the stem and bottleneck blocks for this model.
  18. A significant number of different network archs can be implemented here, including variants of the
  19. above nets that include attention.
  20. Hacked together by / copyright Ross Wightman, 2021.
  21. """
  22. import math
  23. from dataclasses import dataclass, field, replace
  24. from functools import partial
  25. from typing import Tuple, List, Dict, Optional, Union, Any, Callable, Sequence, Type
  26. import torch
  27. import torch.nn as nn
  28. from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
  29. from timm.layers import (
  30. ClassifierHead,
  31. NormMlpClassifierHead,
  32. ConvNormAct,
  33. BatchNormAct2d,
  34. DropBlock2d,
  35. EvoNorm2dS0a,
  36. AttentionPool2d,
  37. RotAttentionPool2d,
  38. DropPath,
  39. calculate_drop_path_rates,
  40. AvgPool2dSame,
  41. create_conv2d,
  42. get_act_layer,
  43. get_norm_act_layer,
  44. get_attn,
  45. make_divisible,
  46. to_2tuple,
  47. )
  48. from ._builder import build_model_with_cfg
  49. from ._features import feature_take_indices
  50. from ._manipulate import named_apply, checkpoint_seq
  51. from ._registry import generate_default_cfgs, register_model
  52. __all__ = ['ByobNet', 'ByoModelCfg', 'ByoBlockCfg', 'create_byob_stem', 'create_block']
  53. @dataclass
  54. class ByoBlockCfg:
  55. """Block configuration for Bring-Your-Own-Blocks.
  56. Defines configuration for a single block or stage of blocks.
  57. """
  58. type: Union[str, nn.Module]
  59. d: int # block depth (number of block repeats in stage)
  60. c: int # number of output channels for each block in stage
  61. s: int = 2 # stride of stage (first block)
  62. gs: Optional[Union[int, Callable]] = None # group-size of blocks in stage, conv is depthwise if gs == 1
  63. br: float = 1. # bottleneck-ratio of blocks in stage
  64. # NOTE: these config items override the model cfgs that are applied to all blocks by default
  65. attn_layer: Optional[str] = None
  66. attn_kwargs: Optional[Dict[str, Any]] = None
  67. self_attn_layer: Optional[str] = None
  68. self_attn_kwargs: Optional[Dict[str, Any]] = None
  69. block_kwargs: Optional[Dict[str, Any]] = None
  70. @dataclass
  71. class ByoModelCfg:
  72. """Model configuration for Bring-Your-Own-Blocks network.
  73. Defines overall architecture configuration.
  74. """
  75. blocks: Tuple[Union[ByoBlockCfg, Tuple[ByoBlockCfg, ...]], ...]
  76. downsample: str = 'conv1x1'
  77. stem_type: str = '3x3'
  78. stem_pool: Optional[str] = 'maxpool'
  79. stem_chs: Union[int, List[int], Tuple[int, ...]] = 32
  80. width_factor: float = 1.0
  81. num_features: int = 0 # num out_channels for final conv, no final 1x1 conv if 0
  82. zero_init_last: bool = True # zero init last weight (usually bn) in residual path
  83. fixed_input_size: bool = False # model constrained to a fixed-input size / img_size must be provided on creation
  84. # layer config
  85. act_layer: str = 'relu'
  86. norm_layer: str = 'batchnorm'
  87. aa_layer: str = ''
  88. # Head config
  89. head_hidden_size: Optional[int] = None # feat dim of MLP head or AttentionPool output
  90. head_type: str = 'classifier'
  91. # Block config
  92. # NOTE: these config items will be overridden by the block cfg (per-block) if they are set there
  93. attn_layer: Optional[str] = None
  94. attn_kwargs: dict = field(default_factory=lambda: dict())
  95. self_attn_layer: Optional[str] = None
  96. self_attn_kwargs: dict = field(default_factory=lambda: dict())
  97. block_kwargs: Dict[str, Any] = field(default_factory=lambda: dict())
  98. def _rep_vgg_bcfg(d: Tuple[int, ...] = (4, 6, 16, 1), wf: Tuple[float, ...] = (1., 1., 1., 1.), groups: int = 0) -> \
  99. Tuple[ByoBlockCfg, ...]:
  100. """Create RepVGG block configuration.
  101. Args:
  102. d: Depth (number of blocks) per stage.
  103. wf: Width factor per stage.
  104. groups: Number of groups for grouped convolution.
  105. Returns:
  106. Tuple of block configurations.
  107. """
  108. c = (64, 128, 256, 512)
  109. group_size = 0
  110. if groups > 0:
  111. group_size = lambda chs, idx: chs // groups if (idx + 1) % 2 == 0 else 0
  112. bcfg = tuple([ByoBlockCfg(type='rep', d=d, c=c * wf, gs=group_size) for d, c, wf in zip(d, c, wf)])
  113. return bcfg
  114. def _mobileone_bcfg(
  115. d: Tuple[int, ...] = (2, 8, 10, 1),
  116. wf: Tuple[float, ...] = (1., 1., 1., 1.),
  117. se_blocks: Tuple[int, ...] = (),
  118. num_conv_branches: int = 1
  119. ) -> List[List[ByoBlockCfg]]:
  120. """Create MobileOne block configuration.
  121. Args:
  122. d: Depth (number of blocks) per stage.
  123. wf: Width factor per stage.
  124. se_blocks: Number of SE blocks per stage.
  125. num_conv_branches: Number of conv branches.
  126. Returns:
  127. List of block configurations per stage.
  128. """
  129. c = (64, 128, 256, 512)
  130. prev_c = min(64, c[0] * wf[0])
  131. se_blocks = se_blocks or (0,) * len(d)
  132. bcfg = []
  133. for d, c, w, se in zip(d, c, wf, se_blocks):
  134. scfg = []
  135. for i in range(d):
  136. out_c = c * w
  137. bk = dict(num_conv_branches=num_conv_branches)
  138. ak = {}
  139. if i >= d - se:
  140. ak['attn_layer'] = 'se'
  141. scfg += [ByoBlockCfg(type='one', d=1, c=prev_c, gs=1, block_kwargs=bk, **ak)] # depthwise block
  142. scfg += [ByoBlockCfg(
  143. type='one', d=1, c=out_c, gs=0, block_kwargs=dict(kernel_size=1, **bk), **ak)] # pointwise block
  144. prev_c = out_c
  145. bcfg += [scfg]
  146. return bcfg
  147. def interleave_blocks(
  148. types: Tuple[str, str],
  149. d: int,
  150. every: Union[int, List[int]] = 1,
  151. first: bool = False,
  152. **kwargs,
  153. ) -> Tuple[ByoBlockCfg, ...]:
  154. """Interleave 2 block types in stack.
  155. Args:
  156. types: Two block type names to interleave.
  157. d: Total depth of blocks.
  158. every: Interval for alternating blocks.
  159. first: Whether to start with alternate block.
  160. **kwargs: Additional block arguments.
  161. Returns:
  162. Tuple of interleaved block configurations.
  163. """
  164. assert len(types) == 2
  165. if isinstance(every, int):
  166. every = list(range(0 if first else every, d, every + 1))
  167. if not every:
  168. every = [d - 1]
  169. set(every)
  170. blocks = []
  171. for i in range(d):
  172. block_type = types[1] if i in every else types[0]
  173. blocks += [ByoBlockCfg(type=block_type, d=1, **kwargs)]
  174. return tuple(blocks)
  175. def expand_blocks_cfg(stage_blocks_cfg: Union[ByoBlockCfg, Sequence[ByoBlockCfg]]) -> List[ByoBlockCfg]:
  176. """Expand block config into individual block instances.
  177. Args:
  178. stage_blocks_cfg: Block configuration(s) for a stage.
  179. Returns:
  180. List of individual block configurations.
  181. """
  182. if not isinstance(stage_blocks_cfg, Sequence):
  183. stage_blocks_cfg = (stage_blocks_cfg,)
  184. block_cfgs = []
  185. for i, cfg in enumerate(stage_blocks_cfg):
  186. block_cfgs += [replace(cfg, d=1) for _ in range(cfg.d)]
  187. return block_cfgs
  188. def num_groups(group_size: Optional[int], channels: int) -> int:
  189. """Calculate number of groups for grouped convolution.
  190. Args:
  191. group_size: Size of each group (1 for depthwise).
  192. channels: Number of channels.
  193. Returns:
  194. Number of groups.
  195. """
  196. if not group_size: # 0 or None
  197. return 1 # normal conv with 1 group
  198. else:
  199. # NOTE group_size == 1 -> depthwise conv
  200. assert channels % group_size == 0
  201. return channels // group_size
  202. @dataclass
  203. class LayerFn:
  204. """Container for layer factory functions."""
  205. conv_norm_act: Type[nn.Module] = ConvNormAct
  206. norm_act: Type[nn.Module] = BatchNormAct2d
  207. act: Type[nn.Module] = nn.ReLU
  208. attn: Optional[Type[nn.Module]] = None
  209. self_attn: Optional[Type[nn.Module]] = None
  210. class DownsampleAvg(nn.Module):
  211. """Average pool downsampling module.
  212. AvgPool Downsampling as in 'D' ResNet variants.
  213. """
  214. def __init__(
  215. self,
  216. in_chs: int,
  217. out_chs: int,
  218. stride: int = 1,
  219. dilation: int = 1,
  220. apply_act: bool = False,
  221. layers: Optional[LayerFn] = None,
  222. device=None,
  223. dtype=None,
  224. ):
  225. """Initialize DownsampleAvg.
  226. Args:
  227. in_chs: Number of input channels.
  228. out_chs: Number of output channels.
  229. stride: Stride for downsampling.
  230. dilation: Dilation rate.
  231. apply_act: Whether to apply activation.
  232. layers: Layer factory functions.
  233. """
  234. dd = {'device': device, 'dtype': dtype}
  235. super().__init__()
  236. layers = layers or LayerFn()
  237. avg_stride = stride if dilation == 1 else 1
  238. if stride > 1 or dilation > 1:
  239. avg_pool_fn = AvgPool2dSame if avg_stride == 1 and dilation > 1 else nn.AvgPool2d
  240. self.pool = avg_pool_fn(2, avg_stride, ceil_mode=True, count_include_pad=False)
  241. else:
  242. self.pool = nn.Identity()
  243. self.conv = layers.conv_norm_act(in_chs, out_chs, 1, apply_act=apply_act, **dd)
  244. def forward(self, x: torch.Tensor) -> torch.Tensor:
  245. """Forward pass.
  246. Args:
  247. x: Input tensor.
  248. Returns:
  249. Output tensor.
  250. """
  251. return self.conv(self.pool(x))
  252. def create_shortcut(
  253. downsample_type: str,
  254. in_chs: int,
  255. out_chs: int,
  256. stride: int,
  257. dilation: Tuple[int, int],
  258. layers: LayerFn,
  259. **kwargs,
  260. ) -> Optional[nn.Module]:
  261. """Create shortcut connection for residual blocks.
  262. Args:
  263. downsample_type: Type of downsampling ('avg', 'conv1x1', or '').
  264. in_chs: Input channels.
  265. out_chs: Output channels.
  266. stride: Stride for downsampling.
  267. dilation: Dilation rates.
  268. layers: Layer factory functions.
  269. **kwargs: Additional arguments.
  270. Returns:
  271. Shortcut module or None.
  272. """
  273. assert downsample_type in ('avg', 'conv1x1', '')
  274. if in_chs != out_chs or stride != 1 or dilation[0] != dilation[1]:
  275. if not downsample_type:
  276. return None # no shortcut
  277. elif downsample_type == 'avg':
  278. return DownsampleAvg(in_chs, out_chs, stride=stride, dilation=dilation[0], **kwargs)
  279. else:
  280. return layers.conv_norm_act(in_chs, out_chs, kernel_size=1, stride=stride, dilation=dilation[0], **kwargs)
  281. else:
  282. return nn.Identity() # identity shortcut
  283. class BasicBlock(nn.Module):
  284. """ ResNet Basic Block - kxk + kxk
  285. """
  286. def __init__(
  287. self,
  288. in_chs: int,
  289. out_chs: int,
  290. kernel_size: int = 3,
  291. stride: int = 1,
  292. dilation: Tuple[int, int] = (1, 1),
  293. group_size: Optional[int] = None,
  294. bottle_ratio: float = 1.0,
  295. downsample: str = 'avg',
  296. attn_last: bool = True,
  297. linear_out: bool = False,
  298. layers: LayerFn = None,
  299. drop_block: Callable = None,
  300. drop_path_rate: float = 0.,
  301. device=None,
  302. dtype=None,
  303. ):
  304. dd = {'device': device, 'dtype': dtype}
  305. super().__init__()
  306. layers = layers or LayerFn()
  307. mid_chs = make_divisible(out_chs * bottle_ratio)
  308. groups = num_groups(group_size, mid_chs)
  309. self.shortcut = create_shortcut(
  310. downsample,
  311. in_chs,
  312. out_chs,
  313. stride=stride,
  314. dilation=dilation,
  315. apply_act=False,
  316. layers=layers,
  317. **dd,
  318. )
  319. self.conv1_kxk = layers.conv_norm_act(in_chs, mid_chs, kernel_size, stride=stride, dilation=dilation[0], **dd)
  320. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs)
  321. self.conv2_kxk = layers.conv_norm_act(
  322. mid_chs,
  323. out_chs,
  324. kernel_size,
  325. dilation=dilation[1],
  326. groups=groups,
  327. drop_layer=drop_block,
  328. apply_act=False,
  329. **dd,
  330. )
  331. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  332. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  333. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  334. def init_weights(self, zero_init_last: bool = False):
  335. if zero_init_last and self.shortcut is not None and getattr(self.conv2_kxk.bn, 'weight', None) is not None:
  336. nn.init.zeros_(self.conv2_kxk.bn.weight)
  337. for attn in (self.attn, self.attn_last):
  338. if hasattr(attn, 'reset_parameters'):
  339. attn.reset_parameters()
  340. def forward(self, x):
  341. shortcut = x
  342. x = self.conv1_kxk(x)
  343. x = self.attn(x)
  344. x = self.conv2_kxk(x)
  345. x = self.attn_last(x)
  346. x = self.drop_path(x)
  347. if self.shortcut is not None:
  348. x = x + self.shortcut(shortcut)
  349. return self.act(x)
  350. class BottleneckBlock(nn.Module):
  351. """ ResNet-like Bottleneck Block - 1x1 - kxk - 1x1
  352. """
  353. def __init__(
  354. self,
  355. in_chs: int,
  356. out_chs: int,
  357. kernel_size: int = 3,
  358. stride: int = 1,
  359. dilation: Tuple[int, int] = (1, 1),
  360. bottle_ratio: float = 1.,
  361. group_size: Optional[int] = None,
  362. downsample: str = 'avg',
  363. attn_last: bool = False,
  364. linear_out: bool = False,
  365. extra_conv: bool = False,
  366. bottle_in: bool = False,
  367. layers: LayerFn = None,
  368. drop_block: Callable = None,
  369. drop_path_rate: float = 0.,
  370. device=None,
  371. dtype=None,
  372. ):
  373. dd = {'device': device, 'dtype': dtype}
  374. super().__init__()
  375. layers = layers or LayerFn()
  376. mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio)
  377. groups = num_groups(group_size, mid_chs)
  378. self.shortcut = create_shortcut(
  379. downsample,
  380. in_chs,
  381. out_chs,
  382. stride=stride,
  383. dilation=dilation,
  384. apply_act=False,
  385. layers=layers,
  386. **dd,
  387. )
  388. self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1, **dd)
  389. self.conv2_kxk = layers.conv_norm_act(
  390. mid_chs,
  391. mid_chs,
  392. kernel_size,
  393. stride=stride,
  394. dilation=dilation[0],
  395. groups=groups,
  396. drop_layer=drop_block,
  397. **dd,
  398. )
  399. if extra_conv:
  400. self.conv2b_kxk = layers.conv_norm_act(
  401. mid_chs,
  402. mid_chs,
  403. kernel_size,
  404. dilation=dilation[1],
  405. groups=groups,
  406. **dd,
  407. )
  408. else:
  409. self.conv2b_kxk = nn.Identity()
  410. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs, **dd)
  411. self.conv3_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False, **dd)
  412. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  413. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  414. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  415. def init_weights(self, zero_init_last: bool = False):
  416. if zero_init_last and self.shortcut is not None and getattr(self.conv3_1x1.bn, 'weight', None) is not None:
  417. nn.init.zeros_(self.conv3_1x1.bn.weight)
  418. for attn in (self.attn, self.attn_last):
  419. if hasattr(attn, 'reset_parameters'):
  420. attn.reset_parameters()
  421. def forward(self, x):
  422. shortcut = x
  423. x = self.conv1_1x1(x)
  424. x = self.conv2_kxk(x)
  425. x = self.conv2b_kxk(x)
  426. x = self.attn(x)
  427. x = self.conv3_1x1(x)
  428. x = self.attn_last(x)
  429. x = self.drop_path(x)
  430. if self.shortcut is not None:
  431. x = x + self.shortcut(shortcut)
  432. return self.act(x)
  433. class DarkBlock(nn.Module):
  434. """ DarkNet-like (1x1 + 3x3 w/ stride) block
  435. The GE-Net impl included a 1x1 + 3x3 block in their search space. It was not used in the feature models.
  436. This block is pretty much a DarkNet block (also DenseNet) hence the name. Neither DarkNet or DenseNet
  437. uses strides within the block (external 3x3 or maxpool downsampling is done in front of the block repeats).
  438. If one does want to use a lot of these blocks w/ stride, I'd recommend using the EdgeBlock (3x3 /w stride + 1x1)
  439. for more optimal compute.
  440. """
  441. def __init__(
  442. self,
  443. in_chs: int,
  444. out_chs: int,
  445. kernel_size: int = 3,
  446. stride: int = 1,
  447. dilation: Tuple[int, int] = (1, 1),
  448. bottle_ratio: float = 1.0,
  449. group_size: Optional[int] = None,
  450. downsample: str = 'avg',
  451. attn_last: bool = True,
  452. linear_out: bool = False,
  453. layers: LayerFn = None,
  454. drop_block: Callable = None,
  455. drop_path_rate: float = 0.,
  456. device=None,
  457. dtype=None,
  458. ):
  459. dd = {'device': device, 'dtype': dtype}
  460. super().__init__()
  461. layers = layers or LayerFn()
  462. mid_chs = make_divisible(out_chs * bottle_ratio)
  463. groups = num_groups(group_size, mid_chs)
  464. self.shortcut = create_shortcut(
  465. downsample,
  466. in_chs,
  467. out_chs,
  468. stride=stride,
  469. dilation=dilation,
  470. apply_act=False,
  471. layers=layers,
  472. **dd,
  473. )
  474. self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1, **dd)
  475. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs, **dd)
  476. self.conv2_kxk = layers.conv_norm_act(
  477. mid_chs,
  478. out_chs,
  479. kernel_size,
  480. stride=stride,
  481. dilation=dilation[0],
  482. groups=groups,
  483. drop_layer=drop_block,
  484. apply_act=False,
  485. **dd,
  486. )
  487. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  488. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  489. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  490. def init_weights(self, zero_init_last: bool = False):
  491. if zero_init_last and self.shortcut is not None and getattr(self.conv2_kxk.bn, 'weight', None) is not None:
  492. nn.init.zeros_(self.conv2_kxk.bn.weight)
  493. for attn in (self.attn, self.attn_last):
  494. if hasattr(attn, 'reset_parameters'):
  495. attn.reset_parameters()
  496. def forward(self, x):
  497. shortcut = x
  498. x = self.conv1_1x1(x)
  499. x = self.attn(x)
  500. x = self.conv2_kxk(x)
  501. x = self.attn_last(x)
  502. x = self.drop_path(x)
  503. if self.shortcut is not None:
  504. x = x + self.shortcut(shortcut)
  505. return self.act(x)
  506. class EdgeBlock(nn.Module):
  507. """ EdgeResidual-like (3x3 + 1x1) block
  508. A two layer block like DarkBlock, but with the order of the 3x3 and 1x1 convs reversed.
  509. Very similar to the EfficientNet Edge-Residual block but this block it ends with activations, is
  510. intended to be used with either expansion or bottleneck contraction, and can use DW/group/non-grouped convs.
  511. FIXME is there a more common 3x3 + 1x1 conv block to name this after?
  512. """
  513. def __init__(
  514. self,
  515. in_chs: int,
  516. out_chs: int,
  517. kernel_size: int = 3,
  518. stride: int = 1,
  519. dilation: Tuple[int, int] = (1, 1),
  520. bottle_ratio: float = 1.0,
  521. group_size: Optional[int] = None,
  522. downsample: str = 'avg',
  523. attn_last: bool = False,
  524. linear_out: bool = False,
  525. layers: LayerFn = None,
  526. drop_block: Callable = None,
  527. drop_path_rate: float = 0.,
  528. device=None,
  529. dtype=None,
  530. ):
  531. dd = {'device': device, 'dtype': dtype}
  532. super().__init__()
  533. layers = layers or LayerFn()
  534. mid_chs = make_divisible(out_chs * bottle_ratio)
  535. groups = num_groups(group_size, mid_chs)
  536. self.shortcut = create_shortcut(
  537. downsample,
  538. in_chs,
  539. out_chs,
  540. stride=stride,
  541. dilation=dilation,
  542. apply_act=False,
  543. layers=layers,
  544. **dd,
  545. )
  546. self.conv1_kxk = layers.conv_norm_act(
  547. in_chs,
  548. mid_chs,
  549. kernel_size,
  550. stride=stride,
  551. dilation=dilation[0],
  552. groups=groups,
  553. drop_layer=drop_block,
  554. **dd,
  555. )
  556. self.attn = nn.Identity() if attn_last or layers.attn is None else layers.attn(mid_chs, **dd)
  557. self.conv2_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False, **dd)
  558. self.attn_last = nn.Identity() if not attn_last or layers.attn is None else layers.attn(out_chs, **dd)
  559. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  560. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  561. def init_weights(self, zero_init_last: bool = False):
  562. if zero_init_last and self.shortcut is not None and getattr(self.conv2_1x1.bn, 'weight', None) is not None:
  563. nn.init.zeros_(self.conv2_1x1.bn.weight)
  564. for attn in (self.attn, self.attn_last):
  565. if hasattr(attn, 'reset_parameters'):
  566. attn.reset_parameters()
  567. def forward(self, x):
  568. shortcut = x
  569. x = self.conv1_kxk(x)
  570. x = self.attn(x)
  571. x = self.conv2_1x1(x)
  572. x = self.attn_last(x)
  573. x = self.drop_path(x)
  574. if self.shortcut is not None:
  575. x = x + self.shortcut(shortcut)
  576. return self.act(x)
  577. class RepVggBlock(nn.Module):
  578. """ RepVGG Block.
  579. Adapted from impl at https://github.com/DingXiaoH/RepVGG
  580. """
  581. def __init__(
  582. self,
  583. in_chs: int,
  584. out_chs: int,
  585. kernel_size: int = 3,
  586. stride: int = 1,
  587. dilation: Tuple[int, int] = (1, 1),
  588. bottle_ratio: float = 1.0,
  589. group_size: Optional[int] = None,
  590. downsample: str = '',
  591. layers: LayerFn = None,
  592. drop_block: Callable = None,
  593. drop_path_rate: float = 0.,
  594. inference_mode: bool = False,
  595. device=None,
  596. dtype=None,
  597. ):
  598. dd = {'device': device, 'dtype': dtype}
  599. super().__init__()
  600. self.groups = groups = num_groups(group_size, in_chs)
  601. layers = layers or LayerFn()
  602. if inference_mode:
  603. self.reparam_conv = nn.Conv2d(
  604. in_channels=in_chs,
  605. out_channels=out_chs,
  606. kernel_size=kernel_size,
  607. stride=stride,
  608. dilation=dilation,
  609. groups=groups,
  610. bias=True,
  611. **dd,
  612. )
  613. else:
  614. self.reparam_conv = None
  615. use_ident = in_chs == out_chs and stride == 1 and dilation[0] == dilation[1]
  616. self.identity = layers.norm_act(out_chs, apply_act=False, **dd) if use_ident else None
  617. self.conv_kxk = layers.conv_norm_act(
  618. in_chs,
  619. out_chs,
  620. kernel_size,
  621. stride=stride,
  622. dilation=dilation[0],
  623. groups=groups,
  624. drop_layer=drop_block,
  625. apply_act=False,
  626. **dd,
  627. )
  628. self.conv_1x1 = layers.conv_norm_act(
  629. in_chs,
  630. out_chs,
  631. 1,
  632. stride=stride,
  633. groups=groups,
  634. apply_act=False,
  635. **dd,
  636. )
  637. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. and use_ident else nn.Identity()
  638. self.attn = nn.Identity() if layers.attn is None else layers.attn(out_chs, **dd)
  639. self.act = layers.act(inplace=True)
  640. def init_weights(self, zero_init_last: bool = False):
  641. # NOTE this init overrides that base model init with specific changes for the block type
  642. for m in self.modules():
  643. if isinstance(m, nn.BatchNorm2d):
  644. nn.init.normal_(m.weight, .1, .1)
  645. nn.init.normal_(m.bias, 0, .1)
  646. if hasattr(self.attn, 'reset_parameters'):
  647. self.attn.reset_parameters()
  648. def forward(self, x):
  649. if self.reparam_conv is not None:
  650. return self.act(self.attn(self.reparam_conv(x)))
  651. if self.identity is None:
  652. x = self.conv_1x1(x) + self.conv_kxk(x)
  653. else:
  654. identity = self.identity(x)
  655. x = self.conv_1x1(x) + self.conv_kxk(x)
  656. x = self.drop_path(x) # not in the paper / official impl, experimental
  657. x += identity
  658. x = self.attn(x) # no attn in the paper / official impl, experimental
  659. return self.act(x)
  660. def reparameterize(self):
  661. """ Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
  662. https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
  663. architecture used at training time to obtain a plain CNN-like structure
  664. for inference.
  665. """
  666. if self.reparam_conv is not None:
  667. return
  668. kernel, bias = self._get_kernel_bias()
  669. self.reparam_conv = nn.Conv2d(
  670. in_channels=self.conv_kxk.conv.in_channels,
  671. out_channels=self.conv_kxk.conv.out_channels,
  672. kernel_size=self.conv_kxk.conv.kernel_size,
  673. stride=self.conv_kxk.conv.stride,
  674. padding=self.conv_kxk.conv.padding,
  675. dilation=self.conv_kxk.conv.dilation,
  676. groups=self.conv_kxk.conv.groups,
  677. bias=True,
  678. )
  679. self.reparam_conv.weight.data = kernel
  680. self.reparam_conv.bias.data = bias
  681. # Delete un-used branches
  682. for name, para in self.named_parameters():
  683. if 'reparam_conv' in name:
  684. continue
  685. para.detach_()
  686. self.__delattr__('conv_kxk')
  687. self.__delattr__('conv_1x1')
  688. self.__delattr__('identity')
  689. self.__delattr__('drop_path')
  690. def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
  691. """ Method to obtain re-parameterized kernel and bias.
  692. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
  693. """
  694. # get weights and bias of scale branch
  695. kernel_1x1 = 0
  696. bias_1x1 = 0
  697. if self.conv_1x1 is not None:
  698. kernel_1x1, bias_1x1 = self._fuse_bn_tensor(self.conv_1x1)
  699. # Pad scale branch kernel to match conv branch kernel size.
  700. pad = self.conv_kxk.conv.kernel_size[0] // 2
  701. kernel_1x1 = torch.nn.functional.pad(kernel_1x1, [pad, pad, pad, pad])
  702. # get weights and bias of skip branch
  703. kernel_identity = 0
  704. bias_identity = 0
  705. if self.identity is not None:
  706. kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
  707. # get weights and bias of conv branches
  708. kernel_conv, bias_conv = self._fuse_bn_tensor(self.conv_kxk)
  709. kernel_final = kernel_conv + kernel_1x1 + kernel_identity
  710. bias_final = bias_conv + bias_1x1 + bias_identity
  711. return kernel_final, bias_final
  712. def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
  713. """ Method to fuse batchnorm layer with preceding conv layer.
  714. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
  715. """
  716. if isinstance(branch, ConvNormAct):
  717. kernel = branch.conv.weight
  718. running_mean = branch.bn.running_mean
  719. running_var = branch.bn.running_var
  720. gamma = branch.bn.weight
  721. beta = branch.bn.bias
  722. eps = branch.bn.eps
  723. else:
  724. assert isinstance(branch, nn.BatchNorm2d)
  725. if not hasattr(self, 'id_tensor'):
  726. in_chs = self.conv_kxk.conv.in_channels
  727. input_dim = in_chs // self.groups
  728. kernel_size = self.conv_kxk.conv.kernel_size
  729. kernel_value = torch.zeros_like(self.conv_kxk.conv.weight)
  730. for i in range(in_chs):
  731. kernel_value[i, i % input_dim, kernel_size[0] // 2, kernel_size[1] // 2] = 1
  732. self.id_tensor = kernel_value
  733. kernel = self.id_tensor
  734. running_mean = branch.running_mean
  735. running_var = branch.running_var
  736. gamma = branch.weight
  737. beta = branch.bias
  738. eps = branch.eps
  739. std = (running_var + eps).sqrt()
  740. t = (gamma / std).reshape(-1, 1, 1, 1)
  741. return kernel * t, beta - running_mean * gamma / std
  742. class MobileOneBlock(nn.Module):
  743. """ MobileOne building block.
  744. This block has a multi-branched architecture at train-time
  745. and plain-CNN style architecture at inference time
  746. For more details, please refer to our paper:
  747. `An Improved One millisecond Mobile Backbone` -
  748. https://arxiv.org/pdf/2206.04040.pdf
  749. """
  750. def __init__(
  751. self,
  752. in_chs: int,
  753. out_chs: int,
  754. kernel_size: int = 3,
  755. stride: int = 1,
  756. dilation: Tuple[int, int] = (1, 1),
  757. bottle_ratio: float = 1.0, # unused
  758. group_size: Optional[int] = None,
  759. downsample: str = '', # unused
  760. inference_mode: bool = False,
  761. num_conv_branches: int = 1,
  762. layers: LayerFn = None,
  763. drop_block: Callable = None,
  764. drop_path_rate: float = 0.,
  765. device=None,
  766. dtype=None,
  767. ) -> None:
  768. """ Construct a MobileOneBlock module.
  769. """
  770. dd = {'device': device, 'dtype': dtype}
  771. super().__init__()
  772. self.num_conv_branches = num_conv_branches
  773. self.groups = groups = num_groups(group_size, in_chs)
  774. layers = layers or LayerFn()
  775. if inference_mode:
  776. self.reparam_conv = nn.Conv2d(
  777. in_channels=in_chs,
  778. out_channels=out_chs,
  779. kernel_size=kernel_size,
  780. stride=stride,
  781. dilation=dilation,
  782. groups=groups,
  783. bias=True,
  784. **dd,
  785. )
  786. else:
  787. self.reparam_conv = None
  788. # Re-parameterizable skip connection
  789. use_ident = in_chs == out_chs and stride == 1 and dilation[0] == dilation[1]
  790. self.identity = layers.norm_act(out_chs, apply_act=False, **dd) if use_ident else None
  791. # Re-parameterizable conv branches
  792. convs = []
  793. for _ in range(self.num_conv_branches):
  794. convs.append(layers.conv_norm_act(
  795. in_chs,
  796. out_chs,
  797. kernel_size=kernel_size,
  798. stride=stride,
  799. groups=groups,
  800. apply_act=False,
  801. **dd,
  802. ))
  803. self.conv_kxk = nn.ModuleList(convs)
  804. # Re-parameterizable scale branch
  805. self.conv_scale = None
  806. if kernel_size > 1:
  807. self.conv_scale = layers.conv_norm_act(
  808. in_chs,
  809. out_chs,
  810. kernel_size=1,
  811. stride=stride,
  812. groups=groups,
  813. apply_act=False,
  814. **dd,
  815. )
  816. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. and use_ident else nn.Identity()
  817. self.attn = nn.Identity() if layers.attn is None else layers.attn(out_chs, **dd)
  818. self.act = layers.act(inplace=True)
  819. def forward(self, x: torch.Tensor) -> torch.Tensor:
  820. """ Apply forward pass. """
  821. # Inference mode forward pass.
  822. if self.reparam_conv is not None:
  823. return self.act(self.attn(self.reparam_conv(x)))
  824. # Multi-branched train-time forward pass.
  825. # Skip branch output
  826. identity_out = 0
  827. if self.identity is not None:
  828. identity_out = self.identity(x)
  829. # Scale branch output
  830. scale_out = 0
  831. if self.conv_scale is not None:
  832. scale_out = self.conv_scale(x)
  833. # Other branches
  834. out = scale_out
  835. for ck in self.conv_kxk:
  836. out += ck(x)
  837. out = self.drop_path(out)
  838. out += identity_out
  839. return self.act(self.attn(out))
  840. def reparameterize(self):
  841. """ Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
  842. https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
  843. architecture used at training time to obtain a plain CNN-like structure
  844. for inference.
  845. """
  846. if self.reparam_conv is not None:
  847. return
  848. kernel, bias = self._get_kernel_bias()
  849. self.reparam_conv = nn.Conv2d(
  850. in_channels=self.conv_kxk[0].conv.in_channels,
  851. out_channels=self.conv_kxk[0].conv.out_channels,
  852. kernel_size=self.conv_kxk[0].conv.kernel_size,
  853. stride=self.conv_kxk[0].conv.stride,
  854. padding=self.conv_kxk[0].conv.padding,
  855. dilation=self.conv_kxk[0].conv.dilation,
  856. groups=self.conv_kxk[0].conv.groups,
  857. bias=True)
  858. self.reparam_conv.weight.data = kernel
  859. self.reparam_conv.bias.data = bias
  860. # Delete un-used branches
  861. for name, para in self.named_parameters():
  862. if 'reparam_conv' in name:
  863. continue
  864. para.detach_()
  865. self.__delattr__('conv_kxk')
  866. self.__delattr__('conv_scale')
  867. self.__delattr__('identity')
  868. self.__delattr__('drop_path')
  869. def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
  870. """ Method to obtain re-parameterized kernel and bias.
  871. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
  872. """
  873. # get weights and bias of scale branch
  874. kernel_scale = 0
  875. bias_scale = 0
  876. if self.conv_scale is not None:
  877. kernel_scale, bias_scale = self._fuse_bn_tensor(self.conv_scale)
  878. # Pad scale branch kernel to match conv branch kernel size.
  879. pad = self.conv_kxk[0].conv.kernel_size[0] // 2
  880. kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
  881. # get weights and bias of skip branch
  882. kernel_identity = 0
  883. bias_identity = 0
  884. if self.identity is not None:
  885. kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
  886. # get weights and bias of conv branches
  887. kernel_conv = 0
  888. bias_conv = 0
  889. for ix in range(self.num_conv_branches):
  890. _kernel, _bias = self._fuse_bn_tensor(self.conv_kxk[ix])
  891. kernel_conv += _kernel
  892. bias_conv += _bias
  893. kernel_final = kernel_conv + kernel_scale + kernel_identity
  894. bias_final = bias_conv + bias_scale + bias_identity
  895. return kernel_final, bias_final
  896. def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
  897. """ Method to fuse batchnorm layer with preceding conv layer.
  898. Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
  899. """
  900. if isinstance(branch, ConvNormAct):
  901. kernel = branch.conv.weight
  902. running_mean = branch.bn.running_mean
  903. running_var = branch.bn.running_var
  904. gamma = branch.bn.weight
  905. beta = branch.bn.bias
  906. eps = branch.bn.eps
  907. else:
  908. assert isinstance(branch, nn.BatchNorm2d)
  909. if not hasattr(self, 'id_tensor'):
  910. in_chs = self.conv_kxk[0].conv.in_channels
  911. input_dim = in_chs // self.groups
  912. kernel_size = self.conv_kxk[0].conv.kernel_size
  913. kernel_value = torch.zeros_like(self.conv_kxk[0].conv.weight)
  914. for i in range(in_chs):
  915. kernel_value[i, i % input_dim, kernel_size[0] // 2, kernel_size[1] // 2] = 1
  916. self.id_tensor = kernel_value
  917. kernel = self.id_tensor
  918. running_mean = branch.running_mean
  919. running_var = branch.running_var
  920. gamma = branch.weight
  921. beta = branch.bias
  922. eps = branch.eps
  923. std = (running_var + eps).sqrt()
  924. t = (gamma / std).reshape(-1, 1, 1, 1)
  925. return kernel * t, beta - running_mean * gamma / std
  926. class SelfAttnBlock(nn.Module):
  927. """ ResNet-like Bottleneck Block - 1x1 - optional kxk - self attn - 1x1
  928. """
  929. def __init__(
  930. self,
  931. in_chs: int,
  932. out_chs: int,
  933. kernel_size: int = 3,
  934. stride: int = 1,
  935. dilation: Tuple[int, int] = (1, 1),
  936. bottle_ratio: float = 1.,
  937. group_size: Optional[int] = None,
  938. downsample: str = 'avg',
  939. extra_conv: bool = False,
  940. linear_out: bool = False,
  941. bottle_in: bool = False,
  942. post_attn_na: bool = True,
  943. feat_size: Optional[Tuple[int, int]] = None,
  944. layers: LayerFn = None,
  945. drop_block: Callable = None,
  946. drop_path_rate: float = 0.,
  947. device=None,
  948. dtype=None,
  949. ):
  950. dd = {'device': device, 'dtype': dtype}
  951. super().__init__()
  952. assert layers is not None
  953. mid_chs = make_divisible((in_chs if bottle_in else out_chs) * bottle_ratio)
  954. groups = num_groups(group_size, mid_chs)
  955. self.shortcut = create_shortcut(
  956. downsample,
  957. in_chs,
  958. out_chs,
  959. stride=stride,
  960. dilation=dilation,
  961. apply_act=False,
  962. layers=layers,
  963. **dd,
  964. )
  965. self.conv1_1x1 = layers.conv_norm_act(in_chs, mid_chs, 1, **dd)
  966. if extra_conv:
  967. self.conv2_kxk = layers.conv_norm_act(
  968. mid_chs,
  969. mid_chs,
  970. kernel_size,
  971. stride=stride,
  972. dilation=dilation[0],
  973. groups=groups,
  974. drop_layer=drop_block,
  975. **dd,
  976. )
  977. stride = 1 # striding done via conv if enabled
  978. else:
  979. self.conv2_kxk = nn.Identity()
  980. opt_kwargs = {} if feat_size is None else dict(feat_size=feat_size)
  981. # FIXME need to dilate self attn to have dilated network support, moop moop
  982. self.self_attn = layers.self_attn(mid_chs, stride=stride, **opt_kwargs, **dd)
  983. self.post_attn = layers.norm_act(mid_chs, **dd) if post_attn_na else nn.Identity()
  984. self.conv3_1x1 = layers.conv_norm_act(mid_chs, out_chs, 1, apply_act=False, **dd)
  985. self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
  986. self.act = nn.Identity() if linear_out else layers.act(inplace=True)
  987. def init_weights(self, zero_init_last: bool = False):
  988. if zero_init_last and self.shortcut is not None and getattr(self.conv3_1x1.bn, 'weight', None) is not None:
  989. nn.init.zeros_(self.conv3_1x1.bn.weight)
  990. if hasattr(self.self_attn, 'reset_parameters'):
  991. self.self_attn.reset_parameters()
  992. def forward(self, x):
  993. shortcut = x
  994. x = self.conv1_1x1(x)
  995. x = self.conv2_kxk(x)
  996. x = self.self_attn(x)
  997. x = self.post_attn(x)
  998. x = self.conv3_1x1(x)
  999. x = self.drop_path(x)
  1000. if self.shortcut is not None:
  1001. x = x + self.shortcut(shortcut)
  1002. return self.act(x)
  1003. _block_registry = dict(
  1004. basic=BasicBlock,
  1005. bottle=BottleneckBlock,
  1006. dark=DarkBlock,
  1007. edge=EdgeBlock,
  1008. rep=RepVggBlock,
  1009. one=MobileOneBlock,
  1010. self_attn=SelfAttnBlock,
  1011. )
  1012. def register_block(block_type: str, block_fn: nn.Module):
  1013. _block_registry[block_type] = block_fn
  1014. def create_block(block: Union[str, nn.Module], **kwargs):
  1015. if isinstance(block, (nn.Module, partial)):
  1016. return block(**kwargs)
  1017. assert block in _block_registry, f'Unknown block type ({block}'
  1018. return _block_registry[block](**kwargs)
  1019. class Stem(nn.Sequential):
  1020. def __init__(
  1021. self,
  1022. in_chs: int,
  1023. out_chs: Union[int, List[int], Tuple[int, ...]],
  1024. kernel_size: int = 3,
  1025. stride: int = 4,
  1026. pool: str = 'maxpool',
  1027. num_rep: int = 3,
  1028. num_act: Optional[int] = None,
  1029. chs_decay: float = 0.5,
  1030. layers: LayerFn = None,
  1031. device=None,
  1032. dtype=None,
  1033. ):
  1034. dd = {'device': device, 'dtype': dtype}
  1035. super().__init__()
  1036. assert stride in (2, 4)
  1037. layers = layers or LayerFn()
  1038. if isinstance(out_chs, (list, tuple)):
  1039. num_rep = len(out_chs)
  1040. stem_chs = out_chs
  1041. else:
  1042. stem_chs = [round(out_chs * chs_decay ** i) for i in range(num_rep)][::-1]
  1043. self.stride = stride
  1044. self.feature_info = [] # track intermediate features
  1045. prev_feat = ''
  1046. stem_strides = [2] + [1] * (num_rep - 1)
  1047. if stride == 4 and not pool:
  1048. # set last conv in stack to be strided if stride == 4 and no pooling layer
  1049. stem_strides[-1] = 2
  1050. num_act = num_rep if num_act is None else num_act
  1051. # if num_act < num_rep, first convs in stack won't have bn + act
  1052. stem_norm_acts = [False] * (num_rep - num_act) + [True] * num_act
  1053. prev_chs = in_chs
  1054. curr_stride = 1
  1055. last_feat_idx = -1
  1056. for i, (ch, s, na) in enumerate(zip(stem_chs, stem_strides, stem_norm_acts)):
  1057. layer_fn = layers.conv_norm_act if na else create_conv2d
  1058. conv_name = f'conv{i + 1}'
  1059. if i > 0 and s > 1:
  1060. last_feat_idx = i - 1
  1061. self.feature_info.append(dict(num_chs=prev_chs, reduction=curr_stride, module=prev_feat, stage=0))
  1062. self.add_module(conv_name, layer_fn(prev_chs, ch, kernel_size=kernel_size, stride=s, **dd))
  1063. prev_chs = ch
  1064. curr_stride *= s
  1065. prev_feat = conv_name
  1066. if pool:
  1067. pool = pool.lower()
  1068. assert pool in ('max', 'maxpool', 'avg', 'avgpool', 'max2', 'avg2')
  1069. last_feat_idx = i
  1070. self.feature_info.append(dict(num_chs=prev_chs, reduction=curr_stride, module=prev_feat, stage=0))
  1071. if pool == 'max2':
  1072. self.add_module('pool', nn.MaxPool2d(2))
  1073. elif pool == 'avg2':
  1074. self.add_module('pool', nn.AvgPool2d(2))
  1075. elif 'max' in pool:
  1076. self.add_module('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
  1077. elif 'avg' in pool:
  1078. self.add_module('pool', nn.AvgPool2d(kernel_size=3, stride=2, padding=1, count_include_pad=False))
  1079. curr_stride *= 2
  1080. prev_feat = 'pool'
  1081. self.last_feat_idx = last_feat_idx if last_feat_idx >= 0 else None
  1082. self.feature_info.append(dict(num_chs=prev_chs, reduction=curr_stride, module=prev_feat, stage=0))
  1083. assert curr_stride == stride
  1084. def forward_intermediates(self, x) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
  1085. intermediate: Optional[torch.Tensor] = None
  1086. for i, m in enumerate(self):
  1087. x = m(x)
  1088. if self.last_feat_idx is not None and i == self.last_feat_idx:
  1089. intermediate = x
  1090. return x, intermediate
  1091. def create_byob_stem(
  1092. in_chs: int,
  1093. out_chs: int,
  1094. stem_type: str = '',
  1095. pool_type: str = '',
  1096. feat_prefix: str = 'stem',
  1097. layers: LayerFn = None,
  1098. device=None,
  1099. dtype=None,
  1100. ):
  1101. dd = {'device': device, 'dtype': dtype}
  1102. layers = layers or LayerFn()
  1103. assert stem_type in ('', 'quad', 'quad2', 'tiered', 'deep', 'rep', 'one', '7x7', '3x3')
  1104. if 'quad' in stem_type:
  1105. # based on NFNet stem, stack of 4 3x3 convs
  1106. num_act = 2 if 'quad2' in stem_type else None
  1107. stem = Stem(in_chs, out_chs, num_rep=4, num_act=num_act, pool=pool_type, layers=layers, **dd)
  1108. elif 'tiered' in stem_type:
  1109. # 3x3 stack of 3 convs as in my ResNet-T
  1110. stem = Stem(in_chs, (3 * out_chs // 8, out_chs // 2, out_chs), pool=pool_type, layers=layers, **dd)
  1111. elif 'deep' in stem_type:
  1112. # 3x3 stack of 3 convs as in ResNet-D
  1113. stem = Stem(in_chs, out_chs, num_rep=3, chs_decay=1.0, pool=pool_type, layers=layers, **dd)
  1114. elif 'rep' in stem_type:
  1115. stem = RepVggBlock(in_chs, out_chs, stride=2, layers=layers, **dd)
  1116. elif 'one' in stem_type:
  1117. stem = MobileOneBlock(in_chs, out_chs, kernel_size=3, stride=2, layers=layers, **dd)
  1118. elif '7x7' in stem_type:
  1119. # 7x7 stem conv as in ResNet
  1120. if pool_type:
  1121. stem = Stem(in_chs, out_chs, 7, num_rep=1, pool=pool_type, layers=layers, **dd)
  1122. else:
  1123. stem = layers.conv_norm_act(in_chs, out_chs, 7, stride=2, **dd)
  1124. else:
  1125. if isinstance(out_chs, (tuple, list)):
  1126. stem = Stem(in_chs, out_chs, 3, pool=pool_type, layers=layers, **dd)
  1127. else:
  1128. # 3x3 stem conv as in RegNet is the default
  1129. if pool_type:
  1130. stem = Stem(in_chs, out_chs, 3, num_rep=1, pool=pool_type, layers=layers, **dd)
  1131. else:
  1132. stem = layers.conv_norm_act(in_chs, out_chs, 3, stride=2, **dd)
  1133. if isinstance(stem, Stem):
  1134. feature_info = [dict(f, module='.'.join([feat_prefix, f['module']])) for f in stem.feature_info]
  1135. else:
  1136. feature_info = [dict(num_chs=out_chs, reduction=2, module=feat_prefix, stage=0)]
  1137. return stem, feature_info
  1138. def reduce_feat_size(feat_size, stride=2):
  1139. return None if feat_size is None else tuple([s // stride for s in feat_size])
  1140. def override_kwargs(block_kwargs, model_kwargs):
  1141. """ Override model level attn/self-attn/block kwargs w/ block level
  1142. NOTE: kwargs are NOT merged across levels, block_kwargs will fully replace model_kwargs
  1143. for the block if set to anything that isn't None.
  1144. i.e. an empty block_kwargs dict will remove kwargs set at model level for that block
  1145. """
  1146. out_kwargs = block_kwargs if block_kwargs is not None else model_kwargs
  1147. return out_kwargs or {} # make sure None isn't returned
  1148. def update_block_kwargs(block_kwargs: Dict[str, Any], block_cfg: ByoBlockCfg, model_cfg: ByoModelCfg, ):
  1149. layer_fns = block_kwargs['layers']
  1150. # override attn layer / args with block local config
  1151. attn_set = block_cfg.attn_layer is not None
  1152. if attn_set or block_cfg.attn_kwargs is not None:
  1153. # override attn layer config
  1154. if attn_set and not block_cfg.attn_layer:
  1155. # empty string for attn_layer type will disable attn for this block
  1156. attn_layer = None
  1157. else:
  1158. attn_kwargs = override_kwargs(block_cfg.attn_kwargs, model_cfg.attn_kwargs)
  1159. attn_layer = block_cfg.attn_layer or model_cfg.attn_layer
  1160. attn_layer = partial(get_attn(attn_layer), **attn_kwargs) if attn_layer is not None else None
  1161. layer_fns = replace(layer_fns, attn=attn_layer)
  1162. # override self-attn layer / args with block local cfg
  1163. self_attn_set = block_cfg.self_attn_layer is not None
  1164. if self_attn_set or block_cfg.self_attn_kwargs is not None:
  1165. # override attn layer config
  1166. if self_attn_set and not block_cfg.self_attn_layer: # attn_layer == ''
  1167. # empty string for self_attn_layer type will disable attn for this block
  1168. self_attn_layer = None
  1169. else:
  1170. self_attn_kwargs = override_kwargs(block_cfg.self_attn_kwargs, model_cfg.self_attn_kwargs)
  1171. self_attn_layer = block_cfg.self_attn_layer or model_cfg.self_attn_layer
  1172. self_attn_layer = partial(get_attn(self_attn_layer), **self_attn_kwargs) \
  1173. if self_attn_layer is not None else None
  1174. layer_fns = replace(layer_fns, self_attn=self_attn_layer)
  1175. block_kwargs['layers'] = layer_fns
  1176. # add additional block_kwargs specified in block_cfg or model_cfg, precedence to block if set
  1177. block_kwargs.update(override_kwargs(block_cfg.block_kwargs, model_cfg.block_kwargs))
  1178. def drop_blocks(
  1179. drop_prob: float = 0.,
  1180. block_size: int = 3,
  1181. num_stages: int = 4,
  1182. ) -> List[Optional[partial]]:
  1183. """Create DropBlock layer partials for each stage.
  1184. DropBlock is applied to the last two stages only, following common practice.
  1185. The block_size specifies the size for the final stage; the second-to-last
  1186. stage uses a larger block size scaled to account for 2x larger feature maps.
  1187. Args:
  1188. drop_prob: Drop probability for DropBlock.
  1189. block_size: Block size for the final stage. Second-to-last stage
  1190. uses `block_size * 2 - 1` to scale with feature map size.
  1191. num_stages: Number of stages in the model.
  1192. Returns:
  1193. List of DropBlock partial instances or None for each stage.
  1194. """
  1195. assert num_stages >= 2
  1196. dbs = [None] * num_stages
  1197. if drop_prob:
  1198. # Scale block size for second-to-last stage (2x larger feature maps)
  1199. dbs[-2] = partial(DropBlock2d, drop_prob=drop_prob, block_size=block_size * 2 - 1, gamma_scale=0.25)
  1200. dbs[-1] = partial(DropBlock2d, drop_prob=drop_prob, block_size=block_size, gamma_scale=1.00)
  1201. return dbs
  1202. def create_byob_stages(
  1203. cfg: ByoModelCfg,
  1204. drop_path_rate: float,
  1205. output_stride: int,
  1206. stem_feat: Dict[str, Any],
  1207. drop_block_rate: float = 0.,
  1208. drop_block_size: int = 3,
  1209. feat_size: Optional[int] = None,
  1210. layers: Optional[LayerFn] = None,
  1211. block_kwargs_fn: Optional[Callable] = update_block_kwargs,
  1212. device=None,
  1213. dtype=None,
  1214. ):
  1215. layers = layers or LayerFn()
  1216. feature_info = []
  1217. block_cfgs = [expand_blocks_cfg(s) for s in cfg.blocks]
  1218. num_stages = len(block_cfgs)
  1219. depths = [sum([bc.d for bc in stage_bcs]) for stage_bcs in block_cfgs]
  1220. dpr = calculate_drop_path_rates(drop_path_rate, depths, stagewise=True)
  1221. dbs = drop_blocks(drop_block_rate, drop_block_size, num_stages)
  1222. dilation = 1
  1223. net_stride = stem_feat['reduction']
  1224. prev_chs = stem_feat['num_chs']
  1225. prev_feat = stem_feat
  1226. stages = []
  1227. for stage_idx, stage_block_cfgs in enumerate(block_cfgs):
  1228. stride = stage_block_cfgs[0].s
  1229. if stride != 1 and prev_feat:
  1230. feature_info.append(prev_feat)
  1231. if net_stride >= output_stride and stride > 1:
  1232. dilation *= stride
  1233. stride = 1
  1234. net_stride *= stride
  1235. first_dilation = 1 if dilation in (1, 2) else 2
  1236. blocks = []
  1237. for block_idx, block_cfg in enumerate(stage_block_cfgs):
  1238. out_chs = make_divisible(block_cfg.c * cfg.width_factor)
  1239. group_size = block_cfg.gs
  1240. if isinstance(group_size, Callable):
  1241. group_size = group_size(out_chs, block_idx)
  1242. block_kwargs = dict( # Blocks used in this model must accept these arguments
  1243. in_chs=prev_chs,
  1244. out_chs=out_chs,
  1245. stride=stride if block_idx == 0 else 1,
  1246. dilation=(first_dilation, dilation),
  1247. group_size=group_size,
  1248. bottle_ratio=block_cfg.br,
  1249. downsample=cfg.downsample,
  1250. drop_block=dbs[stage_idx],
  1251. drop_path_rate=dpr[stage_idx][block_idx],
  1252. layers=layers,
  1253. device=device,
  1254. dtype=dtype,
  1255. )
  1256. if block_cfg.type in ('self_attn',):
  1257. # add feat_size arg for blocks that support/need it
  1258. block_kwargs['feat_size'] = feat_size
  1259. block_kwargs_fn(block_kwargs, block_cfg=block_cfg, model_cfg=cfg)
  1260. blocks += [create_block(block_cfg.type, **block_kwargs)]
  1261. first_dilation = dilation
  1262. prev_chs = out_chs
  1263. if stride > 1 and block_idx == 0:
  1264. feat_size = reduce_feat_size(feat_size, stride)
  1265. stages += [nn.Sequential(*blocks)]
  1266. prev_feat = dict(num_chs=prev_chs, reduction=net_stride, module=f'stages.{stage_idx}', stage=stage_idx + 1)
  1267. feature_info.append(prev_feat)
  1268. return nn.Sequential(*stages), feature_info, feat_size
  1269. def get_layer_fns(cfg: ByoModelCfg, allow_aa: bool = True):
  1270. act = get_act_layer(cfg.act_layer)
  1271. norm_act = get_norm_act_layer(norm_layer=cfg.norm_layer, act_layer=act)
  1272. if cfg.aa_layer and allow_aa:
  1273. conv_norm_act = partial(ConvNormAct, norm_layer=cfg.norm_layer, act_layer=act, aa_layer=cfg.aa_layer)
  1274. else:
  1275. conv_norm_act = partial(ConvNormAct, norm_layer=cfg.norm_layer, act_layer=act)
  1276. attn = partial(get_attn(cfg.attn_layer), **cfg.attn_kwargs) if cfg.attn_layer else None
  1277. self_attn = partial(get_attn(cfg.self_attn_layer), **cfg.self_attn_kwargs) if cfg.self_attn_layer else None
  1278. layer_fn = LayerFn(conv_norm_act=conv_norm_act, norm_act=norm_act, act=act, attn=attn, self_attn=self_attn)
  1279. return layer_fn
  1280. class ByobNet(nn.Module):
  1281. """Bring-your-own-blocks Network.
  1282. A flexible network backbone that allows building model stem + blocks via
  1283. dataclass cfg definition w/ factory functions for module instantiation.
  1284. Current assumption is that both stem and blocks are in conv-bn-act order (w/ block ending in act).
  1285. """
  1286. def __init__(
  1287. self,
  1288. cfg: ByoModelCfg,
  1289. num_classes: int = 1000,
  1290. in_chans: int = 3,
  1291. global_pool: Optional[str] = None,
  1292. output_stride: int = 32,
  1293. img_size: Optional[Union[int, Tuple[int, int]]] = None,
  1294. drop_rate: float = 0.,
  1295. drop_block_rate: float = 0.,
  1296. drop_block_size: int = 3,
  1297. drop_path_rate: float = 0.,
  1298. zero_init_last: bool = True,
  1299. device=None,
  1300. dtype=None,
  1301. **kwargs,
  1302. ):
  1303. """
  1304. Args:
  1305. cfg: Model architecture configuration.
  1306. num_classes: Number of classifier classes.
  1307. in_chans: Number of input channels.
  1308. global_pool: Global pooling type.
  1309. output_stride: Output stride of network, one of (8, 16, 32).
  1310. img_size: Image size for fixed image size models (i.e. self-attn).
  1311. drop_rate: Classifier dropout rate.
  1312. drop_block_rate: DropBlock drop rate.
  1313. drop_block_size: DropBlock block size for final stage (scales up for earlier stages).
  1314. drop_path_rate: Stochastic depth drop-path rate.
  1315. zero_init_last: Zero-init last weight of residual path.
  1316. **kwargs: Extra kwargs overlayed onto cfg.
  1317. """
  1318. super().__init__()
  1319. dd = {'device': device, 'dtype': dtype}
  1320. self.num_classes = num_classes
  1321. self.in_chans = in_chans
  1322. self.drop_rate = drop_rate
  1323. self.grad_checkpointing = False
  1324. cfg = replace(cfg, **kwargs) # overlay kwargs onto cfg
  1325. stem_layers = get_layer_fns(cfg, allow_aa=False) # keep aa off for stem-layers
  1326. stage_layers = get_layer_fns(cfg)
  1327. if cfg.fixed_input_size:
  1328. assert img_size is not None, 'img_size argument is required for fixed input size model'
  1329. feat_size = to_2tuple(img_size) if img_size is not None else None
  1330. self.feature_info = []
  1331. if isinstance(cfg.stem_chs, (list, tuple)):
  1332. stem_chs = [int(round(c * cfg.width_factor)) for c in cfg.stem_chs]
  1333. else:
  1334. stem_chs = int(round((cfg.stem_chs or cfg.blocks[0].c) * cfg.width_factor))
  1335. self.stem, stem_feat = create_byob_stem(
  1336. in_chs=in_chans,
  1337. out_chs=stem_chs,
  1338. stem_type=cfg.stem_type,
  1339. pool_type=cfg.stem_pool,
  1340. layers=stem_layers,
  1341. **dd,
  1342. )
  1343. self.feature_info.extend(stem_feat[:-1])
  1344. feat_size = reduce_feat_size(feat_size, stride=stem_feat[-1]['reduction'])
  1345. self.stages, stage_feat, feat_size = create_byob_stages(
  1346. cfg,
  1347. drop_path_rate,
  1348. output_stride,
  1349. stem_feat[-1],
  1350. drop_block_rate=drop_block_rate,
  1351. drop_block_size=drop_block_size,
  1352. layers=stage_layers,
  1353. feat_size=feat_size,
  1354. **dd,
  1355. )
  1356. self.feature_info.extend(stage_feat[:-1])
  1357. reduction = stage_feat[-1]['reduction']
  1358. prev_chs = stage_feat[-1]['num_chs']
  1359. if cfg.num_features:
  1360. self.num_features = int(round(cfg.width_factor * cfg.num_features))
  1361. self.final_conv = stage_layers.conv_norm_act(prev_chs, self.num_features, 1, **dd)
  1362. else:
  1363. self.num_features = prev_chs
  1364. self.final_conv = nn.Identity()
  1365. self.feature_info += [
  1366. dict(num_chs=self.num_features, reduction=reduction, module='final_conv', stage=len(self.stages))]
  1367. self.stage_ends = [f['stage'] for f in self.feature_info]
  1368. self.head_hidden_size = self.num_features
  1369. assert cfg.head_type in ('', 'classifier', 'mlp', 'attn_abs', 'attn_rot')
  1370. if cfg.head_type == 'mlp':
  1371. if global_pool is None:
  1372. global_pool = 'avg'
  1373. self.head = NormMlpClassifierHead(
  1374. self.num_features,
  1375. num_classes,
  1376. hidden_size=cfg.head_hidden_size,
  1377. pool_type=global_pool,
  1378. norm_layer=cfg.norm_layer,
  1379. act_layer=cfg.act_layer,
  1380. drop_rate=self.drop_rate,
  1381. **dd,
  1382. )
  1383. self.head_hidden_size = self.head.hidden_size
  1384. elif cfg.head_type == 'attn_abs':
  1385. if global_pool is None:
  1386. global_pool = 'token'
  1387. assert global_pool in ('', 'token')
  1388. self.head = AttentionPool2d(
  1389. self.num_features,
  1390. embed_dim=cfg.head_hidden_size,
  1391. out_features=num_classes,
  1392. feat_size=feat_size,
  1393. pool_type=global_pool,
  1394. drop_rate=self.drop_rate,
  1395. qkv_separate=True,
  1396. **dd,
  1397. )
  1398. self.head_hidden_size = self.head.embed_dim
  1399. elif cfg.head_type == 'attn_rot':
  1400. if global_pool is None:
  1401. global_pool = 'token'
  1402. assert global_pool in ('', 'token')
  1403. self.head = RotAttentionPool2d(
  1404. self.num_features,
  1405. embed_dim=cfg.head_hidden_size,
  1406. out_features=num_classes,
  1407. ref_feat_size=feat_size,
  1408. pool_type=global_pool,
  1409. drop_rate=self.drop_rate,
  1410. qkv_separate=True,
  1411. **dd,
  1412. )
  1413. self.head_hidden_size = self.head.embed_dim
  1414. else:
  1415. if global_pool is None:
  1416. global_pool = 'avg'
  1417. assert cfg.head_hidden_size is None
  1418. self.head = ClassifierHead(
  1419. self.num_features,
  1420. num_classes,
  1421. pool_type=global_pool,
  1422. drop_rate=self.drop_rate,
  1423. **dd,
  1424. )
  1425. self.global_pool = global_pool
  1426. # init weights
  1427. named_apply(partial(_init_weights, zero_init_last=zero_init_last), self)
  1428. @torch.jit.ignore
  1429. def group_matcher(self, coarse: bool = False) -> Dict[str, Any]:
  1430. """Group matcher for parameter groups.
  1431. Args:
  1432. coarse: Whether to use coarse grouping.
  1433. Returns:
  1434. Dictionary mapping group names to patterns.
  1435. """
  1436. matcher = dict(
  1437. stem=r'^stem',
  1438. blocks=[
  1439. (r'^stages\.(\d+)' if coarse else r'^stages\.(\d+)\.(\d+)', None),
  1440. (r'^final_conv', (99999,))
  1441. ]
  1442. )
  1443. return matcher
  1444. @torch.jit.ignore
  1445. def set_grad_checkpointing(self, enable: bool = True) -> None:
  1446. """Enable or disable gradient checkpointing.
  1447. Args:
  1448. enable: Whether to enable gradient checkpointing.
  1449. """
  1450. self.grad_checkpointing = enable
  1451. @torch.jit.ignore
  1452. def get_classifier(self) -> nn.Module:
  1453. """Get classifier module.
  1454. Returns:
  1455. Classifier module.
  1456. """
  1457. return self.head.fc
  1458. def reset_classifier(self, num_classes: int, global_pool: Optional[str] = None) -> None:
  1459. """Reset classifier.
  1460. Args:
  1461. num_classes: Number of classes for new classifier.
  1462. global_pool: Global pooling type.
  1463. """
  1464. self.num_classes = num_classes
  1465. self.head.reset(num_classes, global_pool)
  1466. def forward_intermediates(
  1467. self,
  1468. x: torch.Tensor,
  1469. indices: Optional[Union[int, List[int]]] = None,
  1470. norm: bool = False,
  1471. stop_early: bool = False,
  1472. output_fmt: str = 'NCHW',
  1473. intermediates_only: bool = False,
  1474. exclude_final_conv: bool = False,
  1475. ) -> Union[List[torch.Tensor], Tuple[torch.Tensor, List[torch.Tensor]]]:
  1476. """ Forward features that returns intermediates.
  1477. Args:
  1478. x: Input image tensor
  1479. indices: Take last n blocks if int, all if None, select matching indices if sequence
  1480. norm: Apply norm layer to compatible intermediates
  1481. stop_early: Stop iterating over blocks when last desired intermediate hit
  1482. output_fmt: Shape of intermediate feature outputs
  1483. intermediates_only: Only return intermediate features
  1484. exclude_final_conv: Exclude final_conv from last intermediate
  1485. Returns:
  1486. """
  1487. assert output_fmt in ('NCHW',), 'Output shape must be NCHW.'
  1488. intermediates = []
  1489. take_indices, max_index = feature_take_indices(len(self.stage_ends), indices)
  1490. take_indices = [self.stage_ends[i] for i in take_indices]
  1491. max_index = self.stage_ends[max_index]
  1492. # forward pass
  1493. feat_idx = 0 # stem is index 0
  1494. if hasattr(self.stem, 'forward_intermediates'):
  1495. # returns last intermediate features in stem (before final stride in stride > 2 stems)
  1496. x, x_inter = self.stem.forward_intermediates(x)
  1497. else:
  1498. x, x_inter = self.stem(x), None
  1499. if feat_idx in take_indices:
  1500. intermediates.append(x if x_inter is None else x_inter)
  1501. last_idx = self.stage_ends[-1]
  1502. if torch.jit.is_scripting() or not stop_early: # can't slice blocks in torchscript
  1503. stages = self.stages
  1504. else:
  1505. stages = self.stages[:max_index]
  1506. for stage in stages:
  1507. feat_idx += 1
  1508. if self.grad_checkpointing and not torch.jit.is_scripting():
  1509. x = checkpoint_seq(stage, x)
  1510. else:
  1511. x = stage(x)
  1512. if not exclude_final_conv and feat_idx == last_idx:
  1513. # default feature_info for this model uses final_conv as the last feature output (if present)
  1514. x = self.final_conv(x)
  1515. if feat_idx in take_indices:
  1516. intermediates.append(x)
  1517. if intermediates_only:
  1518. return intermediates
  1519. if exclude_final_conv and feat_idx == last_idx:
  1520. x = self.final_conv(x)
  1521. return x, intermediates
  1522. def prune_intermediate_layers(
  1523. self,
  1524. indices: Union[int, List[int]] = 1,
  1525. prune_norm: bool = False,
  1526. prune_head: bool = True,
  1527. ) -> List[int]:
  1528. """Prune layers not required for specified intermediates.
  1529. Args:
  1530. indices: Indices of intermediate layers to keep.
  1531. prune_norm: Whether to prune normalization layer.
  1532. prune_head: Whether to prune the classifier head.
  1533. Returns:
  1534. List of indices that were kept.
  1535. """
  1536. take_indices, max_index = feature_take_indices(len(self.stage_ends), indices)
  1537. max_index = self.stage_ends[max_index]
  1538. self.stages = self.stages[:max_index] # truncate blocks w/ stem as idx 0
  1539. if max_index < self.stage_ends[-1]:
  1540. self.final_conv = nn.Identity()
  1541. if prune_head:
  1542. self.reset_classifier(0, '')
  1543. return take_indices
  1544. def forward_features(self, x: torch.Tensor) -> torch.Tensor:
  1545. """Forward pass through feature extraction.
  1546. Args:
  1547. x: Input tensor.
  1548. Returns:
  1549. Feature tensor.
  1550. """
  1551. x = self.stem(x)
  1552. if self.grad_checkpointing and not torch.jit.is_scripting():
  1553. x = checkpoint_seq(self.stages, x)
  1554. else:
  1555. x = self.stages(x)
  1556. x = self.final_conv(x)
  1557. return x
  1558. def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> torch.Tensor:
  1559. """Forward pass through head.
  1560. Args:
  1561. x: Input features.
  1562. pre_logits: Return features before final linear layer.
  1563. Returns:
  1564. Classification logits or features.
  1565. """
  1566. return self.head(x, pre_logits=pre_logits) if pre_logits else self.head(x)
  1567. def forward(self, x: torch.Tensor) -> torch.Tensor:
  1568. """Forward pass.
  1569. Args:
  1570. x: Input tensor.
  1571. Returns:
  1572. Output logits.
  1573. """
  1574. x = self.forward_features(x)
  1575. x = self.forward_head(x)
  1576. return x
  1577. def _init_weights(module: nn.Module, name: str = '', zero_init_last: bool = False) -> None:
  1578. """Initialize weights.
  1579. Args:
  1580. module: Module to initialize.
  1581. name: Module name.
  1582. zero_init_last: Zero-initialize last layer.
  1583. """
  1584. if isinstance(module, nn.Conv2d):
  1585. fan_out = module.kernel_size[0] * module.kernel_size[1] * module.out_channels
  1586. fan_out //= module.groups
  1587. module.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
  1588. if module.bias is not None:
  1589. module.bias.data.zero_()
  1590. elif isinstance(module, nn.Linear):
  1591. nn.init.normal_(module.weight, mean=0.0, std=0.01)
  1592. if module.bias is not None:
  1593. nn.init.zeros_(module.bias)
  1594. elif isinstance(module, nn.BatchNorm2d):
  1595. nn.init.ones_(module.weight)
  1596. nn.init.zeros_(module.bias)
  1597. elif hasattr(module, 'init_weights'):
  1598. module.init_weights(zero_init_last=zero_init_last)
  1599. model_cfgs = dict(
  1600. gernet_l=ByoModelCfg(
  1601. blocks=(
  1602. ByoBlockCfg(type='basic', d=1, c=128, s=2, gs=0, br=1.),
  1603. ByoBlockCfg(type='basic', d=2, c=192, s=2, gs=0, br=1.),
  1604. ByoBlockCfg(type='bottle', d=6, c=640, s=2, gs=0, br=1 / 4),
  1605. ByoBlockCfg(type='bottle', d=5, c=640, s=2, gs=1, br=3.),
  1606. ByoBlockCfg(type='bottle', d=4, c=640, s=1, gs=1, br=3.),
  1607. ),
  1608. stem_chs=32,
  1609. stem_pool=None,
  1610. num_features=2560,
  1611. ),
  1612. gernet_m=ByoModelCfg(
  1613. blocks=(
  1614. ByoBlockCfg(type='basic', d=1, c=128, s=2, gs=0, br=1.),
  1615. ByoBlockCfg(type='basic', d=2, c=192, s=2, gs=0, br=1.),
  1616. ByoBlockCfg(type='bottle', d=6, c=640, s=2, gs=0, br=1 / 4),
  1617. ByoBlockCfg(type='bottle', d=4, c=640, s=2, gs=1, br=3.),
  1618. ByoBlockCfg(type='bottle', d=1, c=640, s=1, gs=1, br=3.),
  1619. ),
  1620. stem_chs=32,
  1621. stem_pool=None,
  1622. num_features=2560,
  1623. ),
  1624. gernet_s=ByoModelCfg(
  1625. blocks=(
  1626. ByoBlockCfg(type='basic', d=1, c=48, s=2, gs=0, br=1.),
  1627. ByoBlockCfg(type='basic', d=3, c=48, s=2, gs=0, br=1.),
  1628. ByoBlockCfg(type='bottle', d=7, c=384, s=2, gs=0, br=1 / 4),
  1629. ByoBlockCfg(type='bottle', d=2, c=560, s=2, gs=1, br=3.),
  1630. ByoBlockCfg(type='bottle', d=1, c=256, s=1, gs=1, br=3.),
  1631. ),
  1632. stem_chs=13,
  1633. stem_pool=None,
  1634. num_features=1920,
  1635. ),
  1636. repvgg_a0=ByoModelCfg(
  1637. blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(0.75, 0.75, 0.75, 2.5)),
  1638. stem_type='rep',
  1639. stem_chs=48,
  1640. ),
  1641. repvgg_a1=ByoModelCfg(
  1642. blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(1, 1, 1, 2.5)),
  1643. stem_type='rep',
  1644. stem_chs=64,
  1645. ),
  1646. repvgg_a2=ByoModelCfg(
  1647. blocks=_rep_vgg_bcfg(d=(2, 4, 14, 1), wf=(1.5, 1.5, 1.5, 2.75)),
  1648. stem_type='rep',
  1649. stem_chs=64,
  1650. ),
  1651. repvgg_b0=ByoModelCfg(
  1652. blocks=_rep_vgg_bcfg(wf=(1., 1., 1., 2.5)),
  1653. stem_type='rep',
  1654. stem_chs=64,
  1655. ),
  1656. repvgg_b1=ByoModelCfg(
  1657. blocks=_rep_vgg_bcfg(wf=(2., 2., 2., 4.)),
  1658. stem_type='rep',
  1659. stem_chs=64,
  1660. ),
  1661. repvgg_b1g4=ByoModelCfg(
  1662. blocks=_rep_vgg_bcfg(wf=(2., 2., 2., 4.), groups=4),
  1663. stem_type='rep',
  1664. stem_chs=64,
  1665. ),
  1666. repvgg_b2=ByoModelCfg(
  1667. blocks=_rep_vgg_bcfg(wf=(2.5, 2.5, 2.5, 5.)),
  1668. stem_type='rep',
  1669. stem_chs=64,
  1670. ),
  1671. repvgg_b2g4=ByoModelCfg(
  1672. blocks=_rep_vgg_bcfg(wf=(2.5, 2.5, 2.5, 5.), groups=4),
  1673. stem_type='rep',
  1674. stem_chs=64,
  1675. ),
  1676. repvgg_b3=ByoModelCfg(
  1677. blocks=_rep_vgg_bcfg(wf=(3., 3., 3., 5.)),
  1678. stem_type='rep',
  1679. stem_chs=64,
  1680. ),
  1681. repvgg_b3g4=ByoModelCfg(
  1682. blocks=_rep_vgg_bcfg(wf=(3., 3., 3., 5.), groups=4),
  1683. stem_type='rep',
  1684. stem_chs=64,
  1685. ),
  1686. repvgg_d2se=ByoModelCfg(
  1687. blocks=_rep_vgg_bcfg(d=(8, 14, 24, 1), wf=(2.5, 2.5, 2.5, 5.)),
  1688. stem_type='rep',
  1689. stem_chs=64,
  1690. attn_layer='se',
  1691. attn_kwargs=dict(rd_ratio=0.0625, rd_divisor=1),
  1692. ),
  1693. # 4 x conv stem w/ 2 act, no maxpool, 2,4,6,4 repeats, group size 32 in first 3 blocks
  1694. # DW convs in last block, 2048 pre-FC, silu act
  1695. resnet51q=ByoModelCfg(
  1696. blocks=(
  1697. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1698. ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25),
  1699. ByoBlockCfg(type='bottle', d=6, c=1536, s=2, gs=32, br=0.25),
  1700. ByoBlockCfg(type='bottle', d=4, c=1536, s=2, gs=1, br=1.0),
  1701. ),
  1702. stem_chs=128,
  1703. stem_type='quad2',
  1704. stem_pool=None,
  1705. num_features=2048,
  1706. act_layer='silu',
  1707. ),
  1708. # 4 x conv stem w/ 4 act, no maxpool, 1,4,6,4 repeats, edge block first, group size 32 in next 2 blocks
  1709. # DW convs in last block, 4 conv for each bottle block, 2048 pre-FC, silu act
  1710. resnet61q=ByoModelCfg(
  1711. blocks=(
  1712. ByoBlockCfg(type='edge', d=1, c=256, s=1, gs=0, br=1.0, block_kwargs=dict()),
  1713. ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25),
  1714. ByoBlockCfg(type='bottle', d=6, c=1536, s=2, gs=32, br=0.25),
  1715. ByoBlockCfg(type='bottle', d=4, c=1536, s=2, gs=1, br=1.0),
  1716. ),
  1717. stem_chs=128,
  1718. stem_type='quad',
  1719. stem_pool=None,
  1720. num_features=2048,
  1721. act_layer='silu',
  1722. block_kwargs=dict(extra_conv=True),
  1723. ),
  1724. # A series of ResNeXt-26 models w/ one of none, GC, SE, ECA, BAT attn, group size 32, SiLU act,
  1725. # and a tiered stem w/ maxpool
  1726. resnext26ts=ByoModelCfg(
  1727. blocks=(
  1728. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1729. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1730. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1731. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1732. ),
  1733. stem_chs=64,
  1734. stem_type='tiered',
  1735. stem_pool='maxpool',
  1736. act_layer='silu',
  1737. ),
  1738. gcresnext26ts=ByoModelCfg(
  1739. blocks=(
  1740. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1741. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1742. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1743. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1744. ),
  1745. stem_chs=64,
  1746. stem_type='tiered',
  1747. stem_pool='maxpool',
  1748. act_layer='silu',
  1749. attn_layer='gca',
  1750. ),
  1751. seresnext26ts=ByoModelCfg(
  1752. blocks=(
  1753. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1754. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1755. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1756. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1757. ),
  1758. stem_chs=64,
  1759. stem_type='tiered',
  1760. stem_pool='maxpool',
  1761. act_layer='silu',
  1762. attn_layer='se',
  1763. ),
  1764. eca_resnext26ts=ByoModelCfg(
  1765. blocks=(
  1766. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1767. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1768. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1769. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1770. ),
  1771. stem_chs=64,
  1772. stem_type='tiered',
  1773. stem_pool='maxpool',
  1774. act_layer='silu',
  1775. attn_layer='eca',
  1776. ),
  1777. bat_resnext26ts=ByoModelCfg(
  1778. blocks=(
  1779. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=32, br=0.25),
  1780. ByoBlockCfg(type='bottle', d=2, c=512, s=2, gs=32, br=0.25),
  1781. ByoBlockCfg(type='bottle', d=2, c=1024, s=2, gs=32, br=0.25),
  1782. ByoBlockCfg(type='bottle', d=2, c=2048, s=2, gs=32, br=0.25),
  1783. ),
  1784. stem_chs=64,
  1785. stem_type='tiered',
  1786. stem_pool='maxpool',
  1787. act_layer='silu',
  1788. attn_layer='bat',
  1789. attn_kwargs=dict(block_size=8)
  1790. ),
  1791. # ResNet-32 (2, 3, 3, 2) models w/ no attn, no groups, SiLU act, no pre-fc feat layer, tiered stem w/o maxpool
  1792. resnet32ts=ByoModelCfg(
  1793. blocks=(
  1794. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1795. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1796. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1797. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1798. ),
  1799. stem_chs=64,
  1800. stem_type='tiered',
  1801. stem_pool='',
  1802. num_features=0,
  1803. act_layer='silu',
  1804. ),
  1805. # ResNet-33 (2, 3, 3, 2) models w/ no attn, no groups, SiLU act, 1280 pre-FC feat, tiered stem w/o maxpool
  1806. resnet33ts=ByoModelCfg(
  1807. blocks=(
  1808. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1809. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1810. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1811. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1812. ),
  1813. stem_chs=64,
  1814. stem_type='tiered',
  1815. stem_pool='',
  1816. num_features=1280,
  1817. act_layer='silu',
  1818. ),
  1819. # A series of ResNet-33 (2, 3, 3, 2) models w/ one of GC, SE, ECA attn, no groups, SiLU act, 1280 pre-FC feat
  1820. # and a tiered stem w/ no maxpool
  1821. gcresnet33ts=ByoModelCfg(
  1822. blocks=(
  1823. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1824. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1825. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1826. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1827. ),
  1828. stem_chs=64,
  1829. stem_type='tiered',
  1830. stem_pool='',
  1831. num_features=1280,
  1832. act_layer='silu',
  1833. attn_layer='gca',
  1834. ),
  1835. seresnet33ts=ByoModelCfg(
  1836. blocks=(
  1837. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1838. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1839. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1840. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1841. ),
  1842. stem_chs=64,
  1843. stem_type='tiered',
  1844. stem_pool='',
  1845. num_features=1280,
  1846. act_layer='silu',
  1847. attn_layer='se',
  1848. ),
  1849. eca_resnet33ts=ByoModelCfg(
  1850. blocks=(
  1851. ByoBlockCfg(type='bottle', d=2, c=256, s=1, gs=0, br=0.25),
  1852. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=0, br=0.25),
  1853. ByoBlockCfg(type='bottle', d=3, c=1536, s=2, gs=0, br=0.25),
  1854. ByoBlockCfg(type='bottle', d=2, c=1536, s=2, gs=0, br=0.25),
  1855. ),
  1856. stem_chs=64,
  1857. stem_type='tiered',
  1858. stem_pool='',
  1859. num_features=1280,
  1860. act_layer='silu',
  1861. attn_layer='eca',
  1862. ),
  1863. gcresnet50t=ByoModelCfg(
  1864. blocks=(
  1865. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  1866. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  1867. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, br=0.25),
  1868. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  1869. ),
  1870. stem_chs=64,
  1871. stem_type='tiered',
  1872. stem_pool='',
  1873. attn_layer='gca',
  1874. ),
  1875. gcresnext50ts=ByoModelCfg(
  1876. blocks=(
  1877. ByoBlockCfg(type='bottle', d=3, c=256, s=1, gs=32, br=0.25),
  1878. ByoBlockCfg(type='bottle', d=4, c=512, s=2, gs=32, br=0.25),
  1879. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, gs=32, br=0.25),
  1880. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, gs=32, br=0.25),
  1881. ),
  1882. stem_chs=64,
  1883. stem_type='tiered',
  1884. stem_pool='maxpool',
  1885. act_layer='silu',
  1886. attn_layer='gca',
  1887. ),
  1888. # experimental models, closer to a RegNetZ than a ResNet. Similar to EfficientNets but w/ groups instead of DW
  1889. regnetz_b16=ByoModelCfg(
  1890. blocks=(
  1891. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=3),
  1892. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=3),
  1893. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=3),
  1894. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=3),
  1895. ),
  1896. stem_chs=32,
  1897. stem_pool='',
  1898. downsample='',
  1899. num_features=1536,
  1900. act_layer='silu',
  1901. attn_layer='se',
  1902. attn_kwargs=dict(rd_ratio=0.25),
  1903. block_kwargs=dict(bottle_in=True, linear_out=True),
  1904. ),
  1905. regnetz_c16=ByoModelCfg(
  1906. blocks=(
  1907. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=4),
  1908. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=4),
  1909. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=4),
  1910. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=4),
  1911. ),
  1912. stem_chs=32,
  1913. stem_pool='',
  1914. downsample='',
  1915. num_features=1536,
  1916. act_layer='silu',
  1917. attn_layer='se',
  1918. attn_kwargs=dict(rd_ratio=0.25),
  1919. block_kwargs=dict(bottle_in=True, linear_out=True),
  1920. ),
  1921. regnetz_d32=ByoModelCfg(
  1922. blocks=(
  1923. ByoBlockCfg(type='bottle', d=3, c=64, s=1, gs=32, br=4),
  1924. ByoBlockCfg(type='bottle', d=6, c=128, s=2, gs=32, br=4),
  1925. ByoBlockCfg(type='bottle', d=12, c=256, s=2, gs=32, br=4),
  1926. ByoBlockCfg(type='bottle', d=3, c=384, s=2, gs=32, br=4),
  1927. ),
  1928. stem_chs=64,
  1929. stem_type='tiered',
  1930. stem_pool='',
  1931. downsample='',
  1932. num_features=1792,
  1933. act_layer='silu',
  1934. attn_layer='se',
  1935. attn_kwargs=dict(rd_ratio=0.25),
  1936. block_kwargs=dict(bottle_in=True, linear_out=True),
  1937. ),
  1938. regnetz_d8=ByoModelCfg(
  1939. blocks=(
  1940. ByoBlockCfg(type='bottle', d=3, c=64, s=1, gs=8, br=4),
  1941. ByoBlockCfg(type='bottle', d=6, c=128, s=2, gs=8, br=4),
  1942. ByoBlockCfg(type='bottle', d=12, c=256, s=2, gs=8, br=4),
  1943. ByoBlockCfg(type='bottle', d=3, c=384, s=2, gs=8, br=4),
  1944. ),
  1945. stem_chs=64,
  1946. stem_type='tiered',
  1947. stem_pool='',
  1948. downsample='',
  1949. num_features=1792,
  1950. act_layer='silu',
  1951. attn_layer='se',
  1952. attn_kwargs=dict(rd_ratio=0.25),
  1953. block_kwargs=dict(bottle_in=True, linear_out=True),
  1954. ),
  1955. regnetz_e8=ByoModelCfg(
  1956. blocks=(
  1957. ByoBlockCfg(type='bottle', d=3, c=96, s=1, gs=8, br=4),
  1958. ByoBlockCfg(type='bottle', d=8, c=192, s=2, gs=8, br=4),
  1959. ByoBlockCfg(type='bottle', d=16, c=384, s=2, gs=8, br=4),
  1960. ByoBlockCfg(type='bottle', d=3, c=512, s=2, gs=8, br=4),
  1961. ),
  1962. stem_chs=64,
  1963. stem_type='tiered',
  1964. stem_pool='',
  1965. downsample='',
  1966. num_features=2048,
  1967. act_layer='silu',
  1968. attn_layer='se',
  1969. attn_kwargs=dict(rd_ratio=0.25),
  1970. block_kwargs=dict(bottle_in=True, linear_out=True),
  1971. ),
  1972. # experimental EvoNorm configs
  1973. regnetz_b16_evos=ByoModelCfg(
  1974. blocks=(
  1975. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=3),
  1976. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=3),
  1977. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=3),
  1978. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=3),
  1979. ),
  1980. stem_chs=32,
  1981. stem_pool='',
  1982. downsample='',
  1983. num_features=1536,
  1984. act_layer='silu',
  1985. norm_layer=partial(EvoNorm2dS0a, group_size=16),
  1986. attn_layer='se',
  1987. attn_kwargs=dict(rd_ratio=0.25),
  1988. block_kwargs=dict(bottle_in=True, linear_out=True),
  1989. ),
  1990. regnetz_c16_evos=ByoModelCfg(
  1991. blocks=(
  1992. ByoBlockCfg(type='bottle', d=2, c=48, s=2, gs=16, br=4),
  1993. ByoBlockCfg(type='bottle', d=6, c=96, s=2, gs=16, br=4),
  1994. ByoBlockCfg(type='bottle', d=12, c=192, s=2, gs=16, br=4),
  1995. ByoBlockCfg(type='bottle', d=2, c=288, s=2, gs=16, br=4),
  1996. ),
  1997. stem_chs=32,
  1998. stem_pool='',
  1999. downsample='',
  2000. num_features=1536,
  2001. act_layer='silu',
  2002. norm_layer=partial(EvoNorm2dS0a, group_size=16),
  2003. attn_layer='se',
  2004. attn_kwargs=dict(rd_ratio=0.25),
  2005. block_kwargs=dict(bottle_in=True, linear_out=True),
  2006. ),
  2007. regnetz_d8_evos=ByoModelCfg(
  2008. blocks=(
  2009. ByoBlockCfg(type='bottle', d=3, c=64, s=1, gs=8, br=4),
  2010. ByoBlockCfg(type='bottle', d=6, c=128, s=2, gs=8, br=4),
  2011. ByoBlockCfg(type='bottle', d=12, c=256, s=2, gs=8, br=4),
  2012. ByoBlockCfg(type='bottle', d=3, c=384, s=2, gs=8, br=4),
  2013. ),
  2014. stem_chs=64,
  2015. stem_type='deep',
  2016. stem_pool='',
  2017. downsample='',
  2018. num_features=1792,
  2019. act_layer='silu',
  2020. norm_layer=partial(EvoNorm2dS0a, group_size=16),
  2021. attn_layer='se',
  2022. attn_kwargs=dict(rd_ratio=0.25),
  2023. block_kwargs=dict(bottle_in=True, linear_out=True),
  2024. ),
  2025. mobileone_s0=ByoModelCfg(
  2026. blocks=_mobileone_bcfg(wf=(0.75, 1.0, 1.0, 2.), num_conv_branches=4),
  2027. stem_type='one',
  2028. stem_chs=48,
  2029. ),
  2030. mobileone_s1=ByoModelCfg(
  2031. blocks=_mobileone_bcfg(wf=(1.5, 1.5, 2.0, 2.5)),
  2032. stem_type='one',
  2033. stem_chs=64,
  2034. ),
  2035. mobileone_s2=ByoModelCfg(
  2036. blocks=_mobileone_bcfg(wf=(1.5, 2.0, 2.5, 4.0)),
  2037. stem_type='one',
  2038. stem_chs=64,
  2039. ),
  2040. mobileone_s3=ByoModelCfg(
  2041. blocks=_mobileone_bcfg(wf=(2.0, 2.5, 3.0, 4.0)),
  2042. stem_type='one',
  2043. stem_chs=64,
  2044. ),
  2045. mobileone_s4=ByoModelCfg(
  2046. blocks=_mobileone_bcfg(wf=(3.0, 3.5, 3.5, 4.0), se_blocks=(0, 0, 5, 1)),
  2047. stem_type='one',
  2048. stem_chs=64,
  2049. ),
  2050. resnet50_clip=ByoModelCfg(
  2051. blocks=(
  2052. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2053. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  2054. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, br=0.25),
  2055. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  2056. ),
  2057. stem_chs=(32, 32, 64),
  2058. stem_type='',
  2059. stem_pool='avg2',
  2060. downsample='avg',
  2061. aa_layer='avg',
  2062. head_type='attn_abs',
  2063. ),
  2064. resnet101_clip=ByoModelCfg(
  2065. blocks=(
  2066. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2067. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  2068. ByoBlockCfg(type='bottle', d=23, c=1024, s=2, br=0.25),
  2069. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  2070. ),
  2071. stem_chs=(32, 32, 64),
  2072. stem_type='',
  2073. stem_pool='avg2',
  2074. downsample='avg',
  2075. aa_layer='avg',
  2076. head_type='attn_abs',
  2077. ),
  2078. resnet50x4_clip=ByoModelCfg(
  2079. blocks=(
  2080. ByoBlockCfg(type='bottle', d=4, c=256, s=1, br=0.25),
  2081. ByoBlockCfg(type='bottle', d=6, c=512, s=2, br=0.25),
  2082. ByoBlockCfg(type='bottle', d=10, c=1024, s=2, br=0.25),
  2083. ByoBlockCfg(type='bottle', d=6, c=2048, s=2, br=0.25),
  2084. ),
  2085. width_factor=1.25,
  2086. stem_chs=(32, 32, 64),
  2087. stem_type='',
  2088. stem_pool='avg2',
  2089. downsample='avg',
  2090. aa_layer='avg',
  2091. head_type='attn_abs',
  2092. ),
  2093. resnet50x16_clip=ByoModelCfg(
  2094. blocks=(
  2095. ByoBlockCfg(type='bottle', d=6, c=256, s=1, br=0.25),
  2096. ByoBlockCfg(type='bottle', d=8, c=512, s=2, br=0.25),
  2097. ByoBlockCfg(type='bottle', d=18, c=1024, s=2, br=0.25),
  2098. ByoBlockCfg(type='bottle', d=8, c=2048, s=2, br=0.25),
  2099. ),
  2100. width_factor=1.5,
  2101. stem_chs=(32, 32, 64),
  2102. stem_type='',
  2103. stem_pool='avg2',
  2104. downsample='avg',
  2105. aa_layer='avg',
  2106. head_type='attn_abs',
  2107. ),
  2108. resnet50x64_clip=ByoModelCfg(
  2109. blocks=(
  2110. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2111. ByoBlockCfg(type='bottle', d=15, c=512, s=2, br=0.25),
  2112. ByoBlockCfg(type='bottle', d=36, c=1024, s=2, br=0.25),
  2113. ByoBlockCfg(type='bottle', d=10, c=2048, s=2, br=0.25),
  2114. ),
  2115. width_factor=2.0,
  2116. stem_chs=(32, 32, 64),
  2117. stem_type='',
  2118. stem_pool='avg2',
  2119. downsample='avg',
  2120. aa_layer='avg',
  2121. head_type='attn_abs',
  2122. ),
  2123. resnet50_mlp=ByoModelCfg(
  2124. blocks=(
  2125. ByoBlockCfg(type='bottle', d=3, c=256, s=1, br=0.25),
  2126. ByoBlockCfg(type='bottle', d=4, c=512, s=2, br=0.25),
  2127. ByoBlockCfg(type='bottle', d=6, c=1024, s=2, br=0.25),
  2128. ByoBlockCfg(type='bottle', d=3, c=2048, s=2, br=0.25),
  2129. ),
  2130. stem_chs=(32, 32, 64),
  2131. stem_type='',
  2132. stem_pool='avg2',
  2133. downsample='avg',
  2134. aa_layer='avg',
  2135. head_hidden_size=1024,
  2136. head_type='mlp',
  2137. ),
  2138. test_byobnet=ByoModelCfg(
  2139. blocks=(
  2140. ByoBlockCfg(type='edge', d=1, c=32, s=2, gs=0, br=0.5),
  2141. ByoBlockCfg(type='dark', d=1, c=64, s=2, gs=0, br=0.5),
  2142. ByoBlockCfg(type='basic', d=1, c=128, s=2, gs=32, br=0.25),
  2143. ByoBlockCfg(type='bottle', d=1, c=256, s=2, gs=64, br=0.25),
  2144. ),
  2145. stem_chs=24,
  2146. downsample='avg',
  2147. stem_pool='',
  2148. act_layer='relu',
  2149. attn_layer='se',
  2150. attn_kwargs=dict(rd_ratio=0.25),
  2151. ),
  2152. )
  2153. for k in ('resnet50_clip', 'resnet101_clip', 'resnet50x4_clip', 'resnet50x16_clip', 'resnet50x64_clip'):
  2154. model_cfgs[k + '_gap'] = replace(model_cfgs[k], head_type='classifier')
  2155. def _convert_openai_clip(
  2156. state_dict: Dict[str, torch.Tensor],
  2157. model: ByobNet,
  2158. prefix: str = 'visual.',
  2159. ) -> Dict[str, torch.Tensor]:
  2160. model_has_attn_pool = isinstance(model.head, (RotAttentionPool2d, AttentionPool2d))
  2161. import re
  2162. def _stage_sub(m):
  2163. stage_idx = int(m.group(1)) - 1
  2164. layer_idx, layer_type, layer_id = int(m.group(2)), m.group(3), int(m.group(4))
  2165. prefix_str = f'stages.{stage_idx}.{layer_idx}.'
  2166. id_map = {1: 'conv1_1x1.', 2: 'conv2_kxk.', 3: 'conv3_1x1.'}
  2167. suffix_str = id_map[layer_id] + layer_type
  2168. return prefix_str + suffix_str
  2169. def _down_sub(m):
  2170. stage_idx = int(m.group(1)) - 1
  2171. layer_idx, layer_id = int(m.group(2)), int(m.group(3))
  2172. return f'stages.{stage_idx}.{layer_idx}.shortcut.' + ('conv.conv' if layer_id == 0 else 'conv.bn')
  2173. out_dict = {}
  2174. for k, v in state_dict.items():
  2175. if not k.startswith(prefix):
  2176. continue
  2177. k = re.sub(rf'{prefix}conv([0-9])', r'stem.conv\1.conv', k)
  2178. k = re.sub(rf'{prefix}bn([0-9])', r'stem.conv\1.bn', k)
  2179. k = re.sub(rf'{prefix}layer([0-9])\.([0-9]+)\.([a-z]+)([0-9])', _stage_sub, k)
  2180. k = re.sub(rf'{prefix}layer([0-9])\.([0-9]+)\.downsample\.([0-9])', _down_sub, k)
  2181. if k.startswith(f'{prefix}attnpool'):
  2182. if not model_has_attn_pool:
  2183. continue
  2184. k = k.replace(prefix + 'attnpool', 'head') # 'attn_pool')
  2185. k = k.replace('positional_embedding', 'pos_embed')
  2186. k = k.replace('q_proj', 'q')
  2187. k = k.replace('k_proj', 'k')
  2188. k = k.replace('v_proj', 'v')
  2189. k = k.replace('c_proj', 'proj')
  2190. out_dict[k] = v
  2191. return out_dict
  2192. def checkpoint_filter_fn(
  2193. state_dict: Dict[str, torch.Tensor],
  2194. model: ByobNet
  2195. ):
  2196. if 'visual.conv1.weight' in state_dict:
  2197. state_dict = _convert_openai_clip(state_dict, model)
  2198. return state_dict
  2199. def _create_byobnet(variant: str, pretrained: bool = False, **kwargs) -> ByobNet:
  2200. """Create a ByobNet model.
  2201. Args:
  2202. variant: Model variant name.
  2203. pretrained: Load pretrained weights.
  2204. **kwargs: Additional model arguments.
  2205. Returns:
  2206. ByobNet model instance.
  2207. """
  2208. return build_model_with_cfg(
  2209. ByobNet, variant, pretrained,
  2210. model_cfg=model_cfgs[variant],
  2211. pretrained_filter_fn=checkpoint_filter_fn,
  2212. feature_cfg=dict(flatten_sequential=True),
  2213. **kwargs,
  2214. )
  2215. def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
  2216. """Create default configuration dictionary.
  2217. Args:
  2218. url: Model weight URL.
  2219. **kwargs: Additional configuration options.
  2220. Returns:
  2221. Configuration dictionary.
  2222. """
  2223. return {
  2224. 'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
  2225. 'crop_pct': 0.875, 'interpolation': 'bilinear',
  2226. 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
  2227. 'first_conv': 'stem.conv', 'classifier': 'head.fc',
  2228. 'license': 'apache-2.0',
  2229. **kwargs
  2230. }
  2231. def _cfgr(url: str = '', **kwargs) -> Dict[str, Any]:
  2232. """Create RepVGG configuration dictionary.
  2233. Args:
  2234. url: Model weight URL.
  2235. **kwargs: Additional configuration options.
  2236. Returns:
  2237. Configuration dictionary.
  2238. """
  2239. return {
  2240. 'url': url, 'num_classes': 1000, 'input_size': (3, 256, 256), 'pool_size': (8, 8),
  2241. 'crop_pct': 0.9, 'interpolation': 'bicubic',
  2242. 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
  2243. 'first_conv': 'stem.conv1.conv', 'classifier': 'head.fc',
  2244. 'license': 'apache-2.0',
  2245. **kwargs
  2246. }
  2247. default_cfgs = generate_default_cfgs({
  2248. # GPU-Efficient (ResNet) weights
  2249. 'gernet_s.idstcv_in1k': _cfg(hf_hub_id='timm/'),
  2250. 'gernet_m.idstcv_in1k': _cfg(hf_hub_id='timm/'),
  2251. 'gernet_l.idstcv_in1k': _cfg(hf_hub_id='timm/', input_size=(3, 256, 256), pool_size=(8, 8)),
  2252. # RepVGG weights
  2253. 'repvgg_a0.rvgg_in1k': _cfg(
  2254. hf_hub_id='timm/',
  2255. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2256. 'repvgg_a1.rvgg_in1k': _cfg(
  2257. hf_hub_id='timm/',
  2258. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2259. 'repvgg_a2.rvgg_in1k': _cfg(
  2260. hf_hub_id='timm/',
  2261. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2262. 'repvgg_b0.rvgg_in1k': _cfg(
  2263. hf_hub_id='timm/',
  2264. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2265. 'repvgg_b1.rvgg_in1k': _cfg(
  2266. hf_hub_id='timm/',
  2267. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2268. 'repvgg_b1g4.rvgg_in1k': _cfg(
  2269. hf_hub_id='timm/',
  2270. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2271. 'repvgg_b2.rvgg_in1k': _cfg(
  2272. hf_hub_id='timm/',
  2273. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2274. 'repvgg_b2g4.rvgg_in1k': _cfg(
  2275. hf_hub_id='timm/',
  2276. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2277. 'repvgg_b3.rvgg_in1k': _cfg(
  2278. hf_hub_id='timm/',
  2279. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2280. 'repvgg_b3g4.rvgg_in1k': _cfg(
  2281. hf_hub_id='timm/',
  2282. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit'),
  2283. 'repvgg_d2se.rvgg_in1k': _cfg(
  2284. hf_hub_id='timm/',
  2285. first_conv=('stem.conv_kxk.conv', 'stem.conv_1x1.conv'), license='mit',
  2286. input_size=(3, 320, 320), pool_size=(10, 10), crop_pct=1.0,
  2287. ),
  2288. # experimental ResNet configs
  2289. 'resnet51q.ra2_in1k': _cfg(
  2290. hf_hub_id='timm/',
  2291. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet51q_ra2-d47dcc76.pth',
  2292. first_conv='stem.conv1', input_size=(3, 256, 256), pool_size=(8, 8),
  2293. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2294. 'resnet61q.ra2_in1k': _cfgr(
  2295. hf_hub_id='timm/',
  2296. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet61q_ra2-6afc536c.pth',
  2297. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2298. # ResNeXt-26 models with different attention in Bottleneck blocks
  2299. 'resnext26ts.ra2_in1k': _cfgr(
  2300. hf_hub_id='timm/',
  2301. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/resnext26ts_256_ra2-8bbd9106.pth',
  2302. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2303. 'seresnext26ts.ch_in1k': _cfgr(
  2304. hf_hub_id='timm/',
  2305. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/seresnext26ts_256-6f0d74a3.pth',
  2306. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2307. 'gcresnext26ts.ch_in1k': _cfgr(
  2308. hf_hub_id='timm/',
  2309. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnext26ts_256-e414378b.pth',
  2310. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2311. 'eca_resnext26ts.ch_in1k': _cfgr(
  2312. hf_hub_id='timm/',
  2313. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/eca_resnext26ts_256-5a1d030f.pth',
  2314. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2315. 'bat_resnext26ts.ch_in1k': _cfgr(
  2316. hf_hub_id='timm/',
  2317. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/bat_resnext26ts_256-fa6fd595.pth',
  2318. min_input_size=(3, 256, 256)),
  2319. # ResNet-32 / 33 models with different attention in Bottleneck blocks
  2320. 'resnet32ts.ra2_in1k': _cfgr(
  2321. hf_hub_id='timm/',
  2322. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/resnet32ts_256-aacf5250.pth',
  2323. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2324. 'resnet33ts.ra2_in1k': _cfgr(
  2325. hf_hub_id='timm/',
  2326. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/resnet33ts_256-e91b09a4.pth',
  2327. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2328. 'gcresnet33ts.ra2_in1k': _cfgr(
  2329. hf_hub_id='timm/',
  2330. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnet33ts_256-0e0cd345.pth',
  2331. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2332. 'seresnet33ts.ra2_in1k': _cfgr(
  2333. hf_hub_id='timm/',
  2334. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/seresnet33ts_256-f8ad44d9.pth',
  2335. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2336. 'eca_resnet33ts.ra2_in1k': _cfgr(
  2337. hf_hub_id='timm/',
  2338. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/eca_resnet33ts_256-8f98face.pth',
  2339. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2340. 'gcresnet50t.ra2_in1k': _cfgr(
  2341. hf_hub_id='timm/',
  2342. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnet50t_256-96374d1c.pth',
  2343. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2344. 'gcresnext50ts.ch_in1k': _cfgr(
  2345. hf_hub_id='timm/',
  2346. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/gcresnext50ts_256-3e0f515e.pth',
  2347. test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2348. # custom `timm` specific RegNetZ inspired models w/ different sizing from paper
  2349. 'regnetz_b16.ra3_in1k': _cfgr(
  2350. hf_hub_id='timm/',
  2351. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_b_raa-677d9606.pth',
  2352. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2353. input_size=(3, 224, 224), pool_size=(7, 7), crop_pct=0.94, test_input_size=(3, 288, 288), test_crop_pct=1.0),
  2354. 'regnetz_c16.ra3_in1k': _cfgr(
  2355. hf_hub_id='timm/',
  2356. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_c_rab2_256-a54bf36a.pth',
  2357. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2358. crop_pct=0.94, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2359. 'regnetz_d32.ra3_in1k': _cfgr(
  2360. hf_hub_id='timm/',
  2361. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_d_rab_256-b8073a89.pth',
  2362. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.95, test_input_size=(3, 320, 320)),
  2363. 'regnetz_d8.ra3_in1k': _cfgr(
  2364. hf_hub_id='timm/',
  2365. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_d8_bh-afc03c55.pth',
  2366. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.94, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2367. 'regnetz_e8.ra3_in1k': _cfgr(
  2368. hf_hub_id='timm/',
  2369. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_e8_bh-aace8e6e.pth',
  2370. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.94, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2371. 'regnetz_b16_evos.untrained': _cfgr(
  2372. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2373. input_size=(3, 224, 224), pool_size=(7, 7), crop_pct=0.95, test_input_size=(3, 288, 288)),
  2374. 'regnetz_c16_evos.ch_in1k': _cfgr(
  2375. hf_hub_id='timm/',
  2376. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetz_c16_evos_ch-d8311942.pth',
  2377. first_conv='stem.conv', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5),
  2378. crop_pct=0.95, test_input_size=(3, 320, 320)),
  2379. 'regnetz_d8_evos.ch_in1k': _cfgr(
  2380. hf_hub_id='timm/',
  2381. url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/regnetz_d8_evos_ch-2bc12646.pth',
  2382. mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5), crop_pct=0.95, test_input_size=(3, 320, 320), test_crop_pct=1.0),
  2383. 'mobileone_s0.apple_in1k': _cfg(
  2384. hf_hub_id='timm/',
  2385. crop_pct=0.875,
  2386. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2387. license='mobileone-license',
  2388. ),
  2389. 'mobileone_s1.apple_in1k': _cfg(
  2390. hf_hub_id='timm/',
  2391. crop_pct=0.9,
  2392. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2393. license='mobileone-license',
  2394. ),
  2395. 'mobileone_s2.apple_in1k': _cfg(
  2396. hf_hub_id='timm/',
  2397. crop_pct=0.9,
  2398. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2399. license='mobileone-license',
  2400. ),
  2401. 'mobileone_s3.apple_in1k': _cfg(
  2402. hf_hub_id='timm/',
  2403. crop_pct=0.9,
  2404. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2405. license='mobileone-license',
  2406. ),
  2407. 'mobileone_s4.apple_in1k': _cfg(
  2408. hf_hub_id='timm/',
  2409. crop_pct=0.9,
  2410. first_conv=('stem.conv_kxk.0.conv', 'stem.conv_scale.conv'),
  2411. license='mobileone-license',
  2412. ),
  2413. # original attention pool head variants
  2414. 'resnet50_clip.openai': _cfgr(
  2415. hf_hub_id='timm/',
  2416. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2417. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2418. classifier='head.proj',
  2419. license='mit',
  2420. ),
  2421. 'resnet101_clip.openai': _cfgr(
  2422. hf_hub_id='timm/',
  2423. num_classes=512, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2424. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2425. classifier='head.proj',
  2426. license='mit',
  2427. ),
  2428. 'resnet50x4_clip.openai': _cfgr(
  2429. hf_hub_id='timm/',
  2430. num_classes=640, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2431. fixed_input_size=True, input_size=(3, 288, 288), pool_size=(9, 9),
  2432. classifier='head.proj',
  2433. license='mit',
  2434. ),
  2435. 'resnet50x16_clip.openai': _cfgr(
  2436. hf_hub_id='timm/',
  2437. num_classes=768, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2438. fixed_input_size=True, input_size=(3, 384, 384), pool_size=(12, 12),
  2439. classifier='head.proj',
  2440. license='mit',
  2441. ),
  2442. 'resnet50x64_clip.openai': _cfgr(
  2443. hf_hub_id='timm/',
  2444. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2445. fixed_input_size=True, input_size=(3, 448, 448), pool_size=(14, 14),
  2446. classifier='head.proj',
  2447. license='mit',
  2448. ),
  2449. 'resnet50_clip.cc12m': _cfgr(
  2450. hf_hub_id='timm/',
  2451. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2452. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2453. classifier='head.proj',
  2454. license='mit',
  2455. ),
  2456. 'resnet50_clip.yfcc15m': _cfgr(
  2457. hf_hub_id='timm/',
  2458. num_classes=1024, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2459. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2460. classifier='head.proj',
  2461. license='mit',
  2462. ),
  2463. 'resnet101_clip.yfcc15m': _cfgr(
  2464. hf_hub_id='timm/',
  2465. num_classes=512, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2466. fixed_input_size=True, input_size=(3, 224, 224), pool_size=(7, 7),
  2467. classifier='head.proj',
  2468. license='mit',
  2469. ),
  2470. # avg-pool w/ optional standard classifier head variants
  2471. 'resnet50_clip_gap.openai': _cfgr(
  2472. hf_hub_id='timm/',
  2473. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2474. input_size=(3, 224, 224), pool_size=(7, 7),
  2475. license='mit',
  2476. ),
  2477. 'resnet101_clip_gap.openai': _cfgr(
  2478. hf_hub_id='timm/',
  2479. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2480. input_size=(3, 224, 224), pool_size=(7, 7),
  2481. license='mit',
  2482. ),
  2483. 'resnet50x4_clip_gap.openai': _cfgr(
  2484. hf_hub_id='timm/',
  2485. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2486. input_size=(3, 288, 288), pool_size=(9, 9),
  2487. license='mit',
  2488. ),
  2489. 'resnet50x16_clip_gap.openai': _cfgr(
  2490. hf_hub_id='timm/',
  2491. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2492. input_size=(3, 384, 384), pool_size=(12, 12),
  2493. license='mit',
  2494. ),
  2495. 'resnet50x64_clip_gap.openai': _cfgr(
  2496. hf_hub_id='timm/',
  2497. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2498. input_size=(3, 448, 448), pool_size=(14, 14),
  2499. license='mit',
  2500. ),
  2501. 'resnet50_clip_gap.cc12m': _cfgr(
  2502. hf_hub_id='timm/',
  2503. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2504. input_size=(3, 224, 224), pool_size=(7, 7),
  2505. license='mit',
  2506. ),
  2507. 'resnet50_clip_gap.yfcc15m': _cfgr(
  2508. hf_hub_id='timm/',
  2509. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2510. input_size=(3, 224, 224), pool_size=(7, 7),
  2511. license='mit',
  2512. ),
  2513. 'resnet101_clip_gap.yfcc15m': _cfgr(
  2514. hf_hub_id='timm/',
  2515. num_classes=0, mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD,
  2516. input_size=(3, 224, 224), pool_size=(7, 7),
  2517. license='mit',
  2518. ),
  2519. 'resnet50_mlp.untrained': _cfgr(
  2520. input_size=(3, 256, 256), pool_size=(8, 8),
  2521. ),
  2522. 'test_byobnet.r160_in1k': _cfgr(
  2523. hf_hub_id='timm/',
  2524. first_conv='stem.conv',
  2525. input_size=(3, 160, 160), crop_pct=0.95, pool_size=(5, 5),
  2526. ),
  2527. })
  2528. @register_model
  2529. def gernet_l(pretrained=False, **kwargs) -> ByobNet:
  2530. """ GEResNet-Large (GENet-Large from official impl)
  2531. `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  2532. """
  2533. return _create_byobnet('gernet_l', pretrained=pretrained, **kwargs)
  2534. @register_model
  2535. def gernet_m(pretrained=False, **kwargs) -> ByobNet:
  2536. """ GEResNet-Medium (GENet-Normal from official impl)
  2537. `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  2538. """
  2539. return _create_byobnet('gernet_m', pretrained=pretrained, **kwargs)
  2540. @register_model
  2541. def gernet_s(pretrained=False, **kwargs) -> ByobNet:
  2542. """ EResNet-Small (GENet-Small from official impl)
  2543. `Neural Architecture Design for GPU-Efficient Networks` - https://arxiv.org/abs/2006.14090
  2544. """
  2545. return _create_byobnet('gernet_s', pretrained=pretrained, **kwargs)
  2546. @register_model
  2547. def repvgg_a0(pretrained=False, **kwargs) -> ByobNet:
  2548. """ RepVGG-A0
  2549. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2550. """
  2551. return _create_byobnet('repvgg_a0', pretrained=pretrained, **kwargs)
  2552. @register_model
  2553. def repvgg_a1(pretrained=False, **kwargs) -> ByobNet:
  2554. """ RepVGG-A1
  2555. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2556. """
  2557. return _create_byobnet('repvgg_a1', pretrained=pretrained, **kwargs)
  2558. @register_model
  2559. def repvgg_a2(pretrained=False, **kwargs) -> ByobNet:
  2560. """ RepVGG-A2
  2561. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2562. """
  2563. return _create_byobnet('repvgg_a2', pretrained=pretrained, **kwargs)
  2564. @register_model
  2565. def repvgg_b0(pretrained=False, **kwargs) -> ByobNet:
  2566. """ RepVGG-B0
  2567. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2568. """
  2569. return _create_byobnet('repvgg_b0', pretrained=pretrained, **kwargs)
  2570. @register_model
  2571. def repvgg_b1(pretrained=False, **kwargs) -> ByobNet:
  2572. """ RepVGG-B1
  2573. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2574. """
  2575. return _create_byobnet('repvgg_b1', pretrained=pretrained, **kwargs)
  2576. @register_model
  2577. def repvgg_b1g4(pretrained=False, **kwargs) -> ByobNet:
  2578. """ RepVGG-B1g4
  2579. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2580. """
  2581. return _create_byobnet('repvgg_b1g4', pretrained=pretrained, **kwargs)
  2582. @register_model
  2583. def repvgg_b2(pretrained=False, **kwargs) -> ByobNet:
  2584. """ RepVGG-B2
  2585. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2586. """
  2587. return _create_byobnet('repvgg_b2', pretrained=pretrained, **kwargs)
  2588. @register_model
  2589. def repvgg_b2g4(pretrained=False, **kwargs) -> ByobNet:
  2590. """ RepVGG-B2g4
  2591. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2592. """
  2593. return _create_byobnet('repvgg_b2g4', pretrained=pretrained, **kwargs)
  2594. @register_model
  2595. def repvgg_b3(pretrained=False, **kwargs) -> ByobNet:
  2596. """ RepVGG-B3
  2597. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2598. """
  2599. return _create_byobnet('repvgg_b3', pretrained=pretrained, **kwargs)
  2600. @register_model
  2601. def repvgg_b3g4(pretrained=False, **kwargs) -> ByobNet:
  2602. """ RepVGG-B3g4
  2603. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2604. """
  2605. return _create_byobnet('repvgg_b3g4', pretrained=pretrained, **kwargs)
  2606. @register_model
  2607. def repvgg_d2se(pretrained=False, **kwargs) -> ByobNet:
  2608. """ RepVGG-D2se
  2609. `Making VGG-style ConvNets Great Again` - https://arxiv.org/abs/2101.03697
  2610. """
  2611. return _create_byobnet('repvgg_d2se', pretrained=pretrained, **kwargs)
  2612. @register_model
  2613. def resnet51q(pretrained=False, **kwargs) -> ByobNet:
  2614. """
  2615. """
  2616. return _create_byobnet('resnet51q', pretrained=pretrained, **kwargs)
  2617. @register_model
  2618. def resnet61q(pretrained=False, **kwargs) -> ByobNet:
  2619. """
  2620. """
  2621. return _create_byobnet('resnet61q', pretrained=pretrained, **kwargs)
  2622. @register_model
  2623. def resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2624. """
  2625. """
  2626. return _create_byobnet('resnext26ts', pretrained=pretrained, **kwargs)
  2627. @register_model
  2628. def gcresnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2629. """
  2630. """
  2631. return _create_byobnet('gcresnext26ts', pretrained=pretrained, **kwargs)
  2632. @register_model
  2633. def seresnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2634. """
  2635. """
  2636. return _create_byobnet('seresnext26ts', pretrained=pretrained, **kwargs)
  2637. @register_model
  2638. def eca_resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2639. """
  2640. """
  2641. return _create_byobnet('eca_resnext26ts', pretrained=pretrained, **kwargs)
  2642. @register_model
  2643. def bat_resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  2644. """
  2645. """
  2646. return _create_byobnet('bat_resnext26ts', pretrained=pretrained, **kwargs)
  2647. @register_model
  2648. def resnet32ts(pretrained=False, **kwargs) -> ByobNet:
  2649. """
  2650. """
  2651. return _create_byobnet('resnet32ts', pretrained=pretrained, **kwargs)
  2652. @register_model
  2653. def resnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2654. """
  2655. """
  2656. return _create_byobnet('resnet33ts', pretrained=pretrained, **kwargs)
  2657. @register_model
  2658. def gcresnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2659. """
  2660. """
  2661. return _create_byobnet('gcresnet33ts', pretrained=pretrained, **kwargs)
  2662. @register_model
  2663. def seresnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2664. """
  2665. """
  2666. return _create_byobnet('seresnet33ts', pretrained=pretrained, **kwargs)
  2667. @register_model
  2668. def eca_resnet33ts(pretrained=False, **kwargs) -> ByobNet:
  2669. """
  2670. """
  2671. return _create_byobnet('eca_resnet33ts', pretrained=pretrained, **kwargs)
  2672. @register_model
  2673. def gcresnet50t(pretrained=False, **kwargs) -> ByobNet:
  2674. """
  2675. """
  2676. return _create_byobnet('gcresnet50t', pretrained=pretrained, **kwargs)
  2677. @register_model
  2678. def gcresnext50ts(pretrained=False, **kwargs) -> ByobNet:
  2679. """
  2680. """
  2681. return _create_byobnet('gcresnext50ts', pretrained=pretrained, **kwargs)
  2682. @register_model
  2683. def regnetz_b16(pretrained=False, **kwargs) -> ByobNet:
  2684. """
  2685. """
  2686. return _create_byobnet('regnetz_b16', pretrained=pretrained, **kwargs)
  2687. @register_model
  2688. def regnetz_c16(pretrained=False, **kwargs) -> ByobNet:
  2689. """
  2690. """
  2691. return _create_byobnet('regnetz_c16', pretrained=pretrained, **kwargs)
  2692. @register_model
  2693. def regnetz_d32(pretrained=False, **kwargs) -> ByobNet:
  2694. """
  2695. """
  2696. return _create_byobnet('regnetz_d32', pretrained=pretrained, **kwargs)
  2697. @register_model
  2698. def regnetz_d8(pretrained=False, **kwargs) -> ByobNet:
  2699. """
  2700. """
  2701. return _create_byobnet('regnetz_d8', pretrained=pretrained, **kwargs)
  2702. @register_model
  2703. def regnetz_e8(pretrained=False, **kwargs) -> ByobNet:
  2704. """
  2705. """
  2706. return _create_byobnet('regnetz_e8', pretrained=pretrained, **kwargs)
  2707. @register_model
  2708. def regnetz_b16_evos(pretrained=False, **kwargs) -> ByobNet:
  2709. """
  2710. """
  2711. return _create_byobnet('regnetz_b16_evos', pretrained=pretrained, **kwargs)
  2712. @register_model
  2713. def regnetz_c16_evos(pretrained=False, **kwargs) -> ByobNet:
  2714. """
  2715. """
  2716. return _create_byobnet('regnetz_c16_evos', pretrained=pretrained, **kwargs)
  2717. @register_model
  2718. def regnetz_d8_evos(pretrained=False, **kwargs) -> ByobNet:
  2719. """
  2720. """
  2721. return _create_byobnet('regnetz_d8_evos', pretrained=pretrained, **kwargs)
  2722. @register_model
  2723. def mobileone_s0(pretrained=False, **kwargs) -> ByobNet:
  2724. """
  2725. """
  2726. return _create_byobnet('mobileone_s0', pretrained=pretrained, **kwargs)
  2727. @register_model
  2728. def mobileone_s1(pretrained=False, **kwargs) -> ByobNet:
  2729. """
  2730. """
  2731. return _create_byobnet('mobileone_s1', pretrained=pretrained, **kwargs)
  2732. @register_model
  2733. def mobileone_s2(pretrained=False, **kwargs) -> ByobNet:
  2734. """
  2735. """
  2736. return _create_byobnet('mobileone_s2', pretrained=pretrained, **kwargs)
  2737. @register_model
  2738. def mobileone_s3(pretrained=False, **kwargs) -> ByobNet:
  2739. """
  2740. """
  2741. return _create_byobnet('mobileone_s3', pretrained=pretrained, **kwargs)
  2742. @register_model
  2743. def mobileone_s4(pretrained=False, **kwargs) -> ByobNet:
  2744. """
  2745. """
  2746. return _create_byobnet('mobileone_s4', pretrained=pretrained, **kwargs)
  2747. @register_model
  2748. def resnet50_clip(pretrained=False, **kwargs) -> ByobNet:
  2749. """ OpenAI Modified ResNet-50 CLIP image tower
  2750. """
  2751. return _create_byobnet('resnet50_clip', pretrained=pretrained, **kwargs)
  2752. @register_model
  2753. def resnet101_clip(pretrained=False, **kwargs) -> ByobNet:
  2754. """ OpenAI Modified ResNet-101 CLIP image tower
  2755. """
  2756. return _create_byobnet('resnet101_clip', pretrained=pretrained, **kwargs)
  2757. @register_model
  2758. def resnet50x4_clip(pretrained=False, **kwargs) -> ByobNet:
  2759. """ OpenAI Modified ResNet-50x4 CLIP image tower
  2760. """
  2761. return _create_byobnet('resnet50x4_clip', pretrained=pretrained, **kwargs)
  2762. @register_model
  2763. def resnet50x16_clip(pretrained=False, **kwargs) -> ByobNet:
  2764. """ OpenAI Modified ResNet-50x16 CLIP image tower
  2765. """
  2766. return _create_byobnet('resnet50x16_clip', pretrained=pretrained, **kwargs)
  2767. @register_model
  2768. def resnet50x64_clip(pretrained=False, **kwargs) -> ByobNet:
  2769. """ OpenAI Modified ResNet-50x64 CLIP image tower
  2770. """
  2771. return _create_byobnet('resnet50x64_clip', pretrained=pretrained, **kwargs)
  2772. @register_model
  2773. def resnet50_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2774. """ OpenAI Modified ResNet-50 CLIP image tower w/ avg pool (no attention pool)
  2775. """
  2776. return _create_byobnet('resnet50_clip_gap', pretrained=pretrained, **kwargs)
  2777. @register_model
  2778. def resnet101_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2779. """ OpenAI Modified ResNet-101 CLIP image tower w/ avg pool (no attention pool)
  2780. """
  2781. return _create_byobnet('resnet101_clip_gap', pretrained=pretrained, **kwargs)
  2782. @register_model
  2783. def resnet50x4_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2784. """ OpenAI Modified ResNet-50x4 CLIP image tower w/ avg pool (no attention pool)
  2785. """
  2786. return _create_byobnet('resnet50x4_clip_gap', pretrained=pretrained, **kwargs)
  2787. @register_model
  2788. def resnet50x16_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2789. """ OpenAI Modified ResNet-50x16 CLIP image tower w/ avg pool (no attention pool)
  2790. """
  2791. return _create_byobnet('resnet50x16_clip_gap', pretrained=pretrained, **kwargs)
  2792. @register_model
  2793. def resnet50x64_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  2794. """ OpenAI Modified ResNet-50x64 CLIP image tower w/ avg pool (no attention pool)
  2795. """
  2796. return _create_byobnet('resnet50x64_clip_gap', pretrained=pretrained, **kwargs)
  2797. @register_model
  2798. def resnet50_mlp(pretrained=False, **kwargs) -> ByobNet:
  2799. """
  2800. """
  2801. return _create_byobnet('resnet50_mlp', pretrained=pretrained, **kwargs)
  2802. @register_model
  2803. def test_byobnet(pretrained=False, **kwargs) -> ByobNet:
  2804. """ Minimal test ResNet (BYOB based) model.
  2805. """
  2806. return _create_byobnet('test_byobnet', pretrained=pretrained, **kwargs)