pooling.py 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550
  1. import torch.nn.functional as F
  2. from torch import Tensor
  3. from torch.nn.common_types import (
  4. _ratio_2_t,
  5. _ratio_3_t,
  6. _size_1_t,
  7. _size_2_opt_t,
  8. _size_2_t,
  9. _size_3_opt_t,
  10. _size_3_t,
  11. _size_any_opt_t,
  12. _size_any_t,
  13. )
  14. from .module import Module
  15. from .utils import _pair, _single, _triple
  16. __all__ = [
  17. "MaxPool1d",
  18. "MaxPool2d",
  19. "MaxPool3d",
  20. "MaxUnpool1d",
  21. "MaxUnpool2d",
  22. "MaxUnpool3d",
  23. "AvgPool1d",
  24. "AvgPool2d",
  25. "AvgPool3d",
  26. "FractionalMaxPool2d",
  27. "FractionalMaxPool3d",
  28. "LPPool1d",
  29. "LPPool2d",
  30. "LPPool3d",
  31. "AdaptiveMaxPool1d",
  32. "AdaptiveMaxPool2d",
  33. "AdaptiveMaxPool3d",
  34. "AdaptiveAvgPool1d",
  35. "AdaptiveAvgPool2d",
  36. "AdaptiveAvgPool3d",
  37. ]
  38. class _MaxPoolNd(Module):
  39. __constants__ = [
  40. "kernel_size",
  41. "stride",
  42. "padding",
  43. "dilation",
  44. "return_indices",
  45. "ceil_mode",
  46. ]
  47. return_indices: bool
  48. ceil_mode: bool
  49. def __init__(
  50. self,
  51. kernel_size: _size_any_t,
  52. stride: _size_any_t | None = None,
  53. padding: _size_any_t = 0,
  54. dilation: _size_any_t = 1,
  55. return_indices: bool = False,
  56. ceil_mode: bool = False,
  57. ) -> None:
  58. super().__init__()
  59. self.kernel_size = kernel_size
  60. self.stride = stride if (stride is not None) else kernel_size
  61. self.padding = padding
  62. self.dilation = dilation
  63. self.return_indices = return_indices
  64. self.ceil_mode = ceil_mode
  65. def extra_repr(self) -> str:
  66. return (
  67. "kernel_size={kernel_size}, stride={stride}, padding={padding}"
  68. ", dilation={dilation}, ceil_mode={ceil_mode}".format(**self.__dict__)
  69. )
  70. class MaxPool1d(_MaxPoolNd):
  71. r"""Applies a 1D max pooling over an input signal composed of several input planes.
  72. In the simplest case, the output value of the layer with input size :math:`(N, C, L)`
  73. and output :math:`(N, C, L_{out})` can be precisely described as:
  74. .. math::
  75. out(N_i, C_j, k) = \max_{m=0, \ldots, \text{kernel\_size} - 1}
  76. input(N_i, C_j, stride \times k + m)
  77. If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides
  78. for :attr:`padding` number of points. :attr:`dilation` is the stride between the elements within the
  79. sliding window. This `link`_ has a nice visualization of the pooling parameters.
  80. Note:
  81. When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
  82. or the input. Sliding windows that would start in the right padded region are ignored.
  83. Args:
  84. kernel_size: The size of the sliding window, must be > 0.
  85. stride: The stride of the sliding window, must be > 0. Default value is :attr:`kernel_size`.
  86. padding: Implicit negative infinity padding to be added on both sides, must be >= 0 and <= kernel_size / 2.
  87. dilation: The stride between elements within a sliding window, must be > 0.
  88. return_indices: If ``True``, will return the argmax along with the max values.
  89. Useful for :class:`torch.nn.MaxUnpool1d` later
  90. ceil_mode: If ``True``, will use `ceil` instead of `floor` to compute the output shape. This
  91. ensures that every element in the input tensor is covered by a sliding window.
  92. Shape:
  93. - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`.
  94. - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`,
  95. where ``ceil_mode = False``
  96. .. math::
  97. L_{out} = \left\lfloor \frac{L_{in} + 2 \times \text{padding} - \text{dilation}
  98. \times (\text{kernel\_size} - 1) - 1}{\text{stride}}\right\rfloor + 1
  99. where ``ceil_mode = True``
  100. .. math::
  101. L_{out} = \left\lceil \frac{L_{in} + 2 \times \text{padding} - \text{dilation}
  102. \times (\text{kernel\_size} - 1) - 1 + (stride - 1)}{\text{stride}}\right\rceil + 1
  103. - Ensure that the last pooling starts inside the image, make :math:`L_{out} = L_{out} - 1`
  104. when :math:`(L_{out} - 1) * \text{stride} >= L_{in} + \text{padding}`.
  105. Examples::
  106. >>> # pool of size=3, stride=2
  107. >>> m = nn.MaxPool1d(3, stride=2)
  108. >>> input = torch.randn(20, 16, 50)
  109. >>> output = m(input)
  110. .. _link:
  111. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  112. """
  113. kernel_size: _size_1_t
  114. stride: _size_1_t
  115. padding: _size_1_t
  116. dilation: _size_1_t
  117. def forward(self, input: Tensor):
  118. """Runs the forward pass."""
  119. return F.max_pool1d(
  120. input,
  121. self.kernel_size,
  122. self.stride,
  123. self.padding,
  124. self.dilation,
  125. ceil_mode=self.ceil_mode,
  126. return_indices=self.return_indices,
  127. )
  128. class MaxPool2d(_MaxPoolNd):
  129. r"""Applies a 2D max pooling over an input signal composed of several input planes.
  130. In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
  131. output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
  132. can be precisely described as:
  133. .. math::
  134. \begin{aligned}
  135. out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
  136. & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
  137. \text{stride[1]} \times w + n)
  138. \end{aligned}
  139. If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides
  140. for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
  141. It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
  142. Note:
  143. When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
  144. or the input. Sliding windows that would start in the right padded region are ignored.
  145. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
  146. - a single ``int`` -- in which case the same value is used for the height and width dimension
  147. - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
  148. and the second `int` for the width dimension
  149. Args:
  150. kernel_size: the size of the window to take a max over
  151. stride: the stride of the window. Default value is :attr:`kernel_size`
  152. padding: Implicit negative infinity padding to be added on both sides
  153. dilation: a parameter that controls the stride of elements in the window
  154. return_indices: if ``True``, will return the max indices along with the outputs.
  155. Useful for :class:`torch.nn.MaxUnpool2d` later
  156. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  157. Shape:
  158. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`
  159. - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where
  160. .. math::
  161. H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
  162. \times (\text{kernel\_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
  163. .. math::
  164. W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
  165. \times (\text{kernel\_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
  166. Examples::
  167. >>> # pool of square window of size=3, stride=2
  168. >>> m = nn.MaxPool2d(3, stride=2)
  169. >>> # pool of non-square window
  170. >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))
  171. >>> input = torch.randn(20, 16, 50, 32)
  172. >>> output = m(input)
  173. .. _link:
  174. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  175. """
  176. kernel_size: _size_2_t
  177. stride: _size_2_t
  178. padding: _size_2_t
  179. dilation: _size_2_t
  180. def forward(self, input: Tensor):
  181. """Runs the forward pass."""
  182. return F.max_pool2d(
  183. input,
  184. self.kernel_size,
  185. self.stride,
  186. self.padding,
  187. self.dilation,
  188. ceil_mode=self.ceil_mode,
  189. return_indices=self.return_indices,
  190. )
  191. class MaxPool3d(_MaxPoolNd):
  192. r"""Applies a 3D max pooling over an input signal composed of several input planes.
  193. In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`,
  194. output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)`
  195. can be precisely described as:
  196. .. math::
  197. \begin{aligned}
  198. \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, kD-1} \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
  199. & \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
  200. \text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
  201. \end{aligned}
  202. If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides
  203. for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
  204. It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
  205. Note:
  206. When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
  207. or the input. Sliding windows that would start in the right padded region are ignored.
  208. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
  209. - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
  210. - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
  211. the second `int` for the height dimension and the third `int` for the width dimension
  212. Args:
  213. kernel_size: the size of the window to take a max over
  214. stride: the stride of the window. Default value is :attr:`kernel_size`
  215. padding: Implicit negative infinity padding to be added on all three sides
  216. dilation: a parameter that controls the stride of elements in the window
  217. return_indices: if ``True``, will return the max indices along with the outputs.
  218. Useful for :class:`torch.nn.MaxUnpool3d` later
  219. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  220. Shape:
  221. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
  222. - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`, where
  223. .. math::
  224. D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
  225. (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
  226. .. math::
  227. H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
  228. (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
  229. .. math::
  230. W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
  231. (\text{kernel\_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
  232. Examples::
  233. >>> # pool of square window of size=3, stride=2
  234. >>> m = nn.MaxPool3d(3, stride=2)
  235. >>> # pool of non-square window
  236. >>> m = nn.MaxPool3d((3, 2, 2), stride=(2, 1, 2))
  237. >>> input = torch.randn(20, 16, 50, 44, 31)
  238. >>> output = m(input)
  239. .. _link:
  240. https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
  241. """
  242. kernel_size: _size_3_t
  243. stride: _size_3_t
  244. padding: _size_3_t
  245. dilation: _size_3_t
  246. def forward(self, input: Tensor):
  247. """Runs the forward pass."""
  248. return F.max_pool3d(
  249. input,
  250. self.kernel_size,
  251. self.stride,
  252. self.padding,
  253. self.dilation,
  254. ceil_mode=self.ceil_mode,
  255. return_indices=self.return_indices,
  256. )
  257. class _MaxUnpoolNd(Module):
  258. def extra_repr(self) -> str:
  259. return f"kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding}"
  260. class MaxUnpool1d(_MaxUnpoolNd):
  261. r"""Computes a partial inverse of :class:`MaxPool1d`.
  262. :class:`MaxPool1d` is not fully invertible, since the non-maximal values are lost.
  263. :class:`MaxUnpool1d` takes in as input the output of :class:`MaxPool1d`
  264. including the indices of the maximal values and computes a partial inverse
  265. in which all non-maximal values are set to zero.
  266. Note:
  267. This operation may behave nondeterministically when the input indices has repeat values.
  268. See https://github.com/pytorch/pytorch/issues/80827 and :doc:`/notes/randomness` for more information.
  269. .. note:: :class:`MaxPool1d` can map several input sizes to the same output
  270. sizes. Hence, the inversion process can get ambiguous.
  271. To accommodate this, you can provide the needed output size
  272. as an additional argument :attr:`output_size` in the forward call.
  273. See the Inputs and Example below.
  274. Args:
  275. kernel_size (int or tuple): Size of the max pooling window.
  276. stride (int or tuple): Stride of the max pooling window.
  277. It is set to :attr:`kernel_size` by default.
  278. padding (int or tuple): Padding that was added to the input
  279. Inputs:
  280. - `input`: the input Tensor to invert
  281. - `indices`: the indices given out by :class:`~torch.nn.MaxPool1d`
  282. - `output_size` (optional): the targeted output size
  283. Shape:
  284. - Input: :math:`(N, C, H_{in})` or :math:`(C, H_{in})`.
  285. - Output: :math:`(N, C, H_{out})` or :math:`(C, H_{out})`, where
  286. .. math::
  287. H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{kernel\_size}[0]
  288. or as given by :attr:`output_size` in the call operator
  289. Example::
  290. >>> # xdoctest: +IGNORE_WANT("do other tests modify the global state?")
  291. >>> pool = nn.MaxPool1d(2, stride=2, return_indices=True)
  292. >>> unpool = nn.MaxUnpool1d(2, stride=2)
  293. >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8]]])
  294. >>> output, indices = pool(input)
  295. >>> unpool(output, indices)
  296. tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]])
  297. >>> # Example showcasing the use of output_size
  298. >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8, 9]]])
  299. >>> output, indices = pool(input)
  300. >>> unpool(output, indices, output_size=input.size())
  301. tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8., 0.]]])
  302. >>> unpool(output, indices)
  303. tensor([[[ 0., 2., 0., 4., 0., 6., 0., 8.]]])
  304. """
  305. kernel_size: _size_1_t
  306. stride: _size_1_t
  307. padding: _size_1_t
  308. def __init__(
  309. self,
  310. kernel_size: _size_1_t,
  311. stride: _size_1_t | None = None,
  312. padding: _size_1_t = 0,
  313. ) -> None:
  314. super().__init__()
  315. self.kernel_size = _single(kernel_size)
  316. self.stride = _single(stride if (stride is not None) else kernel_size)
  317. self.padding = _single(padding)
  318. def forward(
  319. self, input: Tensor, indices: Tensor, output_size: list[int] | None = None
  320. ) -> Tensor:
  321. """Runs the forward pass."""
  322. return F.max_unpool1d(
  323. input, indices, self.kernel_size, self.stride, self.padding, output_size
  324. )
  325. class MaxUnpool2d(_MaxUnpoolNd):
  326. r"""Computes a partial inverse of :class:`MaxPool2d`.
  327. :class:`MaxPool2d` is not fully invertible, since the non-maximal values are lost.
  328. :class:`MaxUnpool2d` takes in as input the output of :class:`MaxPool2d`
  329. including the indices of the maximal values and computes a partial inverse
  330. in which all non-maximal values are set to zero.
  331. Note:
  332. This operation may behave nondeterministically when the input indices has repeat values.
  333. See https://github.com/pytorch/pytorch/issues/80827 and :doc:`/notes/randomness` for more information.
  334. .. note:: :class:`MaxPool2d` can map several input sizes to the same output
  335. sizes. Hence, the inversion process can get ambiguous.
  336. To accommodate this, you can provide the needed output size
  337. as an additional argument :attr:`output_size` in the forward call.
  338. See the Inputs and Example below.
  339. Args:
  340. kernel_size (int or tuple): Size of the max pooling window.
  341. stride (int or tuple): Stride of the max pooling window.
  342. It is set to :attr:`kernel_size` by default.
  343. padding (int or tuple): Padding that was added to the input
  344. Inputs:
  345. - `input`: the input Tensor to invert
  346. - `indices`: the indices given out by :class:`~torch.nn.MaxPool2d`
  347. - `output_size` (optional): the targeted output size
  348. Shape:
  349. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
  350. - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where
  351. .. math::
  352. H_{out} = (H_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
  353. .. math::
  354. W_{out} = (W_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
  355. or as given by :attr:`output_size` in the call operator
  356. Example::
  357. >>> pool = nn.MaxPool2d(2, stride=2, return_indices=True)
  358. >>> unpool = nn.MaxUnpool2d(2, stride=2)
  359. >>> input = torch.tensor([[[[ 1., 2., 3., 4.],
  360. [ 5., 6., 7., 8.],
  361. [ 9., 10., 11., 12.],
  362. [13., 14., 15., 16.]]]])
  363. >>> output, indices = pool(input)
  364. >>> unpool(output, indices)
  365. tensor([[[[ 0., 0., 0., 0.],
  366. [ 0., 6., 0., 8.],
  367. [ 0., 0., 0., 0.],
  368. [ 0., 14., 0., 16.]]]])
  369. >>> # Now using output_size to resolve an ambiguous size for the inverse
  370. >>> input = torch.tensor([[[[ 1., 2., 3., 4., 5.],
  371. [ 6., 7., 8., 9., 10.],
  372. [11., 12., 13., 14., 15.],
  373. [16., 17., 18., 19., 20.]]]])
  374. >>> output, indices = pool(input)
  375. >>> # This call will not work without specifying output_size
  376. >>> unpool(output, indices, output_size=input.size())
  377. tensor([[[[ 0., 0., 0., 0., 0.],
  378. [ 0., 7., 0., 9., 0.],
  379. [ 0., 0., 0., 0., 0.],
  380. [ 0., 17., 0., 19., 0.]]]])
  381. """
  382. kernel_size: _size_2_t
  383. stride: _size_2_t
  384. padding: _size_2_t
  385. def __init__(
  386. self,
  387. kernel_size: _size_2_t,
  388. stride: _size_2_t | None = None,
  389. padding: _size_2_t = 0,
  390. ) -> None:
  391. super().__init__()
  392. self.kernel_size = _pair(kernel_size)
  393. self.stride = _pair(stride if (stride is not None) else kernel_size)
  394. self.padding = _pair(padding)
  395. def forward(
  396. self, input: Tensor, indices: Tensor, output_size: list[int] | None = None
  397. ) -> Tensor:
  398. """Runs the forward pass."""
  399. return F.max_unpool2d(
  400. input, indices, self.kernel_size, self.stride, self.padding, output_size
  401. )
  402. class MaxUnpool3d(_MaxUnpoolNd):
  403. r"""Computes a partial inverse of :class:`MaxPool3d`.
  404. :class:`MaxPool3d` is not fully invertible, since the non-maximal values are lost.
  405. :class:`MaxUnpool3d` takes in as input the output of :class:`MaxPool3d`
  406. including the indices of the maximal values and computes a partial inverse
  407. in which all non-maximal values are set to zero.
  408. Note:
  409. This operation may behave nondeterministically when the input indices has repeat values.
  410. See https://github.com/pytorch/pytorch/issues/80827 and :doc:`/notes/randomness` for more information.
  411. .. note:: :class:`MaxPool3d` can map several input sizes to the same output
  412. sizes. Hence, the inversion process can get ambiguous.
  413. To accommodate this, you can provide the needed output size
  414. as an additional argument :attr:`output_size` in the forward call.
  415. See the Inputs section below.
  416. Args:
  417. kernel_size (int or tuple): Size of the max pooling window.
  418. stride (int or tuple): Stride of the max pooling window.
  419. It is set to :attr:`kernel_size` by default.
  420. padding (int or tuple): Padding that was added to the input
  421. Inputs:
  422. - `input`: the input Tensor to invert
  423. - `indices`: the indices given out by :class:`~torch.nn.MaxPool3d`
  424. - `output_size` (optional): the targeted output size
  425. Shape:
  426. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
  427. - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`, where
  428. .. math::
  429. D_{out} = (D_{in} - 1) \times \text{stride[0]} - 2 \times \text{padding[0]} + \text{kernel\_size[0]}
  430. .. math::
  431. H_{out} = (H_{in} - 1) \times \text{stride[1]} - 2 \times \text{padding[1]} + \text{kernel\_size[1]}
  432. .. math::
  433. W_{out} = (W_{in} - 1) \times \text{stride[2]} - 2 \times \text{padding[2]} + \text{kernel\_size[2]}
  434. or as given by :attr:`output_size` in the call operator
  435. Example::
  436. >>> # pool of square window of size=3, stride=2
  437. >>> pool = nn.MaxPool3d(3, stride=2, return_indices=True)
  438. >>> unpool = nn.MaxUnpool3d(3, stride=2)
  439. >>> output, indices = pool(torch.randn(20, 16, 51, 33, 15))
  440. >>> unpooled_output = unpool(output, indices)
  441. >>> unpooled_output.size()
  442. torch.Size([20, 16, 51, 33, 15])
  443. """
  444. kernel_size: _size_3_t
  445. stride: _size_3_t
  446. padding: _size_3_t
  447. def __init__(
  448. self,
  449. kernel_size: _size_3_t,
  450. stride: _size_3_t | None = None,
  451. padding: _size_3_t = 0,
  452. ) -> None:
  453. super().__init__()
  454. self.kernel_size = _triple(kernel_size)
  455. self.stride = _triple(stride if (stride is not None) else kernel_size)
  456. self.padding = _triple(padding)
  457. def forward(
  458. self, input: Tensor, indices: Tensor, output_size: list[int] | None = None
  459. ) -> Tensor:
  460. """Runs the forward pass."""
  461. return F.max_unpool3d(
  462. input, indices, self.kernel_size, self.stride, self.padding, output_size
  463. )
  464. class _AvgPoolNd(Module):
  465. __constants__ = [
  466. "kernel_size",
  467. "stride",
  468. "padding",
  469. "ceil_mode",
  470. "count_include_pad",
  471. ]
  472. def extra_repr(self) -> str:
  473. return f"kernel_size={self.kernel_size}, stride={self.stride}, padding={self.padding}"
  474. class AvgPool1d(_AvgPoolNd):
  475. r"""Applies a 1D average pooling over an input signal composed of several input planes.
  476. In the simplest case, the output value of the layer with input size :math:`(N, C, L)`,
  477. output :math:`(N, C, L_{out})` and :attr:`kernel_size` :math:`k`
  478. can be precisely described as:
  479. .. math::
  480. \text{out}(N_i, C_j, l) = \frac{1}{k} \sum_{m=0}^{k-1}
  481. \text{input}(N_i, C_j, \text{stride} \times l + m)
  482. If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
  483. for :attr:`padding` number of points.
  484. Note:
  485. When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
  486. or the input. Sliding windows that would start in the right padded region are ignored.
  487. .. note::
  488. pad should be at most half of effective kernel size.
  489. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can each be
  490. an ``int`` or a one-element tuple.
  491. Args:
  492. kernel_size: the size of the window
  493. stride: the stride of the window. Default value is :attr:`kernel_size`
  494. padding: implicit zero padding to be added on both sides
  495. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  496. count_include_pad: when True, will include the zero-padding in the averaging calculation
  497. Shape:
  498. - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`.
  499. - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where
  500. .. math::
  501. L_{out} = \left\lfloor \frac{L_{in} +
  502. 2 \times \text{padding} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor
  503. Per the note above, if ``ceil_mode`` is True and :math:`(L_{out} - 1) \times \text{stride} \geq L_{in}
  504. + \text{padding}`, we skip the last window as it would start in the right padded region, resulting in
  505. :math:`L_{out}` being reduced by one.
  506. Examples::
  507. >>> # pool with window of size=3, stride=2
  508. >>> m = nn.AvgPool1d(3, stride=2)
  509. >>> m(torch.tensor([[[1., 2, 3, 4, 5, 6, 7]]]))
  510. tensor([[[2., 4., 6.]]])
  511. """
  512. kernel_size: _size_1_t
  513. stride: _size_1_t
  514. padding: _size_1_t
  515. ceil_mode: bool
  516. count_include_pad: bool
  517. def __init__(
  518. self,
  519. kernel_size: _size_1_t,
  520. stride: _size_1_t = None,
  521. padding: _size_1_t = 0,
  522. ceil_mode: bool = False,
  523. count_include_pad: bool = True,
  524. ) -> None:
  525. super().__init__()
  526. self.kernel_size = _single(kernel_size)
  527. self.stride = _single(stride if stride is not None else kernel_size)
  528. self.padding = _single(padding)
  529. self.ceil_mode = ceil_mode
  530. self.count_include_pad = count_include_pad
  531. def forward(self, input: Tensor) -> Tensor:
  532. """Runs the forward pass."""
  533. return F.avg_pool1d(
  534. input,
  535. self.kernel_size,
  536. self.stride,
  537. self.padding,
  538. self.ceil_mode,
  539. self.count_include_pad,
  540. )
  541. class AvgPool2d(_AvgPoolNd):
  542. r"""Applies a 2D average pooling over an input signal composed of several input planes.
  543. In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
  544. output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
  545. can be precisely described as:
  546. .. math::
  547. out(N_i, C_j, h, w) = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
  548. input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
  549. If :attr:`padding` is non-zero, then the input is implicitly zero-padded on both sides
  550. for :attr:`padding` number of points.
  551. Note:
  552. When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
  553. or the input. Sliding windows that would start in the right padded region are ignored.
  554. .. note::
  555. pad should be at most half of effective kernel size.
  556. The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding` can either be:
  557. - a single ``int`` or a single-element tuple -- in which case the same value is used for the height and width dimension
  558. - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
  559. and the second `int` for the width dimension
  560. Args:
  561. kernel_size: the size of the window
  562. stride: the stride of the window. Default value is :attr:`kernel_size`
  563. padding: implicit zero padding to be added on both sides
  564. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  565. count_include_pad: when True, will include the zero-padding in the averaging calculation
  566. divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
  567. Shape:
  568. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
  569. - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where
  570. .. math::
  571. H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] -
  572. \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
  573. .. math::
  574. W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] -
  575. \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
  576. Per the note above, if ``ceil_mode`` is True and :math:`(H_{out} - 1)\times \text{stride}[0]\geq H_{in}
  577. + \text{padding}[0]`, we skip the last window as it would start in the bottom padded region,
  578. resulting in :math:`H_{out}` being reduced by one.
  579. The same applies for :math:`W_{out}`.
  580. Examples::
  581. >>> # pool of square window of size=3, stride=2
  582. >>> m = nn.AvgPool2d(3, stride=2)
  583. >>> # pool of non-square window
  584. >>> m = nn.AvgPool2d((3, 2), stride=(2, 1))
  585. >>> input = torch.randn(20, 16, 50, 32)
  586. >>> output = m(input)
  587. """
  588. __constants__ = [
  589. "kernel_size",
  590. "stride",
  591. "padding",
  592. "ceil_mode",
  593. "count_include_pad",
  594. "divisor_override",
  595. ]
  596. kernel_size: _size_2_t
  597. stride: _size_2_t
  598. padding: _size_2_t
  599. ceil_mode: bool
  600. count_include_pad: bool
  601. def __init__(
  602. self,
  603. kernel_size: _size_2_t,
  604. stride: _size_2_t | None = None,
  605. padding: _size_2_t = 0,
  606. ceil_mode: bool = False,
  607. count_include_pad: bool = True,
  608. divisor_override: int | None = None,
  609. ) -> None:
  610. super().__init__()
  611. self.kernel_size = kernel_size
  612. self.stride = stride if (stride is not None) else kernel_size
  613. self.padding = padding
  614. self.ceil_mode = ceil_mode
  615. self.count_include_pad = count_include_pad
  616. self.divisor_override = divisor_override
  617. def forward(self, input: Tensor) -> Tensor:
  618. """Runs the forward pass."""
  619. return F.avg_pool2d(
  620. input,
  621. self.kernel_size,
  622. self.stride,
  623. self.padding,
  624. self.ceil_mode,
  625. self.count_include_pad,
  626. self.divisor_override,
  627. )
  628. class AvgPool3d(_AvgPoolNd):
  629. r"""Applies a 3D average pooling over an input signal composed of several input planes.
  630. In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`,
  631. output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)`
  632. can be precisely described as:
  633. .. math::
  634. \begin{aligned}
  635. \text{out}(N_i, C_j, d, h, w) ={} & \sum_{k=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1} \\
  636. & \frac{\text{input}(N_i, C_j, \text{stride}[0] \times d + k,
  637. \text{stride}[1] \times h + m, \text{stride}[2] \times w + n)}
  638. {kD \times kH \times kW}
  639. \end{aligned}
  640. If :attr:`padding` is non-zero, then the input is implicitly zero-padded on all three sides
  641. for :attr:`padding` number of points.
  642. Note:
  643. When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
  644. or the input. Sliding windows that would start in the right padded region are ignored.
  645. .. note::
  646. pad should be at most half of effective kernel size.
  647. The parameters :attr:`kernel_size`, :attr:`stride` can either be:
  648. - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
  649. - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
  650. the second `int` for the height dimension and the third `int` for the width dimension
  651. Args:
  652. kernel_size: the size of the window
  653. stride: the stride of the window. Default value is :attr:`kernel_size`
  654. padding: implicit zero padding to be added on all three sides
  655. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  656. count_include_pad: when True, will include the zero-padding in the averaging calculation
  657. divisor_override: if specified, it will be used as divisor, otherwise :attr:`kernel_size` will be used
  658. Shape:
  659. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
  660. - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or
  661. :math:`(C, D_{out}, H_{out}, W_{out})`, where
  662. .. math::
  663. D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] -
  664. \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
  665. .. math::
  666. H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] -
  667. \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
  668. .. math::
  669. W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] -
  670. \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor
  671. Per the note above, if ``ceil_mode`` is True and :math:`(D_{out} - 1)\times \text{stride}[0]\geq D_{in}
  672. + \text{padding}[0]`, we skip the last window as it would start in the padded region,
  673. resulting in :math:`D_{out}` being reduced by one.
  674. The same applies for :math:`W_{out}` and :math:`H_{out}`.
  675. Examples::
  676. >>> # pool of square window of size=3, stride=2
  677. >>> m = nn.AvgPool3d(3, stride=2)
  678. >>> # pool of non-square window
  679. >>> m = nn.AvgPool3d((3, 2, 2), stride=(2, 1, 2))
  680. >>> input = torch.randn(20, 16, 50, 44, 31)
  681. >>> output = m(input)
  682. """
  683. __constants__ = [
  684. "kernel_size",
  685. "stride",
  686. "padding",
  687. "ceil_mode",
  688. "count_include_pad",
  689. "divisor_override",
  690. ]
  691. kernel_size: _size_3_t
  692. stride: _size_3_t
  693. padding: _size_3_t
  694. ceil_mode: bool
  695. count_include_pad: bool
  696. def __init__(
  697. self,
  698. kernel_size: _size_3_t,
  699. stride: _size_3_t | None = None,
  700. padding: _size_3_t = 0,
  701. ceil_mode: bool = False,
  702. count_include_pad: bool = True,
  703. divisor_override: int | None = None,
  704. ) -> None:
  705. super().__init__()
  706. self.kernel_size = kernel_size
  707. self.stride = stride if (stride is not None) else kernel_size
  708. self.padding = padding
  709. self.ceil_mode = ceil_mode
  710. self.count_include_pad = count_include_pad
  711. self.divisor_override = divisor_override
  712. def forward(self, input: Tensor) -> Tensor:
  713. """Runs the forward pass."""
  714. return F.avg_pool3d(
  715. input,
  716. self.kernel_size,
  717. self.stride,
  718. self.padding,
  719. self.ceil_mode,
  720. self.count_include_pad,
  721. self.divisor_override,
  722. )
  723. def __setstate__(self, d):
  724. super().__setstate__(d)
  725. self.__dict__.setdefault("padding", 0)
  726. self.__dict__.setdefault("ceil_mode", False)
  727. self.__dict__.setdefault("count_include_pad", True)
  728. class FractionalMaxPool2d(Module):
  729. r"""Applies a 2D fractional max pooling over an input signal composed of several input planes.
  730. Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham
  731. The max-pooling operation is applied in :math:`kH \times kW` regions by a stochastic
  732. step size determined by the target output size.
  733. The number of output features is equal to the number of input planes.
  734. .. note:: Exactly one of ``output_size`` or ``output_ratio`` must be defined.
  735. Args:
  736. kernel_size: the size of the window to take a max over.
  737. Can be a single number k (for a square kernel of k x k) or a tuple `(kh, kw)`
  738. output_size: the target output size of the image of the form `oH x oW`.
  739. Can be a tuple `(oH, oW)` or a single number oH for a square image `oH x oH`.
  740. Note that we must have :math:`kH + oH - 1 <= H_{in}` and :math:`kW + oW - 1 <= W_{in}`
  741. output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
  742. This has to be a number or tuple in the range (0, 1).
  743. Note that we must have :math:`kH + (output\_ratio\_H * H_{in}) - 1 <= H_{in}`
  744. and :math:`kW + (output\_ratio\_W * W_{in}) - 1 <= W_{in}`
  745. return_indices: if ``True``, will return the indices along with the outputs.
  746. Useful to pass to :meth:`nn.MaxUnpool2d`. Default: ``False``
  747. Shape:
  748. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
  749. - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where
  750. :math:`(H_{out}, W_{out})=\text{output\_size}` or
  751. :math:`(H_{out}, W_{out})=\text{output\_ratio} \times (H_{in}, W_{in})`.
  752. Examples:
  753. >>> # pool of square window of size=3, and target output size 13x12
  754. >>> m = nn.FractionalMaxPool2d(3, output_size=(13, 12))
  755. >>> # pool of square window and target output size being half of input image size
  756. >>> m = nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5))
  757. >>> input = torch.randn(20, 16, 50, 32)
  758. >>> output = m(input)
  759. .. _Fractional MaxPooling:
  760. https://arxiv.org/abs/1412.6071
  761. """
  762. __constants__ = ["kernel_size", "return_indices", "output_size", "output_ratio"]
  763. kernel_size: _size_2_t
  764. return_indices: bool
  765. output_size: _size_2_t
  766. output_ratio: _ratio_2_t
  767. def __init__(
  768. self,
  769. kernel_size: _size_2_t,
  770. output_size: _size_2_t | None = None,
  771. output_ratio: _ratio_2_t | None = None,
  772. return_indices: bool = False,
  773. _random_samples=None,
  774. ) -> None:
  775. super().__init__()
  776. self.kernel_size = _pair(kernel_size)
  777. self.return_indices = return_indices
  778. self.register_buffer("_random_samples", _random_samples)
  779. self.output_size = _pair(output_size) if output_size is not None else None
  780. self.output_ratio = _pair(output_ratio) if output_ratio is not None else None
  781. if output_size is None and output_ratio is None:
  782. raise ValueError(
  783. "FractionalMaxPool2d requires specifying either "
  784. "an output size, or a pooling ratio"
  785. )
  786. if output_size is not None and output_ratio is not None:
  787. raise ValueError(
  788. "only one of output_size and output_ratio may be specified"
  789. )
  790. if self.output_ratio is not None:
  791. if not (0 < self.output_ratio[0] < 1 and 0 < self.output_ratio[1] < 1):
  792. raise ValueError(
  793. f"output_ratio must be between 0 and 1 (got {output_ratio})"
  794. )
  795. def forward(self, input: Tensor):
  796. return F.fractional_max_pool2d(
  797. input,
  798. self.kernel_size,
  799. self.output_size,
  800. self.output_ratio,
  801. self.return_indices,
  802. _random_samples=self._random_samples,
  803. )
  804. class FractionalMaxPool3d(Module):
  805. r"""Applies a 3D fractional max pooling over an input signal composed of several input planes.
  806. Fractional MaxPooling is described in detail in the paper `Fractional MaxPooling`_ by Ben Graham
  807. The max-pooling operation is applied in :math:`kT \times kH \times kW` regions by a stochastic
  808. step size determined by the target output size.
  809. The number of output features is equal to the number of input planes.
  810. .. note:: Exactly one of ``output_size`` or ``output_ratio`` must be defined.
  811. Args:
  812. kernel_size: the size of the window to take a max over.
  813. Can be a single number `k` (for a square kernel of `k x k x k`) or a tuple `(kt x kh x kw)`,
  814. `k` must greater than 0.
  815. output_size: the target output size of the image of the form `oT x oH x oW`.
  816. Can be a tuple `(oT, oH, oW)` or a single number oH for a square image `oH x oH x oH`
  817. output_ratio: If one wants to have an output size as a ratio of the input size, this option can be given.
  818. This has to be a number or tuple in the range (0, 1)
  819. return_indices: if ``True``, will return the indices along with the outputs.
  820. Useful to pass to :meth:`nn.MaxUnpool3d`. Default: ``False``
  821. Shape:
  822. - Input: :math:`(N, C, T_{in}, H_{in}, W_{in})` or :math:`(C, T_{in}, H_{in}, W_{in})`.
  823. - Output: :math:`(N, C, T_{out}, H_{out}, W_{out})` or :math:`(C, T_{out}, H_{out}, W_{out})`, where
  824. :math:`(T_{out}, H_{out}, W_{out})=\text{output\_size}` or
  825. :math:`(T_{out}, H_{out}, W_{out})=\text{output\_ratio} \times (T_{in}, H_{in}, W_{in})`
  826. Examples:
  827. >>> # pool of cubic window of size=3, and target output size 13x12x11
  828. >>> m = nn.FractionalMaxPool3d(3, output_size=(13, 12, 11))
  829. >>> # pool of cubic window and target output size being half of input size
  830. >>> m = nn.FractionalMaxPool3d(3, output_ratio=(0.5, 0.5, 0.5))
  831. >>> input = torch.randn(20, 16, 50, 32, 16)
  832. >>> output = m(input)
  833. .. _Fractional MaxPooling:
  834. https://arxiv.org/abs/1412.6071
  835. """
  836. __constants__ = ["kernel_size", "return_indices", "output_size", "output_ratio"]
  837. kernel_size: _size_3_t
  838. return_indices: bool
  839. output_size: _size_3_t
  840. output_ratio: _ratio_3_t
  841. def __init__(
  842. self,
  843. kernel_size: _size_3_t,
  844. output_size: _size_3_t | None = None,
  845. output_ratio: _ratio_3_t | None = None,
  846. return_indices: bool = False,
  847. _random_samples=None,
  848. ) -> None:
  849. super().__init__()
  850. if (isinstance(kernel_size, int) and kernel_size <= 0) or (
  851. isinstance(kernel_size, (tuple, list))
  852. and not all(k > 0 for k in kernel_size)
  853. ):
  854. raise ValueError(f"kernel_size must greater than 0, but got {kernel_size}")
  855. self.kernel_size = _triple(kernel_size)
  856. self.return_indices = return_indices
  857. self.register_buffer("_random_samples", _random_samples)
  858. self.output_size = _triple(output_size) if output_size is not None else None
  859. self.output_ratio = _triple(output_ratio) if output_ratio is not None else None
  860. if output_size is None and output_ratio is None:
  861. raise ValueError(
  862. "FractionalMaxPool3d requires specifying either "
  863. "an output size, or a pooling ratio"
  864. )
  865. if output_size is not None and output_ratio is not None:
  866. raise ValueError(
  867. "only one of output_size and output_ratio may be specified"
  868. )
  869. if self.output_ratio is not None:
  870. if not (
  871. 0 < self.output_ratio[0] < 1
  872. and 0 < self.output_ratio[1] < 1
  873. and 0 < self.output_ratio[2] < 1
  874. ):
  875. raise ValueError(
  876. f"output_ratio must be between 0 and 1 (got {output_ratio})"
  877. )
  878. def forward(self, input: Tensor):
  879. return F.fractional_max_pool3d(
  880. input,
  881. self.kernel_size,
  882. self.output_size,
  883. self.output_ratio,
  884. self.return_indices,
  885. _random_samples=self._random_samples,
  886. )
  887. class _LPPoolNd(Module):
  888. __constants__ = ["norm_type", "kernel_size", "stride", "ceil_mode"]
  889. norm_type: float
  890. ceil_mode: bool
  891. def __init__(
  892. self,
  893. norm_type: float,
  894. kernel_size: _size_any_t,
  895. stride: _size_any_t | None = None,
  896. ceil_mode: bool = False,
  897. ) -> None:
  898. super().__init__()
  899. self.norm_type = norm_type
  900. self.kernel_size = kernel_size
  901. self.stride = stride
  902. self.ceil_mode = ceil_mode
  903. def extra_repr(self) -> str:
  904. return (
  905. "norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, "
  906. "ceil_mode={ceil_mode}".format(**self.__dict__)
  907. )
  908. class LPPool1d(_LPPoolNd):
  909. r"""Applies a 1D power-average pooling over an input signal composed of several input planes.
  910. On each window, the function computed is:
  911. .. math::
  912. f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
  913. - At p = :math:`\infty`, one gets Max Pooling
  914. - At p = 1, one gets Sum Pooling (which is proportional to Average Pooling)
  915. .. note:: If the sum to the power of `p` is zero, the gradient of this function is
  916. not defined. This implementation will set the gradient to zero in this case.
  917. Args:
  918. kernel_size: a single int, the size of the window
  919. stride: a single int, the stride of the window. Default value is :attr:`kernel_size`
  920. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  921. Note:
  922. When :attr:`ceil_mode` is ``True``, sliding windows may go off-bounds if they start within the
  923. left padding or the input. Sliding windows that would start in the right padded region are ignored.
  924. Shape:
  925. - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`.
  926. - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where
  927. .. math::
  928. L_{out} = \left\lfloor\frac{L_{in} - \text{kernel\_size}}{\text{stride}} + 1\right\rfloor
  929. Examples::
  930. >>> # power-2 pool of window of length 3, with stride 2.
  931. >>> m = nn.LPPool1d(2, 3, stride=2)
  932. >>> input = torch.randn(20, 16, 50)
  933. >>> output = m(input)
  934. """
  935. kernel_size: _size_1_t
  936. stride: _size_1_t
  937. def forward(self, input: Tensor) -> Tensor:
  938. """Runs the forward pass."""
  939. return F.lp_pool1d(
  940. input, float(self.norm_type), self.kernel_size, self.stride, self.ceil_mode
  941. )
  942. class LPPool2d(_LPPoolNd):
  943. r"""Applies a 2D power-average pooling over an input signal composed of several input planes.
  944. On each window, the function computed is:
  945. .. math::
  946. f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
  947. - At p = :math:`\infty`, one gets Max Pooling
  948. - At p = 1, one gets Sum Pooling (which is proportional to average pooling)
  949. The parameters :attr:`kernel_size`, :attr:`stride` can either be:
  950. - a single ``int`` -- in which case the same value is used for the height and width dimension
  951. - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
  952. and the second `int` for the width dimension
  953. .. note:: If the sum to the power of `p` is zero, the gradient of this function is
  954. not defined. This implementation will set the gradient to zero in this case.
  955. Args:
  956. kernel_size: the size of the window
  957. stride: the stride of the window. Default value is :attr:`kernel_size`
  958. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  959. Note:
  960. When :attr:`ceil_mode` is ``True``, sliding windows may go off-bounds if they start within the
  961. left padding or the input. Sliding windows that would start in the right padded region are ignored.
  962. Shape:
  963. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
  964. - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where
  965. .. math::
  966. H_{out} = \left\lfloor\frac{H_{in} - \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
  967. .. math::
  968. W_{out} = \left\lfloor\frac{W_{in} - \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
  969. Examples::
  970. >>> # power-2 pool of square window of size=3, stride=2
  971. >>> m = nn.LPPool2d(2, 3, stride=2)
  972. >>> # pool of non-square window of power 1.2
  973. >>> m = nn.LPPool2d(1.2, (3, 2), stride=(2, 1))
  974. >>> input = torch.randn(20, 16, 50, 32)
  975. >>> output = m(input)
  976. """
  977. kernel_size: _size_2_t
  978. stride: _size_2_t
  979. def forward(self, input: Tensor) -> Tensor:
  980. """Runs the forward pass."""
  981. return F.lp_pool2d(
  982. input, float(self.norm_type), self.kernel_size, self.stride, self.ceil_mode
  983. )
  984. class LPPool3d(_LPPoolNd):
  985. r"""Applies a 3D power-average pooling over an input signal composed of several input planes.
  986. On each window, the function computed is:
  987. .. math::
  988. f(X) = \sqrt[p]{\sum_{x \in X} x^{p}}
  989. - At p = :math:`\infty`, one gets Max Pooling
  990. - At p = 1, one gets Sum Pooling (which is proportional to average pooling)
  991. The parameters :attr:`kernel_size`, :attr:`stride` can either be:
  992. - a single ``int`` -- in which case the same value is used for the height, width and depth dimension
  993. - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
  994. the second `int` for the height dimension and the third `int` for the width dimension
  995. .. note:: If the sum to the power of `p` is zero, the gradient of this function is
  996. not defined. This implementation will set the gradient to zero in this case.
  997. Args:
  998. kernel_size: the size of the window
  999. stride: the stride of the window. Default value is :attr:`kernel_size`
  1000. ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
  1001. Note:
  1002. When :attr:`ceil_mode` is ``True``, sliding windows may go off-bounds if they start within the
  1003. left padding or the input. Sliding windows that would start in the right padded region are ignored.
  1004. Shape:
  1005. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
  1006. - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or
  1007. :math:`(C, D_{out}, H_{out}, W_{out})`, where
  1008. .. math::
  1009. D_{out} = \left\lfloor\frac{D_{in} - \text{kernel\_size}[0]}{\text{stride}[0]} + 1\right\rfloor
  1010. .. math::
  1011. H_{out} = \left\lfloor\frac{H_{in} - \text{kernel\_size}[1]}{\text{stride}[1]} + 1\right\rfloor
  1012. .. math::
  1013. W_{out} = \left\lfloor\frac{W_{in} - \text{kernel\_size}[2]}{\text{stride}[2]} + 1\right\rfloor
  1014. Examples::
  1015. >>> # power-2 pool of square window of size=3, stride=2
  1016. >>> m = nn.LPPool3d(2, 3, stride=2)
  1017. >>> # pool of non-square window of power 1.2
  1018. >>> m = nn.LPPool3d(1.2, (3, 2, 2), stride=(2, 1, 2))
  1019. >>> input = torch.randn(20, 16, 50, 44, 31)
  1020. >>> output = m(input)
  1021. """
  1022. kernel_size: _size_3_t
  1023. stride: _size_3_t
  1024. def forward(self, input: Tensor) -> Tensor:
  1025. """Runs the forward pass."""
  1026. return F.lp_pool3d(
  1027. input, float(self.norm_type), self.kernel_size, self.stride, self.ceil_mode
  1028. )
  1029. class _AdaptiveMaxPoolNd(Module):
  1030. __constants__ = ["output_size", "return_indices"]
  1031. return_indices: bool
  1032. def __init__(
  1033. self, output_size: _size_any_opt_t, return_indices: bool = False
  1034. ) -> None:
  1035. super().__init__()
  1036. self.output_size = output_size
  1037. self.return_indices = return_indices
  1038. def extra_repr(self) -> str:
  1039. return f"output_size={self.output_size}"
  1040. # FIXME (by @ssnl): Improve adaptive pooling docs: specify what the input and
  1041. # output shapes are, and how the operation computes output.
  1042. class AdaptiveMaxPool1d(_AdaptiveMaxPoolNd):
  1043. r"""Applies a 1D adaptive max pooling over an input signal composed of several input planes.
  1044. The output size is :math:`L_{out}`, for any input size.
  1045. The number of output features is equal to the number of input planes.
  1046. Args:
  1047. output_size: the target output size :math:`L_{out}`.
  1048. return_indices: if ``True``, will return the indices along with the outputs.
  1049. Useful to pass to nn.MaxUnpool1d. Default: ``False``
  1050. Shape:
  1051. - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`.
  1052. - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where
  1053. :math:`L_{out}=\text{output\_size}`.
  1054. Examples:
  1055. >>> # target output size of 5
  1056. >>> m = nn.AdaptiveMaxPool1d(5)
  1057. >>> input = torch.randn(1, 64, 8)
  1058. >>> output = m(input)
  1059. """
  1060. output_size: _size_1_t
  1061. def forward(self, input: Tensor):
  1062. """Runs the forward pass."""
  1063. return F.adaptive_max_pool1d(input, self.output_size, self.return_indices)
  1064. class AdaptiveMaxPool2d(_AdaptiveMaxPoolNd):
  1065. r"""Applies a 2D adaptive max pooling over an input signal composed of several input planes.
  1066. The output is of size :math:`H_{out} \times W_{out}`, for any input size.
  1067. The number of output features is equal to the number of input planes.
  1068. Args:
  1069. output_size: the target output size of the image of the form :math:`H_{out} \times W_{out}`.
  1070. Can be a tuple :math:`(H_{out}, W_{out})` or a single :math:`H_{out}` for a
  1071. square image :math:`H_{out} \times H_{out}`. :math:`H_{out}` and :math:`W_{out}`
  1072. can be either a ``int``, or ``None`` which means the size will be the same as that
  1073. of the input.
  1074. return_indices: if ``True``, will return the indices along with the outputs.
  1075. Useful to pass to nn.MaxUnpool2d. Default: ``False``
  1076. Shape:
  1077. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
  1078. - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where
  1079. :math:`(H_{out}, W_{out})=\text{output\_size}`.
  1080. Examples:
  1081. >>> # target output size of 5x7
  1082. >>> m = nn.AdaptiveMaxPool2d((5, 7))
  1083. >>> input = torch.randn(1, 64, 8, 9)
  1084. >>> output = m(input)
  1085. >>> # target output size of 7x7 (square)
  1086. >>> m = nn.AdaptiveMaxPool2d(7)
  1087. >>> input = torch.randn(1, 64, 10, 9)
  1088. >>> output = m(input)
  1089. >>> # target output size of 10x7
  1090. >>> m = nn.AdaptiveMaxPool2d((None, 7))
  1091. >>> input = torch.randn(1, 64, 10, 9)
  1092. >>> output = m(input)
  1093. """
  1094. output_size: _size_2_opt_t
  1095. def forward(self, input: Tensor):
  1096. """Runs the forward pass."""
  1097. return F.adaptive_max_pool2d(input, self.output_size, self.return_indices)
  1098. class AdaptiveMaxPool3d(_AdaptiveMaxPoolNd):
  1099. r"""Applies a 3D adaptive max pooling over an input signal composed of several input planes.
  1100. The output is of size :math:`D_{out} \times H_{out} \times W_{out}`, for any input size.
  1101. The number of output features is equal to the number of input planes.
  1102. Args:
  1103. output_size: the target output size of the image of the form :math:`D_{out} \times H_{out} \times W_{out}`.
  1104. Can be a tuple :math:`(D_{out}, H_{out}, W_{out})` or a single
  1105. :math:`D_{out}` for a cube :math:`D_{out} \times D_{out} \times D_{out}`.
  1106. :math:`D_{out}`, :math:`H_{out}` and :math:`W_{out}` can be either a
  1107. ``int``, or ``None`` which means the size will be the same as that of the input.
  1108. return_indices: if ``True``, will return the indices along with the outputs.
  1109. Useful to pass to nn.MaxUnpool3d. Default: ``False``
  1110. Shape:
  1111. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
  1112. - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})` or :math:`(C, D_{out}, H_{out}, W_{out})`,
  1113. where :math:`(D_{out}, H_{out}, W_{out})=\text{output\_size}`.
  1114. Examples:
  1115. >>> # target output size of 5x7x9
  1116. >>> m = nn.AdaptiveMaxPool3d((5, 7, 9))
  1117. >>> input = torch.randn(1, 64, 8, 9, 10)
  1118. >>> output = m(input)
  1119. >>> # target output size of 7x7x7 (cube)
  1120. >>> m = nn.AdaptiveMaxPool3d(7)
  1121. >>> input = torch.randn(1, 64, 10, 9, 8)
  1122. >>> output = m(input)
  1123. >>> # target output size of 7x9x8
  1124. >>> m = nn.AdaptiveMaxPool3d((7, None, None))
  1125. >>> input = torch.randn(1, 64, 10, 9, 8)
  1126. >>> output = m(input)
  1127. """
  1128. output_size: _size_3_opt_t
  1129. def forward(self, input: Tensor):
  1130. """Runs the forward pass."""
  1131. return F.adaptive_max_pool3d(input, self.output_size, self.return_indices)
  1132. class _AdaptiveAvgPoolNd(Module):
  1133. __constants__ = ["output_size"]
  1134. def __init__(self, output_size: _size_any_opt_t) -> None:
  1135. super().__init__()
  1136. self.output_size = output_size
  1137. def extra_repr(self) -> str:
  1138. return f"output_size={self.output_size}"
  1139. class AdaptiveAvgPool1d(_AdaptiveAvgPoolNd):
  1140. r"""Applies a 1D adaptive average pooling over an input signal composed of several input planes.
  1141. The output size is :math:`L_{out}`, for any input size.
  1142. The number of output features is equal to the number of input planes.
  1143. Args:
  1144. output_size: the target output size :math:`L_{out}`.
  1145. Shape:
  1146. - Input: :math:`(N, C, L_{in})` or :math:`(C, L_{in})`.
  1147. - Output: :math:`(N, C, L_{out})` or :math:`(C, L_{out})`, where
  1148. :math:`L_{out}=\text{output\_size}`.
  1149. Examples:
  1150. >>> # target output size of 5
  1151. >>> m = nn.AdaptiveAvgPool1d(5)
  1152. >>> input = torch.randn(1, 64, 8)
  1153. >>> output = m(input)
  1154. """
  1155. output_size: _size_1_t
  1156. def forward(self, input: Tensor) -> Tensor:
  1157. """
  1158. Runs the forward pass.
  1159. """
  1160. return F.adaptive_avg_pool1d(input, self.output_size)
  1161. class AdaptiveAvgPool2d(_AdaptiveAvgPoolNd):
  1162. r"""Applies a 2D adaptive average pooling over an input signal composed of several input planes.
  1163. The output is of size H x W, for any input size.
  1164. The number of output features is equal to the number of input planes.
  1165. Args:
  1166. output_size: the target output size of the image of the form H x W.
  1167. Can be a tuple (H, W) or a single H for a square image H x H.
  1168. H and W can be either a ``int``, or ``None`` which means the size will
  1169. be the same as that of the input.
  1170. Shape:
  1171. - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`.
  1172. - Output: :math:`(N, C, S_{0}, S_{1})` or :math:`(C, S_{0}, S_{1})`, where
  1173. :math:`S=\text{output\_size}`.
  1174. Examples:
  1175. >>> # target output size of 5x7
  1176. >>> m = nn.AdaptiveAvgPool2d((5, 7))
  1177. >>> input = torch.randn(1, 64, 8, 9)
  1178. >>> output = m(input)
  1179. >>> # target output size of 7x7 (square)
  1180. >>> m = nn.AdaptiveAvgPool2d(7)
  1181. >>> input = torch.randn(1, 64, 10, 9)
  1182. >>> output = m(input)
  1183. >>> # target output size of 10x7
  1184. >>> m = nn.AdaptiveAvgPool2d((None, 7))
  1185. >>> input = torch.randn(1, 64, 10, 9)
  1186. >>> output = m(input)
  1187. """
  1188. output_size: _size_2_opt_t
  1189. def forward(self, input: Tensor) -> Tensor:
  1190. """Runs the forward pass."""
  1191. return F.adaptive_avg_pool2d(input, self.output_size)
  1192. class AdaptiveAvgPool3d(_AdaptiveAvgPoolNd):
  1193. r"""Applies a 3D adaptive average pooling over an input signal composed of several input planes.
  1194. The output is of size D x H x W, for any input size.
  1195. The number of output features is equal to the number of input planes.
  1196. Args:
  1197. output_size: the target output size of the form D x H x W.
  1198. Can be a tuple (D, H, W) or a single number D for a cube D x D x D.
  1199. D, H and W can be either a ``int``, or ``None`` which means the size will
  1200. be the same as that of the input.
  1201. Shape:
  1202. - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})` or :math:`(C, D_{in}, H_{in}, W_{in})`.
  1203. - Output: :math:`(N, C, S_{0}, S_{1}, S_{2})` or :math:`(C, S_{0}, S_{1}, S_{2})`,
  1204. where :math:`S=\text{output\_size}`.
  1205. Examples:
  1206. >>> # target output size of 5x7x9
  1207. >>> m = nn.AdaptiveAvgPool3d((5, 7, 9))
  1208. >>> input = torch.randn(1, 64, 8, 9, 10)
  1209. >>> output = m(input)
  1210. >>> # target output size of 7x7x7 (cube)
  1211. >>> m = nn.AdaptiveAvgPool3d(7)
  1212. >>> input = torch.randn(1, 64, 10, 9, 8)
  1213. >>> output = m(input)
  1214. >>> # target output size of 7x9x8
  1215. >>> m = nn.AdaptiveAvgPool3d((7, None, None))
  1216. >>> input = torch.randn(1, 64, 10, 9, 8)
  1217. >>> output = m(input)
  1218. """
  1219. output_size: _size_3_opt_t
  1220. def forward(self, input: Tensor) -> Tensor:
  1221. """Runs the forward pass."""
  1222. return F.adaptive_avg_pool3d(input, self.output_size)