| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- """ Global Context Attention Block
- Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond`
- - https://arxiv.org/abs/1904.11492
- Official code consulted as reference: https://github.com/xvjiarui/GCNet
- Hacked together by / Copyright 2021 Ross Wightman
- """
- from typing import Optional, Tuple, Type, Union
- from torch import nn as nn
- import torch.nn.functional as F
- from .create_act import create_act_layer, get_act_layer
- from .helpers import make_divisible
- from .mlp import ConvMlp
- from .norm import LayerNorm2d
- class GlobalContext(nn.Module):
- def __init__(
- self,
- channels: int,
- use_attn: bool = True,
- fuse_add: bool = False,
- fuse_scale: bool = True,
- init_last_zero: bool = False,
- rd_ratio: float = 1./8,
- rd_channels: Optional[int] = None,
- rd_divisor: int = 1,
- act_layer: Type[nn.Module] = nn.ReLU,
- gate_layer: Union[str, Type[nn.Module]] = 'sigmoid',
- device=None,
- dtype=None
- ):
- dd = {'device': device, 'dtype': dtype}
- super().__init__()
- act_layer = get_act_layer(act_layer)
- self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True, **dd) if use_attn else None
- if rd_channels is None:
- rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
- if fuse_add:
- self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d, **dd)
- else:
- self.mlp_add = None
- if fuse_scale:
- self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d, **dd)
- else:
- self.mlp_scale = None
- self.gate = create_act_layer(gate_layer)
- self.init_last_zero = init_last_zero
- self.reset_parameters()
- def reset_parameters(self):
- if self.conv_attn is not None:
- nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu')
- if self.mlp_add is not None:
- nn.init.zeros_(self.mlp_add.fc2.weight)
- def forward(self, x):
- B, C, H, W = x.shape
- if self.conv_attn is not None:
- attn = self.conv_attn(x).reshape(B, 1, H * W) # (B, 1, H * W)
- attn = F.softmax(attn, dim=-1).unsqueeze(3) # (B, 1, H * W, 1)
- context = x.reshape(B, C, H * W).unsqueeze(1) @ attn
- context = context.view(B, C, 1, 1)
- else:
- context = x.mean(dim=(2, 3), keepdim=True)
- if self.mlp_scale is not None:
- mlp_x = self.mlp_scale(context)
- x = x * self.gate(mlp_x)
- if self.mlp_add is not None:
- mlp_x = self.mlp_add(context)
- x = x + mlp_x
- return x
|