__init__.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import torch
  2. import torch.nn as nn
  3. import torch.nn.functional as F
  4. from romatch.utils.utils import get_grid, get_autocast_params
  5. from .layers.block import Block
  6. from .layers.attention import MemEffAttention
  7. from .dinov2 import vit_large
  8. class TransformerDecoder(nn.Module):
  9. def __init__(self, blocks, hidden_dim, out_dim, is_classifier = False, *args,
  10. amp = False, pos_enc = True, learned_embeddings = False, embedding_dim = None, amp_dtype = torch.float16, **kwargs) -> None:
  11. super().__init__(*args, **kwargs)
  12. self.blocks = blocks
  13. self.to_out = nn.Linear(hidden_dim, out_dim)
  14. self.hidden_dim = hidden_dim
  15. self.out_dim = out_dim
  16. self._scales = [16]
  17. self.is_classifier = is_classifier
  18. self.amp = amp
  19. self.amp_dtype = amp_dtype
  20. self.pos_enc = pos_enc
  21. self.learned_embeddings = learned_embeddings
  22. if self.learned_embeddings:
  23. self.learned_pos_embeddings = nn.Parameter(nn.init.kaiming_normal_(torch.empty((1, hidden_dim, embedding_dim, embedding_dim))))
  24. def scales(self):
  25. return self._scales.copy()
  26. def forward(self, gp_posterior, features, old_stuff, new_scale):
  27. autocast_device, autocast_enabled, autocast_dtype = get_autocast_params(gp_posterior.device, enabled=self.amp, dtype=self.amp_dtype)
  28. with torch.autocast(autocast_device, enabled=autocast_enabled, dtype = autocast_dtype):
  29. B,C,H,W = gp_posterior.shape
  30. x = torch.cat((gp_posterior, features), dim = 1)
  31. B,C,H,W = x.shape
  32. grid = get_grid(B, H, W, x.device).reshape(B,H*W,2)
  33. if self.learned_embeddings:
  34. pos_enc = F.interpolate(self.learned_pos_embeddings, size = (H,W), mode = 'bilinear', align_corners = False).permute(0,2,3,1).reshape(1,H*W,C)
  35. else:
  36. pos_enc = 0
  37. tokens = x.reshape(B,C,H*W).permute(0,2,1) + pos_enc
  38. z = self.blocks(tokens)
  39. out = self.to_out(z)
  40. out = out.permute(0,2,1).reshape(B, self.out_dim, H, W)
  41. warp, certainty = out[:, :-1], out[:, -1:]
  42. return warp, certainty, None