modeling_shieldgemma2.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # Copyright 2025 Google Inc. HuggingFace Inc. team. All rights reserved.
  2. #
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from dataclasses import dataclass
  16. import torch
  17. from ...cache_utils import Cache
  18. from ...modeling_outputs import ImageClassifierOutputWithNoAttention
  19. from ...modeling_utils import PreTrainedModel
  20. from ...utils import (
  21. auto_docstring,
  22. logging,
  23. )
  24. from ..auto import AutoModelForImageTextToText
  25. from .configuration_shieldgemma2 import ShieldGemma2Config
  26. logger = logging.get_logger(__name__)
  27. @dataclass
  28. class ShieldGemma2ImageClassifierOutputWithNoAttention(ImageClassifierOutputWithNoAttention):
  29. """ShieldGemma2 classifies imags as violative or not relative to a specific policy
  30. Args:
  31. """
  32. probabilities: torch.Tensor | None = None
  33. @auto_docstring
  34. class ShieldGemma2ForImageClassification(PreTrainedModel):
  35. config: ShieldGemma2Config
  36. input_modalities = ("image", "text")
  37. base_model_prefix = "model"
  38. def __init__(self, config: ShieldGemma2Config):
  39. super().__init__(config=config)
  40. self.yes_token_index = getattr(config, "yes_token_index", 10_784)
  41. self.no_token_index = getattr(config, "no_token_index", 3771)
  42. self.model = AutoModelForImageTextToText.from_config(config=config)
  43. self.post_init()
  44. def get_input_embeddings(self):
  45. return self.model.get_decoder().get_input_embeddings()
  46. def set_input_embeddings(self, value):
  47. self.model.get_decoder().set_input_embeddings(value)
  48. def get_output_embeddings(self):
  49. return self.model.get_decoder().get_output_embeddings()
  50. def set_output_embeddings(self, new_embeddings):
  51. self.model.get_decoder().set_output_embeddings(new_embeddings)
  52. @auto_docstring
  53. def forward(
  54. self,
  55. input_ids: torch.LongTensor | None = None,
  56. pixel_values: torch.FloatTensor | None = None,
  57. attention_mask: torch.Tensor | None = None,
  58. position_ids: torch.LongTensor | None = None,
  59. past_key_values: Cache | None = None,
  60. token_type_ids: torch.LongTensor | None = None,
  61. inputs_embeds: torch.FloatTensor | None = None,
  62. labels: torch.LongTensor | None = None,
  63. use_cache: bool | None = None,
  64. output_attentions: bool | None = None,
  65. output_hidden_states: bool | None = None,
  66. return_dict: bool | None = None,
  67. logits_to_keep: int | torch.Tensor = 0,
  68. **lm_kwargs,
  69. ) -> ShieldGemma2ImageClassifierOutputWithNoAttention:
  70. r"""
  71. Returns:
  72. A `ShieldGemma2ImageClassifierOutputWithNoAttention` instance containing the logits and probabilities
  73. associated with the model predicting the `Yes` or `No` token as the response to that prompt, captured in the
  74. following properties.
  75. * `logits` (`torch.Tensor` of shape `(batch_size, 2)`):
  76. The first position along dim=1 is the logits for the `Yes` token and the second position along dim=1 is
  77. the logits for the `No` token.
  78. * `probabilities` (`torch.Tensor` of shape `(batch_size, 2)`):
  79. The first position along dim=1 is the probability of predicting the `Yes` token and the second position
  80. along dim=1 is the probability of predicting the `No` token.
  81. ShieldGemma prompts are constructed such that predicting the `Yes` token means the content *does violate* the
  82. policy as described. If you are only interested in the violative condition, use
  83. `violated = outputs.probabilities[:, 1]` to extract that slice from the output tensors.
  84. When used with the `ShieldGemma2Processor`, the `batch_size` will be equal to `len(images) * len(policies)`,
  85. and the order within the batch will be img1_policy1, ... img1_policyN, ... imgM_policyN.
  86. """
  87. outputs = self.model(
  88. input_ids=input_ids,
  89. pixel_values=pixel_values,
  90. attention_mask=attention_mask,
  91. position_ids=position_ids,
  92. past_key_values=past_key_values,
  93. token_type_ids=token_type_ids,
  94. inputs_embeds=inputs_embeds,
  95. labels=labels,
  96. use_cache=use_cache,
  97. output_attentions=output_attentions,
  98. output_hidden_states=output_hidden_states,
  99. return_dict=return_dict,
  100. logits_to_keep=logits_to_keep,
  101. **lm_kwargs,
  102. )
  103. logits = outputs.logits
  104. selected_logits = logits[:, -1, [self.yes_token_index, self.no_token_index]]
  105. probabilities = torch.softmax(selected_logits, dim=-1)
  106. return ShieldGemma2ImageClassifierOutputWithNoAttention(
  107. logits=selected_logits,
  108. probabilities=probabilities,
  109. )
  110. __all__ = [
  111. "ShieldGemma2ForImageClassification",
  112. ]