Image-Text-to-Text
Transformers
TensorBoard
Safetensors
feature-extraction
conversational
custom_code
Yin-Xie commited on
Commit
1e8c3bf
·
verified ·
1 Parent(s): 7852b29

Upload modeling_llavaonevision1_5.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_llavaonevision1_5.py +2 -0
modeling_llavaonevision1_5.py CHANGED
@@ -508,6 +508,7 @@ class LLaVAOneVision1_5_Attention(nn.Module):
508
  super().__init__()
509
  self.config = config
510
  self.layer_idx = layer_idx
 
511
  self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
512
  self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
513
  self.scaling = self.head_dim**-0.5
@@ -544,6 +545,7 @@ class LLaVAOneVision1_5_Attention(nn.Module):
544
  **kwargs: Unpack[FlashAttentionKwargs],
545
  ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
546
  input_shape = hidden_states.shape[:-1]
 
547
  hidden_shape = (*input_shape, -1, self.head_dim)
548
 
549
  query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
 
508
  super().__init__()
509
  self.config = config
510
  self.layer_idx = layer_idx
511
+ self.num_heads = config.num_attention_heads
512
  self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
513
  self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
514
  self.scaling = self.head_dim**-0.5
 
545
  **kwargs: Unpack[FlashAttentionKwargs],
546
  ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
547
  input_shape = hidden_states.shape[:-1]
548
+ bsz = input_shape[0]
549
  hidden_shape = (*input_shape, -1, self.head_dim)
550
 
551
  query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)