Upload modeling_llavaonevision1_5.py with huggingface_hub
Browse files
modeling_llavaonevision1_5.py
CHANGED
@@ -508,6 +508,7 @@ class LLaVAOneVision1_5_Attention(nn.Module):
|
|
508 |
super().__init__()
|
509 |
self.config = config
|
510 |
self.layer_idx = layer_idx
|
|
|
511 |
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
|
512 |
self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
|
513 |
self.scaling = self.head_dim**-0.5
|
@@ -544,6 +545,7 @@ class LLaVAOneVision1_5_Attention(nn.Module):
|
|
544 |
**kwargs: Unpack[FlashAttentionKwargs],
|
545 |
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
546 |
input_shape = hidden_states.shape[:-1]
|
|
|
547 |
hidden_shape = (*input_shape, -1, self.head_dim)
|
548 |
|
549 |
query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
|
|
|
508 |
super().__init__()
|
509 |
self.config = config
|
510 |
self.layer_idx = layer_idx
|
511 |
+
self.num_heads = config.num_attention_heads
|
512 |
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
|
513 |
self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
|
514 |
self.scaling = self.head_dim**-0.5
|
|
|
545 |
**kwargs: Unpack[FlashAttentionKwargs],
|
546 |
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
547 |
input_shape = hidden_states.shape[:-1]
|
548 |
+
bsz = input_shape[0]
|
549 |
hidden_shape = (*input_shape, -1, self.head_dim)
|
550 |
|
551 |
query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
|