Upload modeling_llavaonevision1_5.py with huggingface_hub
Browse files
modeling_llavaonevision1_5.py
CHANGED
|
@@ -508,6 +508,7 @@ class LLaVAOneVision1_5_Attention(nn.Module):
|
|
| 508 |
super().__init__()
|
| 509 |
self.config = config
|
| 510 |
self.layer_idx = layer_idx
|
|
|
|
| 511 |
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
|
| 512 |
self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
|
| 513 |
self.scaling = self.head_dim**-0.5
|
|
@@ -544,6 +545,7 @@ class LLaVAOneVision1_5_Attention(nn.Module):
|
|
| 544 |
**kwargs: Unpack[FlashAttentionKwargs],
|
| 545 |
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
| 546 |
input_shape = hidden_states.shape[:-1]
|
|
|
|
| 547 |
hidden_shape = (*input_shape, -1, self.head_dim)
|
| 548 |
|
| 549 |
query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
|
|
|
|
| 508 |
super().__init__()
|
| 509 |
self.config = config
|
| 510 |
self.layer_idx = layer_idx
|
| 511 |
+
self.num_heads = config.num_attention_heads
|
| 512 |
self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
|
| 513 |
self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
|
| 514 |
self.scaling = self.head_dim**-0.5
|
|
|
|
| 545 |
**kwargs: Unpack[FlashAttentionKwargs],
|
| 546 |
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
| 547 |
input_shape = hidden_states.shape[:-1]
|
| 548 |
+
bsz = input_shape[0]
|
| 549 |
hidden_shape = (*input_shape, -1, self.head_dim)
|
| 550 |
|
| 551 |
query_states = self.q_norm(self.q_proj(hidden_states).view(hidden_shape)).transpose(1, 2)
|