Spaces:
Sleeping
Sleeping
| """ | |
| LoRA (Low-Rank Adaptation) implementation for convolutional layers. | |
| """ | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from torchvision import models | |
| class LoRALayer(nn.Module): | |
| """ | |
| LoRA (Low-Rank Adaptation) wrapper for convolutional layers. | |
| Args: | |
| original_layer: The Conv2d layer to adapt | |
| rank: LoRA rank (default=8) | |
| - Lower rank (4): Fewer parameters, less overfitting risk, less capacity | |
| - Medium rank (8-16): Balanced trade-off (recommended for most tasks) | |
| - Higher rank (32+): More capacity but approaches full fine-tuning | |
| For small datasets (<1000 images), rank=8 provides sufficient | |
| adaptation capacity while keeping parameters low (~2% of original layer). | |
| """ | |
| def __init__(self, original_layer, rank=8): | |
| super().__init__() | |
| self.original_layer = original_layer | |
| self.rank = rank | |
| # Get dimensions from original layer | |
| out_channels = original_layer.out_channels | |
| in_channels = original_layer.in_channels | |
| kernel_size = original_layer.kernel_size | |
| # LoRA matrices: A (down-projection) and B (up-projection) | |
| # A reduces dimensions: in_channels -> rank | |
| # Initialized with small random values to break symmetry | |
| self.lora_A = nn.Parameter( | |
| torch.randn(rank, in_channels, *kernel_size) * 0.01 | |
| ) | |
| # B expands dimensions: rank -> out_channels | |
| # Initialized to zeros so LoRA starts as identity (preserves pretrained weights) | |
| # This initialization strategy follows the original LoRA paper | |
| self.lora_B = nn.Parameter( | |
| torch.zeros(out_channels, rank, 1, 1) | |
| ) | |
| # Freeze original weights (preserve ImageNet knowledge) | |
| self.original_layer.weight.requires_grad = False | |
| if self.original_layer.bias is not None: | |
| self.original_layer.bias.requires_grad = False | |
| def forward(self, x): | |
| """ | |
| Forward pass combining original frozen weights with LoRA adaptation. | |
| Mathematical formulation: | |
| output = W_frozen * x + (B * (A * x)) | |
| where * denotes convolution operation. | |
| """ | |
| # Original forward pass (frozen pretrained weights) | |
| original_output = self.original_layer(x) | |
| # LoRA adaptation pathway (low-rank decomposition) | |
| # Step 1: Down-project with A (in_channels β rank) | |
| lora_output = F.conv2d( | |
| x, | |
| self.lora_A, | |
| stride=self.original_layer.stride, | |
| padding=self.original_layer.padding | |
| ) | |
| # Step 2: Up-project with B (rank β out_channels) | |
| # These two sequential convolutions approximate a low-rank adaptation | |
| lora_output = F.conv2d(lora_output, self.lora_B) | |
| # Combine: W*x + (B*(A*x)) where * denotes convolution | |
| return original_output + lora_output | |
| def get_model(num_classes=2, pretrained=True): | |
| """ | |
| Load ResNet34 with optional pretrained weights. | |
| Args: | |
| num_classes: Number of output classes | |
| pretrained: Whether to load ImageNet pretrained weights | |
| Returns: | |
| ResNet34 model | |
| """ | |
| if pretrained: | |
| model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1) | |
| else: | |
| model = models.resnet34(weights=None) | |
| # Modify last layer for classification | |
| num_features = model.fc.in_features | |
| model.fc = nn.Linear(num_features, num_classes) | |
| return model | |
| def apply_lora_to_model(model, target_layers=['layer3', 'layer4'], rank=8): | |
| """ | |
| Apply LoRA adapters to specific layers in ResNet34. | |
| Strategy: We target layer3 and layer4 (high-level feature extractors) because: | |
| - layer1 & layer2: Extract low-level features (edges, textures) that are | |
| universal across tasks β keep frozen, no adaptation needed | |
| - layer3 & layer4: Extract high-level semantic features (objects, contexts) | |
| that are task-specific β need slight adaptation for smoking detection | |
| - fc: Brand new classifier head β fully trainable | |
| This approach gives us the sweet spot: | |
| - Full fine-tuning: 21.8M params (overfitting risk with small datasets) | |
| - Only fc training: ~1K params (may underfit, features not adapted) | |
| - LoRA on layer3+layer4: ~465K params (2.14% of model, balanced approach) | |
| Args: | |
| model: ResNet34 model | |
| target_layers: List of layer names to apply LoRA to | |
| rank: LoRA rank (default=8, adds ~2% params per adapted layer) | |
| Returns: | |
| Number of convolutional layers where LoRA was applied | |
| """ | |
| # Freeze ALL layers first (preserve ImageNet features) | |
| for param in model.parameters(): | |
| param.requires_grad = False | |
| # Unfreeze only the new classification head | |
| for param in model.fc.parameters(): | |
| param.requires_grad = True | |
| # Apply to layer3 | |
| for block in model.layer3: | |
| if hasattr(block, 'conv1'): | |
| block.conv1 = LoRALayer(block.conv1, rank=rank) | |
| if hasattr(block, 'conv2'): | |
| block.conv2 = LoRALayer(block.conv2, rank=rank) | |
| # Apply to layer4 | |
| for block in model.layer4: | |
| if hasattr(block, 'conv1'): | |
| block.conv1 = LoRALayer(block.conv1, rank=rank) | |
| if hasattr(block, 'conv2'): | |
| block.conv2 = LoRALayer(block.conv2, rank=rank) | |
| return model | |
| def count_parameters(model): | |
| """ | |
| Count total and trainable parameters in the model. | |
| Returns: | |
| tuple: (total_params, trainable_params, trainable_percentage) | |
| """ | |
| total_params = sum(p.numel() for p in model.parameters()) | |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
| trainable_pct = 100. * trainable_params / total_params | |
| return total_params, trainable_params, trainable_pct |