Spaces:

notrito
/

smoker-detection

Sleeping

App Files Files Community

smoker-detection / model.py

notrito

Update model.py

21e560f verified 2 months ago

raw

history blame contribute delete

6 kB

	"""
	LoRA (Low-Rank Adaptation) implementation for convolutional layers.
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torchvision import models


	class LoRALayer(nn.Module):
	"""
	LoRA (Low-Rank Adaptation) wrapper for convolutional layers.

	Args:
	original_layer: The Conv2d layer to adapt
	rank: LoRA rank (default=8)
	- Lower rank (4): Fewer parameters, less overfitting risk, less capacity
	- Medium rank (8-16): Balanced trade-off (recommended for most tasks)
	- Higher rank (32+): More capacity but approaches full fine-tuning

	For small datasets (<1000 images), rank=8 provides sufficient
	adaptation capacity while keeping parameters low (~2% of original layer).
	"""

	def __init__(self, original_layer, rank=8):
	super().__init__()
	self.original_layer = original_layer
	self.rank = rank

	# Get dimensions from original layer
	out_channels = original_layer.out_channels
	in_channels = original_layer.in_channels
	kernel_size = original_layer.kernel_size

	# LoRA matrices: A (down-projection) and B (up-projection)
	# A reduces dimensions: in_channels -> rank
	# Initialized with small random values to break symmetry
	self.lora_A = nn.Parameter(
	torch.randn(rank, in_channels, kernel_size) 0.01
	)

	# B expands dimensions: rank -> out_channels
	# Initialized to zeros so LoRA starts as identity (preserves pretrained weights)
	# This initialization strategy follows the original LoRA paper
	self.lora_B = nn.Parameter(
	torch.zeros(out_channels, rank, 1, 1)
	)

	# Freeze original weights (preserve ImageNet knowledge)
	self.original_layer.weight.requires_grad = False
	if self.original_layer.bias is not None:
	self.original_layer.bias.requires_grad = False

	def forward(self, x):
	"""
	Forward pass combining original frozen weights with LoRA adaptation.

	Mathematical formulation:
	output = W_frozen * x + (B * (A * x))

	where * denotes convolution operation.
	"""
	# Original forward pass (frozen pretrained weights)
	original_output = self.original_layer(x)

	# LoRA adaptation pathway (low-rank decomposition)
	# Step 1: Down-project with A (in_channels → rank)
	lora_output = F.conv2d(
	x,
	self.lora_A,
	stride=self.original_layer.stride,
	padding=self.original_layer.padding
	)

	# Step 2: Up-project with B (rank → out_channels)
	# These two sequential convolutions approximate a low-rank adaptation
	lora_output = F.conv2d(lora_output, self.lora_B)

	# Combine: Wx + (B(Ax)) where denotes convolution
	return original_output + lora_output


	def get_model(num_classes=2, pretrained=True):
	"""
	Load ResNet34 with optional pretrained weights.

	Args:
	num_classes: Number of output classes
	pretrained: Whether to load ImageNet pretrained weights

	Returns:
	ResNet34 model
	"""
	if pretrained:
	model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
	else:
	model = models.resnet34(weights=None)

	# Modify last layer for classification
	num_features = model.fc.in_features
	model.fc = nn.Linear(num_features, num_classes)

	return model


	def apply_lora_to_model(model, target_layers=['layer3', 'layer4'], rank=8):
	"""
	Apply LoRA adapters to specific layers in ResNet34.

	Strategy: We target layer3 and layer4 (high-level feature extractors) because:
	- layer1 & layer2: Extract low-level features (edges, textures) that are
	universal across tasks → keep frozen, no adaptation needed
	- layer3 & layer4: Extract high-level semantic features (objects, contexts)
	that are task-specific → need slight adaptation for smoking detection
	- fc: Brand new classifier head → fully trainable

	This approach gives us the sweet spot:
	- Full fine-tuning: 21.8M params (overfitting risk with small datasets)
	- Only fc training: ~1K params (may underfit, features not adapted)
	- LoRA on layer3+layer4: ~465K params (2.14% of model, balanced approach)

	Args:
	model: ResNet34 model
	target_layers: List of layer names to apply LoRA to
	rank: LoRA rank (default=8, adds ~2% params per adapted layer)

	Returns:
	Number of convolutional layers where LoRA was applied
	"""
	# Freeze ALL layers first (preserve ImageNet features)
	for param in model.parameters():
	param.requires_grad = False

	# Unfreeze only the new classification head
	for param in model.fc.parameters():
	param.requires_grad = True

	# Apply to layer3
	for block in model.layer3:
	if hasattr(block, 'conv1'):
	block.conv1 = LoRALayer(block.conv1, rank=rank)
	if hasattr(block, 'conv2'):
	block.conv2 = LoRALayer(block.conv2, rank=rank)

	# Apply to layer4
	for block in model.layer4:
	if hasattr(block, 'conv1'):
	block.conv1 = LoRALayer(block.conv1, rank=rank)
	if hasattr(block, 'conv2'):
	block.conv2 = LoRALayer(block.conv2, rank=rank)

	return model


	def count_parameters(model):
	"""
	Count total and trainable parameters in the model.

	Returns:
	tuple: (total_params, trainable_params, trainable_percentage)
	"""
	total_params = sum(p.numel() for p in model.parameters())
	trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	trainable_pct = 100. * trainable_params / total_params

	return total_params, trainable_params, trainable_pct