|
|
|
import torch |
|
import torch.nn as nn |
|
import timm |
|
from huggingface_hub import PyTorchModelHubMixin |
|
|
|
class KeypointModel(nn.Module, PyTorchModelHubMixin): |
|
def __init__(self, config, **kwargs): |
|
super().__init__() |
|
|
|
upsample_size = config.heatmap_size |
|
|
|
backbone = timm.create_model('convnextv2_base.fcmae_ft_in22k_in1k_384', pretrained=False) |
|
|
|
self.feature_extractor = nn.Sequential(*list(backbone.children())[:-2]) |
|
in_channels = backbone.num_features |
|
self.head = nn.Sequential( |
|
nn.Conv2d(in_channels, 256, kernel_size=3, padding=1), |
|
nn.ReLU(inplace=True), |
|
nn.Upsample(size=upsample_size, mode='bilinear', align_corners=False), |
|
nn.Conv2d(256, 1, kernel_size=1) |
|
) |
|
|
|
def forward(self, image): |
|
features = self.feature_extractor(image) |
|
heatmap = self.head(features) |
|
return heatmap |