import torch import torch.nn as nn import timm from huggingface_hub import PyTorchModelHubMixin class KeypointModel(nn.Module, PyTorchModelHubMixin): def __init__(self, config, **kwargs): super().__init__() upsample_size = config.heatmap_size backbone = timm.create_model('convnextv2_base.fcmae_ft_in22k_in1k_384', pretrained=False) self.feature_extractor = nn.Sequential(*list(backbone.children())[:-2]) in_channels = backbone.num_features self.head = nn.Sequential( nn.Conv2d(in_channels, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Upsample(size=upsample_size, mode='bilinear', align_corners=False), nn.Conv2d(256, 1, kernel_size=1) ) def forward(self, image): features = self.feature_extractor(image) heatmap = self.head(features) return heatmap