import torch
import torch.nn as nn
import timm
from huggingface_hub import PyTorchModelHubMixin

class KeypointModel(nn.Module, PyTorchModelHubMixin):
    def __init__(self, config, **kwargs):
        super().__init__()

        upsample_size = config.heatmap_size

        backbone = timm.create_model('convnextv2_base.fcmae_ft_in22k_in1k_384', pretrained=False)

        self.feature_extractor = nn.Sequential(*list(backbone.children())[:-2])
        in_channels = backbone.num_features
        self.head = nn.Sequential(
            nn.Conv2d(in_channels, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Upsample(size=upsample_size, mode='bilinear', align_corners=False),
            nn.Conv2d(256, 1, kernel_size=1)
        )

    def forward(self, image):
        features = self.feature_extractor(image)
        heatmap = self.head(features)
        return heatmap