sohamc10's picture
gradio app
9b0d6c2
from models.asit.data_transformations import DataAugmentation
from models.asit.vision_transformer import vit_base
from models.transformer_wrapper import BaseModelWrapper
class ASiTWrapper(BaseModelWrapper):
def __init__(self) -> None:
super().__init__()
self.asit_mel = DataAugmentation()
self.asit = vit_base(
patch_size=[16, 16],
audio_size=[128, 592],
stride=[16, 16],
in_chans=1,
num_classes=0
)
def mel_forward(self, x):
return self.asit_mel(x)
def forward(self, spec):
return self.asit(spec)
def separate_params(self):
pt_params = [[], [], [], [], [], [], [], [], [], [], [], []]
for k, p in self.named_parameters():
if any(['cls_token' in k,
'pos_embed' in k,
'norm_stats' in k,
'patch_embed' in k]):
pt_params[0].append(p)
elif 'blocks.0.' in k:
pt_params[0].append(p)
elif 'blocks.1.' in k:
pt_params[1].append(p)
elif 'blocks.2.' in k:
pt_params[2].append(p)
elif 'blocks.3.' in k:
pt_params[3].append(p)
elif 'blocks.4.' in k:
pt_params[4].append(p)
elif 'blocks.5.' in k:
pt_params[5].append(p)
elif 'blocks.6.' in k:
pt_params[6].append(p)
elif 'blocks.7.' in k:
pt_params[7].append(p)
elif 'blocks.8.' in k:
pt_params[8].append(p)
elif 'blocks.9.' in k:
pt_params[9].append(p)
elif 'blocks.10.' in k:
pt_params[10].append(p)
elif 'blocks.11.' in k:
pt_params[11].append(p)
elif 'asit.norm.weight' in k or 'asit.norm.bias' in k:
pt_params[11].append(p)
else:
raise ValueError(f"Check separate params for ASiT! Unknown key: {k}")
return list(reversed(pt_params))