### Push UDLM to Hub

In [None]:
import os

import huggingface_hub
import torch
import transformers

from models.hf import UDLMConfig
from models.hf import UDLM
from models.ema import ExponentialMovingAverage

In [None]:
if os.path.exists(os.path.join(os.environ['HF_HOME'], 'token')):
 with open(os.path.join(os.environ['HF_HOME'], 'token'), 'r') as f:
 token = f.read().strip()
else:
 token = None
huggingface_hub.login(token=token)

In [None]:
UDLMConfig.register_for_auto_class()
UDLM.register_for_auto_class('AutoModelForMaskedLM')

In [None]:
device = 'cuda'
# 'bert-base-uncased' for LM1B
# 'yairschiff/qm9-tokenizer' for QM9
tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased', trust_remote_code=True)
# tokenizer = transformers.AutoTokenizer.from_pretrained('yairschiff/qm9-tokenizer', trust_remote_code=True)

# 'kuleshov-group/udlm-lm1b' for LM1B
# 'kuleshov-group/udlm-qm9' for QM9
name_or_path = 'kuleshov-group/udlm-lm1b'
# name_or_path = 'kuleshov-group/udlm-qm9'

In [None]:
config = UDLMConfig(
 vocab_size=tokenizer.vocab_size,
 model_length=128,
 hidden_dim=768,
 cond_dim=128,
 n_blocks=12, 
 n_heads=12,
 dropout=0.1,
 time_conditioning=True,
 cfg=False,
 cfg_num_classes=-1,
 return_dict=False
)

In [None]:
model = UDLM(config)
ema = ExponentialMovingAverage(
 model.backbone.parameters(),
 decay=0.0)

In [None]:
model.config._name_or_path = name_or_path
model.config.auto_map = {
 'AutoConfig': f'{name_or_path}--configuraction_udlm.UDLMConfig',
 'AutoModelForMaskedLM': f'{name_or_path}--modeling_udlm.UDLM',
}

In [None]:
ckpt_path = ''
ckpt = torch.load(ckpt_path)

In [None]:
ema.load_state_dict(ckpt['ema'])
ema.copy_to(model.backbone.parameters())
model = model.to(device)

In [None]:
# Confirm EMA params loaded
for c, m in zip(ema.shadow_params, ckpt['ema']['shadow_params']):
 if not torch.allclose(c.to(device), m.to(device)):
 print('Issue with EMA!')

for c, m in zip(ema.shadow_params, model.parameters()):
 if not torch.allclose(c.to(device), m.to(device)):
 print('Issue with EMA!')

In [None]:
model.push_to_hub(name_or_path, private=False)

### Test Model from Hub

In [None]:
model_test = transformers.AutoModelForMaskedLM.from_pretrained(name_or_path, trust_remote_code=True)

In [None]:
input_ids = torch.randint(10, size=(2, 10)).to(device)
model_test = model_test.to(device)
model_test.eval()

In [None]:
print(model_test(input_ids, torch.zeros(2,).to(device)).shape)
print(model_test(input_ids, torch.zeros(2,).to(device)).mean())