shivik-m1-2b / tokenization_shivik_m1.py
ziadrone's picture
Upload folder using huggingface_hub
d83aab4 verified
raw
history blame
357 Bytes
from transformers import PreTrainedTokenizerFast
class ShivikM1Tokenizer(PreTrainedTokenizerFast):
model_input_names = ["input_ids", "attention_mask"]
def __init__(self, tokenizer_file=None, **kwargs):
super().__init__(tokenizer_file=tokenizer_file, **kwargs)
@property
def vocab_size(self):
return len(self.get_vocab())