| from transformers import PreTrainedTokenizerFast | |
| class ShivikM1Tokenizer(PreTrainedTokenizerFast): | |
| model_input_names = ["input_ids", "attention_mask"] | |
| def __init__(self, tokenizer_file=None, **kwargs): | |
| super().__init__(tokenizer_file=tokenizer_file, **kwargs) | |
| def vocab_size(self): | |
| return len(self.get_vocab()) | |