{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|unk|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "<|im_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "<|im_sep|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "<|im_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "<|unk|>", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "<|unk|>": 0, " ": 1, "<|im_start|>": 2, "<|im_sep|>": 3, "<|im_end|>": 4, "!": 5, "'": 6, ",": 7, "I": 8, "T": 9, "d": 10, "e": 11, "g": 12, "h": 13, "i": 14, "m": 15, "n": 16, "o": 17, "r": 18, "s": 19, "t": 20, "u": 21, "y": 22, "Ċ": 23, "Ġ": 24, "hi": 25, "ng": 26, "re": 27, "Ġs": 28 }, "merges": [ [ "h", "i" ] ] } }