datastet-models / datasets-BERT_CRF /preprocessor.json
lfoppiano's picture
migrate models to HF
5c9bd68
{
"padding": true,
"return_lengths": false,
"return_word_embeddings": false,
"return_casing": false,
"return_features": false,
"return_chars": false,
"return_bert_embeddings": true,
"vocab_char": {
"<PAD>": 0,
"<UNK>": 1,
"!": 2,
"\"": 3,
"#": 4,
"$": 5,
"%": 6,
"&": 7,
"'": 8,
"(": 9,
")": 10,
"*": 11,
"+": 12,
",": 13,
"-": 14,
".": 15,
"/": 16,
"0": 17,
"1": 18,
"2": 19,
"3": 20,
"4": 21,
"5": 22,
"6": 23,
"7": 24,
"8": 25,
"9": 26,
":": 27,
";": 28,
"<": 29,
"=": 30,
">": 31,
"?": 32,
"@": 33,
"A": 34,
"B": 35,
"C": 36,
"D": 37,
"E": 38,
"F": 39,
"G": 40,
"H": 41,
"I": 42,
"J": 43,
"K": 44,
"L": 45,
"M": 46,
"N": 47,
"O": 48,
"P": 49,
"Q": 50,
"R": 51,
"S": 52,
"T": 53,
"U": 54,
"V": 55,
"W": 56,
"X": 57,
"Y": 58,
"Z": 59,
"[": 60,
"\\": 61,
"]": 62,
"^": 63,
"_": 64,
"`": 65,
"a": 66,
"b": 67,
"c": 68,
"d": 69,
"e": 70,
"f": 71,
"g": 72,
"h": 73,
"i": 74,
"j": 75,
"k": 76,
"l": 77,
"m": 78,
"n": 79,
"o": 80,
"p": 81,
"q": 82,
"r": 83,
"s": 84,
"t": 85,
"u": 86,
"v": 87,
"w": 88,
"x": 89,
"y": 90,
"z": 91,
"{": 92,
"|": 93,
"}": 94,
"~": 95,
"\u00a1": 96,
"\u00a2": 97,
"\u00a4": 98,
"\u00a7": 99,
"\u00a8": 100,
"\u00ae": 101,
"\u00b0": 102,
"\u00b1": 103,
"\u00b4": 104,
"\u00b5": 105,
"\u00b7": 106,
"\u00bc": 107,
"\u00bd": 108,
"\u00c2": 109,
"\u00c4": 110,
"\u00c5": 111,
"\u00c9": 112,
"\u00d2": 113,
"\u00d5": 114,
"\u00d7": 115,
"\u00d8": 116,
"\u00de": 117,
"\u00df": 118,
"\u00e1": 119,
"\u00e2": 120,
"\u00e3": 121,
"\u00e4": 122,
"\u00e5": 123,
"\u00e7": 124,
"\u00e9": 125,
"\u00ea": 126,
"\u00ed": 127,
"\u00ef": 128,
"\u00f0": 129,
"\u00f1": 130,
"\u00f3": 131,
"\u00f4": 132,
"\u00f8": 133,
"\u00fc": 134,
"\u0109": 135,
"\u012a": 136,
"\u012b": 137,
"\u0131": 138,
"\u0142": 139,
"\u015d": 140,
"\u0177": 141,
"\u017b": 142,
"\u017e": 143,
"\u01eb": 144,
"\u0219": 145,
"\u0263": 146,
"\u02da": 147,
"\u02dd": 148,
"\u0387": 149,
"\u0394": 150,
"\u0398": 151,
"\u039b": 152,
"\u03a3": 153,
"\u03a6": 154,
"\u03a8": 155,
"\u03b1": 156,
"\u03b2": 157,
"\u03b3": 158,
"\u03b4": 159,
"\u03b5": 160,
"\u03b6": 161,
"\u03b7": 162,
"\u03b8": 163,
"\u03ba": 164,
"\u03bb": 165,
"\u03bc": 166,
"\u03bd": 167,
"\u03be": 168,
"\u03c0": 169,
"\u03c1": 170,
"\u03c3": 171,
"\u03c4": 172,
"\u03c5": 173,
"\u03c6": 174,
"\u03c7": 175,
"\u03c8": 176,
"\u03c9": 177,
"\u03d5": 178,
"\u03f5": 179,
"\u0434": 180,
"\u0740": 181,
"\u0742": 182,
"\u0750": 183,
"\u0751": 184,
"\u0753": 185,
"\u123a": 186,
"\u123b": 187,
"\u1ef9": 188,
"\u2016": 189,
"\u2019": 190,
"\u2022": 191,
"\u2026": 192,
"\u202a": 193,
"\u202b": 194,
"\u202c": 195,
"\u2032": 196,
"\u2033": 197,
"\u2113": 198,
"\u211d": 199,
"\u2122": 200,
"\u2126": 201,
"\u212b": 202,
"\u2190": 203,
"\u2192": 204,
"\u2194": 205,
"\u21e1": 206,
"\u21e5": 207,
"\u2200": 208,
"\u2202": 209,
"\u2206": 210,
"\u2208": 211,
"\u2212": 212,
"\u221a": 213,
"\u221d": 214,
"\u221e": 215,
"\u222a": 216,
"\u223c": 217,
"\u2248": 218,
"\u2264": 219,
"\u2265": 220,
"\u226a": 221,
"\u2282": 222,
"\u2286": 223,
"\u22a5": 224,
"\u2303": 225,
"\u23af": 226,
"\u25b3": 227,
"\u27e8": 228,
"\u27e9": 229,
"\uf071": 230,
"\uf0a2": 231,
"\uf731": 232,
"\ufffd": 233
},
"vocab_tag": {
"<PAD>": 0,
"B-data_device": 1,
"B-dataset": 2,
"B-dataset_name": 3,
"I-data_device": 4,
"I-dataset": 5,
"I-dataset_name": 6,
"O": 7
},
"vocab_case": [
"<PAD>",
"numeric",
"allLower",
"allUpper",
"initialUpper",
"other",
"mainly_numeric",
"contains_digit"
],
"max_char_length": 30,
"feature_preprocessor": null,
"indice_tag": {
"0": "<PAD>",
"1": "B-data_device",
"2": "B-dataset",
"3": "B-dataset_name",
"4": "I-data_device",
"5": "I-dataset",
"6": "I-dataset_name",
"7": "O"
}
}