Spaces:
Running
on
Zero
Running
on
Zero
from os import PathLike | |
from typing import Dict, List, Optional, Union | |
from wenet.text.char_tokenizer import CharTokenizer | |
from wenet.text.tokenize_utils import tokenize_by_seg_dict | |
def read_seg_dict(path): | |
seg_table = {} | |
with open(path, 'r', encoding='utf8') as fin: | |
for line in fin: | |
arr = line.strip().split('\t') | |
assert len(arr) == 2 | |
seg_table[arr[0]] = arr[1] | |
return seg_table | |