Spaces:
Running
on
Zero
Running
on
Zero
File size: 440 Bytes
841f290 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
from os import PathLike
from typing import Dict, List, Optional, Union
from wenet.text.char_tokenizer import CharTokenizer
from wenet.text.tokenize_utils import tokenize_by_seg_dict
def read_seg_dict(path):
seg_table = {}
with open(path, 'r', encoding='utf8') as fin:
for line in fin:
arr = line.strip().split('\t')
assert len(arr) == 2
seg_table[arr[0]] = arr[1]
return seg_table
|