Spaces:
No application file
No application file
import argparse | |
import os | |
def handle_diarization(args): | |
print(f"handle_diarization from {args.source_dir} to {args.target_dir}") | |
assert(os.path.exists(args.source_dir)) | |
from subfix.solution.diarization import diarization_dir | |
diarization_dir(args) | |
pass | |
def handle_format_convert(args): | |
from .format import FormatBertvits2, FormatJson | |
print(os.path.splitext(args.source)[1]) | |
if os.path.splitext(args.source)[1] == '.list': | |
souce_format = FormatBertvits2() | |
else: | |
souce_format = FormatJson() | |
if os.path.splitext(args.target)[1] == '.list': | |
target_format = FormatBertvits2() | |
else: | |
target_format = FormatJson() | |
data = souce_format.load(args.source) | |
target_format.save(args.target, data) | |
def handle_webui(args): | |
from .webui import startwebui | |
args.force_delete = args.force_delete == "True" | |
startwebui(args) | |
def handle_create(args): | |
print(f"Checkout command with args: {args}") | |
if args.solution == "modelscope": | |
from .solution.modelscope_multi_lang import run_task | |
run_task(args) | |
elif args.solution == "whisper": | |
from .solution.whisper_multi_lang import run_whisper_task | |
run_whisper_task(args) | |
def cli(): | |
parser = argparse.ArgumentParser(description="a tool to check or create TTS dataset") | |
subparsers = parser.add_subparsers(dest='command') | |
# webui | |
parser_webui = subparsers.add_parser('webui', | |
help='webui to modify audios') | |
parser_webui.add_argument('--load_json', default="None", help='source file, like demo.json') | |
parser_webui.add_argument('--load_list', default="None", help='source file, like demo.list') | |
parser_webui.add_argument('--json_key_text', default="text", type=str, help='the text key name in json, Default: text') | |
parser_webui.add_argument('--json_key_path', default="wav_path", type=str, help='the path key name in json, Default: wav_path') | |
parser_webui.add_argument('--g_batch', default=10, type=int, help='max number g_batch wav to display, Default: 10') | |
parser_webui.add_argument('--webui_language', default="en", type=str, help='webui language: en or zh, Default: en') | |
parser_webui.add_argument('--force_delete', default="False", type=str, help='delete file in disk while delete items, True or False, Default: False') | |
parser_webui.set_defaults(func=handle_webui) | |
# create | |
parser_create = subparsers.add_parser('create', | |
help='create dataset by origin audio dirctory: subfix create [modelscope|whisper]') | |
create_subparsers = parser_create.add_subparsers(dest='solution', | |
help='auto asr solution, modelscope or whisper') | |
# create modelscope | |
modelscope_subparsers = create_subparsers.add_parser('modelscope', | |
help='modelscope models') | |
modelscope_subparsers.add_argument("--source_dir", type=str, default="origin", help="Source directory path, Default: origin") | |
modelscope_subparsers.add_argument("--target_dir", type=str, default="dataset", help="Target directory path, Default: dataset") | |
modelscope_subparsers.add_argument("--cache_dir", type=str, default="cache", help="cache directory path, Default: cache") | |
modelscope_subparsers.add_argument("--sample_rate", type=int, default=44100, help="Sample rate, Default: 44100") | |
modelscope_subparsers.add_argument("--language", type=str, default="ZH", help="Language, Default: ZH|JA|KO|EN|DE|RU") | |
modelscope_subparsers.add_argument("--output", type=str, default="demo.list", help="List file, Default: demo.list") | |
modelscope_subparsers.add_argument("--max_seconds", type=int, default=15, help="Max sliced voice length(seconds), Default: 15") | |
modelscope_subparsers.set_defaults(func=handle_create) | |
# create whisper | |
whisper_subparsers = create_subparsers.add_parser('whisper', | |
help='whisper models') | |
whisper_subparsers.add_argument("--source_dir", type=str, default="origin", help="Source directory path, Default: origin") | |
whisper_subparsers.add_argument("--target_dir", type=str, default="dataset", help="Target directory path, Default: dataset") | |
whisper_subparsers.add_argument("--cache_dir", type=str, default="cache", help="cache directory path, Default: cache") | |
whisper_subparsers.add_argument("--model", type=str, default="large-v3", help="whisper model small/medium/large-v3, Default: small") | |
whisper_subparsers.add_argument("--sample_rate", type=int, default=44100, help="Sample rate, Default: 44100") | |
whisper_subparsers.add_argument("--language", type=str, default="ZH", help="Any Language whisper support, Default: ZH") | |
whisper_subparsers.add_argument("--output", type=str, default="demo.list", help="List file, Default: demo.list") | |
whisper_subparsers.add_argument("--max_seconds", type=int, default=15, help="Max sliced voice length(seconds), Default: 15") | |
whisper_subparsers.set_defaults(func=handle_create) | |
# format_convert | |
parser_format_convert = subparsers.add_parser('format_convert', | |
help='format_convert: format_convert --source demo.json --target demo.list') | |
parser_format_convert.add_argument('--source', default="demo.list", help='source file, like demo.json/list') | |
parser_format_convert.add_argument('--target', default="demo.json", help='target file, like demo.list/json') | |
parser_format_convert.set_defaults(func=handle_format_convert) | |
# diarization | |
parser_diarization = subparsers.add_parser('diarization', | |
help='diarization: diarization -h') | |
parser_diarization.add_argument('--source_dir', default="origin", help='source dir, Default: origin') | |
parser_diarization.add_argument('--target_dir', default="diarization", help='target dir, Default: diarization') | |
parser_diarization.add_argument('--cache_dir', default="cache", help='cache dir, Default: cache') | |
parser_diarization.add_argument('--min_seconds', default=3.0, type=float, help='slice must bigger than min_seconds, Default: 3.0') | |
parser_diarization.add_argument('--top_of_number', default=1, type=int, help='The n items with the highest frequency of occurrence. Default: 1') | |
parser_diarization.add_argument('--interval', default=1.0, type=float, help='The interval between two slice audio. Default: 1.0') | |
parser_diarization.add_argument("--sample_rate", type=int, default=44100, help="Sample rate, Default: 44100") | |
parser_diarization.add_argument("--oracle_num", type=int, default=0, help="oracle number, the person number you think maybe in audio, Default: 0") | |
parser_diarization.set_defaults(func=handle_diarization) | |
# run | |
args = parser.parse_args() | |
if hasattr(args, 'func'): | |
args.func(args) | |
else: | |
parser.print_help() | |