Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,440 Bytes
bd84a81 f3ca430 a305f81 bd84a81 a305f81 bd84a81 d0e12a4 bd84a81 a3f711d d7b5dbc bd84a81 7238dfb 3898757 bd84a81 a3f711d bd84a81 d7b5dbc 4144748 d7b5dbc 3898757 4144748 7238dfb 4144748 d7b5dbc 7238dfb d7b5dbc f3ca430 bd84a81 4144748 9a3e90a 4144748 bd84a81 a3f711d bd84a81 a3f711d bd84a81 4144748 d7b5dbc 4144748 d7b5dbc 4144748 d7b5dbc 4144748 a3f711d f3ca430 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import spaces
import sys
import torch
import gradio as gr
import opencc
# 添加第三方库路径
sys.path.append('third_party/Matcha-TTS')
from cosyvoice.cli.cosyvoice import CosyVoice2
from cosyvoice.utils.file_utils import load_wav
from huggingface_hub import hf_hub_download
# 繁简转换
converter = opencc.OpenCC('s2t.json')
# 预设参考音频选项
PRESET_AUDIO_OPTIONS = {
"ZoengJyutGaai": "asset/张悦楷.wav",
"Trump": "asset/特朗普.wav",
"Taiyi Zhenren": "asset/太乙真人.wav",
"自定义上传": None
}
# 加载模型
cosyvoice_base = CosyVoice2(
'ASLP-lab/Cosyvoice2-Yue',
load_jit=False, load_trt=False, load_vllm=False, fp16=False
)
print('load model 1')
cosyvoice_zjg = CosyVoice2(
'ASLP-lab/Cosyvoice2-Yue-ZoengJyutGaai',
load_jit=False, load_trt=False, load_vllm=False, fp16=False
)
print('load model 2')
# cosyvoice_biaobei = CosyVoice2(
# 'pretrained_models/CosyVoice2-yue-biaobei',
# load_jit=False, load_trt=False, load_vllm=False, fp16=False
# )
@spaces.GPU
def tts_inference(model_choice, text, preset_audio_choice, prompt_audio):
# 选择模型和默认音频
if model_choice == "CosyVoice2-Yue-ZoengJyutGaai":
model = cosyvoice_zjg
prompt_audio = "asset/sg_017_090.wav"
elif model_choice == "CosyVoice2-精品女音":
model = cosyvoice_base
prompt_audio = "asset/F01_中立_20054.wav"
elif model_choice == "CosyVoice2-Yue":
model = cosyvoice_base
if preset_audio_choice == "自定义上传":
if custom_audio is None:
return None, "请上传参考音频或选择预设音频"
prompt_audio = custom_audio
else:
prompt_audio = PRESET_AUDIO_OPTIONS[preset_audio_choice]
if prompt_audio is None:
return None, "请选择有效的参考音频"
else:
return None, "未知模型"
model.model.cuda()
# 繁简转换
text = converter.convert(text)
prompt_speech_16k = load_wav(prompt_audio, 16000)
all_speech = []
for _, j in enumerate(
model.inference_instruct2(
text, "用粤语说这句话", prompt_speech_16k, stream=False
)
):
all_speech.append(j['tts_speech'])
concatenated_speech = torch.cat(all_speech, dim=1)
audio_numpy = concatenated_speech.squeeze(0).cpu().numpy()
sample_rate = model.sample_rate
return (sample_rate, audio_numpy), f"生成成功:{text}"
# ---- Gradio Interface ----
demo = gr.Interface(
fn=tts_inference,
inputs=[
gr.Dropdown(
["CosyVoice2-Yue", "CosyVoice2-Yue-ZoengJyutGaai"],
# ["CosyVoice2-base", "CosyVoice2-张悦楷粤语评书", "CosyVoice2-精品女音"],
label="select model", value="CosyVoice2-Yue"
),
gr.Textbox(lines=2, label="input text"),
gr.Dropdown(
choices=list(PRESET_AUDIO_OPTIONS.keys()),
value="ZoengJyutGaai",
label="please select audio options(only for CosyVoice2-Yue)"
),
# gr.Audio(source="upload", type="filepath", label="上传参考音频(仅 CosyVoice2-Yue 必需)")
gr.Audio(sources=["upload"], type="filepath", label="upload prompt audio(only for CosyVoice2-Yue)")
],
outputs=[
gr.Audio(type="numpy", label="生成的语音"),
gr.Textbox(label="状态信息")
]
)
demo.launch()
|