import gradio as gr from transformers import AutoProcessor, AutoModelForSpeechT5 import torch import scipy.io.wavfile import tempfile # 加载模型和处理器 processor = AutoProcessor.from_pretrained("FunAudioLLM/CosyVoice2-0.5B") model = AutoModelForSpeechT5.from_pretrained("FunAudioLLM/CosyVoice2-0.5B") device = "cuda" if torch.cuda.is_available() else "cpu" model = model.to(device) # 合成语音的函数 def tts_fn(text): inputs = processor(text, return_tensors="pt").to(device) with torch.no_grad(): speech = model.generate(**inputs) speech = speech.cpu().numpy() # 保存为临时 wav 文件 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: scipy.io.wavfile.write(f.name, rate=16000, data=speech) return f.name # Gradio 界面 demo = gr.Interface( fn=tts_fn, inputs=gr.Textbox(label="请输入中文文本", placeholder="例如:你好,这是 CosyVoice2 的语音演示"), outputs=gr.Audio(label="合成语音"), title="CosyVoice2 中文语音合成演示" ) demo.launch()