Qwen-TTS-Demo / app.py
multimodalart's picture
feat: Enable MCP
9b73212 verified
raw
history blame
1.79 kB
import os
import requests
import tempfile
import dashscope
import gradio as gr
# 从环境变量读取你的 API Key,或者直接替换成你的 key 字符串
API_KEY = os.environ['API_KEY']
def tts_gradio(text: str, voice: str) -> str:
"""
调用 Qwen-TTS 接口合成语音,并将返回的 wav 保存到临时文件,
返回文件路径给 Gradio 播放。
Args:
text: The input text to be converted to speech.
voice: The voice/speaker to use for text-to-speech synthesis. Options include "Dylan", "Sunny", "Jada", "Cherry", "Ethan", "Serena", "Chelsie".
Returns:
The file path of the generated audio file in WAV format.
"""
# 调用合成
response = dashscope.audio.qwen_tts.SpeechSynthesizer.call(
model="qwen-tts-latest",
api_key=API_KEY,
text=text,
voice=voice,
)
audio_url = response.output.audio["url"]
# 下载音频
resp = requests.get(audio_url)
resp.raise_for_status()
# 写入临时文件
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
tmp.write(resp.content)
tmp.flush()
tmp.close()
# 返回文件路径,Gradio 会自动播放
return tmp.name
# 定义 Gradio 界面
demo = gr.Interface(
fn=tts_gradio,
inputs=[
gr.Textbox(lines=4, label="input"),
gr.Dropdown(choices=["Dylan", "Sunny", "Jada","Cherry","Ethan",'Serena','Chelsie'], value="Dylan", label="speaker"),
],
outputs=gr.Audio(label="output"),
title="Qwen-TTS Gradio demo",
description="input text,choose speaker,click "submit"",
allow_flagging="never",
)
if __name__ == "__main__":
# 本地调试用:localhost:7860
demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)