Piper-TTS-DEMO / app.py
drewThomasson's picture
Update app.py
346bc15 verified
import gradio as gr
import os
import json
import requests
from tqdm import tqdm
import wave
import time
from piper import PiperVoice
# Load predefined voices from voices.json
with open("voices.json", "r") as f:
voices_data = json.load(f)
BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/"
def download_file(url, dest_path):
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(dest_path, 'wb') as file, tqdm(
desc=f"Downloading {os.path.basename(dest_path)}",
total=total_size,
unit='B',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in response.iter_content(1024):
if not data:
break
file.write(data)
bar.update(len(data))
def download_voice_files(voice_key):
voice_info = voices_data[voice_key]
files = voice_info["files"]
# The folder where we'll store the model files
voice_dir = os.path.join(os.getcwd(), voice_key)
os.makedirs(voice_dir, exist_ok=True)
model_file_path = None
config_file_path = None
for file_path in files.keys():
local_file_name = os.path.basename(file_path)
local_file_path = os.path.join(voice_dir, local_file_name)
# Check if the file already exists
if os.path.exists(local_file_path):
print(f"File '{local_file_name}' already exists. Skipping download.")
else:
# Download the file with tqdm progress bar
url = BASE_URL + file_path
print(f"Downloading {url}")
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(local_file_path, 'wb') as file, tqdm(
desc=f"Downloading {local_file_name}",
total=total_size,
unit='B',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in response.iter_content(1024):
if not data:
break
file.write(data)
bar.update(len(data))
# Identify model and config files
if file_path.endswith('.onnx'):
model_file_path = local_file_path
elif file_path.endswith('.onnx.json') or file_path.endswith('.json'):
config_file_path = local_file_path
else:
# Other files, e.g., MODEL_CARD
pass
if not model_file_path or not config_file_path:
raise FileNotFoundError(f"Model or config file not found for voice '{voice_key}'.")
return model_file_path, config_file_path
def load_piper_tts(model_file_path, config_file_path):
if not os.path.exists(model_file_path) or not os.path.exists(config_file_path):
print(f"Model file exists: {os.path.exists(model_file_path)}")
print(f"Config file exists: {os.path.exists(config_file_path)}")
print(f"Contents of the directory:")
for item in os.listdir(os.path.dirname(model_file_path)):
print(item)
raise FileNotFoundError(f"Model or config file not found.")
global voice
voice = PiperVoice.load(model_file_path, config_path=config_file_path, use_cuda=False)
print("Model loaded successfully.")
def piper_to_tts(text_to_generate, output_audio_name):
print(f"Input Text: {text_to_generate}")
if 'voice' not in globals():
raise RuntimeError("Piper TTS model is not loaded. Please load it first.")
start_time = time.time()
with wave.open(output_audio_name, 'wb') as wav_file:
wav_file.setnchannels(1) # Mono channel
wav_file.setsampwidth(2) # 16-bit samples
wav_file.setframerate(voice.config.sample_rate)
voice.synthesize(text_to_generate, wav_file)
end_time = time.time()
print(f"Audio generated and saved to {output_audio_name} in {end_time - start_time:.2f} seconds")
def tts_model(model_link, json_link, text, selected_voice):
# Decide which model and config files to use
if model_link.strip() and json_link.strip():
# Use custom URLs if provided
voice_dir = "custom_model"
os.makedirs(voice_dir, exist_ok=True)
model_path = os.path.join(voice_dir, "custom_model.onnx")
config_path = os.path.join(voice_dir, "custom_model.json")
download_file(model_link, model_path)
download_file(json_link, config_path)
else:
# Use selected voice files if no custom links are given
model_path, config_path = download_voice_files(selected_voice)
# Load the model using your load_piper_tts function
load_piper_tts(model_path, config_path)
# Generate the audio
output_file = "output.wav"
piper_to_tts(text, output_file)
return output_file
# Gradio interface
iface = gr.Interface(
fn=tts_model,
inputs=[
gr.Textbox(label="Custom Model File URL (Optional)"),
gr.Textbox(label="Custom Config JSON URL (Optional)"),
gr.Textbox(label="Enter Text to Convert"),
gr.Dropdown(label="Select Predefined Voice", choices=list(voices_data.keys()), value=list(voices_data.keys())[0])
],
outputs=gr.Audio(label="Generated Speech")
)
if __name__ == "__main__":
iface.launch()