Piper-TTS-DEMO

Runtime error

File size: 5,393 Bytes

import gradio as gr
import os
import json
import requests
from tqdm import tqdm
import wave
import time
from piper import PiperVoice

# Load predefined voices from voices.json
with open("voices.json", "r") as f:
    voices_data = json.load(f)

BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/"

def download_file(url, dest_path):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    with open(dest_path, 'wb') as file, tqdm(
        desc=f"Downloading {os.path.basename(dest_path)}",
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(1024):
            if not data:
                break
            file.write(data)
            bar.update(len(data))

def download_voice_files(voice_key):
    voice_info = voices_data[voice_key]
    files = voice_info["files"]
    
    # The folder where we'll store the model files
    voice_dir = os.path.join(os.getcwd(), voice_key)
    os.makedirs(voice_dir, exist_ok=True)
    
    model_file_path = None
    config_file_path = None
    
    for file_path in files.keys():
        local_file_name = os.path.basename(file_path)
        local_file_path = os.path.join(voice_dir, local_file_name)
        
        # Check if the file already exists
        if os.path.exists(local_file_path):
            print(f"File '{local_file_name}' already exists. Skipping download.")
        else:
            # Download the file with tqdm progress bar
            url = BASE_URL + file_path
            print(f"Downloading {url}")
            response = requests.get(url, stream=True)
            total_size = int(response.headers.get('content-length', 0))
            with open(local_file_path, 'wb') as file, tqdm(
                desc=f"Downloading {local_file_name}",
                total=total_size,
                unit='B',
                unit_scale=True,
                unit_divisor=1024,
            ) as bar:
                for data in response.iter_content(1024):
                    if not data:
                        break
                    file.write(data)
                    bar.update(len(data))
        
        # Identify model and config files
        if file_path.endswith('.onnx'):
            model_file_path = local_file_path
        elif file_path.endswith('.onnx.json') or file_path.endswith('.json'):
            config_file_path = local_file_path
        else:
            # Other files, e.g., MODEL_CARD
            pass
    
    if not model_file_path or not config_file_path:
        raise FileNotFoundError(f"Model or config file not found for voice '{voice_key}'.")
    
    return model_file_path, config_file_path

def load_piper_tts(model_file_path, config_file_path):
    if not os.path.exists(model_file_path) or not os.path.exists(config_file_path):
        print(f"Model file exists: {os.path.exists(model_file_path)}")
        print(f"Config file exists: {os.path.exists(config_file_path)}")
        print(f"Contents of the directory:")
        for item in os.listdir(os.path.dirname(model_file_path)):
            print(item)
        raise FileNotFoundError(f"Model or config file not found.")
    
    global voice
    voice = PiperVoice.load(model_file_path, config_path=config_file_path, use_cuda=False)
    print("Model loaded successfully.")

def piper_to_tts(text_to_generate, output_audio_name):
    print(f"Input Text: {text_to_generate}")
    if 'voice' not in globals():
        raise RuntimeError("Piper TTS model is not loaded. Please load it first.")

    start_time = time.time()

    with wave.open(output_audio_name, 'wb') as wav_file:
        wav_file.setnchannels(1)  # Mono channel
        wav_file.setsampwidth(2)  # 16-bit samples
        wav_file.setframerate(voice.config.sample_rate)
        voice.synthesize(text_to_generate, wav_file)

    end_time = time.time()
    print(f"Audio generated and saved to {output_audio_name} in {end_time - start_time:.2f} seconds")

def tts_model(model_link, json_link, text, selected_voice):
    # Decide which model and config files to use
    if model_link.strip() and json_link.strip():
        # Use custom URLs if provided
        voice_dir = "custom_model"
        os.makedirs(voice_dir, exist_ok=True)
        model_path = os.path.join(voice_dir, "custom_model.onnx")
        config_path = os.path.join(voice_dir, "custom_model.json")
        download_file(model_link, model_path)
        download_file(json_link, config_path)
    else:
        # Use selected voice files if no custom links are given
        model_path, config_path = download_voice_files(selected_voice)

    # Load the model using your load_piper_tts function
    load_piper_tts(model_path, config_path)

    # Generate the audio
    output_file = "output.wav"
    piper_to_tts(text, output_file)

    return output_file

# Gradio interface
iface = gr.Interface(
    fn=tts_model,
    inputs=[
        gr.Textbox(label="Custom Model File URL (Optional)"),
        gr.Textbox(label="Custom Config JSON URL (Optional)"),
        gr.Textbox(label="Enter Text to Convert"),
        gr.Dropdown(label="Select Predefined Voice", choices=list(voices_data.keys()), value=list(voices_data.keys())[0])
    ],
    outputs=gr.Audio(label="Generated Speech")
)

if __name__ == "__main__":
    iface.launch()