import gradio as gr import os import json import requests from tqdm import tqdm import wave import time from piper import PiperVoice # Load predefined voices from voices.json with open("voices.json", "r") as f: voices_data = json.load(f) BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/" def download_file(url, dest_path): response = requests.get(url, stream=True) total_size = int(response.headers.get('content-length', 0)) with open(dest_path, 'wb') as file, tqdm( desc=f"Downloading {os.path.basename(dest_path)}", total=total_size, unit='B', unit_scale=True, unit_divisor=1024, ) as bar: for data in response.iter_content(1024): if not data: break file.write(data) bar.update(len(data)) def download_voice_files(voice_key): voice_info = voices_data[voice_key] files = voice_info["files"] # The folder where we'll store the model files voice_dir = os.path.join(os.getcwd(), voice_key) os.makedirs(voice_dir, exist_ok=True) model_file_path = None config_file_path = None for file_path in files.keys(): local_file_name = os.path.basename(file_path) local_file_path = os.path.join(voice_dir, local_file_name) # Check if the file already exists if os.path.exists(local_file_path): print(f"File '{local_file_name}' already exists. Skipping download.") else: # Download the file with tqdm progress bar url = BASE_URL + file_path print(f"Downloading {url}") response = requests.get(url, stream=True) total_size = int(response.headers.get('content-length', 0)) with open(local_file_path, 'wb') as file, tqdm( desc=f"Downloading {local_file_name}", total=total_size, unit='B', unit_scale=True, unit_divisor=1024, ) as bar: for data in response.iter_content(1024): if not data: break file.write(data) bar.update(len(data)) # Identify model and config files if file_path.endswith('.onnx'): model_file_path = local_file_path elif file_path.endswith('.onnx.json') or file_path.endswith('.json'): config_file_path = local_file_path else: # Other files, e.g., MODEL_CARD pass if not model_file_path or not config_file_path: raise FileNotFoundError(f"Model or config file not found for voice '{voice_key}'.") return model_file_path, config_file_path def load_piper_tts(model_file_path, config_file_path): if not os.path.exists(model_file_path) or not os.path.exists(config_file_path): print(f"Model file exists: {os.path.exists(model_file_path)}") print(f"Config file exists: {os.path.exists(config_file_path)}") print(f"Contents of the directory:") for item in os.listdir(os.path.dirname(model_file_path)): print(item) raise FileNotFoundError(f"Model or config file not found.") global voice voice = PiperVoice.load(model_file_path, config_path=config_file_path, use_cuda=False) print("Model loaded successfully.") def piper_to_tts(text_to_generate, output_audio_name): print(f"Input Text: {text_to_generate}") if 'voice' not in globals(): raise RuntimeError("Piper TTS model is not loaded. Please load it first.") start_time = time.time() with wave.open(output_audio_name, 'wb') as wav_file: wav_file.setnchannels(1) # Mono channel wav_file.setsampwidth(2) # 16-bit samples wav_file.setframerate(voice.config.sample_rate) voice.synthesize(text_to_generate, wav_file) end_time = time.time() print(f"Audio generated and saved to {output_audio_name} in {end_time - start_time:.2f} seconds") def tts_model(model_link, json_link, text, selected_voice): # Decide which model and config files to use if model_link.strip() and json_link.strip(): # Use custom URLs if provided voice_dir = "custom_model" os.makedirs(voice_dir, exist_ok=True) model_path = os.path.join(voice_dir, "custom_model.onnx") config_path = os.path.join(voice_dir, "custom_model.json") download_file(model_link, model_path) download_file(json_link, config_path) else: # Use selected voice files if no custom links are given model_path, config_path = download_voice_files(selected_voice) # Load the model using your load_piper_tts function load_piper_tts(model_path, config_path) # Generate the audio output_file = "output.wav" piper_to_tts(text, output_file) return output_file # Gradio interface iface = gr.Interface( fn=tts_model, inputs=[ gr.Textbox(label="Custom Model File URL (Optional)"), gr.Textbox(label="Custom Config JSON URL (Optional)"), gr.Textbox(label="Enter Text to Convert"), gr.Dropdown(label="Select Predefined Voice", choices=list(voices_data.keys()), value=list(voices_data.keys())[0]) ], outputs=gr.Audio(label="Generated Speech") ) if __name__ == "__main__": iface.launch()