Spaces:
Runtime error
Runtime error
File size: 5,393 Bytes
6f430c8 28aabd9 8e8c284 7f9a819 6f430c8 8e8c284 6f430c8 8e8c284 7f9a819 9521202 7f9a819 8e8c284 7f9a819 9521202 fcf6d97 9521202 7f9a819 9521202 7f9a819 9521202 7f9a819 9521202 7f9a819 9521202 7f9a819 346bc15 7f9a819 8e8c284 7f9a819 8e8c284 7f9a819 8e8c284 9521202 7f9a819 9521202 8e8c284 7f9a819 8e8c284 7f9a819 6f430c8 8e8c284 6f430c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
import os
import json
import requests
from tqdm import tqdm
import wave
import time
from piper import PiperVoice
# Load predefined voices from voices.json
with open("voices.json", "r") as f:
voices_data = json.load(f)
BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/"
def download_file(url, dest_path):
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(dest_path, 'wb') as file, tqdm(
desc=f"Downloading {os.path.basename(dest_path)}",
total=total_size,
unit='B',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in response.iter_content(1024):
if not data:
break
file.write(data)
bar.update(len(data))
def download_voice_files(voice_key):
voice_info = voices_data[voice_key]
files = voice_info["files"]
# The folder where we'll store the model files
voice_dir = os.path.join(os.getcwd(), voice_key)
os.makedirs(voice_dir, exist_ok=True)
model_file_path = None
config_file_path = None
for file_path in files.keys():
local_file_name = os.path.basename(file_path)
local_file_path = os.path.join(voice_dir, local_file_name)
# Check if the file already exists
if os.path.exists(local_file_path):
print(f"File '{local_file_name}' already exists. Skipping download.")
else:
# Download the file with tqdm progress bar
url = BASE_URL + file_path
print(f"Downloading {url}")
response = requests.get(url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(local_file_path, 'wb') as file, tqdm(
desc=f"Downloading {local_file_name}",
total=total_size,
unit='B',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in response.iter_content(1024):
if not data:
break
file.write(data)
bar.update(len(data))
# Identify model and config files
if file_path.endswith('.onnx'):
model_file_path = local_file_path
elif file_path.endswith('.onnx.json') or file_path.endswith('.json'):
config_file_path = local_file_path
else:
# Other files, e.g., MODEL_CARD
pass
if not model_file_path or not config_file_path:
raise FileNotFoundError(f"Model or config file not found for voice '{voice_key}'.")
return model_file_path, config_file_path
def load_piper_tts(model_file_path, config_file_path):
if not os.path.exists(model_file_path) or not os.path.exists(config_file_path):
print(f"Model file exists: {os.path.exists(model_file_path)}")
print(f"Config file exists: {os.path.exists(config_file_path)}")
print(f"Contents of the directory:")
for item in os.listdir(os.path.dirname(model_file_path)):
print(item)
raise FileNotFoundError(f"Model or config file not found.")
global voice
voice = PiperVoice.load(model_file_path, config_path=config_file_path, use_cuda=False)
print("Model loaded successfully.")
def piper_to_tts(text_to_generate, output_audio_name):
print(f"Input Text: {text_to_generate}")
if 'voice' not in globals():
raise RuntimeError("Piper TTS model is not loaded. Please load it first.")
start_time = time.time()
with wave.open(output_audio_name, 'wb') as wav_file:
wav_file.setnchannels(1) # Mono channel
wav_file.setsampwidth(2) # 16-bit samples
wav_file.setframerate(voice.config.sample_rate)
voice.synthesize(text_to_generate, wav_file)
end_time = time.time()
print(f"Audio generated and saved to {output_audio_name} in {end_time - start_time:.2f} seconds")
def tts_model(model_link, json_link, text, selected_voice):
# Decide which model and config files to use
if model_link.strip() and json_link.strip():
# Use custom URLs if provided
voice_dir = "custom_model"
os.makedirs(voice_dir, exist_ok=True)
model_path = os.path.join(voice_dir, "custom_model.onnx")
config_path = os.path.join(voice_dir, "custom_model.json")
download_file(model_link, model_path)
download_file(json_link, config_path)
else:
# Use selected voice files if no custom links are given
model_path, config_path = download_voice_files(selected_voice)
# Load the model using your load_piper_tts function
load_piper_tts(model_path, config_path)
# Generate the audio
output_file = "output.wav"
piper_to_tts(text, output_file)
return output_file
# Gradio interface
iface = gr.Interface(
fn=tts_model,
inputs=[
gr.Textbox(label="Custom Model File URL (Optional)"),
gr.Textbox(label="Custom Config JSON URL (Optional)"),
gr.Textbox(label="Enter Text to Convert"),
gr.Dropdown(label="Select Predefined Voice", choices=list(voices_data.keys()), value=list(voices_data.keys())[0])
],
outputs=gr.Audio(label="Generated Speech")
)
if __name__ == "__main__":
iface.launch()
|