File size: 5,393 Bytes
6f430c8
28aabd9
8e8c284
 
7f9a819
 
 
 
6f430c8
8e8c284
 
 
6f430c8
8e8c284
 
7f9a819
 
 
 
 
 
 
 
 
 
 
9521202
 
7f9a819
 
 
8e8c284
 
7f9a819
 
 
 
 
 
9521202
 
fcf6d97
9521202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f9a819
9521202
 
7f9a819
9521202
 
 
 
 
 
 
 
7f9a819
9521202
 
7f9a819
9521202
7f9a819
 
 
346bc15
7f9a819
 
 
 
 
 
 
 
 
 
 
 
 
8e8c284
 
 
7f9a819
8e8c284
7f9a819
 
 
 
 
 
8e8c284
 
9521202
7f9a819
 
9521202
8e8c284
7f9a819
8e8c284
7f9a819
 
6f430c8
 
 
 
 
 
8e8c284
 
 
 
6f430c8
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import gradio as gr
import os
import json
import requests
from tqdm import tqdm
import wave
import time
from piper import PiperVoice

# Load predefined voices from voices.json
with open("voices.json", "r") as f:
    voices_data = json.load(f)

BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main/"

def download_file(url, dest_path):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    with open(dest_path, 'wb') as file, tqdm(
        desc=f"Downloading {os.path.basename(dest_path)}",
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(1024):
            if not data:
                break
            file.write(data)
            bar.update(len(data))

def download_voice_files(voice_key):
    voice_info = voices_data[voice_key]
    files = voice_info["files"]
    
    # The folder where we'll store the model files
    voice_dir = os.path.join(os.getcwd(), voice_key)
    os.makedirs(voice_dir, exist_ok=True)
    
    model_file_path = None
    config_file_path = None
    
    for file_path in files.keys():
        local_file_name = os.path.basename(file_path)
        local_file_path = os.path.join(voice_dir, local_file_name)
        
        # Check if the file already exists
        if os.path.exists(local_file_path):
            print(f"File '{local_file_name}' already exists. Skipping download.")
        else:
            # Download the file with tqdm progress bar
            url = BASE_URL + file_path
            print(f"Downloading {url}")
            response = requests.get(url, stream=True)
            total_size = int(response.headers.get('content-length', 0))
            with open(local_file_path, 'wb') as file, tqdm(
                desc=f"Downloading {local_file_name}",
                total=total_size,
                unit='B',
                unit_scale=True,
                unit_divisor=1024,
            ) as bar:
                for data in response.iter_content(1024):
                    if not data:
                        break
                    file.write(data)
                    bar.update(len(data))
        
        # Identify model and config files
        if file_path.endswith('.onnx'):
            model_file_path = local_file_path
        elif file_path.endswith('.onnx.json') or file_path.endswith('.json'):
            config_file_path = local_file_path
        else:
            # Other files, e.g., MODEL_CARD
            pass
    
    if not model_file_path or not config_file_path:
        raise FileNotFoundError(f"Model or config file not found for voice '{voice_key}'.")
    
    return model_file_path, config_file_path

def load_piper_tts(model_file_path, config_file_path):
    if not os.path.exists(model_file_path) or not os.path.exists(config_file_path):
        print(f"Model file exists: {os.path.exists(model_file_path)}")
        print(f"Config file exists: {os.path.exists(config_file_path)}")
        print(f"Contents of the directory:")
        for item in os.listdir(os.path.dirname(model_file_path)):
            print(item)
        raise FileNotFoundError(f"Model or config file not found.")
    
    global voice
    voice = PiperVoice.load(model_file_path, config_path=config_file_path, use_cuda=False)
    print("Model loaded successfully.")

def piper_to_tts(text_to_generate, output_audio_name):
    print(f"Input Text: {text_to_generate}")
    if 'voice' not in globals():
        raise RuntimeError("Piper TTS model is not loaded. Please load it first.")

    start_time = time.time()

    with wave.open(output_audio_name, 'wb') as wav_file:
        wav_file.setnchannels(1)  # Mono channel
        wav_file.setsampwidth(2)  # 16-bit samples
        wav_file.setframerate(voice.config.sample_rate)
        voice.synthesize(text_to_generate, wav_file)

    end_time = time.time()
    print(f"Audio generated and saved to {output_audio_name} in {end_time - start_time:.2f} seconds")

def tts_model(model_link, json_link, text, selected_voice):
    # Decide which model and config files to use
    if model_link.strip() and json_link.strip():
        # Use custom URLs if provided
        voice_dir = "custom_model"
        os.makedirs(voice_dir, exist_ok=True)
        model_path = os.path.join(voice_dir, "custom_model.onnx")
        config_path = os.path.join(voice_dir, "custom_model.json")
        download_file(model_link, model_path)
        download_file(json_link, config_path)
    else:
        # Use selected voice files if no custom links are given
        model_path, config_path = download_voice_files(selected_voice)

    # Load the model using your load_piper_tts function
    load_piper_tts(model_path, config_path)

    # Generate the audio
    output_file = "output.wav"
    piper_to_tts(text, output_file)

    return output_file

# Gradio interface
iface = gr.Interface(
    fn=tts_model,
    inputs=[
        gr.Textbox(label="Custom Model File URL (Optional)"),
        gr.Textbox(label="Custom Config JSON URL (Optional)"),
        gr.Textbox(label="Enter Text to Convert"),
        gr.Dropdown(label="Select Predefined Voice", choices=list(voices_data.keys()), value=list(voices_data.keys())[0])
    ],
    outputs=gr.Audio(label="Generated Speech")
)

if __name__ == "__main__":
    iface.launch()