import gradio as gr
import torch
from TTS.api import TTS
import os

# Sử dụng GPU nếu có, nếu không thì dùng CPU
device = "cuda" if torch.cuda.is_available() else "cpu"

# Tải mô hình TTS một lần khi ứng dụng khởi động
print("Đang tải mô hình TTS...")
tts = TTS("thinhlpg/vixtts-test").to(device)
print("Đã tải xong mô hình.")

def clone_voice(text, reference_audio):
    if text and reference_audio:
        # Lưu file âm thanh tham chiếu vào một đường dẫn tạm thời
        reference_file_path = reference_audio

        # Tạo giọng nói và lưu vào file output.wav
        output_file_path = "output.wav"
        tts.tts_to_file(
            text=text,
            speaker_wav=reference_file_path,
            language="vi",
            file_path=output_file_path
        )
        return output_file_path
    return None

# Thiết kế giao diện Gradio
app = gr.Interface(
    fn=clone_voice,
    inputs=[
        gr.Textbox(label="Văn bản cần chuyển đổi"),
        gr.Audio(type="filepath", label="Tải lên tệp âm thanh mẫu (.wav)")
    ],
    outputs=gr.Audio(label="Giọng nói đã nhân bản"),
    title="👩‍💻 Demo nhân bản giọng nói Tiếng Việt",
    description="Nhập một đoạn văn bản và tải lên một file âm thanh mẫu (dưới 15 giây) để tạo ra giọng nói mới.",
    examples=[
        ["Xin chào, đây là một thử nghiệm về nhân bản giọng nói.", None],
        ["Trí tuệ nhân tạo đang thay đổi thế giới của chúng ta.", None]
    ]
)

# Khởi chạy ứng dụng
app.launch()