import os
from transformers import pipeline
import gradio as gr
import numpy as np
import soundfile as sf  # Để đọc âm thanh

# Kiểm tra xem token của Hugging Face có sẵn trong môi trường không
hf_token = os.getenv('HF_TOKEN')

if not hf_token:
    print("Warning: No Hugging Face token set. Continuing without token...")

# Tải mô hình phân loại deepfake âm thanh
pipe = pipeline("audio-classification", model="mo-thecreator/Deepfake-audio-detection")

def predict_deepfake(audio):
    # Nếu tệp âm thanh được tải lên, sử dụng soundfile để đọc
    if isinstance(audio, str):  # Đây là tệp âm thanh
        audio_data, _ = sf.read(audio)  # Đọc tệp âm thanh với soundfile
    elif isinstance(audio, np.ndarray):  # Nếu âm thanh đã là numpy array
        audio_data = audio
    else:
        return "Error: Invalid input format. Please upload a valid audio file."

    # Kiểm tra kích thước dữ liệu âm thanh
    if audio_data.shape[0] == 0:
        return "Error: Audio data is empty."

    # Phân loại âm thanh (real/fake)
    try:
        result = pipe(audio_data)
        return result[0]['label']  # Trả về nhãn 'real' hoặc 'fake'
    except Exception as e:
        return f"Error during classification: {str(e)}"

# Tạo giao diện người dùng với Gradio
iface = gr.Interface(fn=predict_deepfake, inputs=gr.Audio(type="filepath"), outputs="text", live=True)

# Khởi chạy giao diện
iface.launch(share=True)