import gradio as gr import torch import torchaudio import librosa import numpy as np from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification import torch.nn.functional as F import torchaudio.transforms as T model_name = "Mahmoud59/wav2vec2-fake-audio-detector" processor = Wav2Vec2Processor.from_pretrained(model_name) model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def classify_audio(audio_path): # Load audio with librosa waveform, sample_rate = librosa.load(audio_path, sr=16000) # librosa automatically resamples to 16kHz # Convert to tensor waveform = torch.tensor(waveform, dtype=torch.float32) # Process audio inputs = processor(waveform, sampling_rate=16000, return_tensors="pt", padding=True) inputs = {key: val.to(device) for key, val in inputs.items()} # Run inference with torch.no_grad(): outputs = model(**inputs) # Get prediction pred = torch.argmax(outputs.logits, dim=-1).item() return "Fake" if pred == 1 else "Real" # 6) Build Gradio interface demo = gr.Interface( fn=classify_audio, inputs=gr.Audio(type="filepath"), outputs="text", title="Wav2Vec2 Deepfake Detection", description="Upload an audio sample to check if it is fake or real, along with confidence." ) if __name__ == "__main__": demo.launch()