Spaces:
Sleeping
Sleeping
File size: 1,448 Bytes
ac997d2 bed14c9 ac997d2 9ad1d66 ac997d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
import torch
import torchaudio
import librosa
import numpy as np
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
import torch.nn.functional as F
import torchaudio.transforms as T
model_name = "Mahmoud59/wav2vec2-fake-audio-detector"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def classify_audio(audio_path):
# Load audio with librosa
waveform, sample_rate = librosa.load(audio_path, sr=16000) # librosa automatically resamples to 16kHz
# Convert to tensor
waveform = torch.tensor(waveform, dtype=torch.float32)
# Process audio
inputs = processor(waveform, sampling_rate=16000, return_tensors="pt", padding=True)
inputs = {key: val.to(device) for key, val in inputs.items()}
# Run inference
with torch.no_grad():
outputs = model(**inputs)
# Get prediction
pred = torch.argmax(outputs.logits, dim=-1).item()
return "Fake" if pred == 1 else "Real"
# 6) Build Gradio interface
demo = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Wav2Vec2 Deepfake Detection",
description="Upload an audio sample to check if it is fake or real, along with confidence."
)
if __name__ == "__main__":
demo.launch()
|