Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from detect import SimpleOfflineAccentClassifier | |
import ssl | |
import urllib3 | |
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | |
ssl._create_default_https_context = ssl._create_unverified_context | |
os.environ['CURL_CA_BUNDLE'] = '' | |
os.environ['REQUESTS_CA_BUNDLE'] = '' | |
import torch | |
import torchaudio | |
import librosa | |
import numpy as np | |
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor | |
import soundfile as sf | |
class AccentClassifierApp: | |
def __init__(self): | |
self.classifier = HuggingFaceAccentClassifier() | |
def classify_audio(self, audio_file): | |
if audio_file is None: | |
return "Please upload an audio file." | |
try: | |
result = self.classifier.predict_accent(audio_file) | |
if result is None: | |
return "Audio file processing failed." | |
output = f"Predicted Accent: {result['accent']}\n" | |
output += f"Confidence Score: {result['confidence']:.2%}\n\n" | |
output += "All Probabilities:\n" | |
sorted_probs = sorted( | |
result['all_probabilities'].items(), | |
key=lambda x: x[1], | |
reverse=True | |
) | |
for accent, prob in sorted_probs: | |
bar = "█" * int(prob * 20) | |
output += f"- {accent}: {prob:.2%} {bar}\n" | |
return output | |
except Exception as e: | |
return f"Error occurred: {str(e)}" | |
def create_interface(self): | |
with gr.Blocks(title="Accent Classifier") as interface: | |
gr.Markdown(""" | |
# AI Accent Classifier | |
This application analyzes speech audio files to predict accents. | |
Supported formats: WAV, MP3, FLAC | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio( | |
label="Upload Audio File", | |
type="filepath" | |
) | |
classify_btn = gr.Button( | |
"Analyze Accent", | |
variant="primary" | |
) | |
with gr.Column(): | |
output_text = gr.Markdown( | |
label="Analysis Results", | |
value="Analysis results will appear here..." | |
) | |
gr.Markdown("### Example Audio Files") | |
gr.Examples( | |
examples=[ | |
["examples/american_sample.wav"], | |
["examples/british_sample.wav"], | |
] if os.path.exists("examples") else [], | |
inputs=audio_input | |
) | |
classify_btn.click( | |
fn=self.classify_audio, | |
inputs=audio_input, | |
outputs=output_text | |
) | |
return interface | |
def extract_acoustic_features(self, audio_path): | |
try: | |
y, sr = librosa.load(audio_path, sr=22050, duration=30) | |
if len(y) == 0: | |
return None | |
min_length = sr * 2 | |
if len(y) < min_length: | |
repeat_count = int(min_length / len(y)) + 1 | |
y = np.tile(y, repeat_count)[:min_length] | |
features = {} | |
n_fft = min(2048, len(y)) | |
hop_length = n_fft // 4 | |
try: | |
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft, hop_length=hop_length) | |
features['mfcc_mean'] = np.mean(mfccs, axis=1) | |
features['mfcc_std'] = np.std(mfccs, axis=1) | |
except Exception as e: | |
features['mfcc_mean'] = np.zeros(13) | |
features['mfcc_std'] = np.zeros(13) | |
try: | |
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length) | |
features['spectral_centroid'] = float(np.mean(spectral_centroids)) | |
features['spectral_centroid_std'] = float(np.std(spectral_centroids)) | |
except Exception as e: | |
features['spectral_centroid'] = 1500.0 | |
features['spectral_centroid_std'] = 100.0 | |
try: | |
pitches, magnitudes = librosa.piptrack(y=y, sr=sr, threshold=0.1, n_fft=n_fft, hop_length=hop_length) | |
pitch_values = [] | |
for t in range(pitches.shape[1]): | |
index = magnitudes[:, t].argmax() | |
pitch = pitches[index, t] | |
if pitch > 0: | |
pitch_values.append(pitch) | |
if pitch_values: | |
features['pitch_mean'] = float(np.mean(pitch_values)) | |
features['pitch_std'] = float(np.std(pitch_values)) | |
else: | |
features['pitch_mean'] = 150.0 | |
features['pitch_std'] = 20.0 | |
except Exception as e: | |
features['pitch_mean'] = 150.0 | |
features['pitch_std'] = 20.0 | |
try: | |
zcr = librosa.feature.zero_crossing_rate(y, hop_length=hop_length) | |
features['zcr_mean'] = float(np.mean(zcr)) | |
features['zcr_std'] = float(np.std(zcr)) | |
except Exception as e: | |
features['zcr_mean'] = 0.1 | |
features['zcr_std'] = 0.05 | |
return features | |
except Exception as e: | |
return None | |
def main(): | |
app = AccentClassifierApp() | |
interface = app.create_interface() | |
interface.launch( | |
server_name="0.0.0.0", | |
server_port=7860, | |
share=True | |
) | |
if __name__ == "__main__": | |
main() |