import gradio as gr import os from detect import SimpleOfflineAccentClassifier import ssl import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) ssl._create_default_https_context = ssl._create_unverified_context os.environ['CURL_CA_BUNDLE'] = '' os.environ['REQUESTS_CA_BUNDLE'] = '' import torch import torchaudio import librosa import numpy as np from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor import soundfile as sf class AccentClassifierApp: def __init__(self): self.classifier = HuggingFaceAccentClassifier() def classify_audio(self, audio_file): if audio_file is None: return "Please upload an audio file." try: result = self.classifier.predict_accent(audio_file) if result is None: return "Audio file processing failed." output = f"Predicted Accent: {result['accent']}\n" output += f"Confidence Score: {result['confidence']:.2%}\n\n" output += "All Probabilities:\n" sorted_probs = sorted( result['all_probabilities'].items(), key=lambda x: x[1], reverse=True ) for accent, prob in sorted_probs: bar = "█" * int(prob * 20) output += f"- {accent}: {prob:.2%} {bar}\n" return output except Exception as e: return f"Error occurred: {str(e)}" def create_interface(self): with gr.Blocks(title="Accent Classifier") as interface: gr.Markdown(""" # AI Accent Classifier This application analyzes speech audio files to predict accents. Supported formats: WAV, MP3, FLAC """) with gr.Row(): with gr.Column(): audio_input = gr.Audio( label="Upload Audio File", type="filepath" ) classify_btn = gr.Button( "Analyze Accent", variant="primary" ) with gr.Column(): output_text = gr.Markdown( label="Analysis Results", value="Analysis results will appear here..." ) gr.Markdown("### Example Audio Files") gr.Examples( examples=[ ["examples/american_sample.wav"], ["examples/british_sample.wav"], ] if os.path.exists("examples") else [], inputs=audio_input ) classify_btn.click( fn=self.classify_audio, inputs=audio_input, outputs=output_text ) return interface def extract_acoustic_features(self, audio_path): try: y, sr = librosa.load(audio_path, sr=22050, duration=30) if len(y) == 0: return None min_length = sr * 2 if len(y) < min_length: repeat_count = int(min_length / len(y)) + 1 y = np.tile(y, repeat_count)[:min_length] features = {} n_fft = min(2048, len(y)) hop_length = n_fft // 4 try: mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft, hop_length=hop_length) features['mfcc_mean'] = np.mean(mfccs, axis=1) features['mfcc_std'] = np.std(mfccs, axis=1) except Exception as e: features['mfcc_mean'] = np.zeros(13) features['mfcc_std'] = np.zeros(13) try: spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length) features['spectral_centroid'] = float(np.mean(spectral_centroids)) features['spectral_centroid_std'] = float(np.std(spectral_centroids)) except Exception as e: features['spectral_centroid'] = 1500.0 features['spectral_centroid_std'] = 100.0 try: pitches, magnitudes = librosa.piptrack(y=y, sr=sr, threshold=0.1, n_fft=n_fft, hop_length=hop_length) pitch_values = [] for t in range(pitches.shape[1]): index = magnitudes[:, t].argmax() pitch = pitches[index, t] if pitch > 0: pitch_values.append(pitch) if pitch_values: features['pitch_mean'] = float(np.mean(pitch_values)) features['pitch_std'] = float(np.std(pitch_values)) else: features['pitch_mean'] = 150.0 features['pitch_std'] = 20.0 except Exception as e: features['pitch_mean'] = 150.0 features['pitch_std'] = 20.0 try: zcr = librosa.feature.zero_crossing_rate(y, hop_length=hop_length) features['zcr_mean'] = float(np.mean(zcr)) features['zcr_std'] = float(np.std(zcr)) except Exception as e: features['zcr_mean'] = 0.1 features['zcr_std'] = 0.05 return features except Exception as e: return None def main(): app = AccentClassifierApp() interface = app.create_interface() interface.launch( server_name="0.0.0.0", server_port=7860, share=True ) if __name__ == "__main__": main()