Spaces:

mguven61
/

accent-classifier

Sleeping

App Files Files Community

mguven61 commited on 14 days ago

Commit

0b86de1

verified ·

1 Parent(s): d636467

Upload detect2.py

Browse files

Files changed (1) hide show

detect2.py +172 -0

detect2.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import gradio as gr
+import os
+from detect import SimpleOfflineAccentClassifier
+import ssl
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+ssl._create_default_https_context = ssl._create_unverified_context
+os.environ['CURL_CA_BUNDLE'] = ''
+os.environ['REQUESTS_CA_BUNDLE'] = ''
+import torch
+import torchaudio
+import librosa
+import numpy as np
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
+import soundfile as sf
+class AccentClassifierApp:
+    def __init__(self):
+        self.classifier = HuggingFaceAccentClassifier()
+    def classify_audio(self, audio_file):
+        if audio_file is None:
+            return "Please upload an audio file."
+        try:
+            result = self.classifier.predict_accent(audio_file)
+            if result is None:
+                return "Audio file processing failed."
+            output = f"Predicted Accent: {result['accent']}\n"
+            output += f"Confidence Score: {result['confidence']:.2%}\n\n"
+            output += "All Probabilities:\n"
+            sorted_probs = sorted(
+                result['all_probabilities'].items(),
+                key=lambda x: x[1],
+                reverse=True
+            )
+            for accent, prob in sorted_probs:
+                bar = "█" * int(prob * 20)
+                output += f"- {accent}: {prob:.2%} {bar}\n"
+            return output
+        except Exception as e:
+            return f"Error occurred: {str(e)}"
+    def create_interface(self):
+        with gr.Blocks(title="Accent Classifier") as interface:
+            gr.Markdown("""
+            # AI Accent Classifier
+            This application analyzes speech audio files to predict accents.
+            Supported formats: WAV, MP3, FLAC
+            """)
+            with gr.Row():
+                with gr.Column():
+                    audio_input = gr.Audio(
+                        label="Upload Audio File",
+                        type="filepath"
+                    )
+                    classify_btn = gr.Button(
+                        "Analyze Accent",
+                        variant="primary"
+                    )
+                with gr.Column():
+                    output_text = gr.Markdown(
+                        label="Analysis Results",
+                        value="Analysis results will appear here..."
+                    )
+            gr.Markdown("### Example Audio Files")
+            gr.Examples(
+                examples=[
+                    ["examples/american_sample.wav"],
+                    ["examples/british_sample.wav"],
+                ] if os.path.exists("examples") else [],
+                inputs=audio_input
+            )
+            classify_btn.click(
+                fn=self.classify_audio,
+                inputs=audio_input,
+                outputs=output_text
+            )
+        return interface
+    def extract_acoustic_features(self, audio_path):
+        try:
+            y, sr = librosa.load(audio_path, sr=22050, duration=30)
+            if len(y) == 0:
+                return None
+            min_length = sr * 2
+            if len(y) < min_length:
+                repeat_count = int(min_length / len(y)) + 1
+                y = np.tile(y, repeat_count)[:min_length]
+            features = {}
+            n_fft = min(2048, len(y))
+            hop_length = n_fft // 4
+            try:
+                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft, hop_length=hop_length)
+                features['mfcc_mean'] = np.mean(mfccs, axis=1)
+                features['mfcc_std'] = np.std(mfccs, axis=1)
+            except Exception as e:
+                features['mfcc_mean'] = np.zeros(13)
+                features['mfcc_std'] = np.zeros(13)
+            try:
+                spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length)
+                features['spectral_centroid'] = float(np.mean(spectral_centroids))
+                features['spectral_centroid_std'] = float(np.std(spectral_centroids))
+            except Exception as e:
+                features['spectral_centroid'] = 1500.0
+                features['spectral_centroid_std'] = 100.0
+            try:
+                pitches, magnitudes = librosa.piptrack(y=y, sr=sr, threshold=0.1, n_fft=n_fft, hop_length=hop_length)
+                pitch_values = []
+                for t in range(pitches.shape[1]):
+                    index = magnitudes[:, t].argmax()
+                    pitch = pitches[index, t]
+                    if pitch > 0:
+                        pitch_values.append(pitch)
+                if pitch_values:
+                    features['pitch_mean'] = float(np.mean(pitch_values))
+                    features['pitch_std'] = float(np.std(pitch_values))
+                else:
+                    features['pitch_mean'] = 150.0
+                    features['pitch_std'] = 20.0
+            except Exception as e:
+                features['pitch_mean'] = 150.0
+                features['pitch_std'] = 20.0
+            try:
+                zcr = librosa.feature.zero_crossing_rate(y, hop_length=hop_length)
+                features['zcr_mean'] = float(np.mean(zcr))
+                features['zcr_std'] = float(np.std(zcr))
+            except Exception as e:
+                features['zcr_mean'] = 0.1
+                features['zcr_std'] = 0.05
+            return features
+        except Exception as e:
+            return None
+def main():
+    app = AccentClassifierApp()
+    interface = app.create_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )
+if __name__ == "__main__":
+    main()