mguven61 commited on
Commit
0b86de1
·
verified ·
1 Parent(s): d636467

Upload detect2.py

Browse files
Files changed (1) hide show
  1. detect2.py +172 -0
detect2.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from detect import SimpleOfflineAccentClassifier
4
+ import ssl
5
+ import urllib3
6
+
7
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
8
+ ssl._create_default_https_context = ssl._create_unverified_context
9
+ os.environ['CURL_CA_BUNDLE'] = ''
10
+ os.environ['REQUESTS_CA_BUNDLE'] = ''
11
+
12
+ import torch
13
+ import torchaudio
14
+ import librosa
15
+ import numpy as np
16
+ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
17
+ import soundfile as sf
18
+
19
+ class AccentClassifierApp:
20
+ def __init__(self):
21
+ self.classifier = HuggingFaceAccentClassifier()
22
+
23
+ def classify_audio(self, audio_file):
24
+ if audio_file is None:
25
+ return "Please upload an audio file."
26
+
27
+ try:
28
+ result = self.classifier.predict_accent(audio_file)
29
+
30
+ if result is None:
31
+ return "Audio file processing failed."
32
+
33
+ output = f"Predicted Accent: {result['accent']}\n"
34
+ output += f"Confidence Score: {result['confidence']:.2%}\n\n"
35
+ output += "All Probabilities:\n"
36
+
37
+ sorted_probs = sorted(
38
+ result['all_probabilities'].items(),
39
+ key=lambda x: x[1],
40
+ reverse=True
41
+ )
42
+
43
+ for accent, prob in sorted_probs:
44
+ bar = "█" * int(prob * 20)
45
+ output += f"- {accent}: {prob:.2%} {bar}\n"
46
+
47
+ return output
48
+
49
+ except Exception as e:
50
+ return f"Error occurred: {str(e)}"
51
+
52
+ def create_interface(self):
53
+ with gr.Blocks(title="Accent Classifier") as interface:
54
+ gr.Markdown("""
55
+ # AI Accent Classifier
56
+
57
+ This application analyzes speech audio files to predict accents.
58
+ Supported formats: WAV, MP3, FLAC
59
+ """)
60
+
61
+ with gr.Row():
62
+ with gr.Column():
63
+ audio_input = gr.Audio(
64
+ label="Upload Audio File",
65
+ type="filepath"
66
+ )
67
+
68
+ classify_btn = gr.Button(
69
+ "Analyze Accent",
70
+ variant="primary"
71
+ )
72
+
73
+ with gr.Column():
74
+ output_text = gr.Markdown(
75
+ label="Analysis Results",
76
+ value="Analysis results will appear here..."
77
+ )
78
+
79
+ gr.Markdown("### Example Audio Files")
80
+ gr.Examples(
81
+ examples=[
82
+ ["examples/american_sample.wav"],
83
+ ["examples/british_sample.wav"],
84
+ ] if os.path.exists("examples") else [],
85
+ inputs=audio_input
86
+ )
87
+
88
+ classify_btn.click(
89
+ fn=self.classify_audio,
90
+ inputs=audio_input,
91
+ outputs=output_text
92
+ )
93
+
94
+ return interface
95
+
96
+ def extract_acoustic_features(self, audio_path):
97
+ try:
98
+ y, sr = librosa.load(audio_path, sr=22050, duration=30)
99
+
100
+ if len(y) == 0:
101
+ return None
102
+
103
+ min_length = sr * 2
104
+ if len(y) < min_length:
105
+ repeat_count = int(min_length / len(y)) + 1
106
+ y = np.tile(y, repeat_count)[:min_length]
107
+
108
+ features = {}
109
+
110
+ n_fft = min(2048, len(y))
111
+ hop_length = n_fft // 4
112
+
113
+ try:
114
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft, hop_length=hop_length)
115
+ features['mfcc_mean'] = np.mean(mfccs, axis=1)
116
+ features['mfcc_std'] = np.std(mfccs, axis=1)
117
+ except Exception as e:
118
+ features['mfcc_mean'] = np.zeros(13)
119
+ features['mfcc_std'] = np.zeros(13)
120
+
121
+ try:
122
+ spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length)
123
+ features['spectral_centroid'] = float(np.mean(spectral_centroids))
124
+ features['spectral_centroid_std'] = float(np.std(spectral_centroids))
125
+ except Exception as e:
126
+ features['spectral_centroid'] = 1500.0
127
+ features['spectral_centroid_std'] = 100.0
128
+
129
+ try:
130
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr, threshold=0.1, n_fft=n_fft, hop_length=hop_length)
131
+ pitch_values = []
132
+ for t in range(pitches.shape[1]):
133
+ index = magnitudes[:, t].argmax()
134
+ pitch = pitches[index, t]
135
+ if pitch > 0:
136
+ pitch_values.append(pitch)
137
+
138
+ if pitch_values:
139
+ features['pitch_mean'] = float(np.mean(pitch_values))
140
+ features['pitch_std'] = float(np.std(pitch_values))
141
+ else:
142
+ features['pitch_mean'] = 150.0
143
+ features['pitch_std'] = 20.0
144
+ except Exception as e:
145
+ features['pitch_mean'] = 150.0
146
+ features['pitch_std'] = 20.0
147
+
148
+ try:
149
+ zcr = librosa.feature.zero_crossing_rate(y, hop_length=hop_length)
150
+ features['zcr_mean'] = float(np.mean(zcr))
151
+ features['zcr_std'] = float(np.std(zcr))
152
+ except Exception as e:
153
+ features['zcr_mean'] = 0.1
154
+ features['zcr_std'] = 0.05
155
+
156
+ return features
157
+
158
+ except Exception as e:
159
+ return None
160
+
161
+ def main():
162
+ app = AccentClassifierApp()
163
+ interface = app.create_interface()
164
+
165
+ interface.launch(
166
+ server_name="0.0.0.0",
167
+ server_port=7860,
168
+ share=True
169
+ )
170
+
171
+ if __name__ == "__main__":
172
+ main()