EdgarDataScientist commited on
Commit
db7582c
·
verified ·
1 Parent(s): 7fa7bc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -228
app.py CHANGED
@@ -2,264 +2,151 @@ import gradio as gr
2
  import os
3
  import tempfile
4
  import requests
5
- from moviepy.editor import VideoFileClip
6
  import random
7
- import json
8
-
9
- # --- Lightweight AccentAnalyzer class ---
10
-
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class AccentAnalyzer:
12
  def __init__(self):
13
  self.accent_profiles = {
14
- "American": {
15
- "features": ["rhotic", "flapped_t", "cot_caught_merger"],
16
- "description": "American English accent with rhotic pronunciation and typical North American features."
17
- },
18
- "British": {
19
- "features": ["non_rhotic", "t_glottalization", "trap_bath_split"],
20
- "description": "British English accent with non-rhotic pronunciation and typical UK features."
21
- },
22
- "Australian": {
23
- "features": ["non_rhotic", "flat_a", "high_rising_terminal"],
24
- "description": "Australian English accent with distinctive vowel sounds and intonation patterns."
25
- },
26
- "Canadian": {
27
- "features": ["rhotic", "canadian_raising", "eh_tag"],
28
- "description": "Canadian English accent with features of both American and British English."
29
- },
30
- "Indian": {
31
- "features": ["retroflex_consonants", "monophthongization", "syllable_timing"],
32
- "description": "Indian English accent influenced by native Indian languages."
33
- },
34
- "Irish": {
35
- "features": ["dental_fricatives", "alveolar_l", "soft_consonants"],
36
- "description": "Irish English accent with distinctive rhythm and consonant patterns."
37
- },
38
- "Scottish": {
39
- "features": ["rolled_r", "monophthongs", "glottal_stops"],
40
- "description": "Scottish English accent with strong consonants and distinctive vowel patterns."
41
- },
42
- "South African": {
43
- "features": ["non_rhotic", "kit_split", "kw_hw_distinction"],
44
- "description": "South African English accent with influences from Afrikaans and other local languages."
45
- }
46
  }
47
- self._load_or_create_accent_data()
48
-
49
- def _load_or_create_accent_data(self):
50
- # For demo: just create simulated data in-memory
51
- self.accent_data = self._create_simulated_accent_data()
52
 
53
- def _create_simulated_accent_data(self):
54
- accent_data = {}
55
- for accent, profile in self.accent_profiles.items():
56
- accent_data[accent] = {
 
57
  "primary_features": profile["features"],
58
- "feature_probabilities": {}
 
 
 
59
  }
60
- for feature in profile["features"]:
61
- accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.7, 0.9)
62
- all_features = set()
63
- for a, p in self.accent_profiles.items():
64
- all_features.update(p["features"])
65
- for feature in all_features:
66
- if feature not in profile["features"]:
67
- accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.1, 0.4)
68
- return accent_data
69
 
70
- def _extract_features(self, audio_path):
71
- # This is a simulated feature extraction for the demo.
72
- # In a real application, this would use SpeechBrain or similar ML models
73
- # to extract actual phonetic features from the audio.
74
- all_features = set()
75
- for accent, profile in self.accent_profiles.items():
76
- all_features.update(profile["features"])
77
- detected_features = {}
78
- for feature in all_features:
79
- # Simulate detection of features with varying probabilities
80
- detected_features[feature] = random.uniform(0.1, 0.9)
81
- return detected_features
82
-
83
- def _calculate_accent_scores(self, detected_features):
84
- accent_scores = {}
85
  for accent, data in self.accent_data.items():
86
- score = 0
87
- total_weight = 0
88
- for feature, probability in detected_features.items():
89
- expected_prob = data["feature_probabilities"].get(feature, 0.1)
90
- weight = 3.0 if feature in data["primary_features"] else 1.0 # Give more weight to primary features
91
- feature_score = probability * expected_prob * weight
92
- score += feature_score
93
- total_weight += weight
94
- if total_weight > 0:
95
- accent_scores[accent] = (score / total_weight) * 100
96
- else:
97
- accent_scores[accent] = 0
98
- return accent_scores
99
-
100
- def _generate_explanation(self, accent_type, confidence):
101
- if confidence >= 70:
102
- confidence_level = "high confidence"
103
- certainty = "is very clear"
104
- elif confidence >= 50:
105
- confidence_level = "moderate confidence"
106
- certainty = "is present"
107
- else:
108
- confidence_level = "low confidence"
109
- certainty = "may be present"
110
- description = self.accent_profiles[accent_type]["description"]
111
- second_accent = self._get_second_most_likely_accent(accent_type)
112
- explanation = f"The speaker has a {confidence_level} {accent_type} English accent. The {accent_type} accent {certainty}, with features of both {accent_type} and {second_accent} English present."
113
- return explanation
114
-
115
- def _get_second_most_likely_accent(self, primary_accent):
116
- # Simple rule-based selection for demo purposes
117
- accent_similarities = {
118
- "American": ["Canadian", "British"],
119
- "British": ["Australian", "Irish"],
120
- "Australian": ["British", "South African"],
121
- "Canadian": ["American", "British"],
122
- "Indian": ["British", "South African"],
123
- "Irish": ["Scottish", "British"],
124
- "Scottish": ["Irish", "British"],
125
- "South African": ["Australian", "British"]
126
- }
127
- # Pick a random similar accent from the predefined list
128
- return random.choice(accent_similarities[primary_accent])
129
-
130
- def analyze_accent(self, audio_path):
131
- """
132
- Analyzes the accent from an audio file.
133
- In this demo, it simulates feature extraction and accent scoring.
134
- """
135
- detected_features = self._extract_features(audio_path)
136
- accent_scores = self._calculate_accent_scores(detected_features)
137
-
138
- # Find the accent with the highest score
139
- accent_type = max(accent_scores, key=accent_scores.get)
140
- confidence = accent_scores[accent_type]
141
-
142
- explanation = self._generate_explanation(accent_type, confidence)
143
-
144
  return {
145
- "accent_type": accent_type,
146
- "confidence": confidence,
147
- "explanation": explanation,
148
- "all_scores": accent_scores # Useful for debugging or more detailed display
149
  }
150
 
151
- # --- Utility: Download video and extract audio ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
 
153
  def download_and_extract_audio(url):
154
- """
155
- Downloads a video from a URL and extracts its audio to a WAV file.
156
- Handles both direct MP4 links and YouTube URLs (using pytubefix).
157
- """
158
  temp_dir = tempfile.mkdtemp()
159
  video_path = os.path.join(temp_dir, "video.mp4")
160
  audio_path = os.path.join(temp_dir, "audio.wav")
161
 
162
- try:
163
- # Download video
164
- # Check for YouTube URL patterns (simplified for demo)
165
- if "youtube.com/" in url or "youtu.be/" in url:
166
- try:
167
- from pytubefix import YouTube
168
- yt = YouTube(url)
169
- # Try to get a progressive stream (video + audio)
170
- stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
171
- if not stream:
172
- # Fallback to separate audio stream if progressive not found
173
- stream = yt.streams.filter(only_audio=True).first()
174
- if not stream:
175
- raise RuntimeError("No suitable video or audio stream found for YouTube URL.")
176
-
177
- # Download the stream
178
- stream.download(output_path=temp_dir, filename="video.mp4")
179
- except ImportError:
180
- raise ImportError("pytubefix is not installed. Please install it with 'pip install pytubefix'.")
181
- except Exception as e:
182
- # Catch specific YouTube errors, e.g., age restriction, unavailable
183
- raise RuntimeError(f"Error downloading YouTube video: {e}. Try running locally or use a direct MP4 link.")
184
- else:
185
- # Direct MP4 download
186
- response = requests.get(url, stream=True)
187
- response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
188
- with open(video_path, "wb") as f:
189
- for chunk in response.iter_content(chunk_size=8192):
190
  f.write(chunk)
191
-
192
- # Extract audio using moviepy
193
- clip = VideoFileClip(video_path)
194
- clip.audio.write_audiofile(audio_path, logger=None) # logger=None suppresses moviepy output
195
- clip.close()
196
-
197
- return audio_path
198
- finally:
199
- # Clean up the video file immediately after audio extraction
200
- if os.path.exists(video_path):
201
- os.remove(video_path)
202
- # The temp_dir itself will be handled by Gradio's internal tempfile management,
203
- # or you can add os.rmdir(temp_dir) if you manage temp_dir manually.
204
 
205
- # --- Gradio interface ---
 
 
 
206
 
207
- def analyze_from_url(url):
208
- """
209
- Gradio interface function to analyze accent from a given video URL.
210
- """
211
- if not url:
212
- return "Please enter a video URL.", "N/A", "No URL provided."
213
 
 
 
 
 
214
  try:
215
  audio_path = download_and_extract_audio(url)
216
  analyzer = AccentAnalyzer()
217
  results = analyzer.analyze_accent(audio_path)
218
-
219
- # Clean up the temporary audio file after analysis
220
- if os.path.exists(audio_path):
221
- os.remove(audio_path)
222
 
223
- return (
224
- results["accent_type"],
225
- f"{results['confidence']:.1f}%",
226
- results["explanation"]
227
- )
 
 
 
 
228
  except Exception as e:
229
- # Catch and display any errors during the process
230
- return (
231
- "Error",
232
- "0%",
233
- f"Error processing video/audio: {e}. Please ensure the URL is valid and publicly accessible."
234
- )
235
 
236
- # Create the Gradio interface
237
  iface = gr.Interface(
238
- fn=analyze_from_url,
239
- inputs=gr.Textbox(
240
- label="Enter Public Video URL (YouTube or direct MP4)",
241
- placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or https://samplelib.com/lib/preview/mp4/sample-5s.mp4"
242
- ),
243
- outputs=[
244
- gr.Textbox(label="Detected Accent"),
245
- gr.Textbox(label="Confidence Score"),
246
- gr.Textbox(label="Explanation")
247
- ],
248
- title="English Accent Analyzer (Rule-Based Demo)",
249
- description="""
250
- Paste a public video URL (YouTube or direct MP4) to detect the English accent and confidence score.
251
-
252
- **Important Notes:**
253
- * This is a **DEMO** using a simulated accent analysis model, not a real machine learning model.
254
- * It uses `pytubefix` for YouTube links and `requests`/`moviepy` for direct MP4s.
255
- * YouTube video extraction can sometimes be temperamental due to YouTube's changing policies or region restrictions. Direct MP4 links are generally more reliable.
256
- * **Sample MP4 URL for testing:** `https://samplelib.com/lib/preview/mp4/sample-5s.mp4`
257
- """
258
  )
259
 
260
- # Launch the Gradio interface
261
- # `share=False` for local deployment (no public link generated)
262
- # For Hugging Face Spaces, you typically don't need `iface.launch()` as the platform handles it.
263
- # However, if you're running it locally to test before deployment, keep this block.
264
- if __name__ == "__main__":
265
- iface.launch(debug=True, share=False)
 
2
  import os
3
  import tempfile
4
  import requests
5
+ import subprocess
6
  import random
7
+ import matplotlib.pyplot as plt
8
+ import torchaudio
9
+ import torch
10
+
11
+ # --- Load SpeechBrain ---
12
+ try:
13
+ from speechbrain.inference import EncoderClassifier
14
+ speechbrain_classifier = EncoderClassifier.from_hparams(
15
+ source="speechbrain/lang-id-commonlanguage_ecapa",
16
+ savedir="pretrained_models/lang-id-commonlanguage_ecapa"
17
+ )
18
+ SPEECHBRAIN_LOADED = True
19
+ except Exception as e:
20
+ print(f"Error loading SpeechBrain model: {e}. Simulated mode ON.")
21
+ SPEECHBRAIN_LOADED = False
22
+
23
+ # --- Accent Analyzer Class ---
24
  class AccentAnalyzer:
25
  def __init__(self):
26
  self.accent_profiles = {
27
+ "American": {"features": ["rhotic", "flapped_t", "cot_caught_merger"]},
28
+ "British": {"features": ["non_rhotic", "t_glottalization", "trap_bath_split"]},
29
+ "Australian": {"features": ["non_rhotic", "flat_a", "high_rising_terminal"]},
30
+ "Canadian": {"features": ["rhotic", "canadian_raising", "eh_tag"]},
31
+ "Indian": {"features": ["retroflex_consonants", "monophthongization", "syllable_timing"]},
32
+ "Irish": {"features": ["dental_fricatives", "alveolar_l", "soft_consonants"]},
33
+ "Scottish": {"features": ["rolled_r", "monophthongs", "glottal_stops"]},
34
+ "South African": {"features": ["non_rhotic", "kit_split", "kw_hw_distinction"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
+ self.accent_data = self._simulate_profiles()
 
 
 
 
37
 
38
+ def _simulate_profiles(self):
39
+ all_features = set(f for p in self.accent_profiles.values() for f in p["features"])
40
+ data = {}
41
+ for name, profile in self.accent_profiles.items():
42
+ data[name] = {
43
  "primary_features": profile["features"],
44
+ "feature_probabilities": {
45
+ f: random.uniform(0.7, 0.9) if f in profile["features"] else random.uniform(0.1, 0.4)
46
+ for f in all_features
47
+ }
48
  }
49
+ return data
 
 
 
 
 
 
 
 
50
 
51
+ def _simulate_accent_classification(self, audio_path):
52
+ all_features = {f for p in self.accent_profiles.values() for f in p["features"]}
53
+ detected = {f: random.uniform(0.1, 0.9) for f in all_features}
54
+ scores = {}
 
 
 
 
 
 
 
 
 
 
 
55
  for accent, data in self.accent_data.items():
56
+ score = sum(
57
+ detected[f] * data["feature_probabilities"][f] * (3.0 if f in data["primary_features"] else 1.0)
58
+ for f in all_features
59
+ )
60
+ scores[accent] = score
61
+ top = max(scores, key=scores.get)
62
+ conf = (scores[top] / max(scores.values())) * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  return {
64
+ "accent_type": top,
65
+ "confidence": conf,
66
+ "explanation": f"Detected **{top}** accent with {conf:.1f}% confidence.",
67
+ "all_scores": scores
68
  }
69
 
70
+ def analyze_accent(self, audio_path):
71
+ if not SPEECHBRAIN_LOADED:
72
+ return self._simulate_accent_classification(audio_path)
73
+ try:
74
+ signal, sr = torchaudio.load(audio_path)
75
+ if sr != 16000:
76
+ signal = torchaudio.transforms.Resample(sr, 16000)(signal)
77
+ if signal.shape[0] > 1:
78
+ signal = signal.mean(dim=0, keepdim=True)
79
+ pred = speechbrain_classifier.classify_batch(signal.unsqueeze(0))
80
+ probs = pred[0].squeeze(0).tolist()
81
+ labels = pred[1][0]
82
+ scores = {speechbrain_classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
83
+ if labels[0] == 'en':
84
+ result = self._simulate_accent_classification(audio_path)
85
+ result["all_scores"] = scores
86
+ return result
87
+ return {
88
+ "accent_type": labels[0],
89
+ "confidence": max(probs) * 100,
90
+ "explanation": f"Detected language: **{labels[0]}** ({max(probs)*100:.1f}%)",
91
+ "all_scores": scores
92
+ }
93
+ except Exception as e:
94
+ print(f"Fallback to simulation: {e}")
95
+ return self._simulate_accent_classification(audio_path)
96
 
97
+ # --- Download & Extract Audio ---
98
  def download_and_extract_audio(url):
 
 
 
 
99
  temp_dir = tempfile.mkdtemp()
100
  video_path = os.path.join(temp_dir, "video.mp4")
101
  audio_path = os.path.join(temp_dir, "audio.wav")
102
 
103
+ if "youtube.com" in url or "youtu.be" in url:
104
+ from pytubefix import YouTube
105
+ yt = YouTube(url)
106
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
107
+ stream.download(output_path=temp_dir, filename="video.mp4")
108
+ else:
109
+ with requests.get(url, stream=True) as r:
110
+ r.raise_for_status()
111
+ with open(video_path, 'wb') as f:
112
+ for chunk in r.iter_content(chunk_size=8192):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  f.write(chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ # Extract audio using ffmpeg
116
+ subprocess.run([
117
+ "ffmpeg", "-i", video_path, "-ar", "16000", "-ac", "1", "-f", "wav", audio_path, "-y"
118
+ ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
119
 
120
+ return audio_path
 
 
 
 
 
121
 
122
+ # --- Gradio Function ---
123
+ def analyze_from_url_gradio(url):
124
+ if not url:
125
+ return "Please enter a URL.", plt.figure()
126
  try:
127
  audio_path = download_and_extract_audio(url)
128
  analyzer = AccentAnalyzer()
129
  results = analyzer.analyze_accent(audio_path)
 
 
 
 
130
 
131
+ labels, values = zip(*results["all_scores"].items())
132
+ fig, ax = plt.subplots()
133
+ ax.bar(labels, values)
134
+ ax.set_ylabel('Confidence (%)')
135
+ ax.set_title('Accent/Language Confidence')
136
+ plt.xticks(rotation=45)
137
+ plt.tight_layout()
138
+
139
+ return results["explanation"], fig
140
  except Exception as e:
141
+ return f"Error: {e}", plt.figure()
 
 
 
 
 
142
 
143
+ # --- Gradio Interface ---
144
  iface = gr.Interface(
145
+ fn=analyze_from_url_gradio,
146
+ inputs=gr.Textbox(label="Enter Public Video URL (YouTube or MP4)"),
147
+ outputs=[gr.Textbox(label="Result"), gr.Plot(label="Confidence Plot")],
148
+ title="English Accent or Language Analyzer",
149
+ description="Paste a public video URL. The system will detect the accent or language spoken using SpeechBrain or simulation."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  )
151
 
152
+ iface.launch()