EdgarDataScientist commited on
Commit
6fb1c7d
·
verified ·
1 Parent(s): db77c3e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -0
app.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import tempfile
4
+ import requests
5
+ from moviepy.editor import VideoFileClip
6
+ import random
7
+ import json
8
+
9
+ # --- Lightweight AccentAnalyzer class ---
10
+
11
+ class AccentAnalyzer:
12
+ def __init__(self):
13
+ self.accent_profiles = {
14
+ "American": {
15
+ "features": ["rhotic", "flapped_t", "cot_caught_merger"],
16
+ "description": "American English accent with rhotic pronunciation and typical North American features."
17
+ },
18
+ "British": {
19
+ "features": ["non_rhotic", "t_glottalization", "trap_bath_split"],
20
+ "description": "British English accent with non-rhotic pronunciation and typical UK features."
21
+ },
22
+ "Australian": {
23
+ "features": ["non_rhotic", "flat_a", "high_rising_terminal"],
24
+ "description": "Australian English accent with distinctive vowel sounds and intonation patterns."
25
+ },
26
+ "Canadian": {
27
+ "features": ["rhotic", "canadian_raising", "eh_tag"],
28
+ "description": "Canadian English accent with features of both American and British English."
29
+ },
30
+ "Indian": {
31
+ "features": ["retroflex_consonants", "monophthongization", "syllable_timing"],
32
+ "description": "Indian English accent influenced by native Indian languages."
33
+ },
34
+ "Irish": {
35
+ "features": ["dental_fricatives", "alveolar_l", "soft_consonants"],
36
+ "description": "Irish English accent with distinctive rhythm and consonant patterns."
37
+ },
38
+ "Scottish": {
39
+ "features": ["rolled_r", "monophthongs", "glottal_stops"],
40
+ "description": "Scottish English accent with strong consonants and distinctive vowel patterns."
41
+ },
42
+ "South African": {
43
+ "features": ["non_rhotic", "kit_split", "kw_hw_distinction"],
44
+ "description": "South African English accent with influences from Afrikaans and other local languages."
45
+ }
46
+ }
47
+ self._load_or_create_accent_data()
48
+
49
+ def _load_or_create_accent_data(self):
50
+ # For demo: just create simulated data in-memory
51
+ self.accent_data = self._create_simulated_accent_data()
52
+
53
+ def _create_simulated_accent_data(self):
54
+ accent_data = {}
55
+ for accent, profile in self.accent_profiles.items():
56
+ accent_data[accent] = {
57
+ "primary_features": profile["features"],
58
+ "feature_probabilities": {}
59
+ }
60
+ for feature in profile["features"]:
61
+ accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.7, 0.9)
62
+ all_features = set()
63
+ for a, p in self.accent_profiles.items():
64
+ all_features.update(p["features"])
65
+ for feature in all_features:
66
+ if feature not in profile["features"]:
67
+ accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.1, 0.4)
68
+ return accent_data
69
+
70
+ def _extract_features(self, audio_path):
71
+ # This is a simulated feature extraction for the demo.
72
+ # In a real application, this would use SpeechBrain or similar ML models
73
+ # to extract actual phonetic features from the audio.
74
+ all_features = set()
75
+ for accent, profile in self.accent_profiles.items():
76
+ all_features.update(profile["features"])
77
+ detected_features = {}
78
+ for feature in all_features:
79
+ # Simulate detection of features with varying probabilities
80
+ detected_features[feature] = random.uniform(0.1, 0.9)
81
+ return detected_features
82
+
83
+ def _calculate_accent_scores(self, detected_features):
84
+ accent_scores = {}
85
+ for accent, data in self.accent_data.items():
86
+ score = 0
87
+ total_weight = 0
88
+ for feature, probability in detected_features.items():
89
+ expected_prob = data["feature_probabilities"].get(feature, 0.1)
90
+ weight = 3.0 if feature in data["primary_features"] else 1.0 # Give more weight to primary features
91
+ feature_score = probability * expected_prob * weight
92
+ score += feature_score
93
+ total_weight += weight
94
+ if total_weight > 0:
95
+ accent_scores[accent] = (score / total_weight) * 100
96
+ else:
97
+ accent_scores[accent] = 0
98
+ return accent_scores
99
+
100
+ def _generate_explanation(self, accent_type, confidence):
101
+ if confidence >= 70:
102
+ confidence_level = "high confidence"
103
+ certainty = "is very clear"
104
+ elif confidence >= 50:
105
+ confidence_level = "moderate confidence"
106
+ certainty = "is present"
107
+ else:
108
+ confidence_level = "low confidence"
109
+ certainty = "may be present"
110
+ description = self.accent_profiles[accent_type]["description"]
111
+ second_accent = self._get_second_most_likely_accent(accent_type)
112
+ explanation = f"The speaker has a {confidence_level} {accent_type} English accent. The {accent_type} accent {certainty}, with features of both {accent_type} and {second_accent} English present."
113
+ return explanation
114
+
115
+ def _get_second_most_likely_accent(self, primary_accent):
116
+ # Simple rule-based selection for demo purposes
117
+ accent_similarities = {
118
+ "American": ["Canadian", "British"],
119
+ "British": ["Australian", "Irish"],
120
+ "Australian": ["British", "South African"],
121
+ "Canadian": ["American", "British"],
122
+ "Indian": ["British", "South African"],
123
+ "Irish": ["Scottish", "British"],
124
+ "Scottish": ["Irish", "British"],
125
+ "South African": ["Australian", "British"]
126
+ }
127
+ # Pick a random similar accent from the predefined list
128
+ return random.choice(accent_similarities[primary_accent])
129
+
130
+ def analyze_accent(self, audio_path):
131
+ """
132
+ Analyzes the accent from an audio file.
133
+ In this demo, it simulates feature extraction and accent scoring.
134
+ """
135
+ detected_features = self._extract_features(audio_path)
136
+ accent_scores = self._calculate_accent_scores(detected_features)
137
+
138
+ # Find the accent with the highest score
139
+ accent_type = max(accent_scores, key=accent_scores.get)
140
+ confidence = accent_scores[accent_type]
141
+
142
+ explanation = self._generate_explanation(accent_type, confidence)
143
+
144
+ return {
145
+ "accent_type": accent_type,
146
+ "confidence": confidence,
147
+ "explanation": explanation,
148
+ "all_scores": accent_scores # Useful for debugging or more detailed display
149
+ }
150
+
151
+ # --- Utility: Download video and extract audio ---
152
+
153
+ def download_and_extract_audio(url):
154
+ """
155
+ Downloads a video from a URL and extracts its audio to a WAV file.
156
+ Handles both direct MP4 links and YouTube URLs (using pytubefix).
157
+ """
158
+ temp_dir = tempfile.mkdtemp()
159
+ video_path = os.path.join(temp_dir, "video.mp4")
160
+ audio_path = os.path.join(temp_dir, "audio.wav")
161
+
162
+ try:
163
+ # Download video
164
+ # Check for YouTube URL patterns (simplified for demo)
165
+ if "youtube.com/" in url or "youtu.be/" in url:
166
+ try:
167
+ from pytubefix import YouTube
168
+ yt = YouTube(url)
169
+ # Try to get a progressive stream (video + audio)
170
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
171
+ if not stream:
172
+ # Fallback to separate audio stream if progressive not found
173
+ stream = yt.streams.filter(only_audio=True).first()
174
+ if not stream:
175
+ raise RuntimeError("No suitable video or audio stream found for YouTube URL.")
176
+
177
+ # Download the stream
178
+ stream.download(output_path=temp_dir, filename="video.mp4")
179
+ except ImportError:
180
+ raise ImportError("pytubefix is not installed. Please install it with 'pip install pytubefix'.")
181
+ except Exception as e:
182
+ # Catch specific YouTube errors, e.g., age restriction, unavailable
183
+ raise RuntimeError(f"Error downloading YouTube video: {e}. Try running locally or use a direct MP4 link.")
184
+ else:
185
+ # Direct MP4 download
186
+ response = requests.get(url, stream=True)
187
+ response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
188
+ with open(video_path, "wb") as f:
189
+ for chunk in response.iter_content(chunk_size=8192):
190
+ f.write(chunk)
191
+
192
+ # Extract audio using moviepy
193
+ clip = VideoFileClip(video_path)
194
+ clip.audio.write_audiofile(audio_path, logger=None) # logger=None suppresses moviepy output
195
+ clip.close()
196
+
197
+ return audio_path
198
+ finally:
199
+ # Clean up the video file immediately after audio extraction
200
+ if os.path.exists(video_path):
201
+ os.remove(video_path)
202
+ # The temp_dir itself will be handled by Gradio's internal tempfile management,
203
+ # or you can add os.rmdir(temp_dir) if you manage temp_dir manually.
204
+
205
+ # --- Gradio interface ---
206
+
207
+ def analyze_from_url(url):
208
+ """
209
+ Gradio interface function to analyze accent from a given video URL.
210
+ """
211
+ if not url:
212
+ return "Please enter a video URL.", "N/A", "No URL provided."
213
+
214
+ try:
215
+ audio_path = download_and_extract_audio(url)
216
+ analyzer = AccentAnalyzer()
217
+ results = analyzer.analyze_accent(audio_path)
218
+
219
+ # Clean up the temporary audio file after analysis
220
+ if os.path.exists(audio_path):
221
+ os.remove(audio_path)
222
+
223
+ return (
224
+ results["accent_type"],
225
+ f"{results['confidence']:.1f}%",
226
+ results["explanation"]
227
+ )
228
+ except Exception as e:
229
+ # Catch and display any errors during the process
230
+ return (
231
+ "Error",
232
+ "0%",
233
+ f"Error processing video/audio: {e}. Please ensure the URL is valid and publicly accessible."
234
+ )
235
+
236
+ # Create the Gradio interface
237
+ iface = gr.Interface(
238
+ fn=analyze_from_url,
239
+ inputs=gr.Textbox(
240
+ label="Enter Public Video URL (YouTube or direct MP4)",
241
+ placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or https://samplelib.com/lib/preview/mp4/sample-5s.mp4"
242
+ ),
243
+ outputs=[
244
+ gr.Textbox(label="Detected Accent"),
245
+ gr.Textbox(label="Confidence Score"),
246
+ gr.Textbox(label="Explanation")
247
+ ],
248
+ title="English Accent Analyzer (Rule-Based Demo)",
249
+ description="""
250
+ Paste a public video URL (YouTube or direct MP4) to detect the English accent and confidence score.
251
+
252
+ **Important Notes:**
253
+ * This is a **DEMO** using a simulated accent analysis model, not a real machine learning model.
254
+ * It uses `pytubefix` for YouTube links and `requests`/`moviepy` for direct MP4s.
255
+ * YouTube video extraction can sometimes be temperamental due to YouTube's changing policies or region restrictions. Direct MP4 links are generally more reliable.
256
+ * **Sample MP4 URL for testing:** `https://samplelib.com/lib/preview/mp4/sample-5s.mp4`
257
+ """
258
+ )
259
+
260
+ # Launch the Gradio interface
261
+ # `share=False` for local deployment (no public link generated)
262
+ # For Hugging Face Spaces, you typically don't need `iface.launch()` as the platform handles it.
263
+ # However, if you're running it locally to test before deployment, keep this block.
264
+ if __name__ == "__main__":
265
+ iface.launch(debug=True, share=False)