|
import gradio as gr |
|
import os |
|
import tempfile |
|
import requests |
|
from moviepy.editor import VideoFileClip |
|
import random |
|
import json |
|
|
|
|
|
|
|
class AccentAnalyzer: |
|
def __init__(self): |
|
self.accent_profiles = { |
|
"American": { |
|
"features": ["rhotic", "flapped_t", "cot_caught_merger"], |
|
"description": "American English accent with rhotic pronunciation and typical North American features." |
|
}, |
|
"British": { |
|
"features": ["non_rhotic", "t_glottalization", "trap_bath_split"], |
|
"description": "British English accent with non-rhotic pronunciation and typical UK features." |
|
}, |
|
"Australian": { |
|
"features": ["non_rhotic", "flat_a", "high_rising_terminal"], |
|
"description": "Australian English accent with distinctive vowel sounds and intonation patterns." |
|
}, |
|
"Canadian": { |
|
"features": ["rhotic", "canadian_raising", "eh_tag"], |
|
"description": "Canadian English accent with features of both American and British English." |
|
}, |
|
"Indian": { |
|
"features": ["retroflex_consonants", "monophthongization", "syllable_timing"], |
|
"description": "Indian English accent influenced by native Indian languages." |
|
}, |
|
"Irish": { |
|
"features": ["dental_fricatives", "alveolar_l", "soft_consonants"], |
|
"description": "Irish English accent with distinctive rhythm and consonant patterns." |
|
}, |
|
"Scottish": { |
|
"features": ["rolled_r", "monophthongs", "glottal_stops"], |
|
"description": "Scottish English accent with strong consonants and distinctive vowel patterns." |
|
}, |
|
"South African": { |
|
"features": ["non_rhotic", "kit_split", "kw_hw_distinction"], |
|
"description": "South African English accent with influences from Afrikaans and other local languages." |
|
} |
|
} |
|
self._load_or_create_accent_data() |
|
|
|
def _load_or_create_accent_data(self): |
|
|
|
self.accent_data = self._create_simulated_accent_data() |
|
|
|
def _create_simulated_accent_data(self): |
|
accent_data = {} |
|
for accent, profile in self.accent_profiles.items(): |
|
accent_data[accent] = { |
|
"primary_features": profile["features"], |
|
"feature_probabilities": {} |
|
} |
|
for feature in profile["features"]: |
|
accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.7, 0.9) |
|
all_features = set() |
|
for a, p in self.accent_profiles.items(): |
|
all_features.update(p["features"]) |
|
for feature in all_features: |
|
if feature not in profile["features"]: |
|
accent_data[accent]["feature_probabilities"][feature] = random.uniform(0.1, 0.4) |
|
return accent_data |
|
|
|
def _extract_features(self, audio_path): |
|
|
|
|
|
|
|
all_features = set() |
|
for accent, profile in self.accent_profiles.items(): |
|
all_features.update(profile["features"]) |
|
detected_features = {} |
|
for feature in all_features: |
|
|
|
detected_features[feature] = random.uniform(0.1, 0.9) |
|
return detected_features |
|
|
|
def _calculate_accent_scores(self, detected_features): |
|
accent_scores = {} |
|
for accent, data in self.accent_data.items(): |
|
score = 0 |
|
total_weight = 0 |
|
for feature, probability in detected_features.items(): |
|
expected_prob = data["feature_probabilities"].get(feature, 0.1) |
|
weight = 3.0 if feature in data["primary_features"] else 1.0 |
|
feature_score = probability * expected_prob * weight |
|
score += feature_score |
|
total_weight += weight |
|
if total_weight > 0: |
|
accent_scores[accent] = (score / total_weight) * 100 |
|
else: |
|
accent_scores[accent] = 0 |
|
return accent_scores |
|
|
|
def _generate_explanation(self, accent_type, confidence): |
|
if confidence >= 70: |
|
confidence_level = "high confidence" |
|
certainty = "is very clear" |
|
elif confidence >= 50: |
|
confidence_level = "moderate confidence" |
|
certainty = "is present" |
|
else: |
|
confidence_level = "low confidence" |
|
certainty = "may be present" |
|
description = self.accent_profiles[accent_type]["description"] |
|
second_accent = self._get_second_most_likely_accent(accent_type) |
|
explanation = f"The speaker has a {confidence_level} {accent_type} English accent. The {accent_type} accent {certainty}, with features of both {accent_type} and {second_accent} English present." |
|
return explanation |
|
|
|
def _get_second_most_likely_accent(self, primary_accent): |
|
|
|
accent_similarities = { |
|
"American": ["Canadian", "British"], |
|
"British": ["Australian", "Irish"], |
|
"Australian": ["British", "South African"], |
|
"Canadian": ["American", "British"], |
|
"Indian": ["British", "South African"], |
|
"Irish": ["Scottish", "British"], |
|
"Scottish": ["Irish", "British"], |
|
"South African": ["Australian", "British"] |
|
} |
|
|
|
return random.choice(accent_similarities[primary_accent]) |
|
|
|
def analyze_accent(self, audio_path): |
|
""" |
|
Analyzes the accent from an audio file. |
|
In this demo, it simulates feature extraction and accent scoring. |
|
""" |
|
detected_features = self._extract_features(audio_path) |
|
accent_scores = self._calculate_accent_scores(detected_features) |
|
|
|
|
|
accent_type = max(accent_scores, key=accent_scores.get) |
|
confidence = accent_scores[accent_type] |
|
|
|
explanation = self._generate_explanation(accent_type, confidence) |
|
|
|
return { |
|
"accent_type": accent_type, |
|
"confidence": confidence, |
|
"explanation": explanation, |
|
"all_scores": accent_scores |
|
} |
|
|
|
|
|
|
|
def download_and_extract_audio(url): |
|
""" |
|
Downloads a video from a URL and extracts its audio to a WAV file. |
|
Handles both direct MP4 links and YouTube URLs (using pytubefix). |
|
""" |
|
temp_dir = tempfile.mkdtemp() |
|
video_path = os.path.join(temp_dir, "video.mp4") |
|
audio_path = os.path.join(temp_dir, "audio.wav") |
|
|
|
try: |
|
|
|
|
|
if "youtube.com/" in url or "youtu.be/" in url: |
|
try: |
|
from pytubefix import YouTube |
|
yt = YouTube(url) |
|
|
|
stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() |
|
if not stream: |
|
|
|
stream = yt.streams.filter(only_audio=True).first() |
|
if not stream: |
|
raise RuntimeError("No suitable video or audio stream found for YouTube URL.") |
|
|
|
|
|
stream.download(output_path=temp_dir, filename="video.mp4") |
|
except ImportError: |
|
raise ImportError("pytubefix is not installed. Please install it with 'pip install pytubefix'.") |
|
except Exception as e: |
|
|
|
raise RuntimeError(f"Error downloading YouTube video: {e}. Try running locally or use a direct MP4 link.") |
|
else: |
|
|
|
response = requests.get(url, stream=True) |
|
response.raise_for_status() |
|
with open(video_path, "wb") as f: |
|
for chunk in response.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
|
|
|
|
clip = VideoFileClip(video_path) |
|
clip.audio.write_audiofile(audio_path, logger=None) |
|
clip.close() |
|
|
|
return audio_path |
|
finally: |
|
|
|
if os.path.exists(video_path): |
|
os.remove(video_path) |
|
|
|
|
|
|
|
|
|
|
|
def analyze_from_url(url): |
|
""" |
|
Gradio interface function to analyze accent from a given video URL. |
|
""" |
|
if not url: |
|
return "Please enter a video URL.", "N/A", "No URL provided." |
|
|
|
try: |
|
audio_path = download_and_extract_audio(url) |
|
analyzer = AccentAnalyzer() |
|
results = analyzer.analyze_accent(audio_path) |
|
|
|
|
|
if os.path.exists(audio_path): |
|
os.remove(audio_path) |
|
|
|
return ( |
|
results["accent_type"], |
|
f"{results['confidence']:.1f}%", |
|
results["explanation"] |
|
) |
|
except Exception as e: |
|
|
|
return ( |
|
"Error", |
|
"0%", |
|
f"Error processing video/audio: {e}. Please ensure the URL is valid and publicly accessible." |
|
) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=analyze_from_url, |
|
inputs=gr.Textbox( |
|
label="Enter Public Video URL (YouTube or direct MP4)", |
|
placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or https://samplelib.com/lib/preview/mp4/sample-5s.mp4" |
|
), |
|
outputs=[ |
|
gr.Textbox(label="Detected Accent"), |
|
gr.Textbox(label="Confidence Score"), |
|
gr.Textbox(label="Explanation") |
|
], |
|
title="English Accent Analyzer (Rule-Based Demo)", |
|
description=""" |
|
Paste a public video URL (YouTube or direct MP4) to detect the English accent and confidence score. |
|
|
|
**Important Notes:** |
|
* This is a **DEMO** using a simulated accent analysis model, not a real machine learning model. |
|
* It uses `pytubefix` for YouTube links and `requests`/`moviepy` for direct MP4s. |
|
* YouTube video extraction can sometimes be temperamental due to YouTube's changing policies or region restrictions. Direct MP4 links are generally more reliable. |
|
* **Sample MP4 URL for testing:** `https://samplelib.com/lib/preview/mp4/sample-5s.mp4` |
|
""" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch(debug=True, share=False) |
|
|