Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """Accent.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1yprWdRUXGqD4QIFAZuMwdyTuwA2Hhdvj | |
| """ | |
| # Install needed libraries (run this cell first!) | |
| !pip install --quiet yt-dlp ffmpeg-python torch torchaudio transformers streamlit speechbrain | |
| import os | |
| import subprocess | |
| import torchaudio | |
| import torch | |
| from speechbrain.pretrained import EncoderClassifier | |
| import yt_dlp | |
| # Paste your video URL here (YouTube or direct MP4 link) | |
| VIDEO_URL = "https://youtu.be/DDjWTWHHkpk?si=oIj6Fuy8Hg2E8U_l" # Example: Replace with your actual link! | |
| def download_video(url, out_path="input_video.mp4"): | |
| """ | |
| Downloads a video from YouTube or direct MP4 link. | |
| Returns the filename of the downloaded video. | |
| """ | |
| # If it's a YouTube link, use yt-dlp | |
| if "youtube.com" in url or "youtu.be" in url: | |
| ydl_opts = {'outtmpl': out_path} | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| else: | |
| # For direct links, use wget/curl fallback | |
| os.system(f"wget -O {out_path} {url}") | |
| return out_path | |
| video_file = download_video(VIDEO_URL) | |
| print(f"Downloaded video: {video_file}") | |
| def extract_audio(video_path, audio_path="audio.wav"): | |
| """ | |
| Extracts audio from a video file using ffmpeg. | |
| Returns the filename of the audio file. | |
| """ | |
| # Remove if already exists | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| # Extract audio with ffmpeg | |
| cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}" | |
| subprocess.call(cmd, shell=True) | |
| return audio_path | |
| audio_file = extract_audio(video_file) | |
| print(f"Extracted audio file: {audio_file}") | |
| def extract_audio(video_path, audio_path="/content/audio.wav"): | |
| """ | |
| Extracts audio from a video file using ffmpeg. | |
| Returns the filename of the audio file. | |
| """ | |
| # Remove if already exists | |
| if os.path.exists(audio_path): | |
| os.remove(audio_path) | |
| # Extract audio with ffmpeg | |
| cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}" | |
| # Use subprocess.run to capture output and check the return code | |
| result = subprocess.run(cmd, shell=True, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"FFmpeg command failed with error code {result.returncode}") | |
| print("FFmpeg stderr:") | |
| print(result.stderr) | |
| # Optionally, raise an error or exit if audio extraction fails | |
| raise RuntimeError(f"Failed to extract audio using FFmpeg. See stderr above.") | |
| else: | |
| print("FFmpeg stdout:") | |
| print(result.stdout) | |
| print("FFmpeg stderr:") | |
| print(result.stderr) # ffmpeg often outputs info/warnings to stderr | |
| # Check if the audio file was actually created | |
| if not os.path.exists(audio_path): | |
| raise FileNotFoundError(f"Audio file '{audio_path}' was not created after FFmpeg execution.") | |
| return audio_path | |
| # Download the pre-trained English accent classifier (SpeechBrain) | |
| accent_model = EncoderClassifier.from_hparams( | |
| source="speechbrain/lang-id-commonlanguage_ecapa", | |
| savedir="tmp_accent_model" | |
| ) | |
| """Used to Debuging the code""" | |
| # List the files to see if input_video.mp4 is present | |
| import os | |
| print(os.listdir('.')) | |
| """TO check the debug file path""" | |
| # Try extracting audio again, but print output to check for errors | |
| video_path = "/content/input_video.mp4.webm" # or whatever your filename is! | |
| audio_path = "audio.wav" | |
| os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}") | |
| # See if audio.wav was created | |
| print(os.listdir('.')) | |
| """Check the Size of the file""" | |
| # Check if the file now exists and get its size | |
| import os | |
| print("audio.wav exists:", os.path.exists(audio_path)) | |
| if os.path.exists(audio_path): | |
| print("audio.wav size (bytes):", os.path.getsize(audio_path)) | |
| # Load the audio file (must be 16kHz mono) | |
| signal, fs = torchaudio.load(audio_file) | |
| # If stereo, take only the first channel | |
| if signal.shape[0] > 1: | |
| signal = signal[0].unsqueeze(0) | |
| # Run classification | |
| prediction = accent_model.classify_batch(signal) | |
| pred_label = prediction[3][0] | |
| pred_scores = prediction[1][0] | |
| # Convert score to percentage | |
| confidence = float(pred_scores.max()) * 100 | |
| # Display top label and score | |
| print(f"Predicted Accent: {pred_label}") | |
| print(f"Confidence: {confidence:.1f}%") | |
| print("Possible accent labels:", accent_model.hparams.label_encoder.lab2ind.keys()) | |
| explanation = f"The speaker's English accent was classified as '{pred_label}' with a confidence score of {confidence:.1f}%. This means the model is {confidence:.0f}% sure the person sounds most similar to this accent group." | |
| print(explanation) | |
| # Save as app.py in Colab for launching a simple web UI | |
| with open("app.py", "w") as f: | |
| f.write(''' | |
| import streamlit as st | |
| import os | |
| import subprocess | |
| import torchaudio | |
| from speechbrain.pretrained import EncoderClassifier | |
| st.title("🗣️ English Accent Classifier (Proof of Concept)") | |
| url = st.text_input("Enter public video URL (YouTube or direct MP4):") | |
| if st.button("Analyze"): | |
| with st.spinner("Downloading video..."): | |
| if "youtube.com" in url or "youtu.be" in url: | |
| os.system(f'yt-dlp -o input_video.mp4 "{url}"') | |
| else: | |
| os.system(f'wget -O input_video.mp4 "{url}"') | |
| with st.spinner("Extracting audio..."): | |
| os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav") | |
| with st.spinner("Classifying accent..."): | |
| accent_model = EncoderClassifier.from_hparams( | |
| source="speechbrain/lang-id-commonlanguage_ecapa", | |
| savedir="tmp_accent_model" | |
| ) | |
| signal, fs = torchaudio.load("audio.wav") | |
| if signal.shape[0] > 1: | |
| signal = signal[0].unsqueeze(0) | |
| prediction = accent_model.classify_batch(signal) | |
| pred_label = prediction[3][0] | |
| pred_scores = prediction[1][0] | |
| confidence = float(pred_scores.max()) * 100 | |
| st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)") | |
| st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.") | |
| ''') | |
| print("Streamlit app code saved as app.py!") | |
| print("To launch the UI, run: !streamlit run app.py --server.headless true --server.port 8501") |