# -*- coding: utf-8 -*- """Accent.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1yprWdRUXGqD4QIFAZuMwdyTuwA2Hhdvj """ # Install needed libraries (run this cell first!) !pip install --quiet yt-dlp ffmpeg-python torch torchaudio transformers streamlit speechbrain import os import subprocess import torchaudio import torch from speechbrain.pretrained import EncoderClassifier import yt_dlp # Paste your video URL here (YouTube or direct MP4 link) VIDEO_URL = "https://youtu.be/DDjWTWHHkpk?si=oIj6Fuy8Hg2E8U_l" # Example: Replace with your actual link! def download_video(url, out_path="input_video.mp4"): """ Downloads a video from YouTube or direct MP4 link. Returns the filename of the downloaded video. """ # If it's a YouTube link, use yt-dlp if "youtube.com" in url or "youtu.be" in url: ydl_opts = {'outtmpl': out_path} with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) else: # For direct links, use wget/curl fallback os.system(f"wget -O {out_path} {url}") return out_path video_file = download_video(VIDEO_URL) print(f"Downloaded video: {video_file}") def extract_audio(video_path, audio_path="audio.wav"): """ Extracts audio from a video file using ffmpeg. Returns the filename of the audio file. """ # Remove if already exists if os.path.exists(audio_path): os.remove(audio_path) # Extract audio with ffmpeg cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}" subprocess.call(cmd, shell=True) return audio_path audio_file = extract_audio(video_file) print(f"Extracted audio file: {audio_file}") def extract_audio(video_path, audio_path="/content/audio.wav"): """ Extracts audio from a video file using ffmpeg. Returns the filename of the audio file. """ # Remove if already exists if os.path.exists(audio_path): os.remove(audio_path) # Extract audio with ffmpeg cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}" # Use subprocess.run to capture output and check the return code result = subprocess.run(cmd, shell=True, capture_output=True, text=True) if result.returncode != 0: print(f"FFmpeg command failed with error code {result.returncode}") print("FFmpeg stderr:") print(result.stderr) # Optionally, raise an error or exit if audio extraction fails raise RuntimeError(f"Failed to extract audio using FFmpeg. See stderr above.") else: print("FFmpeg stdout:") print(result.stdout) print("FFmpeg stderr:") print(result.stderr) # ffmpeg often outputs info/warnings to stderr # Check if the audio file was actually created if not os.path.exists(audio_path): raise FileNotFoundError(f"Audio file '{audio_path}' was not created after FFmpeg execution.") return audio_path # Download the pre-trained English accent classifier (SpeechBrain) accent_model = EncoderClassifier.from_hparams( source="speechbrain/lang-id-commonlanguage_ecapa", savedir="tmp_accent_model" ) """Used to Debuging the code""" # List the files to see if input_video.mp4 is present import os print(os.listdir('.')) """TO check the debug file path""" # Try extracting audio again, but print output to check for errors video_path = "/content/input_video.mp4.webm" # or whatever your filename is! audio_path = "audio.wav" os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}") # See if audio.wav was created print(os.listdir('.')) """Check the Size of the file""" # Check if the file now exists and get its size import os print("audio.wav exists:", os.path.exists(audio_path)) if os.path.exists(audio_path): print("audio.wav size (bytes):", os.path.getsize(audio_path)) # Load the audio file (must be 16kHz mono) signal, fs = torchaudio.load(audio_file) # If stereo, take only the first channel if signal.shape[0] > 1: signal = signal[0].unsqueeze(0) # Run classification prediction = accent_model.classify_batch(signal) pred_label = prediction[3][0] pred_scores = prediction[1][0] # Convert score to percentage confidence = float(pred_scores.max()) * 100 # Display top label and score print(f"Predicted Accent: {pred_label}") print(f"Confidence: {confidence:.1f}%") print("Possible accent labels:", accent_model.hparams.label_encoder.lab2ind.keys()) explanation = f"The speaker's English accent was classified as '{pred_label}' with a confidence score of {confidence:.1f}%. This means the model is {confidence:.0f}% sure the person sounds most similar to this accent group." print(explanation) # Save as app.py in Colab for launching a simple web UI with open("app.py", "w") as f: f.write(''' import streamlit as st import os import subprocess import torchaudio from speechbrain.pretrained import EncoderClassifier st.title("🗣️ English Accent Classifier (Proof of Concept)") url = st.text_input("Enter public video URL (YouTube or direct MP4):") if st.button("Analyze"): with st.spinner("Downloading video..."): if "youtube.com" in url or "youtu.be" in url: os.system(f'yt-dlp -o input_video.mp4 "{url}"') else: os.system(f'wget -O input_video.mp4 "{url}"') with st.spinner("Extracting audio..."): os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav") with st.spinner("Classifying accent..."): accent_model = EncoderClassifier.from_hparams( source="speechbrain/lang-id-commonlanguage_ecapa", savedir="tmp_accent_model" ) signal, fs = torchaudio.load("audio.wav") if signal.shape[0] > 1: signal = signal[0].unsqueeze(0) prediction = accent_model.classify_batch(signal) pred_label = prediction[3][0] pred_scores = prediction[1][0] confidence = float(pred_scores.max()) * 100 st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)") st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.") ''') print("Streamlit app code saved as app.py!") print("To launch the UI, run: !streamlit run app.py --server.headless true --server.port 8501")