Spaces:

Sajidahamed
/

AccentClassification

Sleeping

App Files Files Community

AccentClassification / accent.py

Sajidahamed

Upload 3 files

98c4440 verified 7 months ago

raw

history blame contribute delete

6.3 kB

	# -- coding: utf-8 --
	"""Accent.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1yprWdRUXGqD4QIFAZuMwdyTuwA2Hhdvj
	"""

	# Install needed libraries (run this cell first!)
	!pip install --quiet yt-dlp ffmpeg-python torch torchaudio transformers streamlit speechbrain

	import os
	import subprocess
	import torchaudio
	import torch
	from speechbrain.pretrained import EncoderClassifier
	import yt_dlp

	# Paste your video URL here (YouTube or direct MP4 link)
	VIDEO_URL = "https://youtu.be/DDjWTWHHkpk?si=oIj6Fuy8Hg2E8U_l" # Example: Replace with your actual link!

	def download_video(url, out_path="input_video.mp4"):
	"""
	Downloads a video from YouTube or direct MP4 link.
	Returns the filename of the downloaded video.
	"""
	# If it's a YouTube link, use yt-dlp
	if "youtube.com" in url or "youtu.be" in url:
	ydl_opts = {'outtmpl': out_path}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])
	else:
	# For direct links, use wget/curl fallback
	os.system(f"wget -O {out_path} {url}")
	return out_path

	video_file = download_video(VIDEO_URL)
	print(f"Downloaded video: {video_file}")

	def extract_audio(video_path, audio_path="audio.wav"):
	"""
	Extracts audio from a video file using ffmpeg.
	Returns the filename of the audio file.
	"""
	# Remove if already exists
	if os.path.exists(audio_path):
	os.remove(audio_path)
	# Extract audio with ffmpeg
	cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}"
	subprocess.call(cmd, shell=True)
	return audio_path

	audio_file = extract_audio(video_file)
	print(f"Extracted audio file: {audio_file}")

	def extract_audio(video_path, audio_path="/content/audio.wav"):
	"""
	Extracts audio from a video file using ffmpeg.
	Returns the filename of the audio file.
	"""
	# Remove if already exists
	if os.path.exists(audio_path):
	os.remove(audio_path)
	# Extract audio with ffmpeg
	cmd = f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}"
	# Use subprocess.run to capture output and check the return code
	result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

	if result.returncode != 0:
	print(f"FFmpeg command failed with error code {result.returncode}")
	print("FFmpeg stderr:")
	print(result.stderr)
	# Optionally, raise an error or exit if audio extraction fails
	raise RuntimeError(f"Failed to extract audio using FFmpeg. See stderr above.")
	else:
	print("FFmpeg stdout:")
	print(result.stdout)
	print("FFmpeg stderr:")
	print(result.stderr) # ffmpeg often outputs info/warnings to stderr

	# Check if the audio file was actually created
	if not os.path.exists(audio_path):
	raise FileNotFoundError(f"Audio file '{audio_path}' was not created after FFmpeg execution.")

	return audio_path

	# Download the pre-trained English accent classifier (SpeechBrain)
	accent_model = EncoderClassifier.from_hparams(
	source="speechbrain/lang-id-commonlanguage_ecapa",
	savedir="tmp_accent_model"
	)

	"""Used to Debuging the code"""

	# List the files to see if input_video.mp4 is present
	import os
	print(os.listdir('.'))

	"""TO check the debug file path"""

	# Try extracting audio again, but print output to check for errors
	video_path = "/content/input_video.mp4.webm" # or whatever your filename is!
	audio_path = "audio.wav"

	os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn {audio_path}")

	# See if audio.wav was created
	print(os.listdir('.'))

	"""Check the Size of the file"""

	# Check if the file now exists and get its size
	import os
	print("audio.wav exists:", os.path.exists(audio_path))
	if os.path.exists(audio_path):
	print("audio.wav size (bytes):", os.path.getsize(audio_path))

	# Load the audio file (must be 16kHz mono)
	signal, fs = torchaudio.load(audio_file)

	# If stereo, take only the first channel
	if signal.shape[0] > 1:
	signal = signal[0].unsqueeze(0)

	# Run classification
	prediction = accent_model.classify_batch(signal)
	pred_label = prediction[3][0]
	pred_scores = prediction[1][0]

	# Convert score to percentage
	confidence = float(pred_scores.max()) * 100

	# Display top label and score
	print(f"Predicted Accent: {pred_label}")
	print(f"Confidence: {confidence:.1f}%")
	print("Possible accent labels:", accent_model.hparams.label_encoder.lab2ind.keys())

	explanation = f"The speaker's English accent was classified as '{pred_label}' with a confidence score of {confidence:.1f}%. This means the model is {confidence:.0f}% sure the person sounds most similar to this accent group."

	print(explanation)

	# Save as app.py in Colab for launching a simple web UI
	with open("app.py", "w") as f:
	f.write('''
	import streamlit as st
	import os
	import subprocess
	import torchaudio
	from speechbrain.pretrained import EncoderClassifier

	st.title("🗣️ English Accent Classifier (Proof of Concept)")

	url = st.text_input("Enter public video URL (YouTube or direct MP4):")
	if st.button("Analyze"):
	with st.spinner("Downloading video..."):
	if "youtube.com" in url or "youtu.be" in url:
	os.system(f'yt-dlp -o input_video.mp4 "{url}"')
	else:
	os.system(f'wget -O input_video.mp4 "{url}"')
	with st.spinner("Extracting audio..."):
	os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav")
	with st.spinner("Classifying accent..."):
	accent_model = EncoderClassifier.from_hparams(
	source="speechbrain/lang-id-commonlanguage_ecapa",
	savedir="tmp_accent_model"
	)
	signal, fs = torchaudio.load("audio.wav")
	if signal.shape[0] > 1:
	signal = signal[0].unsqueeze(0)
	prediction = accent_model.classify_batch(signal)
	pred_label = prediction[3][0]
	pred_scores = prediction[1][0]
	confidence = float(pred_scores.max()) * 100
	st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)")
	st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.")
	''')

	print("Streamlit app code saved as app.py!")
	print("To launch the UI, run: !streamlit run app.py --server.headless true --server.port 8501")