Spaces:

tasmimulhuda
/

english-accent-detection-from-video-url

Sleeping

App Files Files Community

english-accent-detection-from-video-url / accent_analysis.py

tasmimulhuda

first

15a1f73 2 months ago

raw

history blame contribute delete

7.2 kB


	import math
	import os
	# SpeechBrain and its dependencies
	from speechbrain.inference.classifiers import EncoderClassifier # Updated import as requested
	import torch
	import torchaudio
	import numpy
	import scipy
	from tqdm import tqdm
	from huggingface_hub import hf_hub_download
	import torch.nn.functional as F

	import warnings
	warnings.filterwarnings("ignore")
	# --- Global variable for the accent classification model ---
	accent_classifier = None

	# Define a temporary directory for Hugging Face cache within the current working directory
	HF_CACHE_DIR = os.path.join(os.getcwd(), '.hf_cache')
	os.makedirs(HF_CACHE_DIR, exist_ok=True) # Create the directory if it doesn't exist
	print(f"Hugging Face cache directory created/ensured at: {HF_CACHE_DIR}")

	# --- Function to load the SpeechBrain Accent Classification Model ---
	def load_accent_model():
	"""
	Loads the SpeechBrain accent classification model from Hugging Face.
	This function should be called once at application startup.
	"""
	global accent_classifier
	if accent_classifier is None:
	try:
	print("Loading SpeechBrain accent classification model... This may take a moment.")

	# Set the HF_HOME environment variable to redirect Hugging Face's cache.
	# This is crucial for resolving WinError 1314 permission issues on Windows,
	# as it ensures model files are downloaded and cached in a user-writable location.
	os.environ['HF_HOME'] = HF_CACHE_DIR
	print(f"HF_HOME environment variable set to: {os.environ['HF_HOME']}")

	# Using the ECAPA-TDNN based model for English accent classification
	accent_classifier = EncoderClassifier.from_hparams(
	source="Jzuluaga/accent-id-commonaccent_ecapa",
	savedir="pretrained_models/accent-id-commonaccent_ecapa" # A distinct directory for this model
	)
	print("SpeechBrain model loaded successfully.")
	except Exception as e:
	print(f"Error loading SpeechBrain model: {e}")
	print("\n--------------------------------------------------------------")
	print("Troubleshooting Steps for Model Loading Errors:")
	print("1. Ensure Python Environment is Clean: If you haven't, create a NEW virtual environment and install dependencies there.")
	print(" Example (in your project directory):")
	# print(" `python -m venv new_accent_env`")
	# print(" `.\new_accent_env\Scripts\activate` (Windows) or `source new_accent_env/bin/activate` (macOS/Linux)")
	print("2. Install/Upgrade ALL Dependencies:")
	print(" `pip uninstall speechbrain transformers torchaudio huggingface_hub numpy scipy tqdm Flask Flask-Executor yt-dlp -y`")
	print(" `pip install --upgrade speechbrain transformers torchaudio huggingface_hub numpy scipy tqdm Flask Flask-Executor`")
	# print("3. Manually Clear Hugging Face Cache: If issues persist, delete the folder: `D:\Accent Detection\rem_waste\.hf_cache`")
	print(" This forces a fresh download. Then try running your app again.")
	print("4. Check FFmpeg Installation: Ensure FFmpeg is installed on your system and its `bin` directory is added to your system's PATH.")
	print("--------------------------------------------------------------\n")
	accent_classifier = None # Set to None if loading fails

	# --- Function to detect accent from an audio file ---
	def detect_accent(audio_path, task_id):
	"""
	Analyzes the speaker's accent from the given audio file using the pre-loaded SpeechBrain model.
	Returns the classified accent, a confidence score, and a summary.
	"""
	if accent_classifier is None:
	return None, None, None, "Accent classification model not loaded. Please ensure the model loads correctly at startup."

	print(f"Task {task_id}: Analyzing accent from {audio_path}...")
	try:
	# The audio_path passed here from video_processing.py should already be an absolute path.
	# Removing os.path.abspath() to prevent potential double-concatenation issues
	# if SpeechBrain's internal file handling implicitly prepends CWD to an already absolute path.
	# processed_audio_path = os.path.abspath(audio_path) # Removed this line
	processed_audio_path = audio_path # Use the path directly as it should be absolute

	print(f"Task {task_id}: Final audio path for SpeechBrain: {processed_audio_path}")

	# Add checks for file existence and size
	if not os.path.exists(processed_audio_path):
	return None, None, None, f"Audio file not found at: {processed_audio_path}"
	if os.path.getsize(processed_audio_path) == 0:
	return None, None, None, f"Audio file is empty at: {processed_audio_path}"

	out_prob, score, index, text_lab = accent_classifier.classify_file(processed_audio_path)

	# Print raw outputs for debugging/understanding
	print(f"out_prob: {out_prob}, score: {score}, index: {index}, text_lab: {text_lab}")

	# Apply softmax to convert logits (out_prob) into probabilities.
	probabilities = F.softmax(out_prob, dim=-1)

	# Get the confidence for the predicted accent
	confidence = probabilities[0, index.item()] * 100

	accent = text_lab[0]
	summary = "Analysis complete. The detected accent is based on the dominant English accent identified in the audio segment provided."

	print(f"Task {task_id}: Accent: {accent}, Confidence: {confidence:.2f}%")
	return accent, confidence, summary, None

	except Exception as e:
	error_message = f"An error occurred during accent detection: {e}"
	print(f"Task {task_id}: {error_message}")
	return None, None, None, error_message



	# Example of how you would integrate this (not runnable on its own, requires an audio file)
	if __name__ == '__main__':
	# This part is for demonstration and would be integrated into your Flask app.
	# You would need an actual audio file (e.g., a .wav file) to test this.
	# For example:
	HF_CACHE_DIR = os.path.join(os.getcwd(), '.hf_cache')
	os.makedirs(HF_CACHE_DIR, exist_ok=True) # Create the directory if it doesn't exist
	print(f"Hugging Face cache directory created/ensured at: {HF_CACHE_DIR}")
	input_file = input("Enter audio: ")
	dummy_audio_path = f"temp_files/{input_file}"
	test_task_id = "test_accent_detection_123"

	# 1. Load the model first (typically done once at app startup)
	load_accent_model()

	# 2. Then call detect_accent with a valid audio path
	if accent_classifier:
	accent, confidence, summary, error = detect_accent(dummy_audio_path, test_task_id)
	if accent:
	print(f"\n--- Detection Result ---")
	print(f"Detected Accent: {accent}")
	print(f"Confidence: {confidence:.2f}%")
	print(f"Summary: {summary}")
	else:
	print(f"\n--- Detection Error ---")
	print(f"Error: {error}")
	else:
	print("Model could not be loaded, skipping accent detection example.")