|
|
|
import math |
|
import os |
|
|
|
from speechbrain.inference.classifiers import EncoderClassifier |
|
import torch |
|
import torchaudio |
|
import numpy |
|
import scipy |
|
from tqdm import tqdm |
|
from huggingface_hub import hf_hub_download |
|
import torch.nn.functional as F |
|
|
|
import warnings |
|
warnings.filterwarnings("ignore") |
|
|
|
accent_classifier = None |
|
|
|
|
|
HF_CACHE_DIR = os.path.join(os.getcwd(), '.hf_cache') |
|
os.makedirs(HF_CACHE_DIR, exist_ok=True) |
|
print(f"Hugging Face cache directory created/ensured at: {HF_CACHE_DIR}") |
|
|
|
|
|
def load_accent_model(): |
|
""" |
|
Loads the SpeechBrain accent classification model from Hugging Face. |
|
This function should be called once at application startup. |
|
""" |
|
global accent_classifier |
|
if accent_classifier is None: |
|
try: |
|
print("Loading SpeechBrain accent classification model... This may take a moment.") |
|
|
|
|
|
|
|
|
|
os.environ['HF_HOME'] = HF_CACHE_DIR |
|
print(f"HF_HOME environment variable set to: {os.environ['HF_HOME']}") |
|
|
|
|
|
accent_classifier = EncoderClassifier.from_hparams( |
|
source="Jzuluaga/accent-id-commonaccent_ecapa", |
|
savedir="pretrained_models/accent-id-commonaccent_ecapa" |
|
) |
|
print("SpeechBrain model loaded successfully.") |
|
except Exception as e: |
|
print(f"Error loading SpeechBrain model: {e}") |
|
print("\n--------------------------------------------------------------") |
|
print("Troubleshooting Steps for Model Loading Errors:") |
|
print("1. **Ensure Python Environment is Clean:** If you haven't, create a NEW virtual environment and install dependencies there.") |
|
print(" Example (in your project directory):") |
|
|
|
|
|
print("2. **Install/Upgrade ALL Dependencies:**") |
|
print(" `pip uninstall speechbrain transformers torchaudio huggingface_hub numpy scipy tqdm Flask Flask-Executor yt-dlp -y`") |
|
print(" `pip install --upgrade speechbrain transformers torchaudio huggingface_hub numpy scipy tqdm Flask Flask-Executor`") |
|
|
|
print(" This forces a fresh download. Then try running your app again.") |
|
print("4. **Check FFmpeg Installation:** Ensure FFmpeg is installed on your system and its `bin` directory is added to your system's PATH.") |
|
print("--------------------------------------------------------------\n") |
|
accent_classifier = None |
|
|
|
|
|
def detect_accent(audio_path, task_id): |
|
""" |
|
Analyzes the speaker's accent from the given audio file using the pre-loaded SpeechBrain model. |
|
Returns the classified accent, a confidence score, and a summary. |
|
""" |
|
if accent_classifier is None: |
|
return None, None, None, "Accent classification model not loaded. Please ensure the model loads correctly at startup." |
|
|
|
print(f"Task {task_id}: Analyzing accent from {audio_path}...") |
|
try: |
|
|
|
|
|
|
|
|
|
processed_audio_path = audio_path |
|
|
|
print(f"Task {task_id}: Final audio path for SpeechBrain: {processed_audio_path}") |
|
|
|
|
|
if not os.path.exists(processed_audio_path): |
|
return None, None, None, f"Audio file not found at: {processed_audio_path}" |
|
if os.path.getsize(processed_audio_path) == 0: |
|
return None, None, None, f"Audio file is empty at: {processed_audio_path}" |
|
|
|
out_prob, score, index, text_lab = accent_classifier.classify_file(processed_audio_path) |
|
|
|
|
|
print(f"out_prob: {out_prob}, score: {score}, index: {index}, text_lab: {text_lab}") |
|
|
|
|
|
probabilities = F.softmax(out_prob, dim=-1) |
|
|
|
|
|
confidence = probabilities[0, index.item()] * 100 |
|
|
|
accent = text_lab[0] |
|
summary = "Analysis complete. The detected accent is based on the dominant English accent identified in the audio segment provided." |
|
|
|
print(f"Task {task_id}: Accent: {accent}, Confidence: {confidence:.2f}%") |
|
return accent, confidence, summary, None |
|
|
|
except Exception as e: |
|
error_message = f"An error occurred during accent detection: {e}" |
|
print(f"Task {task_id}: {error_message}") |
|
return None, None, None, error_message |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
|
|
HF_CACHE_DIR = os.path.join(os.getcwd(), '.hf_cache') |
|
os.makedirs(HF_CACHE_DIR, exist_ok=True) |
|
print(f"Hugging Face cache directory created/ensured at: {HF_CACHE_DIR}") |
|
input_file = input("Enter audio: ") |
|
dummy_audio_path = f"temp_files/{input_file}" |
|
test_task_id = "test_accent_detection_123" |
|
|
|
|
|
load_accent_model() |
|
|
|
|
|
if accent_classifier: |
|
accent, confidence, summary, error = detect_accent(dummy_audio_path, test_task_id) |
|
if accent: |
|
print(f"\n--- Detection Result ---") |
|
print(f"Detected Accent: {accent}") |
|
print(f"Confidence: {confidence:.2f}%") |
|
print(f"Summary: {summary}") |
|
else: |
|
print(f"\n--- Detection Error ---") |
|
print(f"Error: {error}") |
|
else: |
|
print("Model could not be loaded, skipping accent detection example.") |
|
|