File size: 7,199 Bytes
15a1f73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

import math
import os
# SpeechBrain and its dependencies
from speechbrain.inference.classifiers import EncoderClassifier # Updated import as requested
import torch
import torchaudio
import numpy
import scipy
from tqdm import tqdm
from huggingface_hub import hf_hub_download
import torch.nn.functional as F

import warnings
warnings.filterwarnings("ignore")
# --- Global variable for the accent classification model ---
accent_classifier = None

# Define a temporary directory for Hugging Face cache within the current working directory
HF_CACHE_DIR = os.path.join(os.getcwd(), '.hf_cache')
os.makedirs(HF_CACHE_DIR, exist_ok=True) # Create the directory if it doesn't exist
print(f"Hugging Face cache directory created/ensured at: {HF_CACHE_DIR}")

# --- Function to load the SpeechBrain Accent Classification Model ---
def load_accent_model():
    """
    Loads the SpeechBrain accent classification model from Hugging Face.
    This function should be called once at application startup.
    """
    global accent_classifier
    if accent_classifier is None:
        try:
            print("Loading SpeechBrain accent classification model... This may take a moment.")

            # Set the HF_HOME environment variable to redirect Hugging Face's cache.
            # This is crucial for resolving WinError 1314 permission issues on Windows,
            # as it ensures model files are downloaded and cached in a user-writable location.
            os.environ['HF_HOME'] = HF_CACHE_DIR
            print(f"HF_HOME environment variable set to: {os.environ['HF_HOME']}")

            # Using the ECAPA-TDNN based model for English accent classification
            accent_classifier = EncoderClassifier.from_hparams(
                source="Jzuluaga/accent-id-commonaccent_ecapa",
                savedir="pretrained_models/accent-id-commonaccent_ecapa" # A distinct directory for this model
            )
            print("SpeechBrain model loaded successfully.")
        except Exception as e:
            print(f"Error loading SpeechBrain model: {e}")
            print("\n--------------------------------------------------------------")
            print("Troubleshooting Steps for Model Loading Errors:")
            print("1. **Ensure Python Environment is Clean:** If you haven't, create a NEW virtual environment and install dependencies there.")
            print("   Example (in your project directory):")
            # print("   `python -m venv new_accent_env`")
            # print("   `.\new_accent_env\Scripts\activate` (Windows) or `source new_accent_env/bin/activate` (macOS/Linux)")
            print("2. **Install/Upgrade ALL Dependencies:**")
            print("   `pip uninstall speechbrain transformers torchaudio huggingface_hub numpy scipy tqdm Flask Flask-Executor yt-dlp -y`")
            print("   `pip install --upgrade speechbrain transformers torchaudio huggingface_hub numpy scipy tqdm Flask Flask-Executor`")
            # print("3. **Manually Clear Hugging Face Cache:** If issues persist, delete the folder: `D:\Accent Detection\rem_waste\.hf_cache`")
            print("   This forces a fresh download. Then try running your app again.")
            print("4. **Check FFmpeg Installation:** Ensure FFmpeg is installed on your system and its `bin` directory is added to your system's PATH.")
            print("--------------------------------------------------------------\n")
            accent_classifier = None # Set to None if loading fails

# --- Function to detect accent from an audio file ---
def detect_accent(audio_path, task_id):
    """
    Analyzes the speaker's accent from the given audio file using the pre-loaded SpeechBrain model.
    Returns the classified accent, a confidence score, and a summary.
    """
    if accent_classifier is None:
        return None, None, None, "Accent classification model not loaded. Please ensure the model loads correctly at startup."

    print(f"Task {task_id}: Analyzing accent from {audio_path}...")
    try:
        # The audio_path passed here from video_processing.py should already be an absolute path.
        # Removing os.path.abspath() to prevent potential double-concatenation issues
        # if SpeechBrain's internal file handling implicitly prepends CWD to an already absolute path.
        # processed_audio_path = os.path.abspath(audio_path) # Removed this line
        processed_audio_path = audio_path # Use the path directly as it should be absolute

        print(f"Task {task_id}: Final audio path for SpeechBrain: {processed_audio_path}")

        # Add checks for file existence and size
        if not os.path.exists(processed_audio_path):
            return None, None, None, f"Audio file not found at: {processed_audio_path}"
        if os.path.getsize(processed_audio_path) == 0:
            return None, None, None, f"Audio file is empty at: {processed_audio_path}"

        out_prob, score, index, text_lab = accent_classifier.classify_file(processed_audio_path)

        # Print raw outputs for debugging/understanding
        print(f"out_prob: {out_prob}, score: {score}, index: {index}, text_lab: {text_lab}")

        # Apply softmax to convert logits (out_prob) into probabilities.
        probabilities = F.softmax(out_prob, dim=-1)

        # Get the confidence for the predicted accent
        confidence = probabilities[0, index.item()] * 100

        accent = text_lab[0]
        summary = "Analysis complete. The detected accent is based on the dominant English accent identified in the audio segment provided."

        print(f"Task {task_id}: Accent: {accent}, Confidence: {confidence:.2f}%")
        return accent, confidence, summary, None

    except Exception as e:
        error_message = f"An error occurred during accent detection: {e}"
        print(f"Task {task_id}: {error_message}")
        return None, None, None, error_message



# Example of how you would integrate this (not runnable on its own, requires an audio file)
if __name__ == '__main__':
    # This part is for demonstration and would be integrated into your Flask app.
    # You would need an actual audio file (e.g., a .wav file) to test this.
    # For example:
    HF_CACHE_DIR = os.path.join(os.getcwd(), '.hf_cache')
    os.makedirs(HF_CACHE_DIR, exist_ok=True) # Create the directory if it doesn't exist
    print(f"Hugging Face cache directory created/ensured at: {HF_CACHE_DIR}")
    input_file = input("Enter audio: ")
    dummy_audio_path = f"temp_files/{input_file}"
    test_task_id = "test_accent_detection_123"

    # 1. Load the model first (typically done once at app startup)
    load_accent_model()

    # 2. Then call detect_accent with a valid audio path
    if accent_classifier:
        accent, confidence, summary, error = detect_accent(dummy_audio_path, test_task_id)
        if accent:
            print(f"\n--- Detection Result ---")
            print(f"Detected Accent: {accent}")
            print(f"Confidence: {confidence:.2f}%")
            print(f"Summary: {summary}")
        else:
            print(f"\n--- Detection Error ---")
            print(f"Error: {error}")
    else:
        print("Model could not be loaded, skipping accent detection example.")