File size: 13,840 Bytes
22d3f74 4e737d5 22d3f74 11aa9b0 4e737d5 11aa9b0 8abb11f 11aa9b0 4e737d5 e176a37 0cbb12f 4994554 0cbb12f 8abb11f 4994554 8abb11f 4994554 0cbb12f 4e737d5 8abb11f 4bf7be2 8abb11f 0cbb12f 8abb11f 4994554 4bf7be2 8abb11f 4e737d5 0cbb12f 22d3f74 8abb11f 0cbb12f 8abb11f 0cbb12f 8abb11f 0cbb12f 8abb11f 0cbb12f 8abb11f 0cbb12f 8abb11f 0cbb12f 8abb11f 0cbb12f 11aa9b0 8abb11f 4bf7be2 11aa9b0 8abb11f 4994554 8abb11f 4994554 11aa9b0 8abb11f 11aa9b0 8abb11f 11aa9b0 0cbb12f 11aa9b0 0cbb12f 8abb11f 0cbb12f 8abb11f 11aa9b0 4bf7be2 8abb11f 4bf7be2 8abb11f 4994554 8abb11f 4994554 8abb11f 0cbb12f 11aa9b0 8abb11f 4e737d5 11aa9b0 e176a37 8abb11f 11aa9b0 8abb11f 11aa9b0 8abb11f 4e737d5 8abb11f 4ee6a10 0cbb12f 8abb11f 22d3f74 11aa9b0 8abb11f 11aa9b0 22d3f74 8abb11f 11aa9b0 8abb11f 11aa9b0 8abb11f 22d3f74 b453cec 8abb11f 11aa9b0 8abb11f 11aa9b0 8abb11f 11aa9b0 8abb11f 11aa9b0 0cbb12f b453cec 8abb11f 11aa9b0 0cbb12f 11aa9b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 |
import gradio as gr
import subprocess
import os
import threading
import time
import librosa
import requests
import numpy as np # Added for sounddevice callback
from datetime import datetime
from transformers import pipeline
# ποΈ Load detection model
try:
print("[INFO] Loading Hugging Face model...")
# The current model includes 'screaming' as a label, but might misclassify
# high-pitched screams as 'crying' due to its general non-verbal vocalization training.
# For higher accuracy in distinguishing screams from crying, fine-tuning on a specific
# dataset or exploring other specialized models would be recommended.
classifier = pipeline(
"audio-classification",
model="padmalcom/wav2vec2-large-nonverbalvocalization-classification"
)
print(f"[INFO] Model labels: {classifier.model.config.id2label.values()}")
except Exception as e:
print(f"[ERROR] Failed to load model: {e}")
classifier = None
# === Audio Conversion ===
def convert_audio(input_path, output_path="input.wav"):
"""
Converts audio files to a standard WAV format (16kHz, mono, 16-bit PCM).
This ensures compatibility with the Hugging Face model.
"""
try:
cmd = [
"ffmpeg", "-i", input_path,
"-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
output_path, "-y"
]
# Use subprocess.run with capture_output=True for better error handling
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"[DEBUG] Audio converted to WAV: {output_path}")
if result.stdout:
print(f"[DEBUG] ffmpeg stdout: {result.stdout.strip()}")
if result.stderr:
print(f"[DEBUG] ffmpeg stderr: {result.stderr.strip()}")
return output_path
except subprocess.CalledProcessError as e:
print(f"[ERROR] ffmpeg conversion failed: {e.stderr.strip()}")
raise RuntimeError(f"Audio conversion failed: {e.stderr.strip()}")
except FileNotFoundError:
print("[ERROR] ffmpeg command not found. Please ensure ffmpeg is installed and in your PATH.")
raise RuntimeError("ffmpeg not found. Please install it.")
except Exception as e:
print(f"[ERROR] Unexpected error during audio conversion: {e}")
raise RuntimeError(f"Unexpected audio conversion error: {e}")
# === Scream Detection ===
def detect_scream(audio_path):
"""
Detects screams in an audio file using the loaded Hugging Face model.
Returns the top detected label and its confidence score.
"""
if classifier is None:
return {"label": "model_not_loaded", "score": 0.0}
try:
# Librosa loads audio, automatically resamples if needed
audio, sr = librosa.load(audio_path, sr=16000)
print(f"[DEBUG] Loaded audio: {len(audio)} samples at {sr} Hz")
if len(audio) == 0:
print("[WARNING] Empty audio file provided for detection.")
return {"label": "no_audio_data", "score": 0.0}
# The pipeline expects raw audio data (numpy array)
results = classifier(audio)
print(f"[DEBUG] Model output: {results}")
if not results:
print("[WARNING] Model returned no detection results.")
return {"label": "no_detection", "score": 0.0}
# Sort results by score in descending order to get the top prediction
top_prediction = sorted(results, key=lambda x: x['score'], reverse=True)[0]
# Ensure label is lowercase for consistent comparison
return {"label": top_prediction["label"].lower(), "score": float(top_prediction["score"]) * 100}
except Exception as e:
print(f"[ERROR] Detection failed for {audio_path}: {e}")
return {"label": "detection_error", "score": 0.0}
# === Send Alert to Salesforce ===
def send_salesforce_alert(audio_meta, detection):
"""
Sends an alert payload to a configured Salesforce endpoint.
Retrieves Salesforce URL and token from environment variables.
"""
SF_URL = os.getenv("SF_ALERT_URL")
SF_TOKEN = os.getenv("SF_API_TOKEN")
if not SF_URL or not SF_TOKEN:
print("[ERROR] Salesforce configuration (SF_ALERT_URL or SF_API_TOKEN) missing.")
raise RuntimeError("Salesforce configuration missing. Cannot send alert.")
headers = {
"Authorization": f"Bearer {SF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"AudioName": audio_meta.get("filename", "unknown_audio"),
"DetectedLabel": detection["label"],
"Score": round(detection["score"], 2), # Round score for cleaner data
"AlertLevel": audio_meta["alert_level"],
"Timestamp": audio_meta["timestamp"],
}
print(f"[DEBUG] Sending payload to Salesforce: {payload}")
try:
resp = requests.post(SF_URL, json=payload, headers=headers, timeout=10) # Increased timeout
resp.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
print(f"[INFO] Salesforce alert sent successfully. Response: {resp.json()}")
return resp.json()
except requests.exceptions.Timeout:
print("[ERROR] Salesforce alert request timed out.")
raise RuntimeError("Salesforce alert timed out.")
except requests.exceptions.RequestException as e:
print(f"[ERROR] Error sending Salesforce alert: {e}")
# Attempt to print response content if available for more details
if hasattr(e, 'response') and e.response is not None:
print(f"[ERROR] Salesforce response content: {e.response.text}")
raise RuntimeError(f"Failed to send Salesforce alert: {e}")
# === Main Gradio Function ===
def process_uploaded(audio_file, system_state, high_thresh, med_thresh):
"""
Main function for Gradio interface. Processes uploaded audio,
performs scream detection, and sends alerts to Salesforce based on thresholds.
"""
if system_state != "Start":
return "π System is stopped. Change 'System State' to 'Start' to enable processing."
if audio_file is None:
return "Please upload an audio file or record one."
print(f"[INFO] Processing uploaded audio: {audio_file}")
try:
# Convert audio to the required WAV format
wav_path = convert_audio(audio_file)
except RuntimeError as e:
return f"β Audio conversion error: {e}"
except Exception as e:
return f"β An unexpected error occurred during audio conversion: {e}"
# Perform scream detection
detection = detect_scream(wav_path)
label = detection["label"]
score = detection["score"]
# Determine risk level based on detected label and score
alert_message = f"π’ Detection: {label} ({score:.1f}%) β "
level = "None"
# Check for 'scream' or related labels. The model might output 'screaming' or similar.
# It's important to check if 'scream' is *in* the label, as some models might output
# variations or combine labels.
if "scream" in label:
if score >= high_thresh:
level = "High-Risk"
elif score >= med_thresh:
level = "Medium-Risk"
# Add explicit check for 'crying' if it's a known misclassification target
elif "crying" in label and score >= med_thresh: # Consider if crying should also trigger an alert
# This part can be adjusted based on whether 'crying' is also an alertable event.
# For now, we'll treat it as 'None' unless explicitly defined as a risk.
level = "None" # Or set to "Low-Risk" if crying is also a concern.
alert_message += f"Alert Level: {level}"
audio_meta = {
"filename": os.path.basename(audio_file),
"timestamp": datetime.utcnow().isoformat() + "Z",
"alert_level": level
}
# Send to Salesforce if a risk level is determined
if level in ("High-Risk", "Medium-Risk"):
try:
sf_resp = send_salesforce_alert(audio_meta, detection)
alert_message = f"β
Detection: {label} ({score:.1f}%) β {level} β Alert sent to Salesforce (ID: {sf_resp.get('id', 'N/A')})"
except RuntimeError as e:
alert_message = f"β οΈ Detection: {label} ({score:.1f}%) β {level} β Salesforce ERROR: {e}"
except Exception as e:
alert_message = f"β οΈ Detection: {label} ({score:.1f}%) β {level} β Unexpected Salesforce error: {e}"
# Clean up the converted WAV file
if os.path.exists(wav_path):
os.remove(wav_path)
print(f"[DEBUG] Cleaned up {wav_path}")
return alert_message
# === Gradio UI ===
# Ensure the title and description align with the requirements
iface = gr.Interface(
fn=process_uploaded,
inputs=[
gr.Audio(type="filepath", label="Upload Audio (or Record)"),
gr.Radio(["Start", "Stop"], label="System State", value="Start",
info="Set to 'Start' to enable audio processing and alerts."),
gr.Slider(0, 100, value=80, step=1, label="High-Risk Threshold (%)",
info="Confidence score for High-Risk scream detection."),
gr.Slider(0, 100, value=50, step=1, label="Medium-Risk Threshold (%)",
info="Confidence score for Medium-Risk scream detection.")
],
outputs="text",
title="π’ Emotion-Triggered Alarm System",
description="""
π§ Upload or record audio for real-time scream detection.
β οΈ Alerts are sent to Salesforce for High-Risk (confidence > 80%) and Medium-Risk (confidence 50-80%) detections.
The system aims to detect panic-indicating screams.
""",
allow_flagging="never" # As per requirement
)
# === Optional Real-Time Listener (for Raspberry Pi or similar) ===
# This section demonstrates how a real-time listener could be implemented.
# It requires `sounddevice` and `numpy`.
# For actual deployment, environment variables for SF_URL and SF_TOKEN must be set.
# This part is commented out by default as it requires specific hardware/setup.
def pi_listener(high_thresh=80, med_thresh=50, interval=1.0):
"""
Simulates a real-time audio listener for devices like Raspberry Pi.
Captures audio chunks, processes them, and sends alerts.
"""
try:
import sounddevice as sd
import numpy as np
except ImportError:
print("[ERROR] sounddevice or numpy not found. Real-time listener cannot be started.")
print("Please install them: pip install sounddevice numpy")
return
if classifier is None:
print("[ERROR] Model not loaded. Real-time listener cannot operate.")
return
def callback(indata, frames, time_info, status):
"""Callback function for sounddevice to process audio chunks."""
if status:
print(f"[WARNING] Sounddevice status: {status}")
# Ensure indata is a 1D array of float32
wav = indata.squeeze()
if wav.ndim > 1:
wav = wav[:, 0] # Take first channel if stereo
wav = wav.astype(np.float32)
if len(wav) == 0:
return # Skip if no audio data
try:
# Classify the audio chunk
detection_results = classifier(wav)
if not detection_results:
return
# Get the top prediction
top_prediction = sorted(detection_results, key=lambda x: x['score'], reverse=True)[0]
lbl, sc = (top_prediction["label"].lower(), float(top_prediction["score"]) * 100)
level = "None"
if "scream" in lbl: # Check if 'scream' is in the label
if sc >= high_thresh:
level = "High-Risk"
elif sc >= med_thresh:
level = "Medium-Risk"
if level != "None":
timestamp = datetime.utcnow().isoformat() + "Z"
audio_meta = {
"filename": f"live-stream-{timestamp}",
"timestamp": timestamp,
"alert_level": level
}
detection_info = {"label": lbl, "score": sc}
try:
send_salesforce_alert(audio_meta, detection_info)
print(f"[{timestamp}] {level} scream detected ({sc:.1f}%) β alert sent.")
except RuntimeError as e:
print(f"[{timestamp}] {level} scream detected ({sc:.1f}%) β Salesforce alert failed: {e}")
except Exception as e:
print(f"[{timestamp}] {level} scream detected ({sc:.1f}%) β Unexpected error sending alert: {e}")
except Exception as e:
print(f"[ERROR] Error in real-time detection callback: {e}")
# Start audio stream
try:
# Adjust blocksize if needed for performance vs. latency
with sd.InputStream(channels=1, samplerate=16000, callback=callback, blocksize=16000): # 1 second chunks
print("π Real-time detection started...")
while True:
time.sleep(interval) # Keep the main thread alive
except sd.PortAudioError as e:
print(f"[ERROR] PortAudio error: {e}. Check your audio device setup.")
except Exception as e:
print(f"[ERROR] An unexpected error occurred in the real-time listener: {e}")
# === App Entry ===
if __name__ == "__main__":
# Optional: enable real-time listener for Raspberry Pi or similar.
# Uncomment the lines below to enable it.
# Remember to install sounddevice and numpy: pip install sounddevice numpy
# Also, ensure your system has PortAudio installed for sounddevice to work.
# pi_thread = threading.Thread(target=pi_listener, daemon=True)
# pi_thread.start()
iface.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
|