voice-sentiment-analysis / voice_sentiment.py
martinoyovo's picture
Update
eb2cf07
"""
Simple Voice Sentiment Analysis System
Wav2Vec 2.0 + BERT Pipeline
"""
import torch
import librosa
import numpy as np
from transformers import (
Wav2Vec2ForCTC,
Wav2Vec2Tokenizer,
pipeline
)
import pandas as pd
import os
class VoiceSentimentAnalyzer:
"""Simple Pipeline: Audio β†’ Transcription β†’ Sentiment Analysis"""
def __init__(self):
print("Loading models...")
# ASR Model (Speech-to-Text)
self.asr_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
self.asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
# Sentiment Model
self.sentiment_analyzer = pipeline(
"sentiment-analysis",
model="nlptown/bert-base-multilingual-uncased-sentiment"
)
print("Models loaded!")
def audio_to_text(self, audio_path):
"""Convert audio to text"""
# Load and preprocess audio
audio, sr = librosa.load(audio_path, sr=16000)
# Transcription with Wav2Vec2
input_values = self.asr_tokenizer(audio, return_tensors="pt", sampling_rate=16000).input_values
with torch.no_grad():
logits = self.asr_model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = self.asr_tokenizer.decode(predicted_ids[0])
return transcription.strip()
def text_to_sentiment(self, text):
"""Analyze sentiment of the text"""
if not text:
return {"sentiment": "NEUTRAL", "score": 0.0}
result = self.sentiment_analyzer(text)[0]
# Convert labels to simple format
label_map = {
"1 star": "NEGATIVE", "2 stars": "NEGATIVE",
"3 stars": "NEUTRAL",
"4 stars": "POSITIVE", "5 stars": "POSITIVE"
}
sentiment = label_map.get(result['label'], result['label'])
return {
"sentiment": sentiment,
"score": result['score']
}
def classify_satisfaction(self, sentiment, score):
"""Classify customer satisfaction"""
if sentiment == "POSITIVE" and score > 0.7:
return "Satisfied"
elif sentiment == "NEGATIVE" and score > 0.7:
return "Dissatisfied"
else:
return "Neutral"
def analyze_call(self, audio_path):
"""Complete pipeline: Audio β†’ Sentiment β†’ Classification"""
print(f"Analyzing: {audio_path}")
# 1. Audio β†’ Text
transcription = self.audio_to_text(audio_path)
print(f"Transcription: {transcription}")
# 2. Text β†’ Sentiment
sentiment_result = self.text_to_sentiment(transcription)
print(f"Sentiment: {sentiment_result['sentiment']} (score: {sentiment_result['score']:.2f})")
# 3. Satisfaction classification
satisfaction = self.classify_satisfaction(sentiment_result['sentiment'], sentiment_result['score'])
print(f"Satisfaction: {satisfaction}")
return {
"file": os.path.basename(audio_path),
"transcription": transcription,
"sentiment": sentiment_result['sentiment'],
"score": sentiment_result['score'],
"satisfaction": satisfaction
}
def analyze_batch(self, audio_folder):
"""Analyze a folder of calls"""
results = []
for filename in os.listdir(audio_folder):
if filename.endswith(('.wav', '.mp3', '.m4a')):
audio_path = os.path.join(audio_folder, filename)
result = self.analyze_call(audio_path)
results.append(result)
print("-" * 50)
# Save to CSV
df = pd.DataFrame(results)
df.to_csv("analysis_results.csv", index=False)
print(f"Results saved: analysis_results.csv")
# Quick statistics
print("\nSTATISTICS:")
print(f"Total calls: {len(results)}")
sentiment_counts = df['sentiment'].value_counts()
for sentiment, count in sentiment_counts.items():
print(f"{sentiment}: {count}")
return df