|
""" |
|
Simple Voice Sentiment Analysis System |
|
Wav2Vec 2.0 + BERT Pipeline |
|
""" |
|
|
|
import torch |
|
import librosa |
|
import numpy as np |
|
from transformers import ( |
|
Wav2Vec2ForCTC, |
|
Wav2Vec2Tokenizer, |
|
pipeline |
|
) |
|
import pandas as pd |
|
import os |
|
|
|
class VoiceSentimentAnalyzer: |
|
"""Simple Pipeline: Audio β Transcription β Sentiment Analysis""" |
|
|
|
def __init__(self): |
|
print("Loading models...") |
|
|
|
|
|
self.asr_tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") |
|
self.asr_model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") |
|
|
|
|
|
self.sentiment_analyzer = pipeline( |
|
"sentiment-analysis", |
|
model="nlptown/bert-base-multilingual-uncased-sentiment" |
|
) |
|
|
|
print("Models loaded!") |
|
|
|
def audio_to_text(self, audio_path): |
|
"""Convert audio to text""" |
|
|
|
audio, sr = librosa.load(audio_path, sr=16000) |
|
|
|
|
|
input_values = self.asr_tokenizer(audio, return_tensors="pt", sampling_rate=16000).input_values |
|
|
|
with torch.no_grad(): |
|
logits = self.asr_model(input_values).logits |
|
|
|
predicted_ids = torch.argmax(logits, dim=-1) |
|
transcription = self.asr_tokenizer.decode(predicted_ids[0]) |
|
|
|
return transcription.strip() |
|
|
|
def text_to_sentiment(self, text): |
|
"""Analyze sentiment of the text""" |
|
if not text: |
|
return {"sentiment": "NEUTRAL", "score": 0.0} |
|
|
|
result = self.sentiment_analyzer(text)[0] |
|
|
|
|
|
label_map = { |
|
"1 star": "NEGATIVE", "2 stars": "NEGATIVE", |
|
"3 stars": "NEUTRAL", |
|
"4 stars": "POSITIVE", "5 stars": "POSITIVE" |
|
} |
|
|
|
sentiment = label_map.get(result['label'], result['label']) |
|
|
|
return { |
|
"sentiment": sentiment, |
|
"score": result['score'] |
|
} |
|
|
|
def classify_satisfaction(self, sentiment, score): |
|
"""Classify customer satisfaction""" |
|
if sentiment == "POSITIVE" and score > 0.7: |
|
return "Satisfied" |
|
elif sentiment == "NEGATIVE" and score > 0.7: |
|
return "Dissatisfied" |
|
else: |
|
return "Neutral" |
|
|
|
def analyze_call(self, audio_path): |
|
"""Complete pipeline: Audio β Sentiment β Classification""" |
|
print(f"Analyzing: {audio_path}") |
|
|
|
|
|
transcription = self.audio_to_text(audio_path) |
|
print(f"Transcription: {transcription}") |
|
|
|
|
|
sentiment_result = self.text_to_sentiment(transcription) |
|
print(f"Sentiment: {sentiment_result['sentiment']} (score: {sentiment_result['score']:.2f})") |
|
|
|
|
|
satisfaction = self.classify_satisfaction(sentiment_result['sentiment'], sentiment_result['score']) |
|
print(f"Satisfaction: {satisfaction}") |
|
|
|
return { |
|
"file": os.path.basename(audio_path), |
|
"transcription": transcription, |
|
"sentiment": sentiment_result['sentiment'], |
|
"score": sentiment_result['score'], |
|
"satisfaction": satisfaction |
|
} |
|
|
|
def analyze_batch(self, audio_folder): |
|
"""Analyze a folder of calls""" |
|
results = [] |
|
|
|
for filename in os.listdir(audio_folder): |
|
if filename.endswith(('.wav', '.mp3', '.m4a')): |
|
audio_path = os.path.join(audio_folder, filename) |
|
result = self.analyze_call(audio_path) |
|
results.append(result) |
|
print("-" * 50) |
|
|
|
|
|
df = pd.DataFrame(results) |
|
df.to_csv("analysis_results.csv", index=False) |
|
print(f"Results saved: analysis_results.csv") |
|
|
|
|
|
print("\nSTATISTICS:") |
|
print(f"Total calls: {len(results)}") |
|
sentiment_counts = df['sentiment'].value_counts() |
|
for sentiment, count in sentiment_counts.items(): |
|
print(f"{sentiment}: {count}") |
|
|
|
return df |