|
""" |
|
Multilingual Sentiment Analysis (English β’ Urdu β’ Roman Urdu) |
|
------------------------------------------------------------- |
|
Features: |
|
β’ Single text sentiment analysis with language hint. |
|
β’ Batch analysis from CSV/XLSX file. |
|
β’ 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores. |
|
β’ Saves logs to sentiment_logs.xlsx. |
|
""" |
|
|
|
import os |
|
from datetime import datetime |
|
import pandas as pd |
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
|
|
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment" |
|
clf = pipeline("sentiment-analysis", model=MODEL_NAME) |
|
|
|
|
|
LOG_PATH = "sentiment_logs.xlsx" |
|
if not os.path.exists(LOG_PATH): |
|
pd.DataFrame(columns=[ |
|
"timestamp", "language_hint", "text", |
|
"predicted_label_3class", "confidence_3class", |
|
"stars_probs", "top_star_label" |
|
]).to_excel(LOG_PATH, index=False) |
|
|
|
|
|
def _aggregate_to_3class(star_scores): |
|
scores = {d["label"].lower(): float(d["score"]) for d in star_scores} |
|
s1, s2, s3, s4, s5 = ( |
|
scores.get("1 star", 0.0), |
|
scores.get("2 stars", 0.0), |
|
scores.get("3 stars", 0.0), |
|
scores.get("4 stars", 0.0), |
|
scores.get("5 stars", 0.0), |
|
) |
|
|
|
neg, neu, pos = s1 + s2, s3, s4 + s5 |
|
probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos} |
|
pred_label = max(probs3, key=probs3.get) |
|
confidence = probs3[pred_label] |
|
|
|
top_star_label = max( |
|
["1 star", "2 stars", "3 stars", "4 stars", "5 stars"], |
|
key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k] |
|
) |
|
return pred_label, confidence, probs3, top_star_label |
|
|
|
|
|
def analyze_single(text, lang_hint): |
|
if not text or not text.strip(): |
|
return "β Please enter some text.", "", "", LOG_PATH |
|
|
|
star_results = clf(text, return_all_scores=True)[0] |
|
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results) |
|
|
|
polarity = { |
|
"Positive": "π Positive", |
|
"Neutral": "π Neutral", |
|
"Negative": "βΉοΈ Negative", |
|
}[pred_label] |
|
|
|
|
|
try: |
|
df = pd.read_excel(LOG_PATH) |
|
except Exception: |
|
df = pd.DataFrame(columns=[ |
|
"timestamp", "language_hint", "text", |
|
"predicted_label_3class", "confidence_3class", |
|
"stars_probs", "top_star_label" |
|
]) |
|
|
|
new_row = { |
|
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), |
|
"language_hint": lang_hint, |
|
"text": text, |
|
"predicted_label_3class": pred_label, |
|
"confidence_3class": round(conf, 4), |
|
"stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}), |
|
"top_star_label": top_star, |
|
} |
|
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) |
|
df.to_excel(LOG_PATH, index=False) |
|
|
|
return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH |
|
|
|
|
|
def analyze_batch(file, lang_hint): |
|
if file is None: |
|
return "β Please upload a CSV/XLSX file.", None |
|
|
|
ext = os.path.splitext(file.name)[-1].lower() |
|
if ext == ".csv": |
|
df = pd.read_csv(file.name) |
|
elif ext in [".xls", ".xlsx"]: |
|
df = pd.read_excel(file.name) |
|
else: |
|
return "β Only CSV or Excel files are supported.", None |
|
|
|
if "text" not in df.columns: |
|
return "β The file must contain a 'text' column.", None |
|
|
|
results = [] |
|
for t in df["text"]: |
|
if not isinstance(t, str) or not t.strip(): |
|
results.append(("N/A", 0.0, "Invalid text")) |
|
continue |
|
star_results = clf(t, return_all_scores=True)[0] |
|
pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results) |
|
results.append((pred_label, conf, top_star)) |
|
|
|
df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results) |
|
out_path = "batch_results.xlsx" |
|
df.to_excel(out_path, index=False) |
|
|
|
return "β
Batch analysis complete.", out_path |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
"## π Multilingual Sentiment Analysis (Positive β’ Neutral β’ Negative)\n" |
|
"**Languages:** English, Urdu, Roman Urdu \n" |
|
"Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β
β 3 classes)" |
|
) |
|
|
|
with gr.Tab("πΉ Single Text"): |
|
user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...") |
|
lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English") |
|
btn = gr.Button("Analyze") |
|
|
|
out_sent = gr.Textbox(label="Sentiment") |
|
out_conf = gr.Textbox(label="Confidence (0β1)") |
|
out_pol = gr.Textbox(label="Polarity") |
|
out_file = gr.File(label="Download logs (.xlsx)") |
|
|
|
btn.click(analyze_single, inputs=[user_text, lang_dropdown], |
|
outputs=[out_sent, out_conf, out_pol, out_file]) |
|
|
|
with gr.Tab("πΉ Batch Upload"): |
|
gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.") |
|
file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"]) |
|
lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"], |
|
label="Language Hint", value="English") |
|
btn_batch = gr.Button("Analyze Batch") |
|
|
|
batch_status = gr.Textbox(label="Status") |
|
batch_file = gr.File(label="Download Batch Results") |
|
|
|
btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch], |
|
outputs=[batch_status, batch_file]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|