""" Multilingual Sentiment Analysis (English • Urdu • Roman Urdu) ------------------------------------------------------------- Features: • Single text sentiment analysis with language hint. • Batch analysis from CSV/XLSX file. • 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores. • Saves logs to sentiment_logs.xlsx. """ import os from datetime import datetime import pandas as pd import gradio as gr from transformers import pipeline # -------- Model & Pipeline -------- MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment" clf = pipeline("sentiment-analysis", model=MODEL_NAME) # -------- Logging setup -------- LOG_PATH = "sentiment_logs.xlsx" if not os.path.exists(LOG_PATH): pd.DataFrame(columns=[ "timestamp", "language_hint", "text", "predicted_label_3class", "confidence_3class", "stars_probs", "top_star_label" ]).to_excel(LOG_PATH, index=False) # -------- Helper function: aggregate 5★ → 3-class -------- def _aggregate_to_3class(star_scores): scores = {d["label"].lower(): float(d["score"]) for d in star_scores} s1, s2, s3, s4, s5 = ( scores.get("1 star", 0.0), scores.get("2 stars", 0.0), scores.get("3 stars", 0.0), scores.get("4 stars", 0.0), scores.get("5 stars", 0.0), ) neg, neu, pos = s1 + s2, s3, s4 + s5 probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos} pred_label = max(probs3, key=probs3.get) confidence = probs3[pred_label] top_star_label = max( ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"], key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k] ) return pred_label, confidence, probs3, top_star_label # -------- Single text analysis -------- def analyze_single(text, lang_hint): if not text or not text.strip(): return "❌ Please enter some text.", "", "", LOG_PATH star_results = clf(text, return_all_scores=True)[0] pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results) polarity = { "Positive": "😊 Positive", "Neutral": "😐 Neutral", "Negative": "☹️ Negative", }[pred_label] # Log try: df = pd.read_excel(LOG_PATH) except Exception: df = pd.DataFrame(columns=[ "timestamp", "language_hint", "text", "predicted_label_3class", "confidence_3class", "stars_probs", "top_star_label" ]) new_row = { "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), "language_hint": lang_hint, "text": text, "predicted_label_3class": pred_label, "confidence_3class": round(conf, 4), "stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}), "top_star_label": top_star, } df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) df.to_excel(LOG_PATH, index=False) return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH # -------- Batch analysis -------- def analyze_batch(file, lang_hint): if file is None: return "❌ Please upload a CSV/XLSX file.", None ext = os.path.splitext(file.name)[-1].lower() if ext == ".csv": df = pd.read_csv(file.name) elif ext in [".xls", ".xlsx"]: df = pd.read_excel(file.name) else: return "❌ Only CSV or Excel files are supported.", None if "text" not in df.columns: return "❌ The file must contain a 'text' column.", None results = [] for t in df["text"]: if not isinstance(t, str) or not t.strip(): results.append(("N/A", 0.0, "Invalid text")) continue star_results = clf(t, return_all_scores=True)[0] pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results) results.append((pred_label, conf, top_star)) df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results) out_path = "batch_results.xlsx" df.to_excel(out_path, index=False) return "✅ Batch analysis complete.", out_path # -------- Gradio UI -------- with gr.Blocks() as demo: gr.Markdown( "## 🌍 Multilingual Sentiment Analysis (Positive • Neutral • Negative)\n" "**Languages:** English, Urdu, Roman Urdu \n" "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5★ → 3 classes)" ) with gr.Tab("🔹 Single Text"): user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...") lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English") btn = gr.Button("Analyze") out_sent = gr.Textbox(label="Sentiment") out_conf = gr.Textbox(label="Confidence (0–1)") out_pol = gr.Textbox(label="Polarity") out_file = gr.File(label="Download logs (.xlsx)") btn.click(analyze_single, inputs=[user_text, lang_dropdown], outputs=[out_sent, out_conf, out_pol, out_file]) with gr.Tab("🔹 Batch Upload"): gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.") file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"]) lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English") btn_batch = gr.Button("Analyze Batch") batch_status = gr.Textbox(label="Status") batch_file = gr.File(label="Download Batch Results") btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch], outputs=[batch_status, batch_file]) if __name__ == "__main__": demo.launch()