File size: 5,883 Bytes
133afa1
 
 
0f5bc63
 
 
 
 
133afa1
4f65e98
133afa1
 
 
 
 
 
 
 
 
 
 
 
 
 
0f5bc63
 
133afa1
 
 
0f5bc63
133afa1
 
0f5bc63
 
 
 
 
 
 
133afa1
0f5bc63
133afa1
 
 
 
 
 
 
 
 
 
0f5bc63
 
133afa1
 
 
0f5bc63
133afa1
 
 
 
 
 
 
 
0f5bc63
133afa1
 
 
 
0f5bc63
 
133afa1
 
 
 
 
0f5bc63
133afa1
 
 
 
 
 
 
 
 
0f5bc63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133afa1
 
4f65e98
133afa1
 
 
 
 
4f65e98
0f5bc63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f65e98
0f5bc63
 
4f65e98
0f5bc63
 
4f65e98
 
0f5bc63
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""
Multilingual Sentiment Analysis (English β€’ Urdu β€’ Roman Urdu)
-------------------------------------------------------------
Features:
β€’ Single text sentiment analysis with language hint.
β€’ Batch analysis from CSV/XLSX file.
β€’ 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores.
β€’ Saves logs to sentiment_logs.xlsx.
"""

import os
from datetime import datetime
import pandas as pd
import gradio as gr
from transformers import pipeline

# -------- Model & Pipeline --------
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
clf = pipeline("sentiment-analysis", model=MODEL_NAME)

# -------- Logging setup --------
LOG_PATH = "sentiment_logs.xlsx"
if not os.path.exists(LOG_PATH):
    pd.DataFrame(columns=[
        "timestamp", "language_hint", "text",
        "predicted_label_3class", "confidence_3class",
        "stars_probs", "top_star_label"
    ]).to_excel(LOG_PATH, index=False)

# -------- Helper function: aggregate 5β˜… β†’ 3-class --------
def _aggregate_to_3class(star_scores):
    scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
    s1, s2, s3, s4, s5 = (
        scores.get("1 star", 0.0),
        scores.get("2 stars", 0.0),
        scores.get("3 stars", 0.0),
        scores.get("4 stars", 0.0),
        scores.get("5 stars", 0.0),
    )

    neg, neu, pos = s1 + s2, s3, s4 + s5
    probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
    pred_label = max(probs3, key=probs3.get)
    confidence = probs3[pred_label]

    top_star_label = max(
        ["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
        key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
    )
    return pred_label, confidence, probs3, top_star_label

# -------- Single text analysis --------
def analyze_single(text, lang_hint):
    if not text or not text.strip():
        return "❌ Please enter some text.", "", "", LOG_PATH

    star_results = clf(text, return_all_scores=True)[0]
    pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)

    polarity = {
        "Positive": "😊 Positive",
        "Neutral": "😐 Neutral",
        "Negative": "☹️ Negative",
    }[pred_label]

    # Log
    try:
        df = pd.read_excel(LOG_PATH)
    except Exception:
        df = pd.DataFrame(columns=[
            "timestamp", "language_hint", "text",
            "predicted_label_3class", "confidence_3class",
            "stars_probs", "top_star_label"
        ])

    new_row = {
        "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
        "language_hint": lang_hint,
        "text": text,
        "predicted_label_3class": pred_label,
        "confidence_3class": round(conf, 4),
        "stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}),
        "top_star_label": top_star,
    }
    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    df.to_excel(LOG_PATH, index=False)

    return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH

# -------- Batch analysis --------
def analyze_batch(file, lang_hint):
    if file is None:
        return "❌ Please upload a CSV/XLSX file.", None

    ext = os.path.splitext(file.name)[-1].lower()
    if ext == ".csv":
        df = pd.read_csv(file.name)
    elif ext in [".xls", ".xlsx"]:
        df = pd.read_excel(file.name)
    else:
        return "❌ Only CSV or Excel files are supported.", None

    if "text" not in df.columns:
        return "❌ The file must contain a 'text' column.", None

    results = []
    for t in df["text"]:
        if not isinstance(t, str) or not t.strip():
            results.append(("N/A", 0.0, "Invalid text"))
            continue
        star_results = clf(t, return_all_scores=True)[0]
        pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
        results.append((pred_label, conf, top_star))

    df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results)
    out_path = "batch_results.xlsx"
    df.to_excel(out_path, index=False)

    return "βœ… Batch analysis complete.", out_path

# -------- Gradio UI --------
with gr.Blocks() as demo:
    gr.Markdown(
        "## 🌍 Multilingual Sentiment Analysis (Positive β€’ Neutral β€’ Negative)\n"
        "**Languages:** English, Urdu, Roman Urdu  \n"
        "Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5β˜… β†’ 3 classes)"
    )

    with gr.Tab("πŸ”Ή Single Text"):
        user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
        lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
        btn = gr.Button("Analyze")

        out_sent = gr.Textbox(label="Sentiment")
        out_conf = gr.Textbox(label="Confidence (0–1)")
        out_pol  = gr.Textbox(label="Polarity")
        out_file = gr.File(label="Download logs (.xlsx)")

        btn.click(analyze_single, inputs=[user_text, lang_dropdown],
                  outputs=[out_sent, out_conf, out_pol, out_file])

    with gr.Tab("πŸ”Ή Batch Upload"):
        gr.Markdown("Upload a CSV/XLSX file with a **'text'** column for batch sentiment analysis.")
        file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"])
        lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"],
                                          label="Language Hint", value="English")
        btn_batch = gr.Button("Analyze Batch")

        batch_status = gr.Textbox(label="Status")
        batch_file   = gr.File(label="Download Batch Results")

        btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch],
                        outputs=[batch_status, batch_file])

if __name__ == "__main__":
    demo.launch()