Spaces:

mbalvi
/

Multilingual_Sentiment_Analysis_v01

Sleeping

App Files Files Community

Multilingual_Sentiment_Analysis_v01 / app.py

mbalvi

Update app.py

0f5bc63 verified 9 days ago

raw

history blame contribute delete

5.88 kB

	"""
	Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)
	-------------------------------------------------------------
	Features:
	• Single text sentiment analysis with language hint.
	• Batch analysis from CSV/XLSX file.
	• 3-class output (Positive / Neutral / Negative) aggregated from 5-star scores.
	• Saves logs to sentiment_logs.xlsx.
	"""

	import os
	from datetime import datetime
	import pandas as pd
	import gradio as gr
	from transformers import pipeline

	# -------- Model & Pipeline --------
	MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
	clf = pipeline("sentiment-analysis", model=MODEL_NAME)

	# -------- Logging setup --------
	LOG_PATH = "sentiment_logs.xlsx"
	if not os.path.exists(LOG_PATH):
	pd.DataFrame(columns=[
	"timestamp", "language_hint", "text",
	"predicted_label_3class", "confidence_3class",
	"stars_probs", "top_star_label"
	]).to_excel(LOG_PATH, index=False)

	# -------- Helper function: aggregate 5★ → 3-class --------
	def _aggregate_to_3class(star_scores):
	scores = {d["label"].lower(): float(d["score"]) for d in star_scores}
	s1, s2, s3, s4, s5 = (
	scores.get("1 star", 0.0),
	scores.get("2 stars", 0.0),
	scores.get("3 stars", 0.0),
	scores.get("4 stars", 0.0),
	scores.get("5 stars", 0.0),
	)

	neg, neu, pos = s1 + s2, s3, s4 + s5
	probs3 = {"Negative": neg, "Neutral": neu, "Positive": pos}
	pred_label = max(probs3, key=probs3.get)
	confidence = probs3[pred_label]

	top_star_label = max(
	["1 star", "2 stars", "3 stars", "4 stars", "5 stars"],
	key=lambda k: {"1 star": s1, "2 stars": s2, "3 stars": s3, "4 stars": s4, "5 stars": s5}[k]
	)
	return pred_label, confidence, probs3, top_star_label

	# -------- Single text analysis --------
	def analyze_single(text, lang_hint):
	if not text or not text.strip():
	return "❌ Please enter some text.", "", "", LOG_PATH

	star_results = clf(text, return_all_scores=True)[0]
	pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)

	polarity = {
	"Positive": "😊 Positive",
	"Neutral": "😐 Neutral",
	"Negative": "☹️ Negative",
	}[pred_label]

	# Log
	try:
	df = pd.read_excel(LOG_PATH)
	except Exception:
	df = pd.DataFrame(columns=[
	"timestamp", "language_hint", "text",
	"predicted_label_3class", "confidence_3class",
	"stars_probs", "top_star_label"
	])

	new_row = {
	"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
	"language_hint": lang_hint,
	"text": text,
	"predicted_label_3class": pred_label,
	"confidence_3class": round(conf, 4),
	"stars_probs": str({d["label"]: round(float(d["score"]), 4) for d in star_results}),
	"top_star_label": top_star,
	}
	df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
	df.to_excel(LOG_PATH, index=False)

	return f"Sentiment: {pred_label}", f"Confidence: {conf:.3f}", f"Polarity: {polarity}", LOG_PATH

	# -------- Batch analysis --------
	def analyze_batch(file, lang_hint):
	if file is None:
	return "❌ Please upload a CSV/XLSX file.", None

	ext = os.path.splitext(file.name)[-1].lower()
	if ext == ".csv":
	df = pd.read_csv(file.name)
	elif ext in [".xls", ".xlsx"]:
	df = pd.read_excel(file.name)
	else:
	return "❌ Only CSV or Excel files are supported.", None

	if "text" not in df.columns:
	return "❌ The file must contain a 'text' column.", None

	results = []
	for t in df["text"]:
	if not isinstance(t, str) or not t.strip():
	results.append(("N/A", 0.0, "Invalid text"))
	continue
	star_results = clf(t, return_all_scores=True)[0]
	pred_label, conf, probs3, top_star = _aggregate_to_3class(star_results)
	results.append((pred_label, conf, top_star))

	df["predicted_label_3class"], df["confidence_3class"], df["top_star_label"] = zip(*results)
	out_path = "batch_results.xlsx"
	df.to_excel(out_path, index=False)

	return "✅ Batch analysis complete.", out_path

	# -------- Gradio UI --------
	with gr.Blocks() as demo:
	gr.Markdown(
	"## 🌍 Multilingual Sentiment Analysis (Positive • Neutral • Negative)\n"
	"Languages: English, Urdu, Roman Urdu \n"
	"Model: `nlptown/bert-base-multilingual-uncased-sentiment` (mapped from 5★ → 3 classes)"
	)

	with gr.Tab("🔹 Single Text"):
	user_text = gr.Textbox(label="Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
	lang_dropdown = gr.Dropdown(["English", "Urdu", "Roman Urdu"], label="Language Hint", value="English")
	btn = gr.Button("Analyze")

	out_sent = gr.Textbox(label="Sentiment")
	out_conf = gr.Textbox(label="Confidence (0–1)")
	out_pol = gr.Textbox(label="Polarity")
	out_file = gr.File(label="Download logs (.xlsx)")

	btn.click(analyze_single, inputs=[user_text, lang_dropdown],
	outputs=[out_sent, out_conf, out_pol, out_file])

	with gr.Tab("🔹 Batch Upload"):
	gr.Markdown("Upload a CSV/XLSX file with a 'text' column for batch sentiment analysis.")
	file_in = gr.File(label="Upload CSV/XLSX", file_types=[".csv", ".xlsx"])
	lang_dropdown_batch = gr.Dropdown(["English", "Urdu", "Roman Urdu"],
	label="Language Hint", value="English")
	btn_batch = gr.Button("Analyze Batch")

	batch_status = gr.Textbox(label="Status")
	batch_file = gr.File(label="Download Batch Results")

	btn_batch.click(analyze_batch, inputs=[file_in, lang_dropdown_batch],
	outputs=[batch_status, batch_file])

	if __name__ == "__main__":
	demo.launch()