Spaces:
Sleeping
Sleeping
# === ClimatePulse: Chatbot Analisis Opini Publik === | |
import torch | |
import streamlit as st | |
import pandas as pd | |
import pydeck as pdk | |
import altair as alt | |
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification | |
from geopy.geocoders import Nominatim | |
from datetime import datetime | |
import os | |
# === Setup Halaman === | |
st.set_page_config(page_title="ClimatePulse", layout="centered") | |
# === Load Model & Pipeline === | |
device = 0 if torch.cuda.is_available() else -1 | |
# Sentimen | |
sent_tokenizer = AutoTokenizer.from_pretrained("mdhugol/indonesia-bert-sentiment-classification") | |
sent_model = AutoModelForSequenceClassification.from_pretrained("mdhugol/indonesia-bert-sentiment-classification") | |
pipe_sent = pipeline("sentiment-analysis", model=sent_model, tokenizer=sent_tokenizer) | |
# Emosi | |
pipe_emo = pipeline("sentiment-analysis", model="azizp128/prediksi-emosi-indobert", device=device) | |
# NER | |
ner_tokenizer = AutoTokenizer.from_pretrained("cahya/bert-base-indonesian-NER") | |
ner_model = AutoModelForTokenClassification.from_pretrained("cahya/bert-base-indonesian-NER") | |
pipe_ner = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple") | |
label_map = {'LABEL_0': 'Positif', 'LABEL_1': 'Netral', 'LABEL_2': 'Negatif'} | |
# === Custom Dark Mode Style + Logo === | |
page_bg = ''' | |
<style> | |
[data-testid="stAppViewContainer"] { | |
background-color: #0e1525; | |
color: white; | |
} | |
[data-testid="stHeader"] { | |
background-color: rgba(0,0,0,0); | |
} | |
[data-testid="stSidebar"] > div:first-child { | |
background-color: #1f2937; | |
} | |
.block-container { | |
padding-top: 2rem; | |
padding-bottom: 2rem; | |
font-family: "Segoe UI", sans-serif; | |
} | |
h1, h2, h3, h4, h5 { | |
font-family: 'Segoe UI', sans-serif; | |
color: #10B981; | |
} | |
.stButton>button { | |
background-color: #10B981; | |
color: white; | |
border-radius: 8px; | |
padding: 0.5rem 1rem; | |
font-size: 1rem; | |
border: none; | |
} | |
.stTextInput>div>div>input, .stTextArea>div>textarea { | |
background-color: #1f2937; | |
color: white; | |
border-radius: 6px; | |
border: 1px solid #374151; | |
} | |
</style> | |
''' | |
st.markdown(page_bg, unsafe_allow_html=True) | |
# === Judul Halaman dengan Logo di Sebelah Teks === | |
col1, col2 = st.columns([1, 8]) | |
with col1: | |
st.image("logo.png", width=60) | |
with col2: | |
st.markdown(""" | |
<div style='display: flex; flex-direction: column; justify-content: center;'> | |
<h4 style='color: #10B981; margin-bottom: 0;'>ClimatePulse - Analisis Opini SDG 13</h4> | |
<h1 style='color: white; margin-top: 0;'>Perubahan Iklim di Media Sosial</h1> | |
<p style='color: gray;'>Telusuri opini publik, sentimen, emosi, dan entitas terkait kebijakan dan bencana iklim</p> | |
</div> | |
""", unsafe_allow_html=True) | |
# === Form Input User === | |
with st.form(key="input_form"): | |
text_input = st.text_area("Input Teks / Tweet", placeholder="Contoh: PLTN dibangun di Papua, saya takut dan kecewa", height=120) | |
submit = st.form_submit_button("π ANALISIS") | |
# === Analisis dan Visualisasi Lain Tetap === | |
# (seluruh isi kode berikutnya tetap seperti sebelumnya) | |
# === Tidak ditampilkan ulang agar tidak duplikasi === | |
# === Analisis dan Visualisasi Lain Tetap === | |
# (seluruh isi kode berikutnya tetap seperti sebelumnya) | |
# === Tidak ditampilkan ulang agar tidak duplikasi === | |
# === Proses Analisis Tunggal === | |
if submit and text_input.strip(): | |
with st.spinner("Menganalisis opini publik..."): | |
sent = pipe_sent(text_input)[0] | |
sent_label = label_map.get(sent['label'], sent['label']) | |
emo = pipe_emo(text_input)[0]['label'].capitalize() | |
ner = pipe_ner(text_input) | |
ents = [e['word'] for e in ner] | |
lokasi_kunci = [ | |
# === Wilayah Umum / Pulau === | |
"sumatera", "jawa", "kalimantan", "sulawesi", "papua", "maluku", "nusa tenggara", "kepulauan seribu", | |
# === Nama Provinsi Lengkap (38) === | |
"aceh", "sumatera utara", "sumatera barat", "riau", "kepulauan riau", "jambi", "bengkulu", | |
"sumatera selatan", "bangka belitung", "lampung", | |
"banten", "dki jakarta", "jawa barat", "jawa tengah", "daerah istimewa yogyakarta", "jawa timur", | |
"bali", "nusa tenggara barat", "nusa tenggara timur", | |
"kalimantan barat", "kalimantan tengah", "kalimantan selatan", "kalimantan timur", "kalimantan utara", | |
"sulawesi utara", "sulawesi tengah", "sulawesi selatan", "sulawesi tenggara", "gorontalo", "sulawesi barat", | |
"maluku", "maluku utara", | |
"papua", "papua barat", "papua selatan", "papua tengah", "papua pegunungan", "papua barat daya", | |
# === Ibu Kota Provinsi === | |
"banda aceh", "medan", "padang", "pekanbaru", "tanjungpinang", "jambi", "bengkulu", | |
"palembang", "pangkalpinang", "bandar lampung", | |
"serang", "jakarta", "bandung", "semarang", "yogyakarta", "surabaya", | |
"denpasar", "mataram", "kupang", | |
"pontianak", "palangka raya", "banjarmasin", "samarinda", "tarakan", | |
"manado", "palu", "makassar", "kendari", "gorontalo", "mamuju", | |
"ambon", "ternate", | |
"jayapura", "manokwari", "merauke", "nabire", "wamena", "fakfak", "sorong", "timika", | |
# === Kota/Kabupaten Besar atau Strategis === | |
"bekasi", "bogor", "depok", "tangerang", "cirebon", "tegal", "purwokerto", "solo", "magelang", | |
"malang", "kediri", "sidoarjo", "pasuruan", "probolinggo", "lumajang", "blitar", "jember", | |
"banyuwangi", "cilacap", "padangsidimpuan", "binjai", "sibolga", "lubuklinggau", "palopo", | |
"parepare", "bitung", "tomohon", "kotamobagu", "kotabaru", "pangkalan bun", "ketapang", | |
"palu", "baubau", "karangasem", "buleleng", "labuan bajo", "ende", "bima", "dompu", | |
# === Lokasi Baru / Khusus / Otorita === | |
"nusantara", # Ibu kota negara baru di Kaltim | |
"penajam paser utara", "balikpapan", "samarinda", "bontang", # Kaltim area | |
"kepri", "ntb", "ntt", "kaltim", "kalteng", "kalsel", "kalbar", "kaltara", # singkatan populer | |
# === Lokasi Adat/Kultural (yang sering disebut) === | |
"minangkabau", "batak", "dayak", "asmat", "ambon", "bugis", "toraja", "sunda", "madura", "tapanuli" | |
] | |
locs = [] | |
for e in ner: | |
ent_text = e['word'].lower() | |
if e['entity_group'] == 'LOC': | |
locs.append(e['word']) | |
else: | |
for keyword in lokasi_kunci: | |
if keyword in ent_text: | |
locs.append(keyword.capitalize()) | |
locs = list(set(locs)) | |
geolocator = Nominatim(user_agent="climatepulse") | |
geo_locs = [] | |
for loc in locs: | |
try: | |
location = geolocator.geocode(loc) | |
if location: | |
geo_locs.append({ | |
'lokasi': loc, | |
'lat': location.latitude, | |
'lon': location.longitude, | |
'jumlah': 1 | |
}) | |
except: | |
continue | |
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
df_log_single = pd.DataFrame([{ | |
"timestamp": now, | |
"text": text_input, | |
"sentimen": sent_label, | |
"emosi": emo | |
}]) | |
log_file = "log_tren.csv" | |
if os.path.exists(log_file): | |
pd.concat([pd.read_csv(log_file), df_log_single]).to_csv(log_file, index=False) | |
else: | |
df_log_single.to_csv(log_file, index=False) | |
emoji_map = { | |
"Senang": "π", | |
"Sedih": "π’", | |
"Marah": "π‘", | |
"Takut": "π¨", | |
"Kecewa": "π", | |
"Netral": "π" | |
} | |
# === Tampilkan Hasil === | |
st.markdown(f""" | |
<div style='background-color: #1f2937; padding: 1rem; border-radius: 10px;'> | |
<h3 style='color: white;'>Hasil Analisis</h3> | |
<p><b>Sentimen:</b> <span style='color: red;'>{sent_label}</span> β|β | |
<b>Emosi:</b> <span style='color: #facc15;'>{emo} {emoji_map.get(emo, '')}</span></p> | |
<p><b>π Lokasi:</b> {', '.join(locs) or "Tidak ditemukan"}</p> | |
<p><b>π Entitas:</b> {', '.join(ents) or "Tidak ditemukan"}</p> | |
</div> | |
""", unsafe_allow_html=True) | |
# === Tambahan: Peta Opini Publik berdasarkan Log === | |
if os.path.exists("log_tren.csv"): | |
df_log = pd.read_csv("log_tren.csv") | |
lokasi_kunci = [ | |
# === Wilayah Umum / Pulau === | |
"sumatera", "jawa", "kalimantan", "sulawesi", "papua", "maluku", "nusa tenggara", "kepulauan seribu", | |
# === Nama Provinsi Lengkap (38) === | |
"aceh", "sumatera utara", "sumatera barat", "riau", "kepulauan riau", "jambi", "bengkulu", | |
"sumatera selatan", "bangka belitung", "lampung", | |
"banten", "dki jakarta", "jawa barat", "jawa tengah", "daerah istimewa yogyakarta", "jawa timur", | |
"bali", "nusa tenggara barat", "nusa tenggara timur", | |
"kalimantan barat", "kalimantan tengah", "kalimantan selatan", "kalimantan timur", "kalimantan utara", | |
"sulawesi utara", "sulawesi tengah", "sulawesi selatan", "sulawesi tenggara", "gorontalo", "sulawesi barat", | |
"maluku", "maluku utara", | |
"papua", "papua barat", "papua selatan", "papua tengah", "papua pegunungan", "papua barat daya", | |
# === Ibu Kota Provinsi === | |
"banda aceh", "medan", "padang", "pekanbaru", "tanjungpinang", "jambi", "bengkulu", | |
"palembang", "pangkalpinang", "bandar lampung", | |
"serang", "jakarta", "bandung", "semarang", "yogyakarta", "surabaya", | |
"denpasar", "mataram", "kupang", | |
"pontianak", "palangka raya", "banjarmasin", "samarinda", "tarakan", | |
"manado", "palu", "makassar", "kendari", "gorontalo", "mamuju", | |
"ambon", "ternate", | |
"jayapura", "manokwari", "merauke", "nabire", "wamena", "fakfak", "sorong", "timika", | |
# === Kota/Kabupaten Besar atau Strategis === | |
"bekasi", "bogor", "depok", "tangerang", "cirebon", "tegal", "purwokerto", "solo", "magelang", | |
"malang", "kediri", "sidoarjo", "pasuruan", "probolinggo", "lumajang", "blitar", "jember", | |
"banyuwangi", "cilacap", "padangsidimpuan", "binjai", "sibolga", "lubuklinggau", "palopo", | |
"parepare", "bitung", "tomohon", "kotamobagu", "kotabaru", "pangkalan bun", "ketapang", | |
"palu", "baubau", "karangasem", "buleleng", "labuan bajo", "ende", "bima", "dompu", | |
# === Lokasi Baru / Khusus / Otorita === | |
"nusantara", # Ibu kota negara baru di Kaltim | |
"penajam paser utara", "balikpapan", "samarinda", "bontang", # Kaltim area | |
"kepri", "ntb", "ntt", "kaltim", "kalteng", "kalsel", "kalbar", "kaltara", # singkatan populer | |
# === Lokasi Adat/Kultural (yang sering disebut) === | |
"minangkabau", "batak", "dayak", "asmat", "ambon", "bugis", "toraja", "sunda", "madura", "tapanuli" | |
] | |
lokasi_counter = {} | |
for text in df_log['text']: | |
for keyword in lokasi_kunci: | |
if keyword in text.lower(): | |
lokasi = keyword.capitalize() | |
lokasi_counter[lokasi] = lokasi_counter.get(lokasi, 0) + 1 | |
geo_locs = [] | |
geolocator = Nominatim(user_agent="climatepulse-map") | |
for lokasi, jumlah in lokasi_counter.items(): | |
try: | |
location = geolocator.geocode(lokasi) | |
if location: | |
geo_locs.append({ | |
'lokasi': lokasi, | |
'lat': location.latitude, | |
'lon': location.longitude, | |
'jumlah': jumlah | |
}) | |
except: | |
continue | |
if geo_locs: | |
map_df = pd.DataFrame(geo_locs) | |
st.markdown("### πΊοΈ Peta Opini Publik") | |
st.pydeck_chart(pdk.Deck( | |
map_style=None, | |
initial_view_state=pdk.ViewState(latitude=-2.5, longitude=117.0, zoom=4, pitch=0), | |
layers=[ | |
pdk.Layer( | |
"ScatterplotLayer", | |
data=map_df, | |
get_position='[lon, lat]', | |
get_color='[255, 100, 100, 160]', | |
get_radius='jumlah * 10000', | |
pickable=True, | |
auto_highlight=True | |
) | |
], | |
tooltip={"text": "{lokasi}: {jumlah} opini"} | |
)) | |
else: | |
st.info("β Tidak ada lokasi yang berhasil dipetakan dari histori log.") | |
st.markdown("### π Tren Waktu Sentimen") | |
if os.path.exists("log_tren.csv"): | |
df_log = pd.read_csv("log_tren.csv") | |
df_log['timestamp'] = pd.to_datetime(df_log['timestamp']) | |
df_log['tanggal'] = df_log['timestamp'].dt.date | |
trend_all = df_log.groupby(['tanggal', 'sentimen']).size().reset_index(name='jumlah') | |
chart = alt.Chart(trend_all).mark_line(point=True).encode( | |
x='tanggal:T', | |
y='jumlah:Q', | |
color='sentimen:N' | |
).properties(width=600) | |
st.altair_chart(chart, use_container_width=True) | |
# === Upload CSV untuk Analisis Massal === | |
st.markdown("---") | |
st.markdown("### π₯ Analisis CSV Massal") | |
uploaded_file = st.file_uploader("Upload file CSV berisi kolom 'text'", type=["csv"]) | |
if uploaded_file is not None: | |
df_csv = pd.read_csv(uploaded_file) | |
st.write("Pratinjau Data:", df_csv.head()) | |
if "text" in df_csv.columns: | |
result_data = [] | |
geo_locs = [] | |
log_rows = [] | |
lokasi_kunci = [ | |
# === Wilayah Umum / Pulau === | |
"sumatera", "jawa", "kalimantan", "sulawesi", "papua", "maluku", "nusa tenggara", "kepulauan seribu", | |
# === Nama Provinsi Lengkap (38) === | |
"aceh", "sumatera utara", "sumatera barat", "riau", "kepulauan riau", "jambi", "bengkulu", | |
"sumatera selatan", "bangka belitung", "lampung", | |
"banten", "dki jakarta", "jawa barat", "jawa tengah", "daerah istimewa yogyakarta", "jawa timur", | |
"bali", "nusa tenggara barat", "nusa tenggara timur", | |
"kalimantan barat", "kalimantan tengah", "kalimantan selatan", "kalimantan timur", "kalimantan utara", | |
"sulawesi utara", "sulawesi tengah", "sulawesi selatan", "sulawesi tenggara", "gorontalo", "sulawesi barat", | |
"maluku", "maluku utara", | |
"papua", "papua barat", "papua selatan", "papua tengah", "papua pegunungan", "papua barat daya", | |
# === Ibu Kota Provinsi === | |
"banda aceh", "medan", "padang", "pekanbaru", "tanjungpinang", "jambi", "bengkulu", | |
"palembang", "pangkalpinang", "bandar lampung", | |
"serang", "jakarta", "bandung", "semarang", "yogyakarta", "surabaya", | |
"denpasar", "mataram", "kupang", | |
"pontianak", "palangka raya", "banjarmasin", "samarinda", "tarakan", | |
"manado", "palu", "makassar", "kendari", "gorontalo", "mamuju", | |
"ambon", "ternate", | |
"jayapura", "manokwari", "merauke", "nabire", "wamena", "fakfak", "sorong", "timika", | |
# === Kota/Kabupaten Besar atau Strategis === | |
"bekasi", "bogor", "depok", "tangerang", "cirebon", "tegal", "purwokerto", "solo", "magelang", | |
"malang", "kediri", "sidoarjo", "pasuruan", "probolinggo", "lumajang", "blitar", "jember", | |
"banyuwangi", "cilacap", "padangsidimpuan", "binjai", "sibolga", "lubuklinggau", "palopo", | |
"parepare", "bitung", "tomohon", "kotamobagu", "kotabaru", "pangkalan bun", "ketapang", | |
"palu", "baubau", "karangasem", "buleleng", "labuan bajo", "ende", "bima", "dompu", | |
# === Lokasi Baru / Khusus / Otorita === | |
"nusantara", # Ibu kota negara baru di Kaltim | |
"penajam paser utara", "balikpapan", "samarinda", "bontang", # Kaltim area | |
"kepri", "ntb", "ntt", "kaltim", "kalteng", "kalsel", "kalbar", "kaltara", # singkatan populer | |
# === Lokasi Adat/Kultural (yang sering disebut) === | |
"minangkabau", "batak", "dayak", "asmat", "ambon", "bugis", "toraja", "sunda", "madura", "tapanuli" | |
] | |
geolocator = Nominatim(user_agent="climatepulse") | |
for i, row in df_csv.iterrows(): | |
text = str(row["text"]) | |
sent = pipe_sent(text)[0] | |
sent_label = label_map.get(sent['label'], sent['label']) | |
emo = pipe_emo(text)[0]['label'].capitalize() | |
ner = pipe_ner(text) | |
ents = [e['word'] for e in ner] | |
locs = [] | |
for e in ner: | |
ent_text = e['word'].lower() | |
if e['entity_group'] == 'LOC': | |
locs.append(e['word']) | |
else: | |
for keyword in lokasi_kunci: | |
if keyword in ent_text: | |
locs.append(keyword.capitalize()) | |
locs = list(set(locs)) | |
for loc in locs: | |
try: | |
location = geolocator.geocode(loc) | |
if location: | |
geo_locs.append({ | |
'lokasi': loc, | |
'lat': location.latitude, | |
'lon': location.longitude, | |
'jumlah': 1 | |
}) | |
except: | |
continue | |
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
log_rows.append({"timestamp": now, "text": text, "sentimen": sent_label, "emosi": emo}) | |
result_data.append({ | |
"text": text, | |
"sentimen": sent_label, | |
"emosi": emo, | |
"entitas": ", ".join(ents) | |
}) | |
df_result = pd.DataFrame(result_data) | |
st.success("Analisis selesai!") | |
st.dataframe(df_result) | |
csv_download = df_result.to_csv(index=False).encode('utf-8') | |
st.download_button("π₯ Download Hasil CSV", csv_download, "hasil_analisis.csv", "text/csv") | |
log_file = "log_tren.csv" | |
df_log_append = pd.DataFrame(log_rows) | |
if os.path.exists(log_file): | |
pd.concat([pd.read_csv(log_file), df_log_append]).to_csv(log_file, index=False) | |
else: | |
df_log_append.to_csv(log_file, index=False) | |