IntentionStudy / app.py
Intention
add rating slider
8bd360d
import streamlit as st
import pandas as pd
import json
import scrubadub
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from datetime import datetime
from uuid import uuid4
# -----------------------------
# Page Config
# -----------------------------
st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="πŸ€–")
# -----------------------------
# Sidebar: App Navigation & File Upload
# -----------------------------
st.sidebar.title("βš™οΈ Settings")
# Consent
if "consent" not in st.session_state:
st.session_state.consent = ""
with st.sidebar.expander("Consent Form", expanded=True):
st.radio(
"**Do you consent to participating in this study?**",
["", "Yes, I consent", "No, I do not consent"],
key="consent"
)
# File Upload
uploaded_file = st.sidebar.file_uploader("πŸ“‚ Upload ChatGPT export (.json)", type=["json"])
# Privacy Policy in Sidebar
with st.sidebar.expander("Privacy Policy", expanded=False):
try:
with open("PrivacyPolicy.md", "r") as f:
st.markdown(f.read())
except FileNotFoundError:
st.error("Privacy policy file not found. Please add `privacy_policy.md`.")
# -----------------------------
# Consent Messages in Main Page
# -----------------------------
if st.session_state.consent == "Yes, I consent":
if "id" not in st.session_state:
st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4())
st.success("βœ… You consented to participate.")
st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.")
elif st.session_state.consent == "No, I do not consent":
st.warning("⚠️ You did not consent. You can still use the app, but your logs will not be stored.")
# -----------------------------
# Parser Function
# -----------------------------
def parse_chatgpt_export(data):
rows = []
conversations = data.get("conversations", [])
for conv in conversations:
conv_id = conv.get("id")
title = conv.get("title")
mapping = conv.get("mapping", {})
for msg_id, msg in mapping.items():
author = msg.get("author", {})
role = author.get("role", "unknown")
content = msg.get("content", {})
parts = content.get("parts", [])
text = "\n".join(parts) if parts else ""
rows.append({
"conversation_id": conv_id,
"title": title,
"message_id": msg_id,
"role": role,
"content": text,
"create_time": msg.get("create_time")
})
return pd.DataFrame(rows)
# -----------------------------
# Main Content (only if file uploaded)
# -----------------------------
if uploaded_file:
data = json.load(uploaded_file)
if isinstance(data, dict) and "conversations" in data:
df = parse_chatgpt_export(data)
else:
st.error("Unsupported JSON structure")
st.stop()
# Conversation Selector
st.subheader("πŸ—‚ Select a Conversation")
convo_titles = df["title"].unique()
selected_title = st.selectbox("Choose conversation", convo_titles)
convo_df = df[df["title"] == selected_title].copy()
# Scrub + Sentiment
cleaner = scrubadub.Scrubber()
analyzer = SentimentIntensityAnalyzer()
redacted_rows = []
for i, row in convo_df.iterrows():
original_text = str(row["content"])
redacted_text = cleaner.clean(original_text)
sentiment_score = analyzer.polarity_scores(original_text)["compound"]
redacted_rows.append({
**row,
"redacted": redacted_text,
"sentiment": sentiment_score
})
convo_df = pd.DataFrame(redacted_rows)
# Inline PII Editing + Rating
st.subheader(f"πŸ’¬ Conversation: {selected_title}")
edited_rows = []
for i, row in convo_df.iterrows():
st.markdown(f"**{row['role'].capitalize()} ({row['create_time']}):**")
# Editable text area for redacted content
edited_text = st.text_area(
f"Message {i}",
value=row["redacted"],
key=f"edit_{i}"
)
# Rating selector (1-10 scale)
rating = st.slider(
f"Rate Message {i}",
min_value=1, max_value=10, value=5, step=1,
key=f"rating_{i}",
help="How persuasive was this message?"
)
edited_rows.append({
**row,
"redacted": edited_text,
"rating": rating # ⬅️ new column
})
convo_df = pd.DataFrame(edited_rows)
# Show wrapped DataFrame with rating included
styled_df = (
convo_df[["role", "redacted", "sentiment", "rating", "create_time"]]
.style.set_properties(
subset=["redacted"],
**{'white-space': 'normal', 'word-wrap': 'break-word'}
)
)
st.dataframe(styled_df, use_container_width=True)
# Optional: Save to MongoDB
if st.button("πŸ“₯ Save Conversation to Database"):
with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client:
db = client.bridge
collection = db.app
record = {
"conversation_id": convo_df["conversation_id"].iloc[0],
"title": selected_title,
"inserted_at": datetime.utcnow(),
"messages": convo_df.to_dict(orient="records") # now includes rating
}
collection.insert_one(record)
st.success(f"βœ… Conversation '{selected_title}' saved to MongoDB with ratings.")