Spaces:

Intention
/

IntentionStudy

Sleeping

Intention

add rating slider

8bd360d 7 months ago

5.78 kB

	import streamlit as st
	import pandas as pd
	import json
	import scrubadub
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	from pymongo.mongo_client import MongoClient
	from pymongo.server_api import ServerApi
	from datetime import datetime
	from uuid import uuid4

	# -----------------------------
	# Page Config
	# -----------------------------
	st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="🤖")

	# -----------------------------
	# Sidebar: App Navigation & File Upload
	# -----------------------------
	st.sidebar.title("⚙️ Settings")

	# Consent
	if "consent" not in st.session_state:
	st.session_state.consent = ""

	with st.sidebar.expander("Consent Form", expanded=True):
	st.radio(
	"Do you consent to participating in this study?",
	["", "Yes, I consent", "No, I do not consent"],
	key="consent"
	)

	# File Upload
	uploaded_file = st.sidebar.file_uploader("📂 Upload ChatGPT export (.json)", type=["json"])

	# Privacy Policy in Sidebar
	with st.sidebar.expander("Privacy Policy", expanded=False):
	try:
	with open("PrivacyPolicy.md", "r") as f:
	st.markdown(f.read())
	except FileNotFoundError:
	st.error("Privacy policy file not found. Please add `privacy_policy.md`.")

	# -----------------------------
	# Consent Messages in Main Page
	# -----------------------------
	if st.session_state.consent == "Yes, I consent":
	if "id" not in st.session_state:
	st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4())
	st.success("✅ You consented to participate.")
	st.info(f"Your anonymized ID is: {st.session_state.id}. Keep this if you want your data deleted later.")

	elif st.session_state.consent == "No, I do not consent":
	st.warning("⚠️ You did not consent. You can still use the app, but your logs will not be stored.")

	# -----------------------------
	# Parser Function
	# -----------------------------
	def parse_chatgpt_export(data):
	rows = []
	conversations = data.get("conversations", [])
	for conv in conversations:
	conv_id = conv.get("id")
	title = conv.get("title")
	mapping = conv.get("mapping", {})

	for msg_id, msg in mapping.items():
	author = msg.get("author", {})
	role = author.get("role", "unknown")
	content = msg.get("content", {})
	parts = content.get("parts", [])
	text = "\n".join(parts) if parts else ""

	rows.append({
	"conversation_id": conv_id,
	"title": title,
	"message_id": msg_id,
	"role": role,
	"content": text,
	"create_time": msg.get("create_time")
	})
	return pd.DataFrame(rows)

	# -----------------------------
	# Main Content (only if file uploaded)
	# -----------------------------
	if uploaded_file:
	data = json.load(uploaded_file)
	if isinstance(data, dict) and "conversations" in data:
	df = parse_chatgpt_export(data)
	else:
	st.error("Unsupported JSON structure")
	st.stop()

	# Conversation Selector
	st.subheader("🗂 Select a Conversation")
	convo_titles = df["title"].unique()
	selected_title = st.selectbox("Choose conversation", convo_titles)

	convo_df = df[df["title"] == selected_title].copy()

	# Scrub + Sentiment
	cleaner = scrubadub.Scrubber()
	analyzer = SentimentIntensityAnalyzer()

	redacted_rows = []
	for i, row in convo_df.iterrows():
	original_text = str(row["content"])
	redacted_text = cleaner.clean(original_text)
	sentiment_score = analyzer.polarity_scores(original_text)["compound"]
	redacted_rows.append({
	**row,
	"redacted": redacted_text,
	"sentiment": sentiment_score
	})

	convo_df = pd.DataFrame(redacted_rows)

	# Inline PII Editing + Rating
	st.subheader(f"💬 Conversation: {selected_title}")
	edited_rows = []
	for i, row in convo_df.iterrows():
	st.markdown(f"{row['role'].capitalize()} ({row['create_time']}):")

	# Editable text area for redacted content
	edited_text = st.text_area(
	f"Message {i}",
	value=row["redacted"],
	key=f"edit_{i}"
	)

	# Rating selector (1-10 scale)
	rating = st.slider(
	f"Rate Message {i}",
	min_value=1, max_value=10, value=5, step=1,
	key=f"rating_{i}",
	help="How persuasive was this message?"
	)

	edited_rows.append({
	**row,
	"redacted": edited_text,
	"rating": rating # ⬅️ new column
	})

	convo_df = pd.DataFrame(edited_rows)

	# Show wrapped DataFrame with rating included
	styled_df = (
	convo_df[["role", "redacted", "sentiment", "rating", "create_time"]]
	.style.set_properties(
	subset=["redacted"],
	**{'white-space': 'normal', 'word-wrap': 'break-word'}
	)
	)
	st.dataframe(styled_df, use_container_width=True)

	# Optional: Save to MongoDB
	if st.button("📥 Save Conversation to Database"):
	with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client:
	db = client.bridge
	collection = db.app
	record = {
	"conversation_id": convo_df["conversation_id"].iloc[0],
	"title": selected_title,
	"inserted_at": datetime.utcnow(),
	"messages": convo_df.to_dict(orient="records") # now includes rating
	}
	collection.insert_one(record)
	st.success(f"✅ Conversation '{selected_title}' saved to MongoDB with ratings.")