import streamlit as st import pandas as pd import json import scrubadub from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from pymongo.mongo_client import MongoClient from pymongo.server_api import ServerApi from datetime import datetime from uuid import uuid4 # ----------------------------- # Page Config # ----------------------------- st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="🤖") # ----------------------------- # Sidebar: App Navigation & File Upload # ----------------------------- st.sidebar.title("⚙️ Settings") # Consent if "consent" not in st.session_state: st.session_state.consent = "" with st.sidebar.expander("Consent Form", expanded=True): st.radio( "**Do you consent to participating in this study?**", ["", "Yes, I consent", "No, I do not consent"], key="consent" ) # File Upload uploaded_file = st.sidebar.file_uploader("📂 Upload ChatGPT export (.json)", type=["json"]) # Privacy Policy in Sidebar with st.sidebar.expander("Privacy Policy", expanded=False): try: with open("PrivacyPolicy.md", "r") as f: st.markdown(f.read()) except FileNotFoundError: st.error("Privacy policy file not found. Please add `privacy_policy.md`.") # ----------------------------- # Consent Messages in Main Page # ----------------------------- if st.session_state.consent == "Yes, I consent": if "id" not in st.session_state: st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4()) st.success("✅ You consented to participate.") st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.") elif st.session_state.consent == "No, I do not consent": st.warning("⚠️ You did not consent. You can still use the app, but your logs will not be stored.") # ----------------------------- # Parser Function # ----------------------------- def parse_chatgpt_export(data): rows = [] conversations = data.get("conversations", []) for conv in conversations: conv_id = conv.get("id") title = conv.get("title") mapping = conv.get("mapping", {}) for msg_id, msg in mapping.items(): author = msg.get("author", {}) role = author.get("role", "unknown") content = msg.get("content", {}) parts = content.get("parts", []) text = "\n".join(parts) if parts else "" rows.append({ "conversation_id": conv_id, "title": title, "message_id": msg_id, "role": role, "content": text, "create_time": msg.get("create_time") }) return pd.DataFrame(rows) # ----------------------------- # Main Content (only if file uploaded) # ----------------------------- if uploaded_file: data = json.load(uploaded_file) if isinstance(data, dict) and "conversations" in data: df = parse_chatgpt_export(data) else: st.error("Unsupported JSON structure") st.stop() # Conversation Selector st.subheader("🗂 Select a Conversation") convo_titles = df["title"].unique() selected_title = st.selectbox("Choose conversation", convo_titles) convo_df = df[df["title"] == selected_title].copy() # Scrub + Sentiment cleaner = scrubadub.Scrubber() analyzer = SentimentIntensityAnalyzer() redacted_rows = [] for i, row in convo_df.iterrows(): original_text = str(row["content"]) redacted_text = cleaner.clean(original_text) sentiment_score = analyzer.polarity_scores(original_text)["compound"] redacted_rows.append({ **row, "redacted": redacted_text, "sentiment": sentiment_score }) convo_df = pd.DataFrame(redacted_rows) # Inline PII Editing + Rating st.subheader(f"💬 Conversation: {selected_title}") edited_rows = [] for i, row in convo_df.iterrows(): st.markdown(f"**{row['role'].capitalize()} ({row['create_time']}):**") # Editable text area for redacted content edited_text = st.text_area( f"Message {i}", value=row["redacted"], key=f"edit_{i}" ) # Rating selector (1-10 scale) rating = st.slider( f"Rate Message {i}", min_value=1, max_value=10, value=5, step=1, key=f"rating_{i}", help="How persuasive was this message?" ) edited_rows.append({ **row, "redacted": edited_text, "rating": rating # ⬅️ new column }) convo_df = pd.DataFrame(edited_rows) # Show wrapped DataFrame with rating included styled_df = ( convo_df[["role", "redacted", "sentiment", "rating", "create_time"]] .style.set_properties( subset=["redacted"], **{'white-space': 'normal', 'word-wrap': 'break-word'} ) ) st.dataframe(styled_df, use_container_width=True) # Optional: Save to MongoDB if st.button("📥 Save Conversation to Database"): with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client: db = client.bridge collection = db.app record = { "conversation_id": convo_df["conversation_id"].iloc[0], "title": selected_title, "inserted_at": datetime.utcnow(), "messages": convo_df.to_dict(orient="records") # now includes rating } collection.insert_one(record) st.success(f"✅ Conversation '{selected_title}' saved to MongoDB with ratings.")