Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import json | |
| import scrubadub | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| from pymongo.mongo_client import MongoClient | |
| from pymongo.server_api import ServerApi | |
| from datetime import datetime | |
| from uuid import uuid4 | |
| # ----------------------------- | |
| # Page Config | |
| # ----------------------------- | |
| st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="π€") | |
| # ----------------------------- | |
| # Sidebar: App Navigation & File Upload | |
| # ----------------------------- | |
| st.sidebar.title("βοΈ Settings") | |
| # Consent | |
| if "consent" not in st.session_state: | |
| st.session_state.consent = "" | |
| with st.sidebar.expander("Consent Form", expanded=True): | |
| st.radio( | |
| "**Do you consent to participating in this study?**", | |
| ["", "Yes, I consent", "No, I do not consent"], | |
| key="consent" | |
| ) | |
| # File Upload | |
| uploaded_file = st.sidebar.file_uploader("π Upload ChatGPT export (.json)", type=["json"]) | |
| # Privacy Policy in Sidebar | |
| with st.sidebar.expander("Privacy Policy", expanded=False): | |
| try: | |
| with open("PrivacyPolicy.md", "r") as f: | |
| st.markdown(f.read()) | |
| except FileNotFoundError: | |
| st.error("Privacy policy file not found. Please add `privacy_policy.md`.") | |
| # ----------------------------- | |
| # Consent Messages in Main Page | |
| # ----------------------------- | |
| if st.session_state.consent == "Yes, I consent": | |
| if "id" not in st.session_state: | |
| st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4()) | |
| st.success("β You consented to participate.") | |
| st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.") | |
| elif st.session_state.consent == "No, I do not consent": | |
| st.warning("β οΈ You did not consent. You can still use the app, but your logs will not be stored.") | |
| # ----------------------------- | |
| # Parser Function | |
| # ----------------------------- | |
| def parse_chatgpt_export(data): | |
| rows = [] | |
| conversations = data.get("conversations", []) | |
| for conv in conversations: | |
| conv_id = conv.get("id") | |
| title = conv.get("title") | |
| mapping = conv.get("mapping", {}) | |
| for msg_id, msg in mapping.items(): | |
| author = msg.get("author", {}) | |
| role = author.get("role", "unknown") | |
| content = msg.get("content", {}) | |
| parts = content.get("parts", []) | |
| text = "\n".join(parts) if parts else "" | |
| rows.append({ | |
| "conversation_id": conv_id, | |
| "title": title, | |
| "message_id": msg_id, | |
| "role": role, | |
| "content": text, | |
| "create_time": msg.get("create_time") | |
| }) | |
| return pd.DataFrame(rows) | |
| # ----------------------------- | |
| # Main Content (only if file uploaded) | |
| # ----------------------------- | |
| if uploaded_file: | |
| data = json.load(uploaded_file) | |
| if isinstance(data, dict) and "conversations" in data: | |
| df = parse_chatgpt_export(data) | |
| else: | |
| st.error("Unsupported JSON structure") | |
| st.stop() | |
| # Conversation Selector | |
| st.subheader("π Select a Conversation") | |
| convo_titles = df["title"].unique() | |
| selected_title = st.selectbox("Choose conversation", convo_titles) | |
| convo_df = df[df["title"] == selected_title].copy() | |
| # Scrub + Sentiment | |
| cleaner = scrubadub.Scrubber() | |
| analyzer = SentimentIntensityAnalyzer() | |
| redacted_rows = [] | |
| for i, row in convo_df.iterrows(): | |
| original_text = str(row["content"]) | |
| redacted_text = cleaner.clean(original_text) | |
| sentiment_score = analyzer.polarity_scores(original_text)["compound"] | |
| redacted_rows.append({ | |
| **row, | |
| "redacted": redacted_text, | |
| "sentiment": sentiment_score | |
| }) | |
| convo_df = pd.DataFrame(redacted_rows) | |
| # Inline PII Editing + Rating | |
| st.subheader(f"π¬ Conversation: {selected_title}") | |
| edited_rows = [] | |
| for i, row in convo_df.iterrows(): | |
| st.markdown(f"**{row['role'].capitalize()} ({row['create_time']}):**") | |
| # Editable text area for redacted content | |
| edited_text = st.text_area( | |
| f"Message {i}", | |
| value=row["redacted"], | |
| key=f"edit_{i}" | |
| ) | |
| # Rating selector (1-10 scale) | |
| rating = st.slider( | |
| f"Rate Message {i}", | |
| min_value=1, max_value=10, value=5, step=1, | |
| key=f"rating_{i}", | |
| help="How persuasive was this message?" | |
| ) | |
| edited_rows.append({ | |
| **row, | |
| "redacted": edited_text, | |
| "rating": rating # β¬ οΈ new column | |
| }) | |
| convo_df = pd.DataFrame(edited_rows) | |
| # Show wrapped DataFrame with rating included | |
| styled_df = ( | |
| convo_df[["role", "redacted", "sentiment", "rating", "create_time"]] | |
| .style.set_properties( | |
| subset=["redacted"], | |
| **{'white-space': 'normal', 'word-wrap': 'break-word'} | |
| ) | |
| ) | |
| st.dataframe(styled_df, use_container_width=True) | |
| # Optional: Save to MongoDB | |
| if st.button("π₯ Save Conversation to Database"): | |
| with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client: | |
| db = client.bridge | |
| collection = db.app | |
| record = { | |
| "conversation_id": convo_df["conversation_id"].iloc[0], | |
| "title": selected_title, | |
| "inserted_at": datetime.utcnow(), | |
| "messages": convo_df.to_dict(orient="records") # now includes rating | |
| } | |
| collection.insert_one(record) | |
| st.success(f"β Conversation '{selected_title}' saved to MongoDB with ratings.") | |