{
"cells": [
{
"cell_type": "markdown",
"id": "78a5a468-2d75-4a6b-a352-e0318bc2d091",
"metadata": {},
"source": [
"# Spike Detection Algorithm Analysis\n",
"\n",
"Can we automatically detect when there's lots of positive or negative sentiment discussion in a subreddit on a particular day?\n",
"\n",
"The goal of the this notebook is to identify \"spikes\" in the daily average sentiment score\n",
"- We care about both positive or negative spikes\n",
"- We want the \"spike\" to be significant relative to historical values\n",
"\n",
"This notebook\n",
"- Walks through importing data from the HuggingFace dataset hub\n",
"- Compares a few different spike definitions based on rolling statistics\n",
"- Selects the best spike definition based on sanity checking against historical data"
]
},
{
"cell_type": "markdown",
"id": "453c6fa8-2c87-4bc1-96c1-a2505885e36a",
"metadata": {},
"source": [
"## Imports and Definitions"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "a334cc8c-9260-44cc-a2bc-a51860aa207a",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:55:16.089718Z",
"iopub.status.busy": "2025-06-25T22:55:16.089718Z",
"iopub.status.idle": "2025-06-25T22:55:17.267352Z",
"shell.execute_reply": "2025-06-25T22:55:17.267065Z",
"shell.execute_reply.started": "2025-06-25T22:55:16.089718Z"
}
},
"outputs": [],
"source": [
"import os\n",
"import glob\n",
"import datetime\n",
"from pathlib import Path\n",
"from dotenv import load_dotenv\n",
"import pandas as pd\n",
"import numpy as np\n",
"import pyarrow\n",
"\n",
"from huggingface_hub import HfApi\n",
"from huggingface_hub.utils import disable_progress_bars\n",
"\n",
"disable_progress_bars() # turns off all HF progress bars"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "393c87bf-19ce-41c6-a19d-f0469d080024",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:55:17.796155Z",
"iopub.status.busy": "2025-06-25T22:55:17.796155Z",
"iopub.status.idle": "2025-06-25T22:55:17.802963Z",
"shell.execute_reply": "2025-06-25T22:55:17.802963Z",
"shell.execute_reply.started": "2025-06-25T22:55:17.796155Z"
}
},
"outputs": [],
"source": [
"def load_reddit_data(folder='data_scored'):\n",
" df = []\n",
" repo_id = \"hblim/top_reddit_posts_daily\"\n",
" api = HfApi()\n",
" all_files = api.list_repo_files(repo_id, repo_type=\"dataset\")\n",
" parquet_files = sorted([f for f in all_files if f.startswith(folder) and f.endswith(\".parquet\")])\n",
"\n",
" for shard in parquet_files:\n",
" local_path = api.hf_hub_download(repo_id=repo_id, filename=shard, repo_type=\"dataset\")\n",
" file_date = os.path.splitext(os.path.basename(local_path))[0]\n",
" df.append(pd.read_parquet(local_path).assign(filedate=file_date))\n",
"\n",
" df = pd.concat(df, ignore_index=True)\n",
" print(f\"Total records across {df.filedate.nunique()} days: {len(df)}\")\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "23cdc75b-55cf-4bcb-970e-c0339f1a60cb",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:55:18.545488Z",
"iopub.status.busy": "2025-06-25T22:55:18.545488Z",
"iopub.status.idle": "2025-06-25T22:55:18.552202Z",
"shell.execute_reply": "2025-06-25T22:55:18.552202Z",
"shell.execute_reply.started": "2025-06-25T22:55:18.545488Z"
}
},
"outputs": [],
"source": [
"def summary_from_df(df: pd.DataFrame, gamma_post: float = 0.3) -> pd.DataFrame:\n",
" \"\"\"\n",
" Return a DataFrame with daily & subreddit aggregates.\n",
"\n",
" Expects columns:\n",
" retrieved_at - UTC timestamp or ISO-date string\n",
" subreddit - subreddit name\n",
" sentiment - numeric score (e.g. −1 … 1)\n",
" score - numeric weight / post score\n",
"\n",
" Output columns:\n",
" date (datetime.date)\n",
" subreddit (string)\n",
" community_weighted_sentiment\n",
" count\n",
" \"\"\"\n",
" # Normalize retrieved_at to datetime and extract calendar day\n",
" df = df.copy()\n",
" df[\"date\"] = pd.to_datetime(df[\"retrieved_at\"]).dt.date\n",
" \n",
" # Group by date and subreddit\n",
" grouped = df.groupby([\"date\", \"subreddit\"])\n",
" \n",
" # Aggregate metrics\n",
" result = grouped.agg(\n",
" # First calculate raw mean_sentiment\n",
" raw_mean_sentiment=(\"sentiment\", \"mean\"),\n",
" count=(\"sentiment\", \"count\"),\n",
" ).reset_index()\n",
" \n",
" # Apply transformation to raw_mean_sentiment to get values in range [-1, 1] instead of [0, 1]\n",
" result[\"mean_sentiment\"] = 2 * result[\"raw_mean_sentiment\"] - 1\n",
" \n",
" # Remove the raw mean column\n",
" result = result.drop(columns=\"raw_mean_sentiment\")\n",
" \n",
" # Calculate engagement-adjusted sentiment (EAS) for each group\n",
" # 1. Ensure 'score' is numeric\n",
" df[\"score_num\"] = pd.to_numeric(df[\"score\"], errors=\"coerce\").fillna(0)\n",
" # 2. Compute base weights (1 + log1p(score))\n",
" weights_base = 1 + np.log1p(df[\"score_num\"].clip(lower=0))\n",
" # 3. Apply post weight multiplier\n",
" weights = weights_base * np.where(df.get(\"type\", None) == \"post\", gamma_post, 1.0)\n",
" df[\"weight\"] = weights\n",
" # 4. Compute EAS per group: weighted average of sentiment\n",
" community_weighted_sentiments = []\n",
" for (date, subreddit), group in grouped:\n",
" w = group[\"weight\"]\n",
" s = group[\"sentiment\"]\n",
" eas = (w * s).sum() / w.sum() if w.sum() > 0 else 0\n",
" community_weighted_sentiments.append(eas)\n",
" result[\"community_weighted_sentiment\"] = community_weighted_sentiments\n",
" \n",
" # Normalize community_weighted_sentiment to range [-1,1]\n",
" result[\"community_weighted_sentiment\"] = 2 * result[\"community_weighted_sentiment\"] - 1\n",
" \n",
" # Ensure consistent column order\n",
" result = result[[\"date\", \"subreddit\", \"community_weighted_sentiment\"]]\n",
" \n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1fa7ab09-eee2-435a-87b8-38acd91c0a4e",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:55:22.130355Z",
"iopub.status.busy": "2025-06-25T22:55:22.130355Z",
"iopub.status.idle": "2025-06-25T22:55:42.914076Z",
"shell.execute_reply": "2025-06-25T22:55:42.914076Z",
"shell.execute_reply.started": "2025-06-25T22:55:22.130355Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total records across 258 days: 82180\n"
]
}
],
"source": [
"df = load_reddit_data()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0597fb74-e300-4a7e-ae63-5732cb22ed59",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:55:42.915081Z",
"iopub.status.busy": "2025-06-25T22:55:42.915081Z",
"iopub.status.idle": "2025-06-25T22:55:43.068733Z",
"shell.execute_reply": "2025-06-25T22:55:43.068733Z",
"shell.execute_reply.started": "2025-06-25T22:55:42.915081Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" subreddit \n",
" community_weighted_sentiment \n",
" \n",
" \n",
" date \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 2025-05-01 \n",
" LocalLLaMA \n",
" -0.477880 \n",
" \n",
" \n",
" 2025-05-01 \n",
" OpenAI \n",
" -0.516990 \n",
" \n",
" \n",
" 2025-05-01 \n",
" artificial \n",
" -0.506502 \n",
" \n",
" \n",
" 2025-05-01 \n",
" singularity \n",
" -0.435683 \n",
" \n",
" \n",
" 2025-05-02 \n",
" LocalLLaMA \n",
" -0.434680 \n",
" \n",
" \n",
" 2025-05-02 \n",
" OpenAI \n",
" -0.487859 \n",
" \n",
" \n",
" 2025-05-02 \n",
" artificial \n",
" -0.504200 \n",
" \n",
" \n",
" 2025-05-02 \n",
" singularity \n",
" -0.415097 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subreddit community_weighted_sentiment\n",
"date \n",
"2025-05-01 LocalLLaMA -0.477880\n",
"2025-05-01 OpenAI -0.516990\n",
"2025-05-01 artificial -0.506502\n",
"2025-05-01 singularity -0.435683\n",
"2025-05-02 LocalLLaMA -0.434680\n",
"2025-05-02 OpenAI -0.487859\n",
"2025-05-02 artificial -0.504200\n",
"2025-05-02 singularity -0.415097"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_summary = summary_from_df(df).set_index('date',drop=True)\n",
"df_summary.head(8)"
]
},
{
"cell_type": "markdown",
"id": "cd6d07cd-a56d-409e-a16b-2c565ee088b6",
"metadata": {},
"source": [
"## Analysis to Determine Spike Metric"
]
},
{
"cell_type": "markdown",
"id": "91bfb2e1-a11a-48b6-9f0a-2e50d60f1630",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"### Method 1: Calculate Z-score based on mean and std of prior week"
]
},
{
"cell_type": "markdown",
"id": "16f54df0-db57-4b75-8fa4-2f7fd9c81f9f",
"metadata": {},
"source": [
"Use prior week mean and std"
]
},
{
"cell_type": "code",
"execution_count": 184,
"id": "2024fb08-fbf5-4d8c-b56a-bae1d5e05730",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-24T21:58:50.873829Z",
"iopub.status.busy": "2025-06-24T21:58:50.873829Z",
"iopub.status.idle": "2025-06-24T21:58:50.886826Z",
"shell.execute_reply": "2025-06-24T21:58:50.886826Z",
"shell.execute_reply.started": "2025-06-24T21:58:50.873829Z"
}
},
"outputs": [],
"source": [
"rolling = df_summary.groupby('subreddit')[['community_weighted_sentiment']].rolling(7)\n",
"mean_cws = rolling.mean().groupby('subreddit').shift(1).reset_index().rename(columns={'community_weighted_sentiment': 'mean_cws'})\n",
"std_cws = rolling.std().groupby('subreddit').shift(1).reset_index().rename(columns={'community_weighted_sentiment': 'std_cws'})"
]
},
{
"cell_type": "code",
"execution_count": 185,
"id": "a88046cc-0361-47f8-8aa4-813506afdecb",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-24T21:58:51.299169Z",
"iopub.status.busy": "2025-06-24T21:58:51.299169Z",
"iopub.status.idle": "2025-06-24T21:58:51.310232Z",
"shell.execute_reply": "2025-06-24T21:58:51.310232Z",
"shell.execute_reply.started": "2025-06-24T21:58:51.299169Z"
}
},
"outputs": [],
"source": [
"df_history = df_summary.merge(mean_cws,how='outer',on=['date','subreddit']).merge(std_cws,how='outer',on=['date','subreddit'])\n",
"df_history['z_score'] = (df_history.community_weighted_sentiment - df_history.mean_cws) / df_history.std_cws"
]
},
{
"cell_type": "code",
"execution_count": 186,
"id": "ce9f0566-6014-412b-ab91-677f24189245",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-24T21:58:51.497646Z",
"iopub.status.busy": "2025-06-24T21:58:51.497646Z",
"iopub.status.idle": "2025-06-24T21:58:51.514844Z",
"shell.execute_reply": "2025-06-24T21:58:51.514844Z",
"shell.execute_reply.started": "2025-06-24T21:58:51.497646Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" subreddit \n",
" LocalLLaMA \n",
" OpenAI \n",
" artificial \n",
" singularity \n",
" \n",
" \n",
" date \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 2025-05-08 \n",
" -0.044194 \n",
" -0.119832 \n",
" -0.871183 \n",
" -1.053798 \n",
" \n",
" \n",
" 2025-05-09 \n",
" 0.403591 \n",
" 1.229599 \n",
" -1.078368 \n",
" -1.174753 \n",
" \n",
" \n",
" 2025-05-10 \n",
" 0.502283 \n",
" -1.493683 \n",
" -0.093901 \n",
" -0.676206 \n",
" \n",
" \n",
" 2025-05-11 \n",
" -1.258698 \n",
" 0.342304 \n",
" -0.902464 \n",
" 1.518415 \n",
" \n",
" \n",
" 2025-05-12 \n",
" -2.167469 \n",
" -0.389167 \n",
" -0.588013 \n",
" -1.014135 \n",
" \n",
" \n",
" 2025-05-13 \n",
" 0.332501 \n",
" -3.434951 \n",
" 2.930380 \n",
" 0.205499 \n",
" \n",
" \n",
" 2025-05-14 \n",
" 3.364345 \n",
" -0.956420 \n",
" -0.526388 \n",
" 1.531968 \n",
" \n",
" \n",
" 2025-05-15 \n",
" -0.725389 \n",
" 0.081388 \n",
" -2.332290 \n",
" -0.305970 \n",
" \n",
" \n",
" 2025-05-16 \n",
" -0.653573 \n",
" -0.582556 \n",
" -0.770156 \n",
" 0.037852 \n",
" \n",
" \n",
" 2025-05-17 \n",
" -0.804320 \n",
" 0.412928 \n",
" 0.356351 \n",
" -1.132476 \n",
" \n",
" \n",
" 2025-05-18 \n",
" 0.042610 \n",
" 1.183579 \n",
" 0.572702 \n",
" -1.373600 \n",
" \n",
" \n",
" 2025-05-19 \n",
" 0.045067 \n",
" -2.333859 \n",
" -0.364828 \n",
" 1.027555 \n",
" \n",
" \n",
" 2025-05-20 \n",
" 0.091186 \n",
" 1.945140 \n",
" 1.578278 \n",
" 1.261514 \n",
" \n",
" \n",
" 2025-05-21 \n",
" -0.533151 \n",
" 0.225542 \n",
" -1.494234 \n",
" 1.073047 \n",
" \n",
" \n",
" 2025-05-22 \n",
" 0.955171 \n",
" -0.053678 \n",
" -1.962915 \n",
" -1.005345 \n",
" \n",
" \n",
" 2025-05-23 \n",
" -2.340671 \n",
" 0.380027 \n",
" 2.630616 \n",
" 0.696121 \n",
" \n",
" \n",
" 2025-05-24 \n",
" 1.211108 \n",
" 2.414997 \n",
" 1.516051 \n",
" -0.078288 \n",
" \n",
" \n",
" 2025-05-25 \n",
" -1.167484 \n",
" 0.952176 \n",
" 0.798131 \n",
" 0.313335 \n",
" \n",
" \n",
" 2025-05-26 \n",
" 0.362807 \n",
" -0.814531 \n",
" 1.366979 \n",
" -0.162863 \n",
" \n",
" \n",
" 2025-05-27 \n",
" -2.376909 \n",
" -0.327798 \n",
" -1.540509 \n",
" 1.831157 \n",
" \n",
" \n",
" 2025-05-28 \n",
" 0.506768 \n",
" -0.099355 \n",
" -0.040696 \n",
" -0.281792 \n",
" \n",
" \n",
" 2025-05-29 \n",
" 1.429324 \n",
" -0.846192 \n",
" 2.571573 \n",
" -0.281824 \n",
" \n",
" \n",
" 2025-05-30 \n",
" 1.015244 \n",
" 0.004725 \n",
" -1.024469 \n",
" -0.622153 \n",
" \n",
" \n",
" 2025-05-31 \n",
" -2.224623 \n",
" 0.047550 \n",
" 0.600559 \n",
" -0.899762 \n",
" \n",
" \n",
" 2025-06-01 \n",
" 1.052451 \n",
" 1.328365 \n",
" 0.741410 \n",
" -1.496282 \n",
" \n",
" \n",
" 2025-06-02 \n",
" 0.520514 \n",
" -0.370123 \n",
" -1.168059 \n",
" -1.622483 \n",
" \n",
" \n",
" 2025-06-03 \n",
" 2.091989 \n",
" -2.331258 \n",
" -0.880932 \n",
" -1.191772 \n",
" \n",
" \n",
" 2025-06-04 \n",
" -0.333966 \n",
" -1.151395 \n",
" -0.870942 \n",
" 3.864479 \n",
" \n",
" \n",
" 2025-06-05 \n",
" -0.134243 \n",
" 1.060434 \n",
" -1.354283 \n",
" 1.134746 \n",
" \n",
" \n",
" 2025-06-06 \n",
" 1.412021 \n",
" -0.210666 \n",
" -0.126506 \n",
" 1.131646 \n",
" \n",
" \n",
" 2025-06-07 \n",
" 0.005485 \n",
" 0.743742 \n",
" 0.393000 \n",
" 0.812612 \n",
" \n",
" \n",
" 2025-06-08 \n",
" -1.177690 \n",
" 0.077874 \n",
" -0.769660 \n",
" -0.886763 \n",
" \n",
" \n",
" 2025-06-09 \n",
" 0.099286 \n",
" 1.456970 \n",
" 1.462335 \n",
" -0.743442 \n",
" \n",
" \n",
" 2025-06-10 \n",
" 2.093233 \n",
" -0.617740 \n",
" 0.492822 \n",
" -0.101424 \n",
" \n",
" \n",
" 2025-06-11 \n",
" 0.600059 \n",
" 2.094294 \n",
" 0.037819 \n",
" 0.461712 \n",
" \n",
" \n",
" 2025-06-12 \n",
" 0.177291 \n",
" -2.477452 \n",
" 1.112029 \n",
" 1.461535 \n",
" \n",
" \n",
" 2025-06-13 \n",
" 1.369635 \n",
" 1.055051 \n",
" 1.789994 \n",
" 1.882643 \n",
" \n",
" \n",
" 2025-06-14 \n",
" -2.056550 \n",
" 0.503585 \n",
" 0.529743 \n",
" 2.430223 \n",
" \n",
" \n",
" 2025-06-15 \n",
" 0.033870 \n",
" -0.739333 \n",
" -0.770163 \n",
" -0.307101 \n",
" \n",
" \n",
" 2025-06-16 \n",
" -1.762794 \n",
" -1.198593 \n",
" -1.290733 \n",
" -0.460972 \n",
" \n",
" \n",
" 2025-06-17 \n",
" -0.768915 \n",
" 0.528013 \n",
" 1.871727 \n",
" -1.186198 \n",
" \n",
" \n",
" 2025-06-18 \n",
" 0.279047 \n",
" -0.375068 \n",
" -1.127651 \n",
" -1.097628 \n",
" \n",
" \n",
" 2025-06-19 \n",
" -0.648869 \n",
" 1.761289 \n",
" -1.395471 \n",
" -0.876198 \n",
" \n",
" \n",
" 2025-06-20 \n",
" 1.499789 \n",
" -3.225222 \n",
" -0.894687 \n",
" -0.752625 \n",
" \n",
" \n",
" 2025-06-21 \n",
" -1.023443 \n",
" -1.240655 \n",
" -0.932564 \n",
" -0.348584 \n",
" \n",
" \n",
" 2025-06-22 \n",
" 0.969063 \n",
" 1.197812 \n",
" 1.602163 \n",
" 8.705383 \n",
" \n",
" \n",
" 2025-06-23 \n",
" nan \n",
" 0.383280 \n",
" 0.779473 \n",
" -2.011176 \n",
" \n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 186,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_zscores = df_history.pipe(pd.pivot_table, index='date',columns='subreddit',values='z_score',aggfunc=\"max\")\n",
"\n",
"def highlight_abs_gt_3(val):\n",
" return 'background-color: red' if abs(val) > 3 else ''\n",
"\n",
"styled = df_zscores.style.map(highlight_abs_gt_3)\n",
"\n",
"styled"
]
},
{
"cell_type": "markdown",
"id": "45cf9a6d-532a-4083-9781-81ff8bdce488",
"metadata": {},
"source": [
"Looking at the June 22 spike, it's suspicious we found an 8.7 sigma spike\n",
"- The prior week (6/15 -- 6/21) had a smaller std, which suddenly dropped to 0.019 from 0.1 ish\n",
"- This caused the current day's -0.25 score to look really spikey\n",
"\n",
"However, based on the data beyond just the prior week, the -.25 score is not that significant! See plenty of values near -0.2 prior to that week\n",
"\n",
"Therefore is probably not a huge spike to alert on, so we need a more stable std calculation."
]
},
{
"cell_type": "code",
"execution_count": 187,
"id": "f27992da-d1ec-4087-8100-513df36e8246",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-24T21:58:51.834680Z",
"iopub.status.busy": "2025-06-24T21:58:51.832679Z",
"iopub.status.idle": "2025-06-24T21:58:51.844699Z",
"shell.execute_reply": "2025-06-24T21:58:51.844699Z",
"shell.execute_reply.started": "2025-06-24T21:58:51.834680Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" date \n",
" subreddit \n",
" community_weighted_sentiment \n",
" mean_cws \n",
" std_cws \n",
" z_score \n",
" \n",
" \n",
" \n",
" \n",
" 151 \n",
" 2025-06-07 \n",
" singularity \n",
" -0.352707 \n",
" -0.425644 \n",
" 0.089756 \n",
" 0.812612 \n",
" \n",
" \n",
" 155 \n",
" 2025-06-08 \n",
" singularity \n",
" -0.493835 \n",
" -0.411582 \n",
" 0.092756 \n",
" -0.886763 \n",
" \n",
" \n",
" 159 \n",
" 2025-06-09 \n",
" singularity \n",
" -0.482524 \n",
" -0.412734 \n",
" 0.093874 \n",
" -0.743442 \n",
" \n",
" \n",
" 163 \n",
" 2025-06-10 \n",
" singularity \n",
" -0.416534 \n",
" -0.407614 \n",
" 0.087944 \n",
" -0.101424 \n",
" \n",
" \n",
" 167 \n",
" 2025-06-11 \n",
" singularity \n",
" -0.358953 \n",
" -0.392952 \n",
" 0.073637 \n",
" 0.461712 \n",
" \n",
" \n",
" 171 \n",
" 2025-06-12 \n",
" singularity \n",
" -0.308392 \n",
" -0.401512 \n",
" 0.063714 \n",
" 1.461535 \n",
" \n",
" \n",
" 175 \n",
" 2025-06-13 \n",
" singularity \n",
" -0.258518 \n",
" -0.393909 \n",
" 0.071916 \n",
" 1.882643 \n",
" \n",
" \n",
" 179 \n",
" 2025-06-14 \n",
" singularity \n",
" -0.169175 \n",
" -0.381637 \n",
" 0.087425 \n",
" 2.430223 \n",
" \n",
" \n",
" 183 \n",
" 2025-06-15 \n",
" singularity \n",
" -0.392046 \n",
" -0.355419 \n",
" 0.119269 \n",
" -0.307101 \n",
" \n",
" \n",
" 187 \n",
" 2025-06-16 \n",
" singularity \n",
" -0.389244 \n",
" -0.340877 \n",
" 0.104923 \n",
" -0.460972 \n",
" \n",
" \n",
" 191 \n",
" 2025-06-17 \n",
" singularity \n",
" -0.432632 \n",
" -0.327552 \n",
" 0.088586 \n",
" -1.186198 \n",
" \n",
" \n",
" 195 \n",
" 2025-06-18 \n",
" singularity \n",
" -0.430223 \n",
" -0.329851 \n",
" 0.091444 \n",
" -1.097628 \n",
" \n",
" \n",
" 199 \n",
" 2025-06-19 \n",
" singularity \n",
" -0.426679 \n",
" -0.340033 \n",
" 0.098889 \n",
" -0.876198 \n",
" \n",
" \n",
" 203 \n",
" 2025-06-20 \n",
" singularity \n",
" -0.434163 \n",
" -0.356931 \n",
" 0.102617 \n",
" -0.752625 \n",
" \n",
" \n",
" 207 \n",
" 2025-06-21 \n",
" singularity \n",
" -0.415414 \n",
" -0.382023 \n",
" 0.095790 \n",
" -0.348584 \n",
" \n",
" \n",
" 211 \n",
" 2025-06-22 \n",
" singularity \n",
" -0.250397 \n",
" -0.417200 \n",
" 0.019161 \n",
" 8.705383 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date subreddit community_weighted_sentiment mean_cws \\\n",
"151 2025-06-07 singularity -0.352707 -0.425644 \n",
"155 2025-06-08 singularity -0.493835 -0.411582 \n",
"159 2025-06-09 singularity -0.482524 -0.412734 \n",
"163 2025-06-10 singularity -0.416534 -0.407614 \n",
"167 2025-06-11 singularity -0.358953 -0.392952 \n",
"171 2025-06-12 singularity -0.308392 -0.401512 \n",
"175 2025-06-13 singularity -0.258518 -0.393909 \n",
"179 2025-06-14 singularity -0.169175 -0.381637 \n",
"183 2025-06-15 singularity -0.392046 -0.355419 \n",
"187 2025-06-16 singularity -0.389244 -0.340877 \n",
"191 2025-06-17 singularity -0.432632 -0.327552 \n",
"195 2025-06-18 singularity -0.430223 -0.329851 \n",
"199 2025-06-19 singularity -0.426679 -0.340033 \n",
"203 2025-06-20 singularity -0.434163 -0.356931 \n",
"207 2025-06-21 singularity -0.415414 -0.382023 \n",
"211 2025-06-22 singularity -0.250397 -0.417200 \n",
"\n",
" std_cws z_score \n",
"151 0.089756 0.812612 \n",
"155 0.092756 -0.886763 \n",
"159 0.093874 -0.743442 \n",
"163 0.087944 -0.101424 \n",
"167 0.073637 0.461712 \n",
"171 0.063714 1.461535 \n",
"175 0.071916 1.882643 \n",
"179 0.087425 2.430223 \n",
"183 0.119269 -0.307101 \n",
"187 0.104923 -0.460972 \n",
"191 0.088586 -1.186198 \n",
"195 0.091444 -1.097628 \n",
"199 0.098889 -0.876198 \n",
"203 0.102617 -0.752625 \n",
"207 0.095790 -0.348584 \n",
"211 0.019161 8.705383 "
]
},
"execution_count": 187,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"date = \"2025-06-22\"\n",
"subreddit = \"singularity\"\n",
"\n",
"rightwindow = datetime.datetime.strptime(date,\"%Y-%m-%d\").date()\n",
"leftwindow = rightwindow - datetime.timedelta(days=15)\n",
"\n",
"\n",
"fil = lambda x: x.date.between(leftwindow,rightwindow) & (x.subreddit == subreddit)\n",
"\n",
"df_history[fil]"
]
},
{
"cell_type": "code",
"execution_count": 188,
"id": "1cbd75a7-530b-47cd-b327-eee0477b06ba",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-24T21:58:52.302354Z",
"iopub.status.busy": "2025-06-24T21:58:52.298239Z",
"iopub.status.idle": "2025-06-24T21:58:52.436682Z",
"shell.execute_reply": "2025-06-24T21:58:52.436682Z",
"shell.execute_reply.started": "2025-06-24T21:58:52.302354Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_history[fil].pipe(pd.pivot_table,index='date',values='community_weighted_sentiment',columns=['subreddit'],aggfunc='max').plot(rot=45,ylabel='sentiment score')"
]
},
{
"cell_type": "markdown",
"id": "9ada3fbb-d889-420b-888d-de7d86f5693e",
"metadata": {},
"source": [
"### Method 2: Calculate z-score based on the mean and std of entire May's data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "343165d0-7b6e-4ed4-bf8d-1ab4ca0537db",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:56:35.838802Z",
"iopub.status.busy": "2025-06-25T22:56:35.838802Z",
"iopub.status.idle": "2025-06-25T22:56:35.861459Z",
"shell.execute_reply": "2025-06-25T22:56:35.861459Z",
"shell.execute_reply.started": "2025-06-25T22:56:35.838802Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" mean_cws \n",
" std_cws \n",
" \n",
" \n",
" subreddit \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" LocalLLaMA \n",
" -0.485437 \n",
" 0.069627 \n",
" \n",
" \n",
" OpenAI \n",
" -0.493694 \n",
" 0.071395 \n",
" \n",
" \n",
" artificial \n",
" -0.473540 \n",
" 0.138909 \n",
" \n",
" \n",
" singularity \n",
" -0.429887 \n",
" 0.081146 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mean_cws std_cws\n",
"subreddit \n",
"LocalLLaMA -0.485437 0.069627\n",
"OpenAI -0.493694 0.071395\n",
"artificial -0.473540 0.138909\n",
"singularity -0.429887 0.081146"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"global_stats = (df_summary[pd.to_datetime(df_summary.index).month == 5]\n",
" .groupby('subreddit').agg(mean_cws=('community_weighted_sentiment','mean'),\n",
" std_cws=('community_weighted_sentiment','std')\n",
" )\n",
" )\n",
"global_stats"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ba4f287b-24f3-4976-86af-a8377140d724",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:56:37.731634Z",
"iopub.status.busy": "2025-06-25T22:56:37.731634Z",
"iopub.status.idle": "2025-06-25T22:56:37.741555Z",
"shell.execute_reply": "2025-06-25T22:56:37.741555Z",
"shell.execute_reply.started": "2025-06-25T22:56:37.731634Z"
}
},
"outputs": [],
"source": [
"df_summary[['mean_cws','std_cws']] = global_stats.loc[df_summary.subreddit,['mean_cws','std_cws']].values\n",
"df_history = df_summary.copy().reset_index()\n",
"df_history['z_score'] = (df_history.community_weighted_sentiment - df_history.mean_cws) / df_history.std_cws"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "2d2bc20a-0a08-4deb-9206-f9e026c0f3d2",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:56:38.204475Z",
"iopub.status.busy": "2025-06-25T22:56:38.204475Z",
"iopub.status.idle": "2025-06-25T22:56:38.824278Z",
"shell.execute_reply": "2025-06-25T22:56:38.824278Z",
"shell.execute_reply.started": "2025-06-25T22:56:38.204475Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" subreddit \n",
" LocalLLaMA \n",
" OpenAI \n",
" artificial \n",
" singularity \n",
" \n",
" \n",
" date \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 2025-05-01 \n",
" 0.108533 \n",
" -0.326294 \n",
" -0.237290 \n",
" -0.071433 \n",
" \n",
" \n",
" 2025-05-02 \n",
" 0.728985 \n",
" 0.081725 \n",
" -0.220718 \n",
" 0.182259 \n",
" \n",
" \n",
" 2025-05-03 \n",
" -0.584901 \n",
" 0.547064 \n",
" 0.779239 \n",
" 0.443977 \n",
" \n",
" \n",
" 2025-05-04 \n",
" 2.589418 \n",
" 0.675299 \n",
" -0.491293 \n",
" -2.161081 \n",
" \n",
" \n",
" 2025-05-05 \n",
" 0.505834 \n",
" 0.947750 \n",
" 0.752050 \n",
" 1.201308 \n",
" \n",
" \n",
" 2025-05-06 \n",
" -0.679105 \n",
" -0.867288 \n",
" 0.108398 \n",
" 0.487051 \n",
" \n",
" \n",
" 2025-05-07 \n",
" 0.080169 \n",
" 0.488811 \n",
" -0.327867 \n",
" 0.892507 \n",
" \n",
" \n",
" 2025-05-08 \n",
" 0.344177 \n",
" 0.144971 \n",
" -0.400844 \n",
" -1.018908 \n",
" \n",
" \n",
" 2025-05-09 \n",
" 0.866881 \n",
" 1.014110 \n",
" -0.551819 \n",
" -1.387403 \n",
" \n",
" \n",
" 2025-05-10 \n",
" 0.998114 \n",
" -0.533001 \n",
" -0.073038 \n",
" -1.091921 \n",
" \n",
" \n",
" 2025-05-11 \n",
" -0.600219 \n",
" 0.516398 \n",
" -0.553907 \n",
" 1.515454 \n",
" \n",
" \n",
" 2025-05-12 \n",
" -1.216182 \n",
" -0.033883 \n",
" -0.423863 \n",
" -1.148153 \n",
" \n",
" \n",
" 2025-05-13 \n",
" 0.247646 \n",
" -2.120170 \n",
" 0.410438 \n",
" -0.007542 \n",
" \n",
" \n",
" 2025-05-14 \n",
" 2.744581 \n",
" -1.054436 \n",
" -0.454956 \n",
" 1.431027 \n",
" \n",
" \n",
" 2025-05-15 \n",
" -0.436973 \n",
" -0.209793 \n",
" -1.107901 \n",
" -0.626926 \n",
" \n",
" \n",
" 2025-05-16 \n",
" -0.488635 \n",
" -0.947210 \n",
" -0.754111 \n",
" -0.141756 \n",
" \n",
" \n",
" 2025-05-17 \n",
" -0.892587 \n",
" -0.274142 \n",
" -0.249486 \n",
" -1.256339 \n",
" \n",
" \n",
" 2025-05-18 \n",
" -0.035129 \n",
" 0.431687 \n",
" -0.179333 \n",
" -1.584113 \n",
" \n",
" \n",
" 2025-05-19 \n",
" 0.048008 \n",
" -2.572587 \n",
" -0.567740 \n",
" 0.574945 \n",
" \n",
" \n",
" 2025-05-20 \n",
" 0.278872 \n",
" 1.125961 \n",
" 0.343371 \n",
" 1.083809 \n",
" \n",
" \n",
" 2025-05-21 \n",
" -0.465335 \n",
" -0.232416 \n",
" -1.113788 \n",
" 1.166050 \n",
" \n",
" \n",
" 2025-05-22 \n",
" 0.097036 \n",
" -0.445080 \n",
" -1.560167 \n",
" -1.219291 \n",
" \n",
" \n",
" 2025-05-23 \n",
" -1.182355 \n",
" 0.024858 \n",
" 1.080172 \n",
" 0.616565 \n",
" \n",
" \n",
" 2025-05-24 \n",
" 0.363446 \n",
" 2.486987 \n",
" 1.014332 \n",
" -0.182961 \n",
" \n",
" \n",
" 2025-05-25 \n",
" -0.753511 \n",
" 1.592893 \n",
" 0.671174 \n",
" 0.409314 \n",
" \n",
" \n",
" 2025-05-26 \n",
" -0.019423 \n",
" -1.059621 \n",
" 1.431861 \n",
" 0.215536 \n",
" \n",
" \n",
" 2025-05-27 \n",
" -1.611847 \n",
" 0.083969 \n",
" -1.511113 \n",
" 1.798263 \n",
" \n",
" \n",
" 2025-05-28 \n",
" -0.146052 \n",
" 0.226888 \n",
" -0.052449 \n",
" 0.128626 \n",
" \n",
" \n",
" 2025-05-29 \n",
" 0.581694 \n",
" -0.614289 \n",
" 3.346702 \n",
" -0.002884 \n",
" \n",
" \n",
" 2025-05-30 \n",
" 0.429497 \n",
" 0.397523 \n",
" -0.657833 \n",
" 0.015947 \n",
" \n",
" \n",
" 2025-05-31 \n",
" -1.900638 \n",
" 0.503316 \n",
" 1.551778 \n",
" -0.261925 \n",
" \n",
" \n",
" 2025-06-01 \n",
" 0.532828 \n",
" 1.289294 \n",
" 1.876977 \n",
" -0.688700 \n",
" \n",
" \n",
" 2025-06-02 \n",
" 0.232248 \n",
" -0.165925 \n",
" -1.098352 \n",
" -1.090313 \n",
" \n",
" \n",
" 2025-06-03 \n",
" 1.922932 \n",
" -1.137965 \n",
" -1.088363 \n",
" -1.100269 \n",
" \n",
" \n",
" 2025-06-04 \n",
" -0.144756 \n",
" -0.843765 \n",
" -0.946424 \n",
" 1.612607 \n",
" \n",
" \n",
" 2025-06-05 \n",
" 0.083187 \n",
" 0.830400 \n",
" -2.020017 \n",
" 0.841401 \n",
" \n",
" \n",
" 2025-06-06 \n",
" 1.761480 \n",
" -0.061533 \n",
" -0.525959 \n",
" 1.053218 \n",
" \n",
" \n",
" 2025-06-07 \n",
" 0.362370 \n",
" 0.711673 \n",
" 0.253683 \n",
" 0.951126 \n",
" \n",
" \n",
" 2025-06-08 \n",
" -0.291809 \n",
" 0.158834 \n",
" -1.471934 \n",
" -0.788063 \n",
" \n",
" \n",
" 2025-06-09 \n",
" 0.650506 \n",
" 0.995156 \n",
" 0.062120 \n",
" -0.648674 \n",
" \n",
" \n",
" 2025-06-10 \n",
" 2.487398 \n",
" -0.421202 \n",
" -0.418658 \n",
" 0.164558 \n",
" \n",
" \n",
" 2025-06-11 \n",
" 1.326513 \n",
" 1.635695 \n",
" -0.693025 \n",
" 0.874158 \n",
" \n",
" \n",
" 2025-06-12 \n",
" 1.086915 \n",
" -1.187856 \n",
" 0.213115 \n",
" 1.497238 \n",
" \n",
" \n",
" 2025-06-13 \n",
" 2.315385 \n",
" 1.254158 \n",
" 0.730157 \n",
" 2.111864 \n",
" \n",
" \n",
" 2025-06-14 \n",
" -0.944600 \n",
" 0.950747 \n",
" 0.198753 \n",
" 3.212883 \n",
" \n",
" \n",
" 2025-06-15 \n",
" 0.990051 \n",
" -0.262975 \n",
" -0.757067 \n",
" 0.466329 \n",
" \n",
" \n",
" 2025-06-16 \n",
" -0.884938 \n",
" -0.828498 \n",
" -0.799452 \n",
" 0.500864 \n",
" \n",
" \n",
" 2025-06-17 \n",
" -0.145928 \n",
" 0.745703 \n",
" 0.903225 \n",
" -0.033837 \n",
" \n",
" \n",
" 2025-06-18 \n",
" 0.876097 \n",
" -0.078857 \n",
" -0.842406 \n",
" -0.004147 \n",
" \n",
" \n",
" 2025-06-19 \n",
" -0.299047 \n",
" 1.717280 \n",
" -1.091362 \n",
" 0.039526 \n",
" \n",
" \n",
" 2025-06-20 \n",
" 2.044398 \n",
" -2.443435 \n",
" -0.977243 \n",
" -0.052705 \n",
" \n",
" \n",
" 2025-06-21 \n",
" -0.897563 \n",
" -1.718745 \n",
" -1.172944 \n",
" 0.178352 \n",
" \n",
" \n",
" 2025-06-22 \n",
" 1.303621 \n",
" 1.285091 \n",
" 0.466241 \n",
" 2.211939 \n",
" \n",
" \n",
" 2025-06-23 \n",
" nan \n",
" 0.407550 \n",
" 0.145478 \n",
" -1.242271 \n",
" \n",
" \n",
" 2025-06-24 \n",
" 2.788611 \n",
" -0.252713 \n",
" -0.483795 \n",
" 0.065892 \n",
" \n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_zscores = df_history.pipe(pd.pivot_table, index='date',columns='subreddit',values='z_score',aggfunc=\"max\")\n",
"\n",
"def highlight_abs_gt_3(val):\n",
" return 'background-color: red' if abs(val) > 3 else ''\n",
"\n",
"styled = df_zscores.style.map(highlight_abs_gt_3)\n",
"\n",
"styled"
]
},
{
"cell_type": "markdown",
"id": "fb2921e5-caf9-4060-8979-70270102efa4",
"metadata": {},
"source": [
"With the more stable mean and std calculation, seems like we've found an actual spike"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e05b3b07-865e-4546-a5ce-0ac9128db685",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:56:40.673195Z",
"iopub.status.busy": "2025-06-25T22:56:40.673195Z",
"iopub.status.idle": "2025-06-25T22:56:40.690169Z",
"shell.execute_reply": "2025-06-25T22:56:40.690169Z",
"shell.execute_reply.started": "2025-06-25T22:56:40.673195Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" date \n",
" subreddit \n",
" community_weighted_sentiment \n",
" mean_cws \n",
" std_cws \n",
" z_score \n",
" \n",
" \n",
" \n",
" \n",
" 54 \n",
" 2025-05-14 \n",
" artificial \n",
" -0.536738 \n",
" -0.47354 \n",
" 0.138909 \n",
" -0.454956 \n",
" \n",
" \n",
" 58 \n",
" 2025-05-15 \n",
" artificial \n",
" -0.627437 \n",
" -0.47354 \n",
" 0.138909 \n",
" -1.107901 \n",
" \n",
" \n",
" 62 \n",
" 2025-05-16 \n",
" artificial \n",
" -0.578293 \n",
" -0.47354 \n",
" 0.138909 \n",
" -0.754111 \n",
" \n",
" \n",
" 66 \n",
" 2025-05-17 \n",
" artificial \n",
" -0.508196 \n",
" -0.47354 \n",
" 0.138909 \n",
" -0.249486 \n",
" \n",
" \n",
" 70 \n",
" 2025-05-18 \n",
" artificial \n",
" -0.498451 \n",
" -0.47354 \n",
" 0.138909 \n",
" -0.179333 \n",
" \n",
" \n",
" 74 \n",
" 2025-05-19 \n",
" artificial \n",
" -0.552404 \n",
" -0.47354 \n",
" 0.138909 \n",
" -0.567740 \n",
" \n",
" \n",
" 78 \n",
" 2025-05-20 \n",
" artificial \n",
" -0.425843 \n",
" -0.47354 \n",
" 0.138909 \n",
" 0.343371 \n",
" \n",
" \n",
" 82 \n",
" 2025-05-21 \n",
" artificial \n",
" -0.628255 \n",
" -0.47354 \n",
" 0.138909 \n",
" -1.113788 \n",
" \n",
" \n",
" 86 \n",
" 2025-05-22 \n",
" artificial \n",
" -0.690261 \n",
" -0.47354 \n",
" 0.138909 \n",
" -1.560167 \n",
" \n",
" \n",
" 90 \n",
" 2025-05-23 \n",
" artificial \n",
" -0.323495 \n",
" -0.47354 \n",
" 0.138909 \n",
" 1.080172 \n",
" \n",
" \n",
" 94 \n",
" 2025-05-24 \n",
" artificial \n",
" -0.332641 \n",
" -0.47354 \n",
" 0.138909 \n",
" 1.014332 \n",
" \n",
" \n",
" 98 \n",
" 2025-05-25 \n",
" artificial \n",
" -0.380308 \n",
" -0.47354 \n",
" 0.138909 \n",
" 0.671174 \n",
" \n",
" \n",
" 102 \n",
" 2025-05-26 \n",
" artificial \n",
" -0.274642 \n",
" -0.47354 \n",
" 0.138909 \n",
" 1.431861 \n",
" \n",
" \n",
" 106 \n",
" 2025-05-27 \n",
" artificial \n",
" -0.683447 \n",
" -0.47354 \n",
" 0.138909 \n",
" -1.511113 \n",
" \n",
" \n",
" 110 \n",
" 2025-05-28 \n",
" artificial \n",
" -0.480826 \n",
" -0.47354 \n",
" 0.138909 \n",
" -0.052449 \n",
" \n",
" \n",
" 114 \n",
" 2025-05-29 \n",
" artificial \n",
" -0.008654 \n",
" -0.47354 \n",
" 0.138909 \n",
" 3.346702 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date subreddit community_weighted_sentiment mean_cws std_cws \\\n",
"54 2025-05-14 artificial -0.536738 -0.47354 0.138909 \n",
"58 2025-05-15 artificial -0.627437 -0.47354 0.138909 \n",
"62 2025-05-16 artificial -0.578293 -0.47354 0.138909 \n",
"66 2025-05-17 artificial -0.508196 -0.47354 0.138909 \n",
"70 2025-05-18 artificial -0.498451 -0.47354 0.138909 \n",
"74 2025-05-19 artificial -0.552404 -0.47354 0.138909 \n",
"78 2025-05-20 artificial -0.425843 -0.47354 0.138909 \n",
"82 2025-05-21 artificial -0.628255 -0.47354 0.138909 \n",
"86 2025-05-22 artificial -0.690261 -0.47354 0.138909 \n",
"90 2025-05-23 artificial -0.323495 -0.47354 0.138909 \n",
"94 2025-05-24 artificial -0.332641 -0.47354 0.138909 \n",
"98 2025-05-25 artificial -0.380308 -0.47354 0.138909 \n",
"102 2025-05-26 artificial -0.274642 -0.47354 0.138909 \n",
"106 2025-05-27 artificial -0.683447 -0.47354 0.138909 \n",
"110 2025-05-28 artificial -0.480826 -0.47354 0.138909 \n",
"114 2025-05-29 artificial -0.008654 -0.47354 0.138909 \n",
"\n",
" z_score \n",
"54 -0.454956 \n",
"58 -1.107901 \n",
"62 -0.754111 \n",
"66 -0.249486 \n",
"70 -0.179333 \n",
"74 -0.567740 \n",
"78 0.343371 \n",
"82 -1.113788 \n",
"86 -1.560167 \n",
"90 1.080172 \n",
"94 1.014332 \n",
"98 0.671174 \n",
"102 1.431861 \n",
"106 -1.511113 \n",
"110 -0.052449 \n",
"114 3.346702 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"date = \"2025-05-29\"\n",
"subreddit = \"artificial\"\n",
"\n",
"rightwindow = datetime.datetime.strptime(date,\"%Y-%m-%d\").date()\n",
"leftwindow = rightwindow - datetime.timedelta(days=15)\n",
"\n",
"\n",
"fil = lambda x: x.date.between(leftwindow,rightwindow) & (x.subreddit == subreddit)\n",
"\n",
"df_history[fil]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "eea2124a-27da-4505-b35e-b17d00ca6096",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-25T22:56:48.802366Z",
"iopub.status.busy": "2025-06-25T22:56:48.802366Z",
"iopub.status.idle": "2025-06-25T22:56:48.998555Z",
"shell.execute_reply": "2025-06-25T22:56:48.998555Z",
"shell.execute_reply.started": "2025-06-25T22:56:48.802366Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_history[fil].pipe(pd.pivot_table,index='date',values='community_weighted_sentiment',columns=['subreddit'],aggfunc='max').plot(rot=45,ylabel='sentiment score')"
]
},
{
"cell_type": "markdown",
"id": "c2d0d05e-883c-461f-b9e0-bb7ade102360",
"metadata": {},
"source": [
"## Export Spike Metric to Production"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "e039e7fc-24a2-4318-b099-58d952059cd8",
"metadata": {
"execution": {
"iopub.execute_input": "2025-06-26T02:32:57.348241Z",
"iopub.status.busy": "2025-06-26T02:32:57.348241Z",
"iopub.status.idle": "2025-06-26T02:32:57.354108Z",
"shell.execute_reply": "2025-06-26T02:32:57.354108Z",
"shell.execute_reply.started": "2025-06-26T02:32:57.348241Z"
}
},
"outputs": [],
"source": [
"import yaml\n",
"\n",
"params = {\n",
" sub: {\n",
" \"mean\": round(float(row[\"mean_cws\"]), 4), # float() strips NumPy dtype\n",
" \"std\": round(float(row[\"std_cws\"]), 4),\n",
" }\n",
" for sub, row in global_stats.iterrows()\n",
"}\n",
"\n",
"with open(\"../spike_params.yaml\", \"w\", encoding=\"utf-8\") as f:\n",
" f.write(\"# spike_params.yaml\\n\")\n",
" yaml.safe_dump(params, f, sort_keys=False, default_flow_style=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41b315d2-4939-47a3-a12a-7312bc15d3c2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:reddit_streamlit]",
"language": "python",
"name": "conda-env-reddit_streamlit-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}