# FinBERT Market Evaluation - Main Streamlit Application """ A confidence-aware, volatility-adjusted post-market evaluator for FinBERT sentiment predictions against actual stock market movements. """ import streamlit as st import pandas as pd import numpy as np import plotly.graph_objects as go import plotly.express as px from datetime import datetime, timedelta, date import time import logging # Import our custom modules from sentiment_analyzer import FinBERTAnalyzer from market_data import MarketDataService from evaluation import EvaluationEngine # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Page configuration st.set_page_config( page_title="FinBERT Market Evaluation", page_icon="🚀", layout="wide", initial_sidebar_state="expanded" ) # Initialize session state for rate limiting if 'last_request_time' not in st.session_state: st.session_state.last_request_time = 0 if 'evaluation_history' not in st.session_state: st.session_state.evaluation_history = [] # Initialize services @st.cache_resource def initialize_services(): """Initialize all services with caching.""" analyzer = FinBERTAnalyzer() market_service = MarketDataService() evaluation_engine = EvaluationEngine() return analyzer, market_service, evaluation_engine def check_rate_limit(): """Check if rate limit allows new request (30 seconds).""" current_time = time.time() time_since_last = current_time - st.session_state.last_request_time return time_since_last >= 30 def update_rate_limit(): """Update the last request time.""" st.session_state.last_request_time = time.time() def create_das_chart(das_score: float, confidence: float, impact: float): """Create horizontal bar chart for DAS, confidence, and impact.""" fig = go.Figure() metrics = ['DAS Score', 'Confidence', 'Impact (scaled)'] values = [das_score, confidence, min(impact / 5.0, 1.0)] # Scale impact to 0-1 colors = ['#1f77b4', '#ff7f0e', '#2ca02c'] fig.add_trace(go.Bar( y=metrics, x=values, orientation='h', marker_color=colors, text=[f'{v:.3f}' for v in values], textposition='inside' )) fig.update_layout( title="Evaluation Metrics", xaxis_title="Score", height=200, margin=dict(l=100, r=50, t=50, b=50) ) return fig def display_evaluation_result(result: dict): """Display comprehensive evaluation results.""" if "error" in result: st.error(f"Evaluation Error: {result['error']}") return # Prominent evaluation summary first st.markdown(f"### {result['evaluation_summary']}") # Key insights in a highlighted box alignment_color = "green" if result['is_correct'] else "red" volatility_note = "🔥 Extremely High" if result['volatility_14d'] > 100 else "📊 High" if result['volatility_14d'] > 50 else "📈 Normal" # Calculate if movement was significant movement_significant = result['impact'] > result['threshold'] significance_text = "exceeded" if movement_significant else "was below" st.markdown(f"""

📊 Volatility-Aware Analysis:

""", unsafe_allow_html=True) # Main metrics in columns col1, col2, col3, col4 = st.columns(4) with col1: st.metric("DAS Score", f"{result['das_score']:.3f}", help="Directional Alignment Score (0-1, higher is better)") with col2: sentiment_emoji = {"positive": "📈", "negative": "📉", "neutral": "➡️"} st.metric("Sentiment", f"{sentiment_emoji.get(result['sentiment'], '❓')} {result['sentiment'].title()}") with col3: st.metric("Confidence", f"{result['confidence']:.1%}") with col4: return_color = "normal" if abs(result['return_24h']) < result['threshold'] else "inverse" st.metric("Same-Day Return", f"{result['return_24h']:+.2f}%", delta=f"vs {result['threshold']:.1f}% threshold") # Additional metrics for 24h return if available if result.get('return_next_24h') is not None: col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Next 24h Return", f"{result['return_next_24h']:+.2f}%", help="Return from close of news day to close of next trading day") with col2: if result.get('alpha_24h') is not None: st.metric("24h Alpha", f"{result['alpha_24h']:+.2f}%", help="24h return vs market performance") with col3: # Show combined impact combined_impact = abs(result['return_24h']) + abs(result.get('return_next_24h', 0)) st.metric("Combined Impact", f"{combined_impact:.2f}%", help="Total magnitude of price movement") with col4: # Show follow-through consistency same_direction = (result['return_24h'] * result.get('return_next_24h', 0)) > 0 consistency = "✅ Consistent" if same_direction else "🔄 Reversal" st.metric("Follow-through", consistency, help="Whether 24h movement continued same direction") # Visualization chart = create_das_chart(result['das_score'], result['confidence'], result['impact']) # Use session state to create unique chart counter if 'chart_counter' not in st.session_state: st.session_state.chart_counter = 0 st.session_state.chart_counter += 1 chart_key = f"chart_{st.session_state.chart_counter}" st.plotly_chart(chart, use_container_width=True, key=chart_key) # Technical metrics (always visible) st.subheader("📊 Technical Metrics") col1, col2, col3 = st.columns(3) with col1: st.metric("Ticker", result['ticker']) st.metric("News Date", result['news_date']) st.metric("14-day Volatility", f"{result['volatility_14d']:.2f}%") st.metric("Significance Threshold", f"{result['threshold']:.2f}%") with col2: st.metric("Same-Day Impact", f"{result['impact']:.2f}%") if result.get('return_next_24h') is not None: st.metric("24h Impact", f"{abs(result['return_next_24h']):.2f}%") st.metric("WAT Weight", f"{result['wat_weight']:.3f}") alignment_text = "✅ Yes" if result['is_correct'] else "❌ No" st.metric("Alignment", alignment_text) with col3: alpha_val = result.get('alpha_adjusted', 'N/A') alpha_str = f"{alpha_val:+.2f}%" if isinstance(alpha_val, (int, float)) else str(alpha_val) st.metric("Same-Day Alpha", alpha_str) if result.get('alpha_24h') is not None: st.metric("24h Alpha", f"{result['alpha_24h']:+.2f}%") # Market context market_same = result.get('market_return', 'N/A') market_str = f"{market_same:+.2f}%" if isinstance(market_same, (int, float)) else str(market_same) st.metric("Market Return", market_str) def main(): """Main application function.""" # Header st.title("🚀 FinBERT Market Evaluation") st.markdown(""" A confidence-aware, volatility-adjusted post-market evaluator for FinBERT sentiment predictions. Evaluate how well FinBERT's financial news sentiment aligns with actual stock market movements. """) # Sidebar info (no user configuration needed) st.sidebar.header("📊 Evaluation Framework") st.sidebar.markdown(""" **Dual-Period Analysis:** - **Same-Day**: Intraday return (Close - Open) - **Next 24h**: Close-to-close follow-through - **Combined**: Complete market reaction picture **Volatility-Aware Evaluation:** - Uses each stock's 14-day volatility - Threshold = 1.0 × volatility (k=1.0) - Adapts to stock movement patterns **Directional Alignment Score:** - Graded 0-1 score (not binary) - Based on same-day return vs threshold - Higher = better alignment **Alpha Analysis:** - Stock return vs market performance - Isolates stock-specific impact - Available for both time periods """) # Fixed research parameters (not user-configurable) volatility_multiplier = 1.0 # k = 1.0 as per your framework confidence_threshold = 0.7 # Reasonable default # Initialize services try: analyzer, market_service, evaluation_engine = initialize_services() evaluation_engine.volatility_multiplier = volatility_multiplier evaluation_engine.confidence_threshold = confidence_threshold except Exception as e: st.error(f"Failed to initialize services: {str(e)}") st.stop() # Main input form st.header("📰 News Analysis") with st.form("evaluation_form"): # News text input news_text = st.text_area( "Financial News Text", height=150, placeholder="Enter financial news headline or summary here...", help="Paste the financial news text you want to analyze" ) col1, col2 = st.columns(2) with col1: ticker = st.text_input( "Stock Ticker", placeholder="e.g., TSLA, AAPL, MSFT", help="Enter the stock ticker symbol" ).upper() with col2: news_date = st.date_input( "News Publication Date", value=date.today() - timedelta(days=1), max_value=date.today() - timedelta(days=1), help="Date when the news was published (must be at least 1 day ago)" ) submitted = st.form_submit_button("🔍 Evaluate Prediction") # Process evaluation if submitted: if not news_text.strip(): st.error("Please enter some news text to analyze.") return if not ticker: st.error("Please enter a stock ticker symbol.") return # Rate limiting check if not check_rate_limit(): remaining_time = 30 - (time.time() - st.session_state.last_request_time) st.warning(f"Rate limit: Please wait {remaining_time:.0f} more seconds before next request.") return # Update rate limit update_rate_limit() # Show progress progress_bar = st.progress(0) status_text = st.empty() try: # Step 1: Sentiment Analysis status_text.text("🤖 Analyzing sentiment with FinBERT...") progress_bar.progress(25) sentiment_result = analyzer.analyze_sentiment(news_text) # Step 2: Market Data status_text.text("📊 Fetching market data...") progress_bar.progress(50) news_datetime = datetime.combine(news_date, datetime.min.time()) market_result = market_service.get_stock_evaluation_data(ticker, news_datetime) # Step 3: Evaluation status_text.text("⚖️ Evaluating prediction...") progress_bar.progress(75) evaluation_result = evaluation_engine.evaluate_prediction( sentiment_result, market_result, news_datetime ) # Step 4: Display Results status_text.text("✅ Evaluation complete!") progress_bar.progress(100) # Clear progress indicators time.sleep(0.5) progress_bar.empty() status_text.empty() # Display results st.header("📊 Evaluation Results") display_evaluation_result(evaluation_result) # Add to history if "error" not in evaluation_result: st.session_state.evaluation_history.append(evaluation_result) except Exception as e: progress_bar.empty() status_text.empty() st.error(f"Evaluation failed: {str(e)}") logger.error(f"Evaluation error: {str(e)}") # Evaluation History Section if st.session_state.evaluation_history: st.header("📋 Previous Evaluations") # Show most recent evaluations first (reverse chronological) recent_evaluations = list(reversed(st.session_state.evaluation_history)) # Show recent evaluations in expandable cards for i, result in enumerate(recent_evaluations): # Create a concise title for each evaluation alignment_icon = "✅" if result['is_correct'] else "❌" sentiment_icon = {"positive": "📈", "negative": "📉", "neutral": "➡️"}.get(result['sentiment'], "❓") title = f"{alignment_icon} {result['ticker']} ({result['news_date']}) - {sentiment_icon} {result['sentiment'].title()} → {result['return_24h']:+.1f}% | DAS: {result['das_score']:.3f}" with st.expander(title, expanded=(i==0)): # Expand the most recent one display_evaluation_result(result) # Simple action buttons st.markdown("---") # Simple action buttons col1, col2 = st.columns([1, 3]) with col1: if st.button("🗑️ Clear All History"): st.session_state.evaluation_history = [] st.rerun() with col2: st.caption(f"📊 {len(st.session_state.evaluation_history)} evaluation(s) completed") # Footer st.markdown("---") st.caption("🚀 **FinBERT Market Evaluation** | Rate limit: 30s | Model: ProsusAI/finbert | Data: Yahoo Finance") if __name__ == "__main__": main()