# Visualization components for FinBERT Market Evaluation """ This module provides additional visualization components including calibration plots, correlation heatmaps, and performance charts. """ import plotly.graph_objects as go import plotly.express as px import pandas as pd import numpy as np from typing import List, Dict import streamlit as st def create_calibration_plot(evaluations: List[Dict]) -> go.Figure: """ Create a calibration plot showing confidence vs actual accuracy. Args: evaluations: List of evaluation results Returns: Plotly figure for calibration plot """ if not evaluations: return go.Figure() # Extract confidence and correctness confidences = [e['confidence'] for e in evaluations if 'confidence' in e] correctness = [e['is_correct'] for e in evaluations if 'is_correct' in e] if len(confidences) != len(correctness) or len(confidences) < 5: return go.Figure() # Create confidence bins bins = np.linspace(0.5, 1.0, 6) # 5 bins from 0.5 to 1.0 bin_centers = (bins[:-1] + bins[1:]) / 2 # Calculate accuracy for each bin bin_accuracies = [] bin_counts = [] for i in range(len(bins) - 1): mask = (np.array(confidences) >= bins[i]) & (np.array(confidences) < bins[i + 1]) if i == len(bins) - 2: # Last bin includes upper bound mask = (np.array(confidences) >= bins[i]) & (np.array(confidences) <= bins[i + 1]) bin_correct = np.array(correctness)[mask] if len(bin_correct) > 0: bin_accuracies.append(np.mean(bin_correct)) bin_counts.append(len(bin_correct)) else: bin_accuracies.append(0) bin_counts.append(0) # Create figure fig = go.Figure() # Perfect calibration line fig.add_trace(go.Scatter( x=[0.5, 1.0], y=[0.5, 1.0], mode='lines', name='Perfect Calibration', line=dict(dash='dash', color='gray') )) # Actual calibration fig.add_trace(go.Scatter( x=bin_centers, y=bin_accuracies, mode='markers+lines', name='Actual Calibration', marker=dict(size=[c/2 + 5 for c in bin_counts]), # Size by count text=[f'Count: {c}' for c in bin_counts], hovertemplate='Confidence: %{x:.2f}
Accuracy: %{y:.2f}
%{text}' )) fig.update_layout( title='Calibration Plot: Confidence vs Accuracy', xaxis_title='Predicted Confidence', yaxis_title='Actual Accuracy', xaxis=dict(range=[0.5, 1.0]), yaxis=dict(range=[0.0, 1.0]), height=400 ) return fig def create_performance_over_time(evaluations: List[Dict]) -> go.Figure: """ Create a time series plot of performance metrics. Args: evaluations: List of evaluation results Returns: Plotly figure for performance over time """ if not evaluations: return go.Figure() # Convert to DataFrame df = pd.DataFrame(evaluations) df['news_date'] = pd.to_datetime(df['news_date']) df = df.sort_values('news_date') # Calculate rolling metrics window = min(5, len(df)) # 5-day rolling window or less df['rolling_das'] = df['das_score'].rolling(window=window, min_periods=1).mean() df['rolling_accuracy'] = df['is_correct'].rolling(window=window, min_periods=1).mean() fig = go.Figure() # DAS Score over time fig.add_trace(go.Scatter( x=df['news_date'], y=df['rolling_das'], mode='lines+markers', name='Rolling DAS Score', line=dict(color='blue'), yaxis='y' )) # Accuracy over time fig.add_trace(go.Scatter( x=df['news_date'], y=df['rolling_accuracy'], mode='lines+markers', name='Rolling Accuracy', line=dict(color='red'), yaxis='y2' )) fig.update_layout( title=f'Performance Over Time (Rolling {window}-day average)', xaxis_title='Date', yaxis=dict( title='DAS Score', side='left', range=[0, 1] ), yaxis2=dict( title='Accuracy', side='right', overlaying='y', range=[0, 1] ), height=400, hovermode='x unified' ) return fig def create_sentiment_distribution(evaluations: List[Dict]) -> go.Figure: """ Create a distribution plot of sentiments and their performance. Args: evaluations: List of evaluation results Returns: Plotly figure for sentiment distribution """ if not evaluations: return go.Figure() df = pd.DataFrame(evaluations) # Group by sentiment sentiment_stats = df.groupby('sentiment').agg({ 'das_score': ['mean', 'count'], 'is_correct': 'mean', 'confidence': 'mean' }).round(3) sentiment_stats.columns = ['avg_das', 'count', 'accuracy', 'avg_confidence'] sentiment_stats = sentiment_stats.reset_index() # Create subplot fig = go.Figure() # Bar chart for counts fig.add_trace(go.Bar( x=sentiment_stats['sentiment'], y=sentiment_stats['count'], name='Count', marker_color='lightblue', yaxis='y', text=sentiment_stats['count'], textposition='auto' )) # Line chart for accuracy fig.add_trace(go.Scatter( x=sentiment_stats['sentiment'], y=sentiment_stats['accuracy'], mode='lines+markers', name='Accuracy', line=dict(color='red'), yaxis='y2', marker=dict(size=10) )) fig.update_layout( title='Sentiment Distribution and Performance', xaxis_title='Sentiment', yaxis=dict( title='Count', side='left' ), yaxis2=dict( title='Accuracy', side='right', overlaying='y', range=[0, 1] ), height=400 ) return fig def create_confidence_impact_scatter(evaluations: List[Dict]) -> go.Figure: """ Create a scatter plot of confidence vs impact with DAS score coloring. Args: evaluations: List of evaluation results Returns: Plotly figure for confidence-impact scatter """ if not evaluations: return go.Figure() df = pd.DataFrame(evaluations) # Create scatter plot fig = px.scatter( df, x='confidence', y='impact', color='das_score', size='wat_weight', hover_data=['ticker', 'sentiment', 'return_24h'], color_continuous_scale='RdYlBu_r', title='Confidence vs Impact (colored by DAS Score)' ) fig.update_layout( xaxis_title='Confidence', yaxis_title='Impact (|Return %|)', height=400 ) return fig def display_advanced_visualizations(evaluations: List[Dict]): """ Display advanced visualization components in Streamlit. Args: evaluations: List of evaluation results """ if len(evaluations) < 3: st.info("Need at least 3 evaluations for advanced visualizations.") return st.subheader("📊 Advanced Analytics") # Create tabs for different visualizations tab1, tab2, tab3, tab4 = st.tabs([ "Calibration", "Performance Over Time", "Sentiment Analysis", "Confidence vs Impact" ]) with tab1: st.plotly_chart( create_calibration_plot(evaluations), use_container_width=True ) st.caption("Shows how well confidence scores align with actual accuracy. Points closer to the diagonal line indicate better calibration.") with tab2: st.plotly_chart( create_performance_over_time(evaluations), use_container_width=True ) st.caption("Rolling average of DAS scores and accuracy over time.") with tab3: st.plotly_chart( create_sentiment_distribution(evaluations), use_container_width=True ) st.caption("Distribution of sentiment predictions and their respective performance.") with tab4: st.plotly_chart( create_confidence_impact_scatter(evaluations), use_container_width=True ) st.caption("Relationship between model confidence and market impact, colored by DAS score.")