# Market data fetching service using yfinance """ This module handles fetching historical stock price data, calculating returns, volatility, and market index comparisons for evaluation purposes. """ import yfinance as yf import pandas as pd import numpy as np from datetime import datetime, timedelta from typing import Dict, Optional, Tuple import logging import streamlit as st # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class MarketDataService: """ Service for fetching and processing market data for evaluation. """ def __init__(self, market_index: str = "^GSPC"): """ Initialize the market data service. Args: market_index: The market index ticker for macro adjustments (default: S&P 500) """ self.market_index = market_index @st.cache_data(ttl=3600) # Cache for 1 hour def fetch_stock_data(_self, ticker: str, start_date: datetime, end_date: datetime) -> Optional[pd.DataFrame]: """ Fetch historical stock data for a given ticker and date range. Args: ticker: Stock ticker symbol start_date: Start date for data fetch end_date: End date for data fetch Returns: DataFrame with stock price data or None if failed """ try: logger.info(f"Fetching data for {ticker} from {start_date} to {end_date}") stock = yf.Ticker(ticker) data = stock.history(start=start_date, end=end_date) if data.empty: logger.warning(f"No data found for ticker {ticker}") return None return data except Exception as e: logger.error(f"Error fetching data for {ticker}: {str(e)}") return None def calculate_same_day_return(self, data: pd.DataFrame, news_date: datetime) -> Optional[float]: """ Calculate stock return on the same day the news was published (intraday). Args: data: Stock price DataFrame news_date: Date when news was published Returns: Intraday return percentage or None if calculation fails """ try: # Convert news_date to date only for comparison news_date_only = news_date.date() # Find the trading day that matches the news date data_dates = data.index.date matching_dates = [d for d in data_dates if d == news_date_only] if not matching_dates: # If no exact match, find the next trading day future_dates = [d for d in data_dates if d > news_date_only] if not future_dates: logger.warning(f"No trading data available for or after {news_date_only}") return None trading_date = future_dates[0] logger.info(f"News date {news_date_only} was not a trading day, using next trading day: {trading_date}") else: trading_date = matching_dates[0] # Get the day's data day_data = data[data.index.date == trading_date] if len(day_data) == 0: logger.warning(f"No trading data found for {trading_date}") return None # Calculate intraday return: (Close - Open) / Open * 100 open_price = day_data['Open'].iloc[0] close_price = day_data['Close'].iloc[-1] return_pct = ((close_price - open_price) / open_price) * 100 logger.info(f"Calculated same-day return for {trading_date}: {return_pct:.2f}% (Open: {open_price:.2f}, Close: {close_price:.2f})") return float(return_pct) except Exception as e: logger.error(f"Error calculating same-day return: {str(e)}") return None def calculate_next_24h_return(self, data: pd.DataFrame, news_date: datetime) -> Optional[float]: """ Calculate stock return over the next 24 hours after news publication. Args: data: Stock price DataFrame news_date: Date when news was published Returns: 24-hour return percentage or None if calculation fails """ try: # Convert news_date to date only for comparison news_date_only = news_date.date() # Find the trading day that matches the news date data_dates = data.index.date matching_dates = [d for d in data_dates if d == news_date_only] if not matching_dates: # If no exact match, find the next trading day future_dates = [d for d in data_dates if d > news_date_only] if not future_dates: logger.warning(f"No trading data available for or after {news_date_only}") return None start_trading_date = future_dates[0] else: start_trading_date = matching_dates[0] # Find the next trading day for 24h comparison future_dates = [d for d in data_dates if d > start_trading_date] if not future_dates: logger.warning(f"No next trading day available after {start_trading_date}") return None end_trading_date = future_dates[0] # Get start and end prices start_data = data[data.index.date == start_trading_date] end_data = data[data.index.date == end_trading_date] if len(start_data) == 0 or len(end_data) == 0: logger.warning(f"Insufficient data for 24h return calculation") return None # Use close of start day and close of next day start_price = start_data['Close'].iloc[-1] end_price = end_data['Close'].iloc[-1] return_pct = ((end_price - start_price) / start_price) * 100 logger.info(f"Calculated 24h return from {start_trading_date} to {end_trading_date}: {return_pct:.2f}%") return float(return_pct) except Exception as e: logger.error(f"Error calculating 24h return: {str(e)}") return None def calculate_return(self, data: pd.DataFrame, news_date: datetime, hours: int = 24) -> Optional[float]: """ Legacy method - now returns same-day return for compatibility. Use calculate_same_day_return() or calculate_next_24h_return() for specific needs. """ return self.calculate_same_day_return(data, news_date) def calculate_volatility(self, data: pd.DataFrame, days: int = 14) -> Optional[float]: """ Calculate rolling volatility for the stock. Args: data: Stock price DataFrame days: Number of days for volatility calculation Returns: Volatility percentage or None if calculation fails """ try: if len(data) < days: logger.warning(f"Insufficient data for {days}-day volatility calculation") return None # Calculate daily returns data['Daily_Return'] = data['Close'].pct_change() # Calculate rolling volatility (annualized) volatility = data['Daily_Return'].rolling(window=days).std() * np.sqrt(252) * 100 # Return the most recent volatility recent_volatility = volatility.dropna().iloc[-1] logger.info(f"Calculated {days}-day volatility: {recent_volatility:.2f}%") return float(recent_volatility) except Exception as e: logger.error(f"Error calculating volatility: {str(e)}") return None def get_market_return(self, news_date: datetime, hours: int = 24) -> Optional[float]: """ Get market index return for the same day as news publication. Args: news_date: Date when news was published hours: Deprecated parameter (kept for compatibility) Returns: Market return percentage for the news day or None if calculation fails """ try: # Fetch market data start_date = news_date - timedelta(days=5) # Buffer for weekends end_date = news_date + timedelta(days=5) market_data = self.fetch_stock_data(self.market_index, start_date, end_date) if market_data is None: return None return self.calculate_return(market_data, news_date, hours) except Exception as e: logger.error(f"Error getting market return: {str(e)}") return None def get_stock_evaluation_data(self, ticker: str, news_date: datetime) -> Dict: """ Get comprehensive stock data for evaluation including both same-day and 24h returns. Args: ticker: Stock ticker symbol news_date: Date when news was published Returns: Dictionary containing all relevant market data """ try: # Define date range (get extra days for volatility calculation) start_date = news_date - timedelta(days=30) end_date = news_date + timedelta(days=5) # Fetch stock data stock_data = self.fetch_stock_data(ticker, start_date, end_date) if stock_data is None: return {"error": f"Could not fetch data for ticker {ticker}"} # Calculate both same-day and 24h returns same_day_return = self.calculate_same_day_return(stock_data, news_date) next_24h_return = self.calculate_next_24h_return(stock_data, news_date) volatility_14d = self.calculate_volatility(stock_data, 14) # Get market returns for both periods market_same_day = self.get_market_return(news_date, 0) # Same day market_24h = self.get_market_return(news_date, 24) # 24h # Calculate alpha-adjusted returns alpha_same_day = None alpha_24h = None if same_day_return is not None and market_same_day is not None: alpha_same_day = same_day_return - market_same_day if next_24h_return is not None and market_24h is not None: alpha_24h = next_24h_return - market_24h return { "ticker": ticker, "return_same_day": same_day_return, "return_next_24h": next_24h_return, "return_24h": same_day_return, # Keep for compatibility with existing code "volatility_14d": volatility_14d, "market_return_same_day": market_same_day, "market_return_24h": market_24h, "market_return": market_same_day, # Keep for compatibility "alpha_same_day": alpha_same_day, "alpha_24h": alpha_24h, "alpha_adjusted": alpha_same_day, # Keep for compatibility "data_points": len(stock_data), "date_range": { "start": stock_data.index[0].strftime("%Y-%m-%d"), "end": stock_data.index[-1].strftime("%Y-%m-%d") } } except Exception as e: logger.error(f"Error getting evaluation data: {str(e)}") return {"error": str(e)}