""" Configuration utilities for Reddit analysis tools. Handles loading of config from YAML and secrets from environment or Streamlit. """ import os from pathlib import Path import yaml # Determine if Streamlit is available try: import streamlit as st HAS_STREAMLIT = True except ImportError: HAS_STREAMLIT = False # Project root - now points to the project root directory ROOT = Path(__file__).resolve().parent.parent def is_running_streamlit(): # The only reliable way to detect if running inside a Streamlit app return os.getenv("STREAMLIT_SERVER_PORT") is not None def load_environment(): """Load environment variables from .env if not running as a Streamlit app.""" if not is_running_streamlit(): from dotenv import load_dotenv load_dotenv(dotenv_path=ROOT / '.env') def get_secret(key, default=None): """Get a secret from environment variables or Streamlit secrets.""" value = os.getenv(key) if value is None and HAS_STREAMLIT and is_running_streamlit(): value = st.secrets.get(key, default) if value is None and default is None: raise ValueError(f"Required secret {key} not found in environment or Streamlit secrets") return value def load_config(config_path=None): """Load configuration from YAML file.""" if config_path is None: config_path = ROOT / "config.yaml" else: config_path = Path(config_path) with open(config_path, 'r') as f: config = yaml.safe_load(f) return config def get_project_root(): """Return the project root directory.""" return ROOT def setup_config(): """ Set up and return configuration and commonly used values. Returns: A dictionary containing configuration and common values: - config: The parsed YAML config - secrets: A dictionary of required secrets (e.g., HF_TOKEN) - paths: Common file paths (all relative to project root) """ # Load environment variables load_environment() # Load config config = load_config() # Common secrets secrets = { 'HF_TOKEN': get_secret('HF_TOKEN') } # Get directory paths from config or use defaults raw_dir = config.get('raw_dir', 'data_raw') scored_dir = config.get('scored_dir', 'data_scored') logs_dir = config.get('logs_dir', 'logs') # Get HF repository directories (paths within the HF repo) hf_raw_dir = config.get('hf_raw_dir', 'data_raw') hf_scored_dir = config.get('hf_scored_dir', 'data_scored') # Common paths and constants (all paths are relative to project root) paths = { 'root': ROOT, 'raw_dir': ROOT / raw_dir, 'scored_dir': ROOT / scored_dir, 'logs_dir': ROOT / logs_dir, 'summary_file': ROOT / config.get('summary_file', 'subreddit_daily_summary.csv'), 'hf_raw_dir': hf_raw_dir, 'hf_scored_dir': hf_scored_dir } # Add REPLICATE_API_TOKEN if it's in the environment try: secrets['REPLICATE_API_TOKEN'] = get_secret('REPLICATE_API_TOKEN') except ValueError: # This is optional for scrape.py, so we'll ignore if missing pass # Add Reddit API credentials if available for key in ['REDDIT_CLIENT_ID', 'REDDIT_CLIENT_SECRET', 'REDDIT_USER_AGENT']: try: secrets[key] = get_secret(key) except ValueError: # These are required by scrape.py but we'll check there pass return { 'config': config, 'secrets': secrets, 'paths': paths }