File size: 3,619 Bytes
a6576f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Configuration utilities for Reddit analysis tools.
Handles loading of config from YAML and secrets from environment or Streamlit.
"""
import os
from pathlib import Path
import yaml

# Determine if Streamlit is available
try:
    import streamlit as st
    HAS_STREAMLIT = True
except ImportError:
    HAS_STREAMLIT = False

# Project root - now points to the project root directory
ROOT = Path(__file__).resolve().parent.parent

def is_running_streamlit():
    # The only reliable way to detect if running inside a Streamlit app
    return os.getenv("STREAMLIT_SERVER_PORT") is not None

def load_environment():
    """Load environment variables from .env if not running as a Streamlit app."""
    if not is_running_streamlit():
        from dotenv import load_dotenv
        load_dotenv(dotenv_path=ROOT / '.env')

def get_secret(key, default=None):
    """Get a secret from environment variables or Streamlit secrets."""
    value = os.getenv(key)
    if value is None and HAS_STREAMLIT and is_running_streamlit():
        value = st.secrets.get(key, default)
    if value is None and default is None:
        raise ValueError(f"Required secret {key} not found in environment or Streamlit secrets")
    return value
    
def load_config(config_path=None):
    """Load configuration from YAML file."""
    if config_path is None:
        config_path = ROOT / "config.yaml"
    else:
        config_path = Path(config_path)
        
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    return config

def get_project_root():
    """Return the project root directory."""
    return ROOT

def setup_config():
    """
    Set up and return configuration and commonly used values.
    
    Returns:
        A dictionary containing configuration and common values:
        - config: The parsed YAML config
        - secrets: A dictionary of required secrets (e.g., HF_TOKEN)
        - paths: Common file paths (all relative to project root)
    """
    # Load environment variables
    load_environment()
    
    # Load config
    config = load_config()
    
    # Common secrets
    secrets = {
        'HF_TOKEN': get_secret('HF_TOKEN')
    }
    
    # Get directory paths from config or use defaults
    raw_dir = config.get('raw_dir', 'data_raw')
    scored_dir = config.get('scored_dir', 'data_scored')
    logs_dir = config.get('logs_dir', 'logs')
    
    # Get HF repository directories (paths within the HF repo)
    hf_raw_dir = config.get('hf_raw_dir', 'data_raw')
    hf_scored_dir = config.get('hf_scored_dir', 'data_scored')
    
    # Common paths and constants (all paths are relative to project root)
    paths = {
        'root': ROOT,
        'raw_dir': ROOT / raw_dir,
        'scored_dir': ROOT / scored_dir,
        'logs_dir': ROOT / logs_dir,
        'summary_file': ROOT / config.get('summary_file', 'subreddit_daily_summary.csv'),
        'hf_raw_dir': hf_raw_dir,
        'hf_scored_dir': hf_scored_dir
    }
    
    # Add REPLICATE_API_TOKEN if it's in the environment
    try:
        secrets['REPLICATE_API_TOKEN'] = get_secret('REPLICATE_API_TOKEN')
    except ValueError:
        # This is optional for scrape.py, so we'll ignore if missing
        pass
    
    # Add Reddit API credentials if available
    for key in ['REDDIT_CLIENT_ID', 'REDDIT_CLIENT_SECRET', 'REDDIT_USER_AGENT']:
        try:
            secrets[key] = get_secret(key)
        except ValueError:
            # These are required by scrape.py but we'll check there
            pass
    
    return {
        'config': config,
        'secrets': secrets,
        'paths': paths
    }