Spaces:
Running
Running
""" | |
Configuration utilities for Reddit analysis tools. | |
Handles loading of config from YAML and secrets from environment or Streamlit. | |
""" | |
import os | |
from pathlib import Path | |
import yaml | |
# Determine if Streamlit is available | |
try: | |
import streamlit as st | |
HAS_STREAMLIT = True | |
except ImportError: | |
HAS_STREAMLIT = False | |
# Project root - now points to the project root directory | |
ROOT = Path(__file__).resolve().parent.parent | |
def is_running_streamlit(): | |
# The only reliable way to detect if running inside a Streamlit app | |
return os.getenv("STREAMLIT_SERVER_PORT") is not None | |
def load_environment(): | |
"""Load environment variables from .env if not running as a Streamlit app.""" | |
if not is_running_streamlit(): | |
from dotenv import load_dotenv | |
load_dotenv(dotenv_path=ROOT / '.env') | |
def get_secret(key, default=None): | |
"""Get a secret from environment variables or Streamlit secrets.""" | |
value = os.getenv(key) | |
if value is None and HAS_STREAMLIT and is_running_streamlit(): | |
value = st.secrets.get(key, default) | |
if value is None and default is None: | |
raise ValueError(f"Required secret {key} not found in environment or Streamlit secrets") | |
return value | |
def load_config(config_path=None): | |
"""Load configuration from YAML file.""" | |
if config_path is None: | |
config_path = ROOT / "config.yaml" | |
else: | |
config_path = Path(config_path) | |
with open(config_path, 'r') as f: | |
config = yaml.safe_load(f) | |
return config | |
def get_project_root(): | |
"""Return the project root directory.""" | |
return ROOT | |
def setup_config(): | |
""" | |
Set up and return configuration and commonly used values. | |
Returns: | |
A dictionary containing configuration and common values: | |
- config: The parsed YAML config | |
- secrets: A dictionary of required secrets (e.g., HF_TOKEN) | |
- paths: Common file paths (all relative to project root) | |
""" | |
# Load environment variables | |
load_environment() | |
# Load config | |
config = load_config() | |
# Common secrets | |
secrets = { | |
'HF_TOKEN': get_secret('HF_TOKEN') | |
} | |
# Get directory paths from config or use defaults | |
raw_dir = config.get('raw_dir', 'data_raw') | |
scored_dir = config.get('scored_dir', 'data_scored') | |
logs_dir = config.get('logs_dir', 'logs') | |
# Get HF repository directories (paths within the HF repo) | |
hf_raw_dir = config.get('hf_raw_dir', 'data_raw') | |
hf_scored_dir = config.get('hf_scored_dir', 'data_scored') | |
# Common paths and constants (all paths are relative to project root) | |
paths = { | |
'root': ROOT, | |
'raw_dir': ROOT / raw_dir, | |
'scored_dir': ROOT / scored_dir, | |
'logs_dir': ROOT / logs_dir, | |
'summary_file': ROOT / config.get('summary_file', 'subreddit_daily_summary.csv'), | |
'hf_raw_dir': hf_raw_dir, | |
'hf_scored_dir': hf_scored_dir | |
} | |
# Add REPLICATE_API_TOKEN if it's in the environment | |
try: | |
secrets['REPLICATE_API_TOKEN'] = get_secret('REPLICATE_API_TOKEN') | |
except ValueError: | |
# This is optional for scrape.py, so we'll ignore if missing | |
pass | |
# Add Reddit API credentials if available | |
for key in ['REDDIT_CLIENT_ID', 'REDDIT_CLIENT_SECRET', 'REDDIT_USER_AGENT']: | |
try: | |
secrets[key] = get_secret(key) | |
except ValueError: | |
# These are required by scrape.py but we'll check there | |
pass | |
return { | |
'config': config, | |
'secrets': secrets, | |
'paths': paths | |
} | |