hblim's picture
Clean codebase for HF Space (drop Prometheus binary data)
a6576f0
"""
Configuration utilities for Reddit analysis tools.
Handles loading of config from YAML and secrets from environment or Streamlit.
"""
import os
from pathlib import Path
import yaml
# Determine if Streamlit is available
try:
import streamlit as st
HAS_STREAMLIT = True
except ImportError:
HAS_STREAMLIT = False
# Project root - now points to the project root directory
ROOT = Path(__file__).resolve().parent.parent
def is_running_streamlit():
# The only reliable way to detect if running inside a Streamlit app
return os.getenv("STREAMLIT_SERVER_PORT") is not None
def load_environment():
"""Load environment variables from .env if not running as a Streamlit app."""
if not is_running_streamlit():
from dotenv import load_dotenv
load_dotenv(dotenv_path=ROOT / '.env')
def get_secret(key, default=None):
"""Get a secret from environment variables or Streamlit secrets."""
value = os.getenv(key)
if value is None and HAS_STREAMLIT and is_running_streamlit():
value = st.secrets.get(key, default)
if value is None and default is None:
raise ValueError(f"Required secret {key} not found in environment or Streamlit secrets")
return value
def load_config(config_path=None):
"""Load configuration from YAML file."""
if config_path is None:
config_path = ROOT / "config.yaml"
else:
config_path = Path(config_path)
with open(config_path, 'r') as f:
config = yaml.safe_load(f)
return config
def get_project_root():
"""Return the project root directory."""
return ROOT
def setup_config():
"""
Set up and return configuration and commonly used values.
Returns:
A dictionary containing configuration and common values:
- config: The parsed YAML config
- secrets: A dictionary of required secrets (e.g., HF_TOKEN)
- paths: Common file paths (all relative to project root)
"""
# Load environment variables
load_environment()
# Load config
config = load_config()
# Common secrets
secrets = {
'HF_TOKEN': get_secret('HF_TOKEN')
}
# Get directory paths from config or use defaults
raw_dir = config.get('raw_dir', 'data_raw')
scored_dir = config.get('scored_dir', 'data_scored')
logs_dir = config.get('logs_dir', 'logs')
# Get HF repository directories (paths within the HF repo)
hf_raw_dir = config.get('hf_raw_dir', 'data_raw')
hf_scored_dir = config.get('hf_scored_dir', 'data_scored')
# Common paths and constants (all paths are relative to project root)
paths = {
'root': ROOT,
'raw_dir': ROOT / raw_dir,
'scored_dir': ROOT / scored_dir,
'logs_dir': ROOT / logs_dir,
'summary_file': ROOT / config.get('summary_file', 'subreddit_daily_summary.csv'),
'hf_raw_dir': hf_raw_dir,
'hf_scored_dir': hf_scored_dir
}
# Add REPLICATE_API_TOKEN if it's in the environment
try:
secrets['REPLICATE_API_TOKEN'] = get_secret('REPLICATE_API_TOKEN')
except ValueError:
# This is optional for scrape.py, so we'll ignore if missing
pass
# Add Reddit API credentials if available
for key in ['REDDIT_CLIENT_ID', 'REDDIT_CLIENT_SECRET', 'REDDIT_USER_AGENT']:
try:
secrets[key] = get_secret(key)
except ValueError:
# These are required by scrape.py but we'll check there
pass
return {
'config': config,
'secrets': secrets,
'paths': paths
}