Spaces:

hblim
/

reddit_sentiment_tracker

Running

App Files Files Community

reddit_sentiment_tracker / reddit_analysis /config_utils.py

hblim

Clean codebase for HF Space (drop Prometheus binary data)

a6576f0 3 months ago

raw

history blame contribute delete

3.62 kB

	"""
	Configuration utilities for Reddit analysis tools.
	Handles loading of config from YAML and secrets from environment or Streamlit.
	"""
	import os
	from pathlib import Path
	import yaml

	# Determine if Streamlit is available
	try:
	import streamlit as st
	HAS_STREAMLIT = True
	except ImportError:
	HAS_STREAMLIT = False

	# Project root - now points to the project root directory
	ROOT = Path(__file__).resolve().parent.parent

	def is_running_streamlit():
	# The only reliable way to detect if running inside a Streamlit app
	return os.getenv("STREAMLIT_SERVER_PORT") is not None

	def load_environment():
	"""Load environment variables from .env if not running as a Streamlit app."""
	if not is_running_streamlit():
	from dotenv import load_dotenv
	load_dotenv(dotenv_path=ROOT / '.env')

	def get_secret(key, default=None):
	"""Get a secret from environment variables or Streamlit secrets."""
	value = os.getenv(key)
	if value is None and HAS_STREAMLIT and is_running_streamlit():
	value = st.secrets.get(key, default)
	if value is None and default is None:
	raise ValueError(f"Required secret {key} not found in environment or Streamlit secrets")
	return value

	def load_config(config_path=None):
	"""Load configuration from YAML file."""
	if config_path is None:
	config_path = ROOT / "config.yaml"
	else:
	config_path = Path(config_path)

	with open(config_path, 'r') as f:
	config = yaml.safe_load(f)

	return config

	def get_project_root():
	"""Return the project root directory."""
	return ROOT

	def setup_config():
	"""
	Set up and return configuration and commonly used values.

	Returns:
	A dictionary containing configuration and common values:
	- config: The parsed YAML config
	- secrets: A dictionary of required secrets (e.g., HF_TOKEN)
	- paths: Common file paths (all relative to project root)
	"""
	# Load environment variables
	load_environment()

	# Load config
	config = load_config()

	# Common secrets
	secrets = {
	'HF_TOKEN': get_secret('HF_TOKEN')
	}

	# Get directory paths from config or use defaults
	raw_dir = config.get('raw_dir', 'data_raw')
	scored_dir = config.get('scored_dir', 'data_scored')
	logs_dir = config.get('logs_dir', 'logs')

	# Get HF repository directories (paths within the HF repo)
	hf_raw_dir = config.get('hf_raw_dir', 'data_raw')
	hf_scored_dir = config.get('hf_scored_dir', 'data_scored')

	# Common paths and constants (all paths are relative to project root)
	paths = {
	'root': ROOT,
	'raw_dir': ROOT / raw_dir,
	'scored_dir': ROOT / scored_dir,
	'logs_dir': ROOT / logs_dir,
	'summary_file': ROOT / config.get('summary_file', 'subreddit_daily_summary.csv'),
	'hf_raw_dir': hf_raw_dir,
	'hf_scored_dir': hf_scored_dir
	}

	# Add REPLICATE_API_TOKEN if it's in the environment
	try:
	secrets['REPLICATE_API_TOKEN'] = get_secret('REPLICATE_API_TOKEN')
	except ValueError:
	# This is optional for scrape.py, so we'll ignore if missing
	pass

	# Add Reddit API credentials if available
	for key in ['REDDIT_CLIENT_ID', 'REDDIT_CLIENT_SECRET', 'REDDIT_USER_AGENT']:
	try:
	secrets[key] = get_secret(key)
	except ValueError:
	# These are required by scrape.py but we'll check there
	pass

	return {
	'config': config,
	'secrets': secrets,
	'paths': paths
	}