File size: 1,107 Bytes
6441bc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
Configuration for database to HuggingFace pipeline.
Update these settings to match your setup.
"""

import os

# Database Configuration
# Note: Database connection is handled by future_bench.database.get_session()
# The script uses the same database connection as the main FutureBench app


# HuggingFace Configuration
HF_CONFIG = {
    "token": os.getenv("HF_TOKEN"),  # Set this in your environment
    "data_repo": "futurebench/data",
    "results_repo": "futurebench/results",
    "requests_repo": "futurebench/requests",  # Optional: for model submissions
}

# Data Processing Settings
PROCESSING_CONFIG = {
    "days_history": 180,  # How many days of data to include
    "min_predictions": 5,  # Minimum predictions per model to include
    "event_types": ["news", "polymarket", "sports"],  # Which event types to include
    "exclude_models": ["test", "debug"],  # Models to exclude from public dataset
}

# Note: Schema mapping not needed since we use SQLAlchemy ORM models
# The script uses the same models as convert_to_csv.py:
# - EventBase (events table)
# - Prediction (predictions table)