File size: 3,024 Bytes
a33458e
9f0d171
a33458e
 
 
 
 
 
 
 
 
2a735cc
a33458e
 
f8ed285
 
a33458e
 
 
9f0d171
 
 
 
 
 
 
 
 
 
 
 
 
 
a33458e
 
 
 
9f0d171
 
 
a33458e
2a735cc
 
 
 
 
 
 
 
 
 
 
 
 
 
a33458e
 
 
 
 
 
2a735cc
a33458e
 
f8ed285
a33458e
 
 
 
 
 
 
 
9f0d171
 
 
2a735cc
 
 
 
 
 
 
 
 
a33458e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import random
from dotenv import load_dotenv
from pathlib import Path

# Load environment variables
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

# API Keys
HF_API_KEY = os.getenv('HF_API_KEY', '')
TELEGRAM_BOT_TOKEN = os.getenv('TELEGRAM_BOT_TOKEN', '')

# LLM Configuration
# Use models that are freely accessible and don't require authentication
LLM_MODEL = os.getenv('LLM_MODEL', 'gpt2')  # Changed from distilgpt2 to gpt2
EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')

# Vector Database
# Determine which vector DB path to use based on deployment environment
if os.path.exists("/app/data/vector_db_1"):
    # We're in the Docker container, use one of the multiple DB paths
    vector_db_options = [
        './data/vector_db_1',
        './data/vector_db_2',
        './data/vector_db_3',
    ]
    # Choose a random DB path to reduce collision probability
    VECTOR_DB_PATH = os.getenv('VECTOR_DB_PATH', random.choice(vector_db_options))
else:
    # Local development, use the standard path
    VECTOR_DB_PATH = os.getenv('VECTOR_DB_PATH', './data/vector_db')

COLLECTION_NAME = os.getenv('COLLECTION_NAME', 'personal_assistant')

# Application Settings
DEFAULT_TEMPERATURE = float(os.getenv('DEFAULT_TEMPERATURE', 0.7))
CHUNK_SIZE = int(os.getenv('CHUNK_SIZE', 512))
CHUNK_OVERLAP = int(os.getenv('CHUNK_OVERLAP', 128))
MAX_TOKENS = int(os.getenv('MAX_TOKENS', 256))

# Telegram Bot Settings
TELEGRAM_ENABLED = os.getenv('TELEGRAM_ENABLED', 'false').lower() == 'true'
TELEGRAM_ALLOWED_USERS = os.getenv('TELEGRAM_ALLOWED_USERS', '')
if TELEGRAM_ALLOWED_USERS:
    TELEGRAM_ALLOWED_USERS = [int(user_id.strip()) for user_id in TELEGRAM_ALLOWED_USERS.split(',')]
else:
    TELEGRAM_ALLOWED_USERS = []

# Hugging Face Dataset Settings for Chat History
HF_DATASET_NAME = os.getenv('HF_DATASET_NAME', '')  # Format: username/repo-name
CHAT_HISTORY_DIR = os.getenv('CHAT_HISTORY_DIR', './data/chat_history')
# How often to sync chat history to HF Hub (in minutes)
SYNC_INTERVAL = int(os.getenv('SYNC_INTERVAL', 60))

# Create a template .env file if it doesn't exist
def create_env_example():
    if not os.path.exists('.env.example'):
        with open('.env.example', 'w') as f:
            f.write("""# API Keys
HF_API_KEY=your_huggingface_api_key_here
TELEGRAM_BOT_TOKEN=your_telegram_bot_token_here

# LLM Configuration
LLM_MODEL=gpt2  # Use small model for Hugging Face Spaces
EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2

# Vector Database
VECTOR_DB_PATH=./data/vector_db
COLLECTION_NAME=personal_assistant

# Application Settings
DEFAULT_TEMPERATURE=0.7
CHUNK_SIZE=512
CHUNK_OVERLAP=128
MAX_TOKENS=256

# Telegram Bot Settings
TELEGRAM_ENABLED=false
TELEGRAM_ALLOWED_USERS=  # Comma-separated list of Telegram user IDs

# Hugging Face Dataset Settings
HF_DATASET_NAME=username/second-brain-history  # Your username/dataset-name
CHAT_HISTORY_DIR=./data/chat_history
SYNC_INTERVAL=60  # How often to sync history to HF (minutes)
""")