File size: 7,000 Bytes
85c8c23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de36c63
 
 
 
 
 
 
85c8c23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
Configuration module for ResearchMate
Provides backward compatibility with new settings system
"""

import os
from pathlib import Path
from typing import Optional
from ..settings import get_settings

# Get settings instance
settings = get_settings()

class Config:
    """Configuration settings for ResearchMate - Legacy compatibility wrapper"""
    
    # Application settings
    APP_NAME: str = "ResearchMate"
    VERSION: str = "2.0.0"
    DEBUG: bool = settings.server.debug
    HOST: str = settings.server.host
    PORT: int = settings.server.port
    
    # API Keys
    GROQ_API_KEY: Optional[str] = settings.get_groq_api_key()
    
    # Groq Llama 3.3 70B settings
    LLAMA_MODEL: str = settings.ai_model.model_name
    MAX_INPUT_TOKENS: int = settings.ai_model.max_tokens
    MAX_OUTPUT_TOKENS: int = settings.ai_model.max_tokens
    TEMPERATURE: float = settings.ai_model.temperature
    TOP_P: float = settings.ai_model.top_p
    
    # Embeddings and chunking
    EMBEDDING_MODEL: str = settings.database.embedding_model
    CHUNK_SIZE: int = settings.search.chunk_size
    CHUNK_OVERLAP: int = settings.search.chunk_overlap
    
    # Database settings - Use environment variables for Docker compatibility
    BASE_DIR: Path = Path(__file__).parent.parent.parent
    
    # Use environment variables if available, otherwise fall back to relative paths
    CHROMA_DB_PATH: str = os.getenv('CHROMA_DB_DIR', str(BASE_DIR / "chroma_db"))
    COLLECTION_NAME: str = settings.database.collection_name
    PERSIST_DIRECTORY: str = os.getenv('CHROMA_DIR', str(BASE_DIR / settings.database.chroma_persist_dir.lstrip('./')))
    
    # Upload settings
    UPLOAD_DIRECTORY: str = os.getenv('UPLOADS_DIR', settings.get_upload_dir())
    MAX_FILE_SIZE: int = settings.upload.max_file_size
    ALLOWED_EXTENSIONS: set = set(ext.lstrip('.') for ext in settings.upload.allowed_extensions)
    
    # Search settings
    TOP_K_SIMILAR: int = settings.search.max_results
    MAX_PAPER_LENGTH: int = 100000  # Keep existing default
    MAX_SUMMARY_LENGTH: int = 2000  # Keep existing default
    
    # Rate limiting
    RATE_LIMIT_ENABLED: bool = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
    RATE_LIMIT_REQUESTS: int = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
    RATE_LIMIT_WINDOW: int = int(os.getenv("RATE_LIMIT_WINDOW", "3600"))
    
    # Security
    SECRET_KEY: str = os.getenv("SECRET_KEY", "your-secret-key-change-in-production")
    ALLOWED_HOSTS: list = os.getenv("ALLOWED_HOSTS", "localhost,127.0.0.1").split(",")
    
    # Logging - Use environment variable for logs directory
    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
    LOG_FILE: str = os.getenv("LOG_FILE", str(Path(os.getenv('LOGS_DIR', str(BASE_DIR / "logs"))) / "app.log"))
    
    # External APIs
    ARXIV_API_BASE_URL: str = os.getenv("ARXIV_API_BASE_URL", "http://export.arxiv.org/api/query")
    SEMANTIC_SCHOLAR_API_URL: str = os.getenv("SEMANTIC_SCHOLAR_API_URL", "https://api.semanticscholar.org/graph/v1/paper/search")
    SEMANTIC_SCHOLAR_API_KEY: Optional[str] = os.getenv("SEMANTIC_SCHOLAR_API_KEY")
    
    @classmethod
    def create_directories(cls):
        """Create necessary directories with proper error handling"""
        directories = [
            cls.CHROMA_DB_PATH,
            cls.PERSIST_DIRECTORY,
            cls.UPLOAD_DIRECTORY,
            str(Path(cls.LOG_FILE).parent)
        ]
        
        for directory in directories:
            try:
                Path(directory).mkdir(parents=True, exist_ok=True)
                print(f"Created/verified directory: {directory}")
            except PermissionError as e:
                print(f"Permission error creating {directory}: {e}")
                # Try to create in /data as fallback on Hugging Face, else ./tmp
                import os
                running_on_hf = os.environ.get("HF_SPACE") == "1" or os.environ.get("SPACE_ID")
                if running_on_hf:
                    fallback_dir = f"/data/researchmate/{Path(directory).name}"
                else:
                    fallback_dir = f"./tmp/researchmate/{Path(directory).name}"
                try:
                    Path(fallback_dir).mkdir(parents=True, exist_ok=True)
                    print(f"Created fallback directory: {fallback_dir}")
                    # Update the class attribute to use the fallback
                    if "chroma_db" in directory.lower():
                        cls.CHROMA_DB_PATH = fallback_dir
                    elif "persist" in directory.lower():
                        cls.PERSIST_DIRECTORY = fallback_dir
                    elif "upload" in directory.lower():
                        cls.UPLOAD_DIRECTORY = fallback_dir
                    elif "log" in directory.lower():
                        cls.LOG_FILE = str(Path(fallback_dir) / "app.log")
                except Exception as fallback_error:
                    print(f"Failed to create fallback directory {fallback_dir}: {fallback_error}")
                    # Continue with other directories
                    continue
    
    @classmethod
    def validate_config(cls):
        """Validate configuration settings"""
        if not cls.GROQ_API_KEY:
            print("Warning: GROQ_API_KEY environment variable is not set")
            # Don't raise error in Docker environment, just warn
            if not os.getenv('DOCKER_ENV'):
                raise ValueError("GROQ_API_KEY environment variable is required")
        
        if cls.MAX_FILE_SIZE > 50 * 1024 * 1024:  # 50MB limit
            raise ValueError("MAX_FILE_SIZE cannot exceed 50MB")
        
        if cls.CHUNK_SIZE < 100:
            raise ValueError("CHUNK_SIZE must be at least 100 characters")
    
    @classmethod
    def get_summary(cls) -> dict:
        """Get configuration summary"""
        return {
            "app_name": cls.APP_NAME,
            "version": cls.VERSION,
            "debug": cls.DEBUG,
            "host": cls.HOST,
            "port": cls.PORT,
            "llama_model": cls.LLAMA_MODEL,
            "embedding_model": cls.EMBEDDING_MODEL,
            "chunk_size": cls.CHUNK_SIZE,
            "max_file_size": cls.MAX_FILE_SIZE,
            "rate_limit_enabled": cls.RATE_LIMIT_ENABLED,
            "chroma_db_path": cls.CHROMA_DB_PATH,
            "persist_directory": cls.PERSIST_DIRECTORY,
            "upload_directory": cls.UPLOAD_DIRECTORY,
            "log_file": cls.LOG_FILE
        }

# Initialize configuration
config = Config()

# Create directories before validation
config.create_directories()

# Validate configuration on import
try:
    config.validate_config()
    print("Configuration validated successfully")
    print(f"Using ChromaDB path: {config.CHROMA_DB_PATH}")
    print(f"Using persist directory: {config.PERSIST_DIRECTORY}")
    print(f"Using upload directory: {config.UPLOAD_DIRECTORY}")
except ValueError as e:
    print(f"Configuration error: {e}")
    if not config.DEBUG:
        raise