File size: 2,073 Bytes
44b5c36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Configuration settings for llama.cpp in Hugging Face Space
"""

import os

# Model configuration
MODEL_CONFIG = {
    "n_ctx": 2048,           # Context window size
    "n_threads": 2,          # Number of threads (conservative for HF Spaces)
    "n_batch": 8,            # Batch size for prompt processing
    "use_mmap": True,        # Use memory mapping for model files
    "use_mlock": False,      # Don't lock model in memory (saves RAM)
    "verbose": False,        # Reduce logging in production
}

# Generation defaults
GENERATION_CONFIG = {
    "temperature": 0.7,
    "top_p": 0.9,
    "top_k": 40,
    "repeat_penalty": 1.1,
    "stop": ["```", "\n\n\n", "Human:", "Assistant:"],
}

# Hugging Face Space specific settings
HF_SPACE_CONFIG = {
    "max_memory_usage": "2GB",    # Conservative memory usage
    "timeout_seconds": 30,        # Request timeout
    "enable_cpu_only": True,      # Force CPU inference
}

# Model download settings
MODEL_DOWNLOAD_CONFIG = {
    "cache_dir": "./models",
    "use_auth_token": os.getenv("HF_TOKEN", None),
    "resume_download": True,
}

# Recommended small GGUF models for demonstration
RECOMMENDED_MODELS = [
    {
        "name": "Osmosis-Structure-0.6B",
        "repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
        "filename": "Osmosis-Structure-0.6B-BF16.gguf",
        "size": "~1.2GB",
        "description": "Osmosis AI structure-focused model for JSON generation"
    },
    {
        "name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
        "repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
        "filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
        "size": "~700MB",
        "description": "Small, fast model good for testing"
    }
]

def get_model_config():
    """Get model configuration optimized for HF Spaces"""
    return MODEL_CONFIG.copy()

def get_generation_config():
    """Get generation configuration"""
    return GENERATION_CONFIG.copy()

def get_recommended_model():
    """Get the recommended model for this space"""
    return RECOMMENDED_MODELS[0]  # Return TinyLlama as default