Spaces:
Running
Running
File size: 2,073 Bytes
44b5c36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
"""
Configuration settings for llama.cpp in Hugging Face Space
"""
import os
# Model configuration
MODEL_CONFIG = {
"n_ctx": 2048, # Context window size
"n_threads": 2, # Number of threads (conservative for HF Spaces)
"n_batch": 8, # Batch size for prompt processing
"use_mmap": True, # Use memory mapping for model files
"use_mlock": False, # Don't lock model in memory (saves RAM)
"verbose": False, # Reduce logging in production
}
# Generation defaults
GENERATION_CONFIG = {
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"stop": ["```", "\n\n\n", "Human:", "Assistant:"],
}
# Hugging Face Space specific settings
HF_SPACE_CONFIG = {
"max_memory_usage": "2GB", # Conservative memory usage
"timeout_seconds": 30, # Request timeout
"enable_cpu_only": True, # Force CPU inference
}
# Model download settings
MODEL_DOWNLOAD_CONFIG = {
"cache_dir": "./models",
"use_auth_token": os.getenv("HF_TOKEN", None),
"resume_download": True,
}
# Recommended small GGUF models for demonstration
RECOMMENDED_MODELS = [
{
"name": "Osmosis-Structure-0.6B",
"repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
"filename": "Osmosis-Structure-0.6B-BF16.gguf",
"size": "~1.2GB",
"description": "Osmosis AI structure-focused model for JSON generation"
},
{
"name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
"size": "~700MB",
"description": "Small, fast model good for testing"
}
]
def get_model_config():
"""Get model configuration optimized for HF Spaces"""
return MODEL_CONFIG.copy()
def get_generation_config():
"""Get generation configuration"""
return GENERATION_CONFIG.copy()
def get_recommended_model():
"""Get the recommended model for this space"""
return RECOMMENDED_MODELS[0] # Return TinyLlama as default |