Spaces:
Running
Running
""" | |
Configuration settings for llama.cpp in Hugging Face Space | |
""" | |
import os | |
# Model configuration | |
MODEL_CONFIG = { | |
"n_ctx": 2048, # Context window size | |
"n_threads": 2, # Number of threads (conservative for HF Spaces) | |
"n_batch": 8, # Batch size for prompt processing | |
"use_mmap": True, # Use memory mapping for model files | |
"use_mlock": False, # Don't lock model in memory (saves RAM) | |
"verbose": False, # Reduce logging in production | |
} | |
# Generation defaults | |
GENERATION_CONFIG = { | |
"temperature": 0.7, | |
"top_p": 0.9, | |
"top_k": 40, | |
"repeat_penalty": 1.1, | |
"stop": ["```", "\n\n\n", "Human:", "Assistant:"], | |
} | |
# Hugging Face Space specific settings | |
HF_SPACE_CONFIG = { | |
"max_memory_usage": "2GB", # Conservative memory usage | |
"timeout_seconds": 30, # Request timeout | |
"enable_cpu_only": True, # Force CPU inference | |
} | |
# Model download settings | |
MODEL_DOWNLOAD_CONFIG = { | |
"cache_dir": "./models", | |
"use_auth_token": os.getenv("HF_TOKEN", None), | |
"resume_download": True, | |
} | |
# Recommended small GGUF models for demonstration | |
RECOMMENDED_MODELS = [ | |
{ | |
"name": "Osmosis-Structure-0.6B", | |
"repo_id": "osmosis-ai/Osmosis-Structure-0.6B", | |
"filename": "Osmosis-Structure-0.6B-BF16.gguf", | |
"size": "~1.2GB", | |
"description": "Osmosis AI structure-focused model for JSON generation" | |
}, | |
{ | |
"name": "TinyLlama-1.1B-Chat-v1.0-GGUF", | |
"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", | |
"filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf", | |
"size": "~700MB", | |
"description": "Small, fast model good for testing" | |
} | |
] | |
def get_model_config(): | |
"""Get model configuration optimized for HF Spaces""" | |
return MODEL_CONFIG.copy() | |
def get_generation_config(): | |
"""Get generation configuration""" | |
return GENERATION_CONFIG.copy() | |
def get_recommended_model(): | |
"""Get the recommended model for this space""" | |
return RECOMMENDED_MODELS[0] # Return TinyLlama as default |