json-structured / config.py
Dev8709's picture
Updated
44b5c36
raw
history blame
2.07 kB
"""
Configuration settings for llama.cpp in Hugging Face Space
"""
import os
# Model configuration
MODEL_CONFIG = {
"n_ctx": 2048, # Context window size
"n_threads": 2, # Number of threads (conservative for HF Spaces)
"n_batch": 8, # Batch size for prompt processing
"use_mmap": True, # Use memory mapping for model files
"use_mlock": False, # Don't lock model in memory (saves RAM)
"verbose": False, # Reduce logging in production
}
# Generation defaults
GENERATION_CONFIG = {
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"stop": ["```", "\n\n\n", "Human:", "Assistant:"],
}
# Hugging Face Space specific settings
HF_SPACE_CONFIG = {
"max_memory_usage": "2GB", # Conservative memory usage
"timeout_seconds": 30, # Request timeout
"enable_cpu_only": True, # Force CPU inference
}
# Model download settings
MODEL_DOWNLOAD_CONFIG = {
"cache_dir": "./models",
"use_auth_token": os.getenv("HF_TOKEN", None),
"resume_download": True,
}
# Recommended small GGUF models for demonstration
RECOMMENDED_MODELS = [
{
"name": "Osmosis-Structure-0.6B",
"repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
"filename": "Osmosis-Structure-0.6B-BF16.gguf",
"size": "~1.2GB",
"description": "Osmosis AI structure-focused model for JSON generation"
},
{
"name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
"filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
"size": "~700MB",
"description": "Small, fast model good for testing"
}
]
def get_model_config():
"""Get model configuration optimized for HF Spaces"""
return MODEL_CONFIG.copy()
def get_generation_config():
"""Get generation configuration"""
return GENERATION_CONFIG.copy()
def get_recommended_model():
"""Get the recommended model for this space"""
return RECOMMENDED_MODELS[0] # Return TinyLlama as default