Spaces:

Dev011a
/

json-structured

Running

App Files Files Community

json-structured / config.py

Dev8709

Updated

44b5c36 1 day ago

raw

history blame

2.07 kB

	"""
	Configuration settings for llama.cpp in Hugging Face Space
	"""

	import os

	# Model configuration
	MODEL_CONFIG = {
	"n_ctx": 2048, # Context window size
	"n_threads": 2, # Number of threads (conservative for HF Spaces)
	"n_batch": 8, # Batch size for prompt processing
	"use_mmap": True, # Use memory mapping for model files
	"use_mlock": False, # Don't lock model in memory (saves RAM)
	"verbose": False, # Reduce logging in production
	}

	# Generation defaults
	GENERATION_CONFIG = {
	"temperature": 0.7,
	"top_p": 0.9,
	"top_k": 40,
	"repeat_penalty": 1.1,
	"stop": ["```", "\n\n\n", "Human:", "Assistant:"],
	}

	# Hugging Face Space specific settings
	HF_SPACE_CONFIG = {
	"max_memory_usage": "2GB", # Conservative memory usage
	"timeout_seconds": 30, # Request timeout
	"enable_cpu_only": True, # Force CPU inference
	}

	# Model download settings
	MODEL_DOWNLOAD_CONFIG = {
	"cache_dir": "./models",
	"use_auth_token": os.getenv("HF_TOKEN", None),
	"resume_download": True,
	}

	# Recommended small GGUF models for demonstration
	RECOMMENDED_MODELS = [
	{
	"name": "Osmosis-Structure-0.6B",
	"repo_id": "osmosis-ai/Osmosis-Structure-0.6B",
	"filename": "Osmosis-Structure-0.6B-BF16.gguf",
	"size": "~1.2GB",
	"description": "Osmosis AI structure-focused model for JSON generation"
	},
	{
	"name": "TinyLlama-1.1B-Chat-v1.0-GGUF",
	"repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
	"filename": "tinyllama-1.1b-chat-v1.0.q4_k_m.gguf",
	"size": "~700MB",
	"description": "Small, fast model good for testing"
	}
	]

	def get_model_config():
	"""Get model configuration optimized for HF Spaces"""
	return MODEL_CONFIG.copy()

	def get_generation_config():
	"""Get generation configuration"""
	return GENERATION_CONFIG.copy()

	def get_recommended_model():
	"""Get the recommended model for this space"""
	return RECOMMENDED_MODELS[0] # Return TinyLlama as default