Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Embedded Configuration for LinguaCustodia API | |
| Fallback configuration when clean architecture imports fail. | |
| Updated for LinguaCustodia Pro Finance Suite models. | |
| """ | |
| import os | |
| import torch | |
| import gc | |
| import logging | |
| from pydantic import BaseModel, Field, field_validator, ConfigDict | |
| from pydantic_settings import BaseSettings | |
| from typing import Dict, List, Optional, Any, Literal | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from huggingface_hub import login | |
| logger = logging.getLogger(__name__) | |
| # Model type definition for Pro Finance Suite | |
| ModelType = Literal[ | |
| "pro-finance-large", "pro-finance-medium", "pro-finance-small", | |
| "pro-finance-mini", "llama-pro-finance-mini", "fin-pythia-1.4b" | |
| ] | |
| class TokenizerConfig(BaseModel): | |
| """Tokenizer configuration for LinguaCustodia models.""" | |
| eos_token: str = Field(..., description="End of sequence token") | |
| bos_token: Optional[str] = Field(None, description="Beginning of sequence token") | |
| pad_token: Optional[str] = Field(None, description="Padding token") | |
| unk_token: Optional[str] = Field(None, description="Unknown token") | |
| eos_token_id: int = Field(..., description="EOS token ID") | |
| bos_token_id: Optional[int] = Field(None, description="BOS token ID") | |
| pad_token_id: Optional[int] = Field(None, description="Pad token ID") | |
| vocab_size: int = Field(..., description="Vocabulary size") | |
| model_max_length: int = Field(131072, description="Maximum sequence length") | |
| class GenerationConfig(BaseModel): | |
| """Generation configuration for LinguaCustodia models.""" | |
| eos_tokens: List[int] = Field(..., description="List of EOS token IDs") | |
| bos_token_id: Optional[int] = Field(None, description="BOS token ID") | |
| temperature: float = Field(0.6, description="Sampling temperature") | |
| top_p: float = Field(0.9, description="Top-p sampling parameter") | |
| max_new_tokens: int = Field(150, description="Maximum new tokens to generate") | |
| repetition_penalty: float = Field(1.05, description="Repetition penalty") | |
| no_repeat_ngram_size: int = Field(2, description="No repeat n-gram size") | |
| early_stopping: bool = Field(False, description="Enable early stopping") | |
| min_length: int = Field(50, description="Minimum response length") | |
| class ModelInfo(BaseModel): | |
| """Model information for LinguaCustodia models.""" | |
| model_id: str = Field(..., description="HuggingFace model identifier") | |
| display_name: str = Field(..., description="Human-readable model name") | |
| architecture: str = Field(..., description="Model architecture class") | |
| parameters: str = Field(..., description="Model parameter count") | |
| memory_gb: int = Field(..., description="Required RAM in GB") | |
| vram_gb: int = Field(..., description="Required VRAM in GB") | |
| tokenizer: TokenizerConfig = Field(..., description="Tokenizer configuration") | |
| generation: GenerationConfig = Field(..., description="Generation configuration") | |
| class AppSettings(BaseSettings): | |
| """Application settings.""" | |
| model_name: ModelType = Field(default="pro-finance-small", description="Model to load") | |
| hf_token_lc: Optional[str] = Field(default=None, description="HuggingFace token for LinguaCustodia") | |
| hf_token: Optional[str] = Field(default=None, description="HuggingFace token") | |
| app_port: int = Field(default=7860, description="Application port") | |
| model_config = ConfigDict( | |
| env_file=".env", | |
| env_file_encoding="utf-8", | |
| case_sensitive=False, | |
| extra="ignore" | |
| ) | |
| def validate_model_name(cls, v): | |
| valid_models = [ | |
| "pro-finance-large", "pro-finance-medium", "pro-finance-small", | |
| "pro-finance-mini", "llama-pro-finance-mini", "fin-pythia-1.4b" | |
| ] | |
| if v not in valid_models: | |
| raise ValueError(f'Model name must be one of: {valid_models}') | |
| return v | |
| # LinguaCustodia Pro Finance Suite model configurations | |
| LINGUACUSTODIA_MODELS = { | |
| "pro-finance-large": ModelInfo( | |
| model_id="LinguaCustodia/Llama-Pro-Finance-Large", | |
| display_name="Llama Pro Finance Large", | |
| architecture="LlamaForCausalLM", | |
| parameters="70B", | |
| memory_gb=140, | |
| vram_gb=80, | |
| tokenizer=TokenizerConfig( | |
| eos_token="<|eot_id|>", | |
| bos_token="<|begin_of_text|>", | |
| pad_token="<|eot_id|>", | |
| unk_token=None, | |
| eos_token_id=128009, | |
| bos_token_id=128000, | |
| pad_token_id=128009, | |
| vocab_size=128000, | |
| model_max_length=131072 | |
| ), | |
| generation=GenerationConfig( | |
| eos_tokens=[128001, 128008, 128009], | |
| bos_token_id=128000 | |
| ) | |
| ), | |
| "pro-finance-medium": ModelInfo( | |
| model_id="LinguaCustodia/LLM-Pro-Finance-Medium", | |
| display_name="LLM Pro Finance Medium", | |
| architecture="LlamaForCausalLM", | |
| parameters="32B", | |
| memory_gb=64, | |
| vram_gb=32, | |
| tokenizer=TokenizerConfig( | |
| eos_token="<|eot_id|>", | |
| bos_token="<|begin_of_text|>", | |
| pad_token="<|eot_id|>", | |
| unk_token=None, | |
| eos_token_id=128009, | |
| bos_token_id=128000, | |
| pad_token_id=128009, | |
| vocab_size=128000, | |
| model_max_length=131072 | |
| ), | |
| generation=GenerationConfig( | |
| eos_tokens=[128001, 128008, 128009], | |
| bos_token_id=128000 | |
| ) | |
| ), | |
| "pro-finance-small": ModelInfo( | |
| model_id="LinguaCustodia/LLM-Pro-Finance-Small", | |
| display_name="LLM Pro Finance Small", | |
| architecture="LlamaForCausalLM", | |
| parameters="8B", | |
| memory_gb=16, | |
| vram_gb=8, | |
| tokenizer=TokenizerConfig( | |
| eos_token="<|eot_id|>", | |
| bos_token="<|begin_of_text|>", | |
| pad_token="<|eot_id|>", | |
| unk_token=None, | |
| eos_token_id=128009, | |
| bos_token_id=128000, | |
| pad_token_id=128009, | |
| vocab_size=128000, | |
| model_max_length=131072 | |
| ), | |
| generation=GenerationConfig( | |
| eos_tokens=[128001, 128008, 128009], | |
| bos_token_id=128000 | |
| ) | |
| ), | |
| "pro-finance-mini": ModelInfo( | |
| model_id="LinguaCustodia/LLM-Pro-Finance-Mini", | |
| display_name="LLM Pro Finance Mini", | |
| architecture="LlamaForCausalLM", | |
| parameters="3B", | |
| memory_gb=6, | |
| vram_gb=3, | |
| tokenizer=TokenizerConfig( | |
| eos_token="<|eot_id|>", | |
| bos_token="<|begin_of_text|>", | |
| pad_token="<|eot_id|>", | |
| unk_token=None, | |
| eos_token_id=128009, | |
| bos_token_id=128000, | |
| pad_token_id=128009, | |
| vocab_size=128000, | |
| model_max_length=131072 | |
| ), | |
| generation=GenerationConfig( | |
| eos_tokens=[128001, 128008, 128009], | |
| bos_token_id=128000 | |
| ) | |
| ), | |
| "llama-pro-finance-mini": ModelInfo( | |
| model_id="LinguaCustodia/Llama-Pro-Finance-Mini", | |
| display_name="Llama Pro Finance Mini", | |
| architecture="LlamaForCausalLM", | |
| parameters="1B", | |
| memory_gb=3, | |
| vram_gb=2, | |
| tokenizer=TokenizerConfig( | |
| eos_token="<|eot_id|>", | |
| bos_token="<|begin_of_text|>", | |
| pad_token="<|eot_id|>", | |
| unk_token=None, | |
| eos_token_id=128009, | |
| bos_token_id=128000, | |
| pad_token_id=128009, | |
| vocab_size=128000, | |
| model_max_length=131072 | |
| ), | |
| generation=GenerationConfig( | |
| eos_tokens=[128001, 128008, 128009], | |
| bos_token_id=128000 | |
| ) | |
| ), | |
| "fin-pythia-1.4b": ModelInfo( | |
| model_id="LinguaCustodia/fin-pythia-1.4b", | |
| display_name="Fin-Pythia 1.4B Financial", | |
| architecture="GPTNeoXForCausalLM", | |
| parameters="1.4B", | |
| memory_gb=3, | |
| vram_gb=2, | |
| tokenizer=TokenizerConfig( | |
| eos_token="<|endoftext|>", | |
| bos_token="<|endoftext|>", | |
| pad_token=None, | |
| unk_token="<|endoftext|>", | |
| eos_token_id=0, | |
| bos_token_id=0, | |
| pad_token_id=None, | |
| vocab_size=50304, | |
| model_max_length=2048 | |
| ), | |
| generation=GenerationConfig( | |
| eos_tokens=[0], | |
| bos_token_id=0 | |
| ) | |
| ) | |
| } | |
| # Default model configuration | |
| DEFAULT_MODEL = "pro-finance-small" | |
| def get_model_config(model_name: str) -> ModelInfo: | |
| """Get model configuration by name.""" | |
| if model_name not in LINGUACUSTODIA_MODELS: | |
| raise ValueError(f"Model '{model_name}' not found. Available models: {list(LINGUACUSTODIA_MODELS.keys())}") | |
| return LINGUACUSTODIA_MODELS[model_name] | |
| def get_app_settings() -> AppSettings: | |
| """Get application settings.""" | |
| return AppSettings() | |
| def authenticate_huggingface(token: str) -> bool: | |
| """Authenticate with HuggingFace.""" | |
| try: | |
| login(token=token, add_to_git_credential=False) | |
| logger.info("β Successfully authenticated with HuggingFace") | |
| return True | |
| except Exception as e: | |
| logger.error(f"β HuggingFace authentication failed: {e}") | |
| return False | |
| def setup_gpu_environment(): | |
| """Setup GPU environment.""" | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| logger.info(f"π GPU available: {torch.cuda.get_device_name(0)}") | |
| logger.info(f"π GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB") | |
| return True | |
| else: | |
| logger.warning("β οΈ No GPU available, using CPU") | |
| return False | |
| def load_model_and_tokenizer(model_info: ModelInfo, use_auth_token: Optional[str] = None): | |
| """Load model and tokenizer with proper configuration.""" | |
| try: | |
| logger.info(f"π Loading model: {model_info.model_id}") | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_info.model_id, | |
| token=use_auth_token, | |
| trust_remote_code=True | |
| ) | |
| # Configure special tokens | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load model | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_info.model_id, | |
| token=use_auth_token, | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| logger.info(f"β Model loaded successfully: {model_info.display_name}") | |
| return model, tokenizer | |
| except Exception as e: | |
| logger.error(f"β Failed to load model {model_info.model_id}: {e}") | |
| raise | |
| def create_pipeline(model, tokenizer, model_info: ModelInfo): | |
| """Create inference pipeline.""" | |
| try: | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| **model_info.generation.model_dump() | |
| ) | |
| logger.info("β Pipeline created successfully") | |
| return pipe | |
| except Exception as e: | |
| logger.error(f"β Failed to create pipeline: {e}") | |
| raise | |