Spaces:
Running
Running
File size: 8,400 Bytes
11d9dfb 83abca7 11d9dfb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
"""
Comprehensive error handling and logging system for the RAG application.
"""
import logging
import logging.handlers
import sys
import traceback
from pathlib import Path
from typing import Any, Dict, Optional, Type, Union
from functools import wraps
import json
class RAGError(Exception):
"""Base exception class for RAG system errors."""
def __init__(self, message: str, error_code: str = None, details: Dict[str, Any] = None):
super().__init__(message)
self.message = message
self.error_code = error_code or self.__class__.__name__
self.details = details or {}
class DocumentProcessingError(RAGError):
"""Raised when document processing fails."""
pass
class EmbeddingError(RAGError):
"""Raised when embedding generation fails."""
pass
class SearchError(RAGError):
"""Raised when search operations fail."""
pass
class ConfigurationError(RAGError):
"""Raised when configuration is invalid."""
pass
class ResourceError(RAGError):
"""Raised when system resources are insufficient."""
pass
class ErrorHandler:
"""Centralized error handling and logging system."""
def __init__(self, config: Dict[str, Any]):
self.config = config
self.logger = self._setup_logging()
def _setup_logging(self) -> logging.Logger:
"""Set up logging configuration."""
logger = logging.getLogger("rag_system")
logger.setLevel(getattr(logging, self.config.get("logging", {}).get("level", "INFO")))
# Clear existing handlers
logger.handlers = []
# Console handler
console_handler = logging.StreamHandler(sys.stdout)
console_formatter = logging.Formatter(
self.config.get("logging", {}).get("format",
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# File handler (if specified)
log_file = self.config.get("logging", {}).get("file")
if log_file:
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.handlers.RotatingFileHandler(
log_file,
maxBytes=self._parse_size(self.config.get("logging", {}).get("max_size", "10MB")),
backupCount=self.config.get("logging", {}).get("backup_count", 5)
)
file_handler.setFormatter(console_formatter)
logger.addHandler(file_handler)
return logger
def _parse_size(self, size_str: str) -> int:
"""Parse size string (e.g., '10MB') to bytes."""
size_str = size_str.upper()
if size_str.endswith('KB'):
return int(size_str[:-2]) * 1024
elif size_str.endswith('MB'):
return int(size_str[:-2]) * 1024 * 1024
elif size_str.endswith('GB'):
return int(size_str[:-2]) * 1024 * 1024 * 1024
else:
return int(size_str)
def log_error(self, error: Exception, context: Dict[str, Any] = None) -> str:
"""Log an error with context and return user-friendly message."""
error_id = f"ERR_{id(error)}"
context = context or {}
# Log detailed error for debugging
self.logger.error(
f"Error {error_id}: {str(error)}",
extra={
"error_type": type(error).__name__,
"error_id": error_id,
"context": context,
"traceback": traceback.format_exc()
}
)
# Return user-friendly message
return self._get_user_friendly_message(error)
def _get_user_friendly_message(self, error: Exception) -> str:
"""Convert technical error to user-friendly message."""
if isinstance(error, DocumentProcessingError):
return f"Document processing failed: {error.message}"
elif isinstance(error, EmbeddingError):
return "Failed to generate document embeddings. Please try again."
elif isinstance(error, SearchError):
return "Search operation failed. Please try again with a different query."
elif isinstance(error, ConfigurationError):
return f"Configuration error: {error.message}"
elif isinstance(error, ResourceError):
return "System resources are insufficient. Please try with smaller documents."
elif isinstance(error, FileNotFoundError):
return "The requested file could not be found."
elif isinstance(error, PermissionError):
return "Permission denied accessing the file."
elif isinstance(error, MemoryError):
return "Not enough memory to process the request. Please try with smaller documents."
else:
return "An unexpected error occurred. Please try again."
def handle_exception(self, func):
"""Decorator for handling exceptions in functions."""
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
error_message = self.log_error(e, {
"function": func.__name__,
"args": str(args)[:200], # Limit size
"kwargs": str(kwargs)[:200]
})
raise RAGError(error_message) from e
return wrapper
def validate_file_upload(file_path: str, max_size: int, allowed_extensions: list) -> None:
"""Validate uploaded file."""
if not file_path or not Path(file_path).exists():
raise DocumentProcessingError("File not found or invalid path")
file_path = Path(file_path)
# Check file size
if file_path.stat().st_size > max_size:
size_mb = max_size / (1024 * 1024)
raise DocumentProcessingError(f"File size exceeds maximum allowed size of {size_mb:.1f}MB")
# Check file extension
if file_path.suffix.lower() not in allowed_extensions:
raise DocumentProcessingError(
f"File type not supported. Allowed types: {', '.join(allowed_extensions)}"
)
def validate_config(config: Dict[str, Any]) -> None:
"""Validate configuration dictionary."""
required_sections = ["app", "models", "processing", "search"]
for section in required_sections:
if section not in config:
raise ConfigurationError(f"Missing required configuration section: {section}")
# Validate model configurations
if "embedding" not in config["models"]:
raise ConfigurationError("Missing embedding model configuration")
# Validate processing parameters
processing = config["processing"]
if processing.get("chunk_size", 0) <= 0:
raise ConfigurationError("chunk_size must be positive")
if processing.get("chunk_overlap", -1) < 0:
raise ConfigurationError("chunk_overlap must be non-negative")
class ContextualLogger:
"""Logger with contextual information."""
def __init__(self, logger: logging.Logger, context: Dict[str, Any] = None):
self.logger = logger
self.context = context or {}
def info(self, message: str, **kwargs):
self.logger.info(message, extra={**self.context, **kwargs})
def warning(self, message: str, **kwargs):
self.logger.warning(message, extra={**self.context, **kwargs})
def error(self, message: str, **kwargs):
self.logger.error(message, extra={**self.context, **kwargs})
def debug(self, message: str, **kwargs):
self.logger.debug(message, extra={**self.context, **kwargs})
def create_error_response(error: Exception, request_id: str = None) -> Dict[str, Any]:
"""Create standardized error response."""
return {
"success": False,
"error": {
"type": type(error).__name__,
"message": str(error),
"request_id": request_id
},
"data": None
}
def create_success_response(data: Any, request_id: str = None) -> Dict[str, Any]:
"""Create standardized success response."""
return {
"success": True,
"error": None,
"data": data,
"request_id": request_id
} |