""" Standard Response Assembler Implementation. This module provides minimal overhead response assembly for performance-critical applications where basic Answer objects are sufficient. Features: - Minimal metadata overhead - Fast assembly performance - Essential source information only - Lightweight configuration """ import logging from typing import Dict, Any, List, Optional from pathlib import Path import sys # Add project paths for imports project_root = Path(__file__).parent.parent.parent.parent.parent sys.path.append(str(project_root)) from ..base import ContextSelection, QueryAnalysis from .base_assembler import BaseResponseAssembler from src.core.interfaces import Answer, Document logger = logging.getLogger(__name__) class StandardAssembler(BaseResponseAssembler): """ Standard response assembler with minimal overhead. This assembler creates Answer objects with essential information only, optimized for performance-critical applications where detailed metadata is not required. Configuration Options: - minimal_metadata: Use absolute minimum metadata (default: False) - include_basic_stats: Include basic statistics (default: True) - strip_large_sources: Remove large document content from sources (default: True) """ def __init__(self, config: Optional[Dict[str, Any]] = None): """ Initialize standard assembler with configuration. Args: config: Configuration dictionary """ # Initialize attributes first before calling super().__init__ config_dict = config or {} self._minimal_metadata = config_dict.get('minimal_metadata', False) self._include_basic_stats = config_dict.get('include_basic_stats', True) self._strip_large_sources = config_dict.get('strip_large_sources', True) super().__init__(config) # Override base settings for performance if self._minimal_metadata: self._include_metadata = False self._include_sources = True # Keep sources but strip content logger.debug(f"Initialized StandardAssembler with minimal_metadata={self._minimal_metadata}") def _assemble_answer( self, query: str, answer_text: str, context: ContextSelection, confidence: float, query_analysis: Optional[QueryAnalysis] = None, generation_metadata: Optional[Dict[str, Any]] = None ) -> Answer: """ Assemble Answer object with minimal overhead. Args: query: Validated query string answer_text: Validated answer text context: Context selection confidence: Validated confidence score query_analysis: Optional query analysis generation_metadata: Optional generation metadata Returns: Answer object with minimal metadata """ # Simple text formatting formatted_text = answer_text.strip() # Create sources list (potentially stripped) sources = self._create_minimal_sources_list(context) # Create minimal metadata metadata = self._create_minimal_metadata(query, context, generation_metadata) return Answer( text=formatted_text, sources=sources, confidence=confidence, metadata=metadata ) def _create_minimal_sources_list(self, context: ContextSelection) -> List[Document]: """ Create minimal sources list for performance. Args: context: Context selection with documents Returns: List of minimal source documents """ if not self._include_sources or not context.selected_documents: return [] sources = [] for doc in context.selected_documents: if self._strip_large_sources: # Create minimal document with just essential information minimal_metadata = { 'original_length': len(doc.content), 'content_stripped': True } if doc.metadata: minimal_metadata.update(doc.metadata) # Add source and chunk_id to metadata if hasattr(doc, 'source'): minimal_metadata['source'] = doc.source elif 'source' not in minimal_metadata: minimal_metadata['source'] = minimal_metadata.get('source', 'unknown') if hasattr(doc, 'chunk_id'): minimal_metadata['chunk_id'] = doc.chunk_id elif 'chunk_id' not in minimal_metadata: minimal_metadata['chunk_id'] = minimal_metadata.get('chunk_id', 'unknown') minimal_doc = Document( content="[Content stripped for performance]", # Document content cannot be empty metadata=minimal_metadata, embedding=None # Remove embedding ) sources.append(minimal_doc) else: # Keep full content but remove embedding clean_metadata = doc.metadata.copy() if doc.metadata else {} # Add source and chunk_id to metadata if hasattr(doc, 'source'): clean_metadata['source'] = doc.source elif 'source' not in clean_metadata: clean_metadata['source'] = clean_metadata.get('source', 'unknown') if hasattr(doc, 'chunk_id'): clean_metadata['chunk_id'] = doc.chunk_id elif 'chunk_id' not in clean_metadata: clean_metadata['chunk_id'] = clean_metadata.get('chunk_id', 'unknown') clean_doc = Document( content=doc.content, metadata=clean_metadata, embedding=None ) sources.append(clean_doc) return sources def _create_minimal_metadata( self, query: str, context: ContextSelection, generation_metadata: Optional[Dict[str, Any]] = None ) -> Dict[str, Any]: """ Create minimal metadata for performance. Args: query: Original query context: Context selection generation_metadata: Optional generation metadata Returns: Minimal metadata dictionary """ if self._minimal_metadata: # Absolute minimum metadata return { 'assembler_type': 'standard', 'source_count': len(context.selected_documents) } metadata = { 'assembler_type': 'standard', 'query': query, 'retrieved_docs': len(context.selected_documents), 'total_tokens': context.total_tokens, 'selection_strategy': context.selection_strategy } # Add basic statistics if enabled if self._include_basic_stats: metadata.update({ 'query_length': len(query), 'answer_length': 0, # Will be updated after answer is created 'source_count': len(context.selected_documents) }) # Include minimal generation information if generation_metadata: # Only include essential generation metadata essential_fields = ['model', 'generation_time'] for field in essential_fields: if field in generation_metadata: metadata[field] = generation_metadata[field] return metadata def get_supported_formats(self) -> List[str]: """ Return list of formats this standard assembler supports. Returns: List of format names """ base_formats = super().get_supported_formats() standard_formats = [ 'minimal', 'fast', 'lightweight', 'performance' ] return base_formats + standard_formats def configure(self, config: Dict[str, Any]) -> None: """ Configure the standard assembler with provided settings. Args: config: Configuration dictionary """ super().configure(config) # Update standard assembler specific configuration self._minimal_metadata = config.get('minimal_metadata', self._minimal_metadata) self._include_basic_stats = config.get('include_basic_stats', self._include_basic_stats) self._strip_large_sources = config.get('strip_large_sources', self._strip_large_sources) # Apply minimal metadata setting if self._minimal_metadata: self._include_metadata = False self._include_sources = True