Spaces:
Sleeping
Sleeping
File size: 9,198 Bytes
5e1a30c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
"""
Standard Response Assembler Implementation.
This module provides minimal overhead response assembly for performance-critical
applications where basic Answer objects are sufficient.
Features:
- Minimal metadata overhead
- Fast assembly performance
- Essential source information only
- Lightweight configuration
"""
import logging
from typing import Dict, Any, List, Optional
from pathlib import Path
import sys
# Add project paths for imports
project_root = Path(__file__).parent.parent.parent.parent.parent
sys.path.append(str(project_root))
from ..base import ContextSelection, QueryAnalysis
from .base_assembler import BaseResponseAssembler
from src.core.interfaces import Answer, Document
logger = logging.getLogger(__name__)
class StandardAssembler(BaseResponseAssembler):
"""
Standard response assembler with minimal overhead.
This assembler creates Answer objects with essential information only,
optimized for performance-critical applications where detailed metadata
is not required.
Configuration Options:
- minimal_metadata: Use absolute minimum metadata (default: False)
- include_basic_stats: Include basic statistics (default: True)
- strip_large_sources: Remove large document content from sources (default: True)
"""
def __init__(self, config: Optional[Dict[str, Any]] = None):
"""
Initialize standard assembler with configuration.
Args:
config: Configuration dictionary
"""
# Initialize attributes first before calling super().__init__
config_dict = config or {}
self._minimal_metadata = config_dict.get('minimal_metadata', False)
self._include_basic_stats = config_dict.get('include_basic_stats', True)
self._strip_large_sources = config_dict.get('strip_large_sources', True)
super().__init__(config)
# Override base settings for performance
if self._minimal_metadata:
self._include_metadata = False
self._include_sources = True # Keep sources but strip content
logger.debug(f"Initialized StandardAssembler with minimal_metadata={self._minimal_metadata}")
def _assemble_answer(
self,
query: str,
answer_text: str,
context: ContextSelection,
confidence: float,
query_analysis: Optional[QueryAnalysis] = None,
generation_metadata: Optional[Dict[str, Any]] = None
) -> Answer:
"""
Assemble Answer object with minimal overhead.
Args:
query: Validated query string
answer_text: Validated answer text
context: Context selection
confidence: Validated confidence score
query_analysis: Optional query analysis
generation_metadata: Optional generation metadata
Returns:
Answer object with minimal metadata
"""
# Simple text formatting
formatted_text = answer_text.strip()
# Create sources list (potentially stripped)
sources = self._create_minimal_sources_list(context)
# Create minimal metadata
metadata = self._create_minimal_metadata(query, context, generation_metadata)
return Answer(
text=formatted_text,
sources=sources,
confidence=confidence,
metadata=metadata
)
def _create_minimal_sources_list(self, context: ContextSelection) -> List[Document]:
"""
Create minimal sources list for performance.
Args:
context: Context selection with documents
Returns:
List of minimal source documents
"""
if not self._include_sources or not context.selected_documents:
return []
sources = []
for doc in context.selected_documents:
if self._strip_large_sources:
# Create minimal document with just essential information
minimal_metadata = {
'original_length': len(doc.content),
'content_stripped': True
}
if doc.metadata:
minimal_metadata.update(doc.metadata)
# Add source and chunk_id to metadata
if hasattr(doc, 'source'):
minimal_metadata['source'] = doc.source
elif 'source' not in minimal_metadata:
minimal_metadata['source'] = minimal_metadata.get('source', 'unknown')
if hasattr(doc, 'chunk_id'):
minimal_metadata['chunk_id'] = doc.chunk_id
elif 'chunk_id' not in minimal_metadata:
minimal_metadata['chunk_id'] = minimal_metadata.get('chunk_id', 'unknown')
minimal_doc = Document(
content="[Content stripped for performance]", # Document content cannot be empty
metadata=minimal_metadata,
embedding=None # Remove embedding
)
sources.append(minimal_doc)
else:
# Keep full content but remove embedding
clean_metadata = doc.metadata.copy() if doc.metadata else {}
# Add source and chunk_id to metadata
if hasattr(doc, 'source'):
clean_metadata['source'] = doc.source
elif 'source' not in clean_metadata:
clean_metadata['source'] = clean_metadata.get('source', 'unknown')
if hasattr(doc, 'chunk_id'):
clean_metadata['chunk_id'] = doc.chunk_id
elif 'chunk_id' not in clean_metadata:
clean_metadata['chunk_id'] = clean_metadata.get('chunk_id', 'unknown')
clean_doc = Document(
content=doc.content,
metadata=clean_metadata,
embedding=None
)
sources.append(clean_doc)
return sources
def _create_minimal_metadata(
self,
query: str,
context: ContextSelection,
generation_metadata: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Create minimal metadata for performance.
Args:
query: Original query
context: Context selection
generation_metadata: Optional generation metadata
Returns:
Minimal metadata dictionary
"""
if self._minimal_metadata:
# Absolute minimum metadata
return {
'assembler_type': 'standard',
'source_count': len(context.selected_documents)
}
metadata = {
'assembler_type': 'standard',
'query': query,
'retrieved_docs': len(context.selected_documents),
'total_tokens': context.total_tokens,
'selection_strategy': context.selection_strategy
}
# Add basic statistics if enabled
if self._include_basic_stats:
metadata.update({
'query_length': len(query),
'answer_length': 0, # Will be updated after answer is created
'source_count': len(context.selected_documents)
})
# Include minimal generation information
if generation_metadata:
# Only include essential generation metadata
essential_fields = ['model', 'generation_time']
for field in essential_fields:
if field in generation_metadata:
metadata[field] = generation_metadata[field]
return metadata
def get_supported_formats(self) -> List[str]:
"""
Return list of formats this standard assembler supports.
Returns:
List of format names
"""
base_formats = super().get_supported_formats()
standard_formats = [
'minimal',
'fast',
'lightweight',
'performance'
]
return base_formats + standard_formats
def configure(self, config: Dict[str, Any]) -> None:
"""
Configure the standard assembler with provided settings.
Args:
config: Configuration dictionary
"""
super().configure(config)
# Update standard assembler specific configuration
self._minimal_metadata = config.get('minimal_metadata', self._minimal_metadata)
self._include_basic_stats = config.get('include_basic_stats', self._include_basic_stats)
self._strip_large_sources = config.get('strip_large_sources', self._strip_large_sources)
# Apply minimal metadata setting
if self._minimal_metadata:
self._include_metadata = False
self._include_sources = True |