File size: 8,045 Bytes
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# Epic 2 Configuration with HuggingFace API Integration
# This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics)
# while using HuggingFace API for both LLM generation and neural reranking

# Document processor for handling input files
document_processor:
  type: "hybrid_pdf"
  config:
    chunk_size: 1024
    chunk_overlap: 128

# Embedding generator for converting text to vectors  
embedder:
  type: "modular"
  config:
    model:
      type: "sentence_transformer"
      config:
        model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
        device: "auto"
        normalize_embeddings: true
    batch_processor:
      type: "dynamic"
      config:
        initial_batch_size: 64
        max_batch_size: 256
        optimize_for_memory: false
    cache:
      type: "memory"
      config:
        max_entries: 100000
        max_memory_mb: 1024

# EPIC 2 ADVANCED RETRIEVER WITH API RERANKING
retriever:
  type: "modular_unified"
  config:
    # Composite filtering configuration (NEW - replaces semantic gap detection)
    composite_filtering:
      enabled: true
      fusion_weight: 0.7          # α - weight for fusion score importance
      semantic_weight: 0.3        # β - weight for semantic similarity
      min_composite_score: 0.4    # threshold for document inclusion
      max_candidates: 15          # reduce from k*2 to k*1.5 for efficiency
    
    # Legacy semantic gap detection (DEPRECATED - use composite_filtering)
    min_semantic_alignment: 0.3  # Minimum query-document semantic similarity
    
    # Backend Configuration
    backends:
      primary_backend: "faiss"
      fallback_enabled: true
      fallback_backend: null
      
      # Hot-swapping configuration
      enable_hot_swap: false
      health_check_interval_seconds: 30
      switch_threshold_error_rate: 0.1
      
      # FAISS backend settings
      faiss:
        index_type: "IndexFlatIP"
        normalize_embeddings: true
        metric: "cosine"
      
      # Weaviate backend settings (disabled for testing)
      weaviate: null

    # BM25 Sparse Retrieval Configuration
    sparse:
      type: "bm25"
      config:
        k1: 1.2
        b: 0.75
        lowercase: true
        preserve_technical_terms: true
        filter_stop_words: true  # Enable stop word filtering
        custom_stop_words: []    # Additional stop words if needed
        min_score: 0.1          # Minimum normalized score threshold

    # Score-Aware Fusion (preserves semantic relevance)
    fusion:
      type: "score_aware"  # Use ScoreAwareFusion 
      config:
        score_weight: 0.9      # α - semantic score importance (very high)
        rank_weight: 0.1       # β - rank stability factor (minimal)
        overlap_weight: 0.0    # γ - both-retriever bonus (disabled)
        normalize_scores: false # Score normalization disabled
        k: 60                  # RRF constant for rank component

    # Hybrid Search Configuration
    hybrid_search:
      enabled: true
      
      # Strategy weights (must sum to 1.0) - Semantic-focused configuration
      dense_weight: 0.8
      sparse_weight: 0.2
      graph_weight: 0.0
      
      # Fusion method
      fusion_method: "score_aware"
      rrf_k: 60
      
      # Advanced fusion parameters
      adaptive_weights: false
      query_dependent_weighting: false
      normalization_method: "min_max"
      
      # Performance optimization
      max_candidates_per_strategy: 200
      early_termination_threshold: 0.95

    # Reranker Configuration - HuggingFace API Backend
    reranker:
      type: "neural"
      config:
        enabled: true
        
        # Model configuration with HuggingFace API
        model_name: "cross-encoder/ms-marco-MiniLM-L6-v2"
        model_type: "cross_encoder"
        
        # Reranking parameters
        max_candidates: 100
        batch_size: 32
        max_length: 512
        
        # Performance thresholds
        max_latency_ms: 5000
        fallback_to_fast_reranker: true
        fast_reranker_threshold: 100
        
        # HuggingFace API specific model configuration
        models:
          default_model:
            name: "cross-encoder/ms-marco-MiniLM-L6-v2"
            backend: "huggingface_api"
            api_token: "${HF_TOKEN}"
            batch_size: 32
            max_length: 512
            timeout: 10
            fallback_to_local: true
            max_candidates: 100
            score_threshold: 0.0
        default_model: "default_model"

    # Graph Retrieval Configuration (Epic 2)
    graph_retrieval:
      enabled: false
      enable_entity_linking: true
      enable_cross_references: true
      similarity_threshold: 0.65
      max_connections_per_document: 15
      use_pagerank: true
      pagerank_damping: 0.85
      use_community_detection: false
      community_algorithm: "louvain"
      max_graph_hops: 3
      graph_weight_decay: 0.5
      combine_with_vector_search: true

    # Analytics Configuration
    analytics:
      enabled: true
      collect_query_metrics: true
      collect_performance_metrics: true
      collect_quality_metrics: true
      dashboard_enabled: false
      dashboard_port: 8050
      dashboard_host: "localhost"
      auto_refresh_seconds: 5
      metrics_retention_days: 30
      detailed_logs_retention_days: 7

# Answer generator - HuggingFace API integration
answer_generator:
  type: "adaptive_modular"
  config:
    llm_client:
      type: "huggingface"
      config:
        model_name: "microsoft/DialoGPT-medium"
        api_token: "${HF_TOKEN}"
        timeout: 30
        use_chat_completion: true
        fallback_models:
          - "google/gemma-2-2b-it"
          - "google/flan-t5-small"
        max_tokens: 512
        temperature: 0.1
        top_p: 0.9
        stop_sequences: []
    
    prompt_builder:
      type: "simple"
      config:
        max_context_length: 12000
        include_instructions: true
        citation_style: "inline"
        template: |
          You are an expert technical assistant specializing in RISC-V architecture and computer systems.
          
          Context Documents:
          {context}
          
          Question: {query}
          
          Instructions:
          - Provide a comprehensive, detailed technical answer based ONLY on the provided context
          - Include technical specifications, encoding details, and implementation information when available
          - Explain concepts step-by-step with technical depth appropriate for engineers
          - Cover related concepts and connections mentioned in the context
          - Include specific examples, instruction formats, or implementation details when present
          - ALWAYS include citations in your answer using the format [Document X] where X is the document number
          - Every factual claim must be followed by a citation like [Document 1] or [Document 2]
          - Multiple citations can be combined like [Document 1, Document 2]
          - If the answer is not fully covered by the context, clearly state what information is missing
          
          Answer:
        
    response_parser:
      type: "markdown"
      config:
        extract_citations: true
        
    confidence_scorer:
      type: "semantic"
      config:
        min_answer_length: 20
        max_answer_length: 1000
        relevance_weight: 0.4
        grounding_weight: 0.4
        quality_weight: 0.2
        low_retrieval_penalty: 0.3  # Penalty when few documents retrieved
        min_context_documents: 3    # Minimum documents for full confidence

# Global settings optimized for HuggingFace API usage
global_settings:
  environment: "hf_api"
  log_level: "debug"
  max_workers: 4
  enable_performance_monitoring: true
  enable_cost_monitoring: true
  
  # API-specific settings
  api_retry_attempts: 3
  api_retry_delay: 1.0
  api_timeout: 30
  
  # Memory optimization for API usage
  enable_memory_optimization: true
  unload_unused_models: true
  model_cache_size: 2