Spaces:
Running
Running
File size: 8,045 Bytes
5e1a30c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 |
# Epic 2 Configuration with HuggingFace API Integration
# This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics)
# while using HuggingFace API for both LLM generation and neural reranking
# Document processor for handling input files
document_processor:
type: "hybrid_pdf"
config:
chunk_size: 1024
chunk_overlap: 128
# Embedding generator for converting text to vectors
embedder:
type: "modular"
config:
model:
type: "sentence_transformer"
config:
model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
device: "auto"
normalize_embeddings: true
batch_processor:
type: "dynamic"
config:
initial_batch_size: 64
max_batch_size: 256
optimize_for_memory: false
cache:
type: "memory"
config:
max_entries: 100000
max_memory_mb: 1024
# EPIC 2 ADVANCED RETRIEVER WITH API RERANKING
retriever:
type: "modular_unified"
config:
# Composite filtering configuration (NEW - replaces semantic gap detection)
composite_filtering:
enabled: true
fusion_weight: 0.7 # α - weight for fusion score importance
semantic_weight: 0.3 # β - weight for semantic similarity
min_composite_score: 0.4 # threshold for document inclusion
max_candidates: 15 # reduce from k*2 to k*1.5 for efficiency
# Legacy semantic gap detection (DEPRECATED - use composite_filtering)
min_semantic_alignment: 0.3 # Minimum query-document semantic similarity
# Backend Configuration
backends:
primary_backend: "faiss"
fallback_enabled: true
fallback_backend: null
# Hot-swapping configuration
enable_hot_swap: false
health_check_interval_seconds: 30
switch_threshold_error_rate: 0.1
# FAISS backend settings
faiss:
index_type: "IndexFlatIP"
normalize_embeddings: true
metric: "cosine"
# Weaviate backend settings (disabled for testing)
weaviate: null
# BM25 Sparse Retrieval Configuration
sparse:
type: "bm25"
config:
k1: 1.2
b: 0.75
lowercase: true
preserve_technical_terms: true
filter_stop_words: true # Enable stop word filtering
custom_stop_words: [] # Additional stop words if needed
min_score: 0.1 # Minimum normalized score threshold
# Score-Aware Fusion (preserves semantic relevance)
fusion:
type: "score_aware" # Use ScoreAwareFusion
config:
score_weight: 0.9 # α - semantic score importance (very high)
rank_weight: 0.1 # β - rank stability factor (minimal)
overlap_weight: 0.0 # γ - both-retriever bonus (disabled)
normalize_scores: false # Score normalization disabled
k: 60 # RRF constant for rank component
# Hybrid Search Configuration
hybrid_search:
enabled: true
# Strategy weights (must sum to 1.0) - Semantic-focused configuration
dense_weight: 0.8
sparse_weight: 0.2
graph_weight: 0.0
# Fusion method
fusion_method: "score_aware"
rrf_k: 60
# Advanced fusion parameters
adaptive_weights: false
query_dependent_weighting: false
normalization_method: "min_max"
# Performance optimization
max_candidates_per_strategy: 200
early_termination_threshold: 0.95
# Reranker Configuration - HuggingFace API Backend
reranker:
type: "neural"
config:
enabled: true
# Model configuration with HuggingFace API
model_name: "cross-encoder/ms-marco-MiniLM-L6-v2"
model_type: "cross_encoder"
# Reranking parameters
max_candidates: 100
batch_size: 32
max_length: 512
# Performance thresholds
max_latency_ms: 5000
fallback_to_fast_reranker: true
fast_reranker_threshold: 100
# HuggingFace API specific model configuration
models:
default_model:
name: "cross-encoder/ms-marco-MiniLM-L6-v2"
backend: "huggingface_api"
api_token: "${HF_TOKEN}"
batch_size: 32
max_length: 512
timeout: 10
fallback_to_local: true
max_candidates: 100
score_threshold: 0.0
default_model: "default_model"
# Graph Retrieval Configuration (Epic 2)
graph_retrieval:
enabled: false
enable_entity_linking: true
enable_cross_references: true
similarity_threshold: 0.65
max_connections_per_document: 15
use_pagerank: true
pagerank_damping: 0.85
use_community_detection: false
community_algorithm: "louvain"
max_graph_hops: 3
graph_weight_decay: 0.5
combine_with_vector_search: true
# Analytics Configuration
analytics:
enabled: true
collect_query_metrics: true
collect_performance_metrics: true
collect_quality_metrics: true
dashboard_enabled: false
dashboard_port: 8050
dashboard_host: "localhost"
auto_refresh_seconds: 5
metrics_retention_days: 30
detailed_logs_retention_days: 7
# Answer generator - HuggingFace API integration
answer_generator:
type: "adaptive_modular"
config:
llm_client:
type: "huggingface"
config:
model_name: "microsoft/DialoGPT-medium"
api_token: "${HF_TOKEN}"
timeout: 30
use_chat_completion: true
fallback_models:
- "google/gemma-2-2b-it"
- "google/flan-t5-small"
max_tokens: 512
temperature: 0.1
top_p: 0.9
stop_sequences: []
prompt_builder:
type: "simple"
config:
max_context_length: 12000
include_instructions: true
citation_style: "inline"
template: |
You are an expert technical assistant specializing in RISC-V architecture and computer systems.
Context Documents:
{context}
Question: {query}
Instructions:
- Provide a comprehensive, detailed technical answer based ONLY on the provided context
- Include technical specifications, encoding details, and implementation information when available
- Explain concepts step-by-step with technical depth appropriate for engineers
- Cover related concepts and connections mentioned in the context
- Include specific examples, instruction formats, or implementation details when present
- ALWAYS include citations in your answer using the format [Document X] where X is the document number
- Every factual claim must be followed by a citation like [Document 1] or [Document 2]
- Multiple citations can be combined like [Document 1, Document 2]
- If the answer is not fully covered by the context, clearly state what information is missing
Answer:
response_parser:
type: "markdown"
config:
extract_citations: true
confidence_scorer:
type: "semantic"
config:
min_answer_length: 20
max_answer_length: 1000
relevance_weight: 0.4
grounding_weight: 0.4
quality_weight: 0.2
low_retrieval_penalty: 0.3 # Penalty when few documents retrieved
min_context_documents: 3 # Minimum documents for full confidence
# Global settings optimized for HuggingFace API usage
global_settings:
environment: "hf_api"
log_level: "debug"
max_workers: 4
enable_performance_monitoring: true
enable_cost_monitoring: true
# API-specific settings
api_retry_attempts: 3
api_retry_delay: 1.0
api_timeout: 30
# Memory optimization for API usage
enable_memory_optimization: true
unload_unused_models: true
model_cache_size: 2 |