enhanced-rag-demo / config /epic2_hf_api.yaml
Arthur Passuello
initial commit
5e1a30c
# Epic 2 Configuration with HuggingFace API Integration
# This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics)
# while using HuggingFace API for both LLM generation and neural reranking
# Document processor for handling input files
document_processor:
type: "hybrid_pdf"
config:
chunk_size: 1024
chunk_overlap: 128
# Embedding generator for converting text to vectors
embedder:
type: "modular"
config:
model:
type: "sentence_transformer"
config:
model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
device: "auto"
normalize_embeddings: true
batch_processor:
type: "dynamic"
config:
initial_batch_size: 64
max_batch_size: 256
optimize_for_memory: false
cache:
type: "memory"
config:
max_entries: 100000
max_memory_mb: 1024
# EPIC 2 ADVANCED RETRIEVER WITH API RERANKING
retriever:
type: "modular_unified"
config:
# Composite filtering configuration (NEW - replaces semantic gap detection)
composite_filtering:
enabled: true
fusion_weight: 0.7 # α - weight for fusion score importance
semantic_weight: 0.3 # β - weight for semantic similarity
min_composite_score: 0.4 # threshold for document inclusion
max_candidates: 15 # reduce from k*2 to k*1.5 for efficiency
# Legacy semantic gap detection (DEPRECATED - use composite_filtering)
min_semantic_alignment: 0.3 # Minimum query-document semantic similarity
# Backend Configuration
backends:
primary_backend: "faiss"
fallback_enabled: true
fallback_backend: null
# Hot-swapping configuration
enable_hot_swap: false
health_check_interval_seconds: 30
switch_threshold_error_rate: 0.1
# FAISS backend settings
faiss:
index_type: "IndexFlatIP"
normalize_embeddings: true
metric: "cosine"
# Weaviate backend settings (disabled for testing)
weaviate: null
# BM25 Sparse Retrieval Configuration
sparse:
type: "bm25"
config:
k1: 1.2
b: 0.75
lowercase: true
preserve_technical_terms: true
filter_stop_words: true # Enable stop word filtering
custom_stop_words: [] # Additional stop words if needed
min_score: 0.1 # Minimum normalized score threshold
# Score-Aware Fusion (preserves semantic relevance)
fusion:
type: "score_aware" # Use ScoreAwareFusion
config:
score_weight: 0.9 # α - semantic score importance (very high)
rank_weight: 0.1 # β - rank stability factor (minimal)
overlap_weight: 0.0 # γ - both-retriever bonus (disabled)
normalize_scores: false # Score normalization disabled
k: 60 # RRF constant for rank component
# Hybrid Search Configuration
hybrid_search:
enabled: true
# Strategy weights (must sum to 1.0) - Semantic-focused configuration
dense_weight: 0.8
sparse_weight: 0.2
graph_weight: 0.0
# Fusion method
fusion_method: "score_aware"
rrf_k: 60
# Advanced fusion parameters
adaptive_weights: false
query_dependent_weighting: false
normalization_method: "min_max"
# Performance optimization
max_candidates_per_strategy: 200
early_termination_threshold: 0.95
# Reranker Configuration - HuggingFace API Backend
reranker:
type: "neural"
config:
enabled: true
# Model configuration with HuggingFace API
model_name: "cross-encoder/ms-marco-MiniLM-L6-v2"
model_type: "cross_encoder"
# Reranking parameters
max_candidates: 100
batch_size: 32
max_length: 512
# Performance thresholds
max_latency_ms: 5000
fallback_to_fast_reranker: true
fast_reranker_threshold: 100
# HuggingFace API specific model configuration
models:
default_model:
name: "cross-encoder/ms-marco-MiniLM-L6-v2"
backend: "huggingface_api"
api_token: "${HF_TOKEN}"
batch_size: 32
max_length: 512
timeout: 10
fallback_to_local: true
max_candidates: 100
score_threshold: 0.0
default_model: "default_model"
# Graph Retrieval Configuration (Epic 2)
graph_retrieval:
enabled: false
enable_entity_linking: true
enable_cross_references: true
similarity_threshold: 0.65
max_connections_per_document: 15
use_pagerank: true
pagerank_damping: 0.85
use_community_detection: false
community_algorithm: "louvain"
max_graph_hops: 3
graph_weight_decay: 0.5
combine_with_vector_search: true
# Analytics Configuration
analytics:
enabled: true
collect_query_metrics: true
collect_performance_metrics: true
collect_quality_metrics: true
dashboard_enabled: false
dashboard_port: 8050
dashboard_host: "localhost"
auto_refresh_seconds: 5
metrics_retention_days: 30
detailed_logs_retention_days: 7
# Answer generator - HuggingFace API integration
answer_generator:
type: "adaptive_modular"
config:
llm_client:
type: "huggingface"
config:
model_name: "microsoft/DialoGPT-medium"
api_token: "${HF_TOKEN}"
timeout: 30
use_chat_completion: true
fallback_models:
- "google/gemma-2-2b-it"
- "google/flan-t5-small"
max_tokens: 512
temperature: 0.1
top_p: 0.9
stop_sequences: []
prompt_builder:
type: "simple"
config:
max_context_length: 12000
include_instructions: true
citation_style: "inline"
template: |
You are an expert technical assistant specializing in RISC-V architecture and computer systems.
Context Documents:
{context}
Question: {query}
Instructions:
- Provide a comprehensive, detailed technical answer based ONLY on the provided context
- Include technical specifications, encoding details, and implementation information when available
- Explain concepts step-by-step with technical depth appropriate for engineers
- Cover related concepts and connections mentioned in the context
- Include specific examples, instruction formats, or implementation details when present
- ALWAYS include citations in your answer using the format [Document X] where X is the document number
- Every factual claim must be followed by a citation like [Document 1] or [Document 2]
- Multiple citations can be combined like [Document 1, Document 2]
- If the answer is not fully covered by the context, clearly state what information is missing
Answer:
response_parser:
type: "markdown"
config:
extract_citations: true
confidence_scorer:
type: "semantic"
config:
min_answer_length: 20
max_answer_length: 1000
relevance_weight: 0.4
grounding_weight: 0.4
quality_weight: 0.2
low_retrieval_penalty: 0.3 # Penalty when few documents retrieved
min_context_documents: 3 # Minimum documents for full confidence
# Global settings optimized for HuggingFace API usage
global_settings:
environment: "hf_api"
log_level: "debug"
max_workers: 4
enable_performance_monitoring: true
enable_cost_monitoring: true
# API-specific settings
api_retry_attempts: 3
api_retry_delay: 1.0
api_timeout: 30
# Memory optimization for API usage
enable_memory_optimization: true
unload_unused_models: true
model_cache_size: 2