Arthur Passuello
initial commit
5e1a30c
# Advanced Retriever Configuration
# This configuration enables Epic 2 features including multi-backend support,
# hybrid search strategies, neural reranking, and analytics.
# Backend Configuration
backends:
primary_backend: "weaviate" # "faiss" or "weaviate"
fallback_enabled: true
fallback_backend: "faiss"
# Hot-swapping configuration
enable_hot_swap: true
health_check_interval_seconds: 30
switch_threshold_error_rate: 0.1
# FAISS backend settings
faiss:
index_type: "IndexFlatIP"
normalize_embeddings: true
metric: "cosine"
# Weaviate backend settings
weaviate:
connection:
url: "http://localhost:8080"
timeout: 30
startup_period: 5
schema:
class_name: "TechnicalDocument"
description: "Technical documentation chunks with embeddings"
vector_index_config:
distance: "cosine"
ef: 64
efConstruction: 128
maxConnections: 64
search:
hybrid_search_enabled: true
alpha: 0.7 # Balance between vector (1.0) and keyword (0.0) search
fusion_type: "rankedFusion"
limit: 100
certainty_threshold: 0.7
autocut: 1
batch:
batch_size: 100
num_workers: 1
dynamic_batch_size: true
min_batch_size: 10
max_batch_size: 1000
auto_create_schema: true
enable_backup: true
backup_interval_hours: 24
max_retries: 3
retry_delay_seconds: 1.0
# Hybrid Search Configuration
hybrid_search:
enabled: true
# Strategy weights (must sum to 1.0)
dense_weight: 0.7
sparse_weight: 0.3
graph_weight: 0.0 # Reserved for future graph-based retrieval
# Fusion method
fusion_method: "rrf" # "rrf", "weighted", "learned"
rrf_k: 60
# Advanced fusion parameters
adaptive_weights: false
query_dependent_weighting: false
normalization_method: "min_max" # "min_max", "z_score", "softmax"
# Performance optimization
max_candidates_per_strategy: 100
early_termination_threshold: 0.95
# Neural Reranking Configuration (Epic 2 enabled)
neural_reranking:
enabled: true # ENABLED for Epic 2
# Model configuration
model_name: "cross-encoder/ms-marco-MiniLM-L6-v2"
model_type: "cross_encoder"
device: "auto" # "auto", "cpu", "cuda", "mps"
# Reranking parameters
max_candidates: 50
batch_size: 32
max_length: 512
# Performance thresholds
max_latency_ms: 200
fallback_to_fast_reranker: true
fast_reranker_threshold: 100
# Graph Retrieval Configuration (Epic 2 enabled)
graph_retrieval:
enabled: true # ENABLED for Epic 2
# Graph construction
enable_entity_linking: true
enable_cross_references: true
similarity_threshold: 0.8
max_connections_per_document: 10
# Graph algorithms
use_pagerank: true
pagerank_damping: 0.85
use_community_detection: false
community_algorithm: "louvain"
# Retrieval strategies
max_graph_hops: 2
graph_weight_decay: 0.5
combine_with_vector_search: true
# Analytics Configuration
analytics:
enabled: true
# Metrics collection
collect_query_metrics: true
collect_performance_metrics: true
collect_quality_metrics: true
# Dashboard configuration (for future Plotly dashboard)
dashboard_enabled: false
dashboard_port: 8050
dashboard_host: "localhost"
auto_refresh_seconds: 5
# Data retention
metrics_retention_days: 30
detailed_logs_retention_days: 7
# Visualization options
enable_real_time_plots: true
enable_query_analysis: true
enable_performance_heatmaps: true
# A/B Testing Configuration (Framework ready, implementation in future)
experiments:
enabled: false # Set to true when implementation is complete
# Assignment strategy
assignment_method: "deterministic" # "random", "deterministic", "contextual"
assignment_key_field: "query_hash"
# Statistical parameters
min_sample_size: 100
confidence_level: 0.95
effect_size_threshold: 0.05
# Monitoring
auto_winner_detection: true
max_experiment_duration_days: 30
early_stopping_enabled: true
# Legacy and Compatibility
legacy_mode: false
legacy_fallback: true
# Performance Settings
max_total_latency_ms: 700 # P95 latency target including all processing
enable_caching: true
cache_size: 1000
# Feature Flags (Epic 2 enabled)
enable_all_features: true # ENABLED for Epic 2
feature_flags:
weaviate_backend: true # Enable Weaviate as alternative to FAISS
neural_reranking: true # ENABLED for Epic 2
graph_retrieval: true # ENABLED for Epic 2
analytics_dashboard: true # ENABLED for Epic 2
ab_testing: false # Enable A/B testing framework (future)