Spaces:
Running
Running
# Epic 2 Configuration with HuggingFace API Integration | |
# This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics) | |
# while using HuggingFace API for both LLM generation and neural reranking | |
# Document processor for handling input files | |
document_processor: | |
type: "hybrid_pdf" | |
config: | |
chunk_size: 1024 | |
chunk_overlap: 128 | |
# Embedding generator for converting text to vectors | |
embedder: | |
type: "modular" | |
config: | |
model: | |
type: "sentence_transformer" | |
config: | |
model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1" | |
device: "auto" | |
normalize_embeddings: true | |
batch_processor: | |
type: "dynamic" | |
config: | |
initial_batch_size: 64 | |
max_batch_size: 256 | |
optimize_for_memory: false | |
cache: | |
type: "memory" | |
config: | |
max_entries: 100000 | |
max_memory_mb: 1024 | |
# EPIC 2 ADVANCED RETRIEVER WITH API RERANKING | |
retriever: | |
type: "modular_unified" | |
config: | |
# Composite filtering configuration (NEW - replaces semantic gap detection) | |
composite_filtering: | |
enabled: true | |
fusion_weight: 0.7 # α - weight for fusion score importance | |
semantic_weight: 0.3 # β - weight for semantic similarity | |
min_composite_score: 0.4 # threshold for document inclusion | |
max_candidates: 15 # reduce from k*2 to k*1.5 for efficiency | |
# Legacy semantic gap detection (DEPRECATED - use composite_filtering) | |
min_semantic_alignment: 0.3 # Minimum query-document semantic similarity | |
# Backend Configuration | |
backends: | |
primary_backend: "faiss" | |
fallback_enabled: true | |
fallback_backend: null | |
# Hot-swapping configuration | |
enable_hot_swap: false | |
health_check_interval_seconds: 30 | |
switch_threshold_error_rate: 0.1 | |
# FAISS backend settings | |
faiss: | |
index_type: "IndexFlatIP" | |
normalize_embeddings: true | |
metric: "cosine" | |
# Weaviate backend settings (disabled for testing) | |
weaviate: null | |
# BM25 Sparse Retrieval Configuration | |
sparse: | |
type: "bm25" | |
config: | |
k1: 1.2 | |
b: 0.75 | |
lowercase: true | |
preserve_technical_terms: true | |
filter_stop_words: true # Enable stop word filtering | |
custom_stop_words: [] # Additional stop words if needed | |
min_score: 0.1 # Minimum normalized score threshold | |
# Score-Aware Fusion (preserves semantic relevance) | |
fusion: | |
type: "score_aware" # Use ScoreAwareFusion | |
config: | |
score_weight: 0.9 # α - semantic score importance (very high) | |
rank_weight: 0.1 # β - rank stability factor (minimal) | |
overlap_weight: 0.0 # γ - both-retriever bonus (disabled) | |
normalize_scores: false # Score normalization disabled | |
k: 60 # RRF constant for rank component | |
# Hybrid Search Configuration | |
hybrid_search: | |
enabled: true | |
# Strategy weights (must sum to 1.0) - Semantic-focused configuration | |
dense_weight: 0.8 | |
sparse_weight: 0.2 | |
graph_weight: 0.0 | |
# Fusion method | |
fusion_method: "score_aware" | |
rrf_k: 60 | |
# Advanced fusion parameters | |
adaptive_weights: false | |
query_dependent_weighting: false | |
normalization_method: "min_max" | |
# Performance optimization | |
max_candidates_per_strategy: 200 | |
early_termination_threshold: 0.95 | |
# Reranker Configuration - HuggingFace API Backend | |
reranker: | |
type: "neural" | |
config: | |
enabled: true | |
# Model configuration with HuggingFace API | |
model_name: "cross-encoder/ms-marco-MiniLM-L6-v2" | |
model_type: "cross_encoder" | |
# Reranking parameters | |
max_candidates: 100 | |
batch_size: 32 | |
max_length: 512 | |
# Performance thresholds | |
max_latency_ms: 5000 | |
fallback_to_fast_reranker: true | |
fast_reranker_threshold: 100 | |
# HuggingFace API specific model configuration | |
models: | |
default_model: | |
name: "cross-encoder/ms-marco-MiniLM-L6-v2" | |
backend: "huggingface_api" | |
api_token: "${HF_TOKEN}" | |
batch_size: 32 | |
max_length: 512 | |
timeout: 10 | |
fallback_to_local: true | |
max_candidates: 100 | |
score_threshold: 0.0 | |
default_model: "default_model" | |
# Graph Retrieval Configuration (Epic 2) | |
graph_retrieval: | |
enabled: false | |
enable_entity_linking: true | |
enable_cross_references: true | |
similarity_threshold: 0.65 | |
max_connections_per_document: 15 | |
use_pagerank: true | |
pagerank_damping: 0.85 | |
use_community_detection: false | |
community_algorithm: "louvain" | |
max_graph_hops: 3 | |
graph_weight_decay: 0.5 | |
combine_with_vector_search: true | |
# Analytics Configuration | |
analytics: | |
enabled: true | |
collect_query_metrics: true | |
collect_performance_metrics: true | |
collect_quality_metrics: true | |
dashboard_enabled: false | |
dashboard_port: 8050 | |
dashboard_host: "localhost" | |
auto_refresh_seconds: 5 | |
metrics_retention_days: 30 | |
detailed_logs_retention_days: 7 | |
# Answer generator - HuggingFace API integration | |
answer_generator: | |
type: "adaptive_modular" | |
config: | |
llm_client: | |
type: "huggingface" | |
config: | |
model_name: "microsoft/DialoGPT-medium" | |
api_token: "${HF_TOKEN}" | |
timeout: 30 | |
use_chat_completion: true | |
fallback_models: | |
- "google/gemma-2-2b-it" | |
- "google/flan-t5-small" | |
max_tokens: 512 | |
temperature: 0.1 | |
top_p: 0.9 | |
stop_sequences: [] | |
prompt_builder: | |
type: "simple" | |
config: | |
max_context_length: 12000 | |
include_instructions: true | |
citation_style: "inline" | |
template: | | |
You are an expert technical assistant specializing in RISC-V architecture and computer systems. | |
Context Documents: | |
{context} | |
Question: {query} | |
Instructions: | |
- Provide a comprehensive, detailed technical answer based ONLY on the provided context | |
- Include technical specifications, encoding details, and implementation information when available | |
- Explain concepts step-by-step with technical depth appropriate for engineers | |
- Cover related concepts and connections mentioned in the context | |
- Include specific examples, instruction formats, or implementation details when present | |
- ALWAYS include citations in your answer using the format [Document X] where X is the document number | |
- Every factual claim must be followed by a citation like [Document 1] or [Document 2] | |
- Multiple citations can be combined like [Document 1, Document 2] | |
- If the answer is not fully covered by the context, clearly state what information is missing | |
Answer: | |
response_parser: | |
type: "markdown" | |
config: | |
extract_citations: true | |
confidence_scorer: | |
type: "semantic" | |
config: | |
min_answer_length: 20 | |
max_answer_length: 1000 | |
relevance_weight: 0.4 | |
grounding_weight: 0.4 | |
quality_weight: 0.2 | |
low_retrieval_penalty: 0.3 # Penalty when few documents retrieved | |
min_context_documents: 3 # Minimum documents for full confidence | |
# Global settings optimized for HuggingFace API usage | |
global_settings: | |
environment: "hf_api" | |
log_level: "debug" | |
max_workers: 4 | |
enable_performance_monitoring: true | |
enable_cost_monitoring: true | |
# API-specific settings | |
api_retry_attempts: 3 | |
api_retry_delay: 1.0 | |
api_timeout: 30 | |
# Memory optimization for API usage | |
enable_memory_optimization: true | |
unload_unused_models: true | |
model_cache_size: 2 |