# Epic 2 Configuration with HuggingFace API Integration # This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics) # while using HuggingFace API for both LLM generation and neural reranking # Document processor for handling input files document_processor: type: "hybrid_pdf" config: chunk_size: 1024 chunk_overlap: 128 # Embedding generator for converting text to vectors embedder: type: "modular" config: model: type: "sentence_transformer" config: model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1" device: "auto" normalize_embeddings: true batch_processor: type: "dynamic" config: initial_batch_size: 64 max_batch_size: 256 optimize_for_memory: false cache: type: "memory" config: max_entries: 100000 max_memory_mb: 1024 # EPIC 2 ADVANCED RETRIEVER WITH API RERANKING retriever: type: "modular_unified" config: # Composite filtering configuration (NEW - replaces semantic gap detection) composite_filtering: enabled: true fusion_weight: 0.7 # α - weight for fusion score importance semantic_weight: 0.3 # β - weight for semantic similarity min_composite_score: 0.4 # threshold for document inclusion max_candidates: 15 # reduce from k*2 to k*1.5 for efficiency # Legacy semantic gap detection (DEPRECATED - use composite_filtering) min_semantic_alignment: 0.3 # Minimum query-document semantic similarity # Backend Configuration backends: primary_backend: "faiss" fallback_enabled: true fallback_backend: null # Hot-swapping configuration enable_hot_swap: false health_check_interval_seconds: 30 switch_threshold_error_rate: 0.1 # FAISS backend settings faiss: index_type: "IndexFlatIP" normalize_embeddings: true metric: "cosine" # Weaviate backend settings (disabled for testing) weaviate: null # BM25 Sparse Retrieval Configuration sparse: type: "bm25" config: k1: 1.2 b: 0.75 lowercase: true preserve_technical_terms: true filter_stop_words: true # Enable stop word filtering custom_stop_words: [] # Additional stop words if needed min_score: 0.1 # Minimum normalized score threshold # Score-Aware Fusion (preserves semantic relevance) fusion: type: "score_aware" # Use ScoreAwareFusion config: score_weight: 0.9 # α - semantic score importance (very high) rank_weight: 0.1 # β - rank stability factor (minimal) overlap_weight: 0.0 # γ - both-retriever bonus (disabled) normalize_scores: false # Score normalization disabled k: 60 # RRF constant for rank component # Hybrid Search Configuration hybrid_search: enabled: true # Strategy weights (must sum to 1.0) - Semantic-focused configuration dense_weight: 0.8 sparse_weight: 0.2 graph_weight: 0.0 # Fusion method fusion_method: "score_aware" rrf_k: 60 # Advanced fusion parameters adaptive_weights: false query_dependent_weighting: false normalization_method: "min_max" # Performance optimization max_candidates_per_strategy: 200 early_termination_threshold: 0.95 # Reranker Configuration - HuggingFace API Backend reranker: type: "neural" config: enabled: true # Model configuration with HuggingFace API model_name: "cross-encoder/ms-marco-MiniLM-L6-v2" model_type: "cross_encoder" # Reranking parameters max_candidates: 100 batch_size: 32 max_length: 512 # Performance thresholds max_latency_ms: 5000 fallback_to_fast_reranker: true fast_reranker_threshold: 100 # HuggingFace API specific model configuration models: default_model: name: "cross-encoder/ms-marco-MiniLM-L6-v2" backend: "huggingface_api" api_token: "${HF_TOKEN}" batch_size: 32 max_length: 512 timeout: 10 fallback_to_local: true max_candidates: 100 score_threshold: 0.0 default_model: "default_model" # Graph Retrieval Configuration (Epic 2) graph_retrieval: enabled: false enable_entity_linking: true enable_cross_references: true similarity_threshold: 0.65 max_connections_per_document: 15 use_pagerank: true pagerank_damping: 0.85 use_community_detection: false community_algorithm: "louvain" max_graph_hops: 3 graph_weight_decay: 0.5 combine_with_vector_search: true # Analytics Configuration analytics: enabled: true collect_query_metrics: true collect_performance_metrics: true collect_quality_metrics: true dashboard_enabled: false dashboard_port: 8050 dashboard_host: "localhost" auto_refresh_seconds: 5 metrics_retention_days: 30 detailed_logs_retention_days: 7 # Answer generator - HuggingFace API integration answer_generator: type: "adaptive_modular" config: llm_client: type: "huggingface" config: model_name: "microsoft/DialoGPT-medium" api_token: "${HF_TOKEN}" timeout: 30 use_chat_completion: true fallback_models: - "google/gemma-2-2b-it" - "google/flan-t5-small" max_tokens: 512 temperature: 0.1 top_p: 0.9 stop_sequences: [] prompt_builder: type: "simple" config: max_context_length: 12000 include_instructions: true citation_style: "inline" template: | You are an expert technical assistant specializing in RISC-V architecture and computer systems. Context Documents: {context} Question: {query} Instructions: - Provide a comprehensive, detailed technical answer based ONLY on the provided context - Include technical specifications, encoding details, and implementation information when available - Explain concepts step-by-step with technical depth appropriate for engineers - Cover related concepts and connections mentioned in the context - Include specific examples, instruction formats, or implementation details when present - ALWAYS include citations in your answer using the format [Document X] where X is the document number - Every factual claim must be followed by a citation like [Document 1] or [Document 2] - Multiple citations can be combined like [Document 1, Document 2] - If the answer is not fully covered by the context, clearly state what information is missing Answer: response_parser: type: "markdown" config: extract_citations: true confidence_scorer: type: "semantic" config: min_answer_length: 20 max_answer_length: 1000 relevance_weight: 0.4 grounding_weight: 0.4 quality_weight: 0.2 low_retrieval_penalty: 0.3 # Penalty when few documents retrieved min_context_documents: 3 # Minimum documents for full confidence # Global settings optimized for HuggingFace API usage global_settings: environment: "hf_api" log_level: "debug" max_workers: 4 enable_performance_monitoring: true enable_cost_monitoring: true # API-specific settings api_retry_attempts: 3 api_retry_delay: 1.0 api_timeout: 30 # Memory optimization for API usage enable_memory_optimization: true unload_unused_models: true model_cache_size: 2