Spaces:

ArthyP
/

enhanced-rag-demo

Running

File size: 8,045 Bytes

5e1a30c

# Epic 2 Configuration with HuggingFace API Integration
# This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics)
# while using HuggingFace API for both LLM generation and neural reranking

# Document processor for handling input files
document_processor:
  type: "hybrid_pdf"
  config:
    chunk_size: 1024
    chunk_overlap: 128

# Embedding generator for converting text to vectors  
embedder:
  type: "modular"
  config:
    model:
      type: "sentence_transformer"
      config:
        model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
        device: "auto"
        normalize_embeddings: true
    batch_processor:
      type: "dynamic"
      config:
        initial_batch_size: 64
        max_batch_size: 256
        optimize_for_memory: false
    cache:
      type: "memory"
      config:
        max_entries: 100000
        max_memory_mb: 1024

# EPIC 2 ADVANCED RETRIEVER WITH API RERANKING
retriever:
  type: "modular_unified"
  config:
    # Composite filtering configuration (NEW - replaces semantic gap detection)
    composite_filtering:
      enabled: true
      fusion_weight: 0.7          # α - weight for fusion score importance
      semantic_weight: 0.3        # β - weight for semantic similarity
      min_composite_score: 0.4    # threshold for document inclusion
      max_candidates: 15          # reduce from k*2 to k*1.5 for efficiency
    
    # Legacy semantic gap detection (DEPRECATED - use composite_filtering)
    min_semantic_alignment: 0.3  # Minimum query-document semantic similarity
    
    # Backend Configuration
    backends:
      primary_backend: "faiss"
      fallback_enabled: true
      fallback_backend: null
      
      # Hot-swapping configuration
      enable_hot_swap: false
      health_check_interval_seconds: 30
      switch_threshold_error_rate: 0.1
      
      # FAISS backend settings
      faiss:
        index_type: "IndexFlatIP"
        normalize_embeddings: true
        metric: "cosine"
      
      # Weaviate backend settings (disabled for testing)
      weaviate: null

    # BM25 Sparse Retrieval Configuration
    sparse:
      type: "bm25"
      config:
        k1: 1.2
        b: 0.75
        lowercase: true
        preserve_technical_terms: true
        filter_stop_words: true  # Enable stop word filtering
        custom_stop_words: []    # Additional stop words if needed
        min_score: 0.1          # Minimum normalized score threshold

    # Score-Aware Fusion (preserves semantic relevance)
    fusion:
      type: "score_aware"  # Use ScoreAwareFusion 
      config:
        score_weight: 0.9      # α - semantic score importance (very high)
        rank_weight: 0.1       # β - rank stability factor (minimal)
        overlap_weight: 0.0    # γ - both-retriever bonus (disabled)
        normalize_scores: false # Score normalization disabled
        k: 60                  # RRF constant for rank component

    # Hybrid Search Configuration
    hybrid_search:
      enabled: true
      
      # Strategy weights (must sum to 1.0) - Semantic-focused configuration
      dense_weight: 0.8
      sparse_weight: 0.2
      graph_weight: 0.0
      
      # Fusion method
      fusion_method: "score_aware"
      rrf_k: 60
      
      # Advanced fusion parameters
      adaptive_weights: false
      query_dependent_weighting: false
      normalization_method: "min_max"
      
      # Performance optimization
      max_candidates_per_strategy: 200
      early_termination_threshold: 0.95

    # Reranker Configuration - HuggingFace API Backend
    reranker:
      type: "neural"
      config:
        enabled: true
        
        # Model configuration with HuggingFace API
        model_name: "cross-encoder/ms-marco-MiniLM-L6-v2"
        model_type: "cross_encoder"
        
        # Reranking parameters
        max_candidates: 100
        batch_size: 32
        max_length: 512
        
        # Performance thresholds
        max_latency_ms: 5000
        fallback_to_fast_reranker: true
        fast_reranker_threshold: 100
        
        # HuggingFace API specific model configuration
        models:
          default_model:
            name: "cross-encoder/ms-marco-MiniLM-L6-v2"
            backend: "huggingface_api"
            api_token: "${HF_TOKEN}"
            batch_size: 32
            max_length: 512
            timeout: 10
            fallback_to_local: true
            max_candidates: 100
            score_threshold: 0.0
        default_model: "default_model"

    # Graph Retrieval Configuration (Epic 2)
    graph_retrieval:
      enabled: false
      enable_entity_linking: true
      enable_cross_references: true
      similarity_threshold: 0.65
      max_connections_per_document: 15
      use_pagerank: true
      pagerank_damping: 0.85
      use_community_detection: false
      community_algorithm: "louvain"
      max_graph_hops: 3
      graph_weight_decay: 0.5
      combine_with_vector_search: true

    # Analytics Configuration
    analytics:
      enabled: true
      collect_query_metrics: true
      collect_performance_metrics: true
      collect_quality_metrics: true
      dashboard_enabled: false
      dashboard_port: 8050
      dashboard_host: "localhost"
      auto_refresh_seconds: 5
      metrics_retention_days: 30
      detailed_logs_retention_days: 7

# Answer generator - HuggingFace API integration
answer_generator:
  type: "adaptive_modular"
  config:
    llm_client:
      type: "huggingface"
      config:
        model_name: "microsoft/DialoGPT-medium"
        api_token: "${HF_TOKEN}"
        timeout: 30
        use_chat_completion: true
        fallback_models:
          - "google/gemma-2-2b-it"
          - "google/flan-t5-small"
        max_tokens: 512
        temperature: 0.1
        top_p: 0.9
        stop_sequences: []
    
    prompt_builder:
      type: "simple"
      config:
        max_context_length: 12000
        include_instructions: true
        citation_style: "inline"
        template: |
          You are an expert technical assistant specializing in RISC-V architecture and computer systems.
          
          Context Documents:
          {context}
          
          Question: {query}
          
          Instructions:
          - Provide a comprehensive, detailed technical answer based ONLY on the provided context
          - Include technical specifications, encoding details, and implementation information when available
          - Explain concepts step-by-step with technical depth appropriate for engineers
          - Cover related concepts and connections mentioned in the context
          - Include specific examples, instruction formats, or implementation details when present
          - ALWAYS include citations in your answer using the format [Document X] where X is the document number
          - Every factual claim must be followed by a citation like [Document 1] or [Document 2]
          - Multiple citations can be combined like [Document 1, Document 2]
          - If the answer is not fully covered by the context, clearly state what information is missing
          
          Answer:
        
    response_parser:
      type: "markdown"
      config:
        extract_citations: true
        
    confidence_scorer:
      type: "semantic"
      config:
        min_answer_length: 20
        max_answer_length: 1000
        relevance_weight: 0.4
        grounding_weight: 0.4
        quality_weight: 0.2
        low_retrieval_penalty: 0.3  # Penalty when few documents retrieved
        min_context_documents: 3    # Minimum documents for full confidence

# Global settings optimized for HuggingFace API usage
global_settings:
  environment: "hf_api"
  log_level: "debug"
  max_workers: 4
  enable_performance_monitoring: true
  enable_cost_monitoring: true
  
  # API-specific settings
  api_retry_attempts: 3
  api_retry_delay: 1.0
  api_timeout: 30
  
  # Memory optimization for API usage
  enable_memory_optimization: true
  unload_unused_models: true
  model_cache_size: 2