File size: 2,779 Bytes
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Epic 2 Configuration with Graph Enhancement - Calibration Ready
# This config enables graph enhancement with all parameters registered for calibration

# Document processor for handling input files
document_processor:
  type: "hybrid_pdf"
  config:
    chunk_size: 1024
    chunk_overlap: 128

# High-performance embedder for Epic 2
embedder:
  type: "modular"
  config:
    model:
      type: "sentence_transformer"
      config:
        model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
        device: "mps"
        normalize_embeddings: true
    batch_processor:
      type: "dynamic"
      config:
        initial_batch_size: 64
        max_batch_size: 256
        optimize_for_memory: false
    cache:
      type: "memory"
      config:
        max_entries: 100000
        max_memory_mb: 1024

# Epic 2 with Graph Enhancement - All Parameters Calibration-Ready
retriever:
  type: "modular_unified"
  config:
    min_semantic_alignment: 0.2
    vector_index:
      type: "faiss"
      config:
        index_type: "IndexFlatIP"
        normalize_embeddings: true
        metric: "cosine"
    
    sparse:
      type: "bm25"
      config:
        k1: 1.2                    # Calibration parameter
        b: 0.75                    # Calibration parameter (original "problematic" value)
        lowercase: true
        filter_stop_words: true
        stop_word_sets: ["english_common"]
        preserve_technical_terms: true
    
    # Basic RRF Fusion (default configuration)
    fusion:
      type: "rrf"
      config:
        k: 60                    # RRF discriminative power
        weights:
          dense: 0.7             # Dense weight
          sparse: 0.3            # Sparse weight
    
    # Basic Identity Reranker (no reranking)
    reranker:
      type: "identity"
      config:
        enabled: false

# Mock answer generation for testing
answer_generator:
  type: "adaptive_modular"
  config:
    llm_client:
      type: "mock"
      config:
        model_name: "mock-llm-for-testing"
        response_pattern: "technical"
        include_citations: true
    
    temperature: 0.3
    max_tokens: 1024
    confidence_threshold: 0.85     # Calibration parameter
    
    prompt_builder:
      type: "simple"
      config:
        max_context_length: 12000
        include_instructions: true
        citation_style: "inline"
    
    response_parser:
      type: "markdown"
      config:
        preserve_formatting: true
        extract_citations: true
    
    confidence_scorer:
      type: "semantic"
      config:
        relevance_weight: 0.4
        grounding_weight: 0.4
        quality_weight: 0.2

# Global settings
global_settings:
  log_level: "INFO"
  cache_enabled: true
  performance_monitoring: true
  max_concurrent_requests: 8