Spaces:

ArthyP
/

enhanced-rag-demo

Running

enhanced-rag-demo / config /epic2_graph_calibrated.yaml

Arthur Passuello

initial commit

5e1a30c about 1 month ago

2.42 kB

	# Epic 2 Calibrated Configuration - Graph Retrieval Optimized
	# Graph parameters optimized through calibration system (Score: 0.8000)
	# Optimal parameters: graph_weight=0.1, similarity_threshold=0.5

	# Document processor for handling input files
	document_processor:
	type: "hybrid_pdf"
	config:
	chunk_size: 1024
	chunk_overlap: 128

	# Embedding generator for converting text to vectors
	embedder:
	type: "modular"
	config:
	model:
	type: "sentence_transformer"
	config:
	model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
	device: "mps"
	normalize_embeddings: true
	batch_processor:
	type: "dynamic"
	config:
	initial_batch_size: 64
	max_batch_size: 256
	optimize_for_memory: false
	cache:
	type: "memory"
	config:
	max_entries: 100000
	max_memory_mb: 1024

	# Epic 2 ModularUnifiedRetriever with Graph Enhancement Enabled
	retriever:
	type: "modular_unified"
	config:
	vector_index:
	type: "faiss"
	config:
	index_type: "IndexFlatIP"
	normalize_embeddings: true
	metric: "cosine"

	sparse:
	type: "bm25"
	config:
	k1: 1.2
	b: 0.75
	lowercase: true
	preserve_technical_terms: true

	# Epic 2 Feature: Graph Enhanced Fusion - ENABLED
	fusion:
	type: "graph_enhanced_rrf"
	config:
	k: 60
	weights:
	dense: 0.4 # Reduced to accommodate graph
	sparse: 0.3 # Maintained
	graph: 0.1 # CALIBRATED: Optimal graph weight
	graph_enabled: true
	similarity_threshold: 0.5 # CALIBRATED: Optimal similarity threshold
	max_connections_per_document: 15
	use_pagerank: true
	pagerank_damping: 0.85

	# Identity reranker (neural disabled for graph-only testing)
	reranker:
	type: "identity"
	config:
	enabled: false

	# Answer generation strategy
	answer_generator:
	type: "adaptive_modular"
	config:
	prompt_builder:
	type: "simple"
	config: {}
	llm_client:
	type: "ollama"
	config:
	model_name: "llama3.2:3b"
	base_url: "http://localhost:11434"
	timeout: 30
	response_parser:
	type: "markdown"
	config: {}
	confidence_scorer:
	type: "semantic"
	config: {}

	# Global settings
	global_settings:
	environment: "testing"
	log_level: "info"