Spaces:

ArthyP
/

enhanced-rag-demo

Running

enhanced-rag-demo / config /epic2_hf_api.yaml

Arthur Passuello

initial commit

5e1a30c about 1 month ago

8.05 kB

	# Epic 2 Configuration with HuggingFace API Integration
	# This configuration preserves all Epic 2 features (neural reranking, graph enhancement, analytics)
	# while using HuggingFace API for both LLM generation and neural reranking

	# Document processor for handling input files
	document_processor:
	type: "hybrid_pdf"
	config:
	chunk_size: 1024
	chunk_overlap: 128

	# Embedding generator for converting text to vectors
	embedder:
	type: "modular"
	config:
	model:
	type: "sentence_transformer"
	config:
	model_name: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
	device: "auto"
	normalize_embeddings: true
	batch_processor:
	type: "dynamic"
	config:
	initial_batch_size: 64
	max_batch_size: 256
	optimize_for_memory: false
	cache:
	type: "memory"
	config:
	max_entries: 100000
	max_memory_mb: 1024

	# EPIC 2 ADVANCED RETRIEVER WITH API RERANKING
	retriever:
	type: "modular_unified"
	config:
	# Composite filtering configuration (NEW - replaces semantic gap detection)
	composite_filtering:
	enabled: true
	fusion_weight: 0.7 # α - weight for fusion score importance
	semantic_weight: 0.3 # β - weight for semantic similarity
	min_composite_score: 0.4 # threshold for document inclusion
	max_candidates: 15 # reduce from k2 to k1.5 for efficiency

	# Legacy semantic gap detection (DEPRECATED - use composite_filtering)
	min_semantic_alignment: 0.3 # Minimum query-document semantic similarity

	# Backend Configuration
	backends:
	primary_backend: "faiss"
	fallback_enabled: true
	fallback_backend: null

	# Hot-swapping configuration
	enable_hot_swap: false
	health_check_interval_seconds: 30
	switch_threshold_error_rate: 0.1

	# FAISS backend settings
	faiss:
	index_type: "IndexFlatIP"
	normalize_embeddings: true
	metric: "cosine"

	# Weaviate backend settings (disabled for testing)
	weaviate: null

	# BM25 Sparse Retrieval Configuration
	sparse:
	type: "bm25"
	config:
	k1: 1.2
	b: 0.75
	lowercase: true
	preserve_technical_terms: true
	filter_stop_words: true # Enable stop word filtering
	custom_stop_words: [] # Additional stop words if needed
	min_score: 0.1 # Minimum normalized score threshold

	# Score-Aware Fusion (preserves semantic relevance)
	fusion:
	type: "score_aware" # Use ScoreAwareFusion
	config:
	score_weight: 0.9 # α - semantic score importance (very high)
	rank_weight: 0.1 # β - rank stability factor (minimal)
	overlap_weight: 0.0 # γ - both-retriever bonus (disabled)
	normalize_scores: false # Score normalization disabled
	k: 60 # RRF constant for rank component

	# Hybrid Search Configuration
	hybrid_search:
	enabled: true

	# Strategy weights (must sum to 1.0) - Semantic-focused configuration
	dense_weight: 0.8
	sparse_weight: 0.2
	graph_weight: 0.0

	# Fusion method
	fusion_method: "score_aware"
	rrf_k: 60

	# Advanced fusion parameters
	adaptive_weights: false
	query_dependent_weighting: false
	normalization_method: "min_max"

	# Performance optimization
	max_candidates_per_strategy: 200
	early_termination_threshold: 0.95

	# Reranker Configuration - HuggingFace API Backend
	reranker:
	type: "neural"
	config:
	enabled: true

	# Model configuration with HuggingFace API
	model_name: "cross-encoder/ms-marco-MiniLM-L6-v2"
	model_type: "cross_encoder"

	# Reranking parameters
	max_candidates: 100
	batch_size: 32
	max_length: 512

	# Performance thresholds
	max_latency_ms: 5000
	fallback_to_fast_reranker: true
	fast_reranker_threshold: 100

	# HuggingFace API specific model configuration
	models:
	default_model:
	name: "cross-encoder/ms-marco-MiniLM-L6-v2"
	backend: "huggingface_api"
	api_token: "${HF_TOKEN}"
	batch_size: 32
	max_length: 512
	timeout: 10
	fallback_to_local: true
	max_candidates: 100
	score_threshold: 0.0
	default_model: "default_model"

	# Graph Retrieval Configuration (Epic 2)
	graph_retrieval:
	enabled: false
	enable_entity_linking: true
	enable_cross_references: true
	similarity_threshold: 0.65
	max_connections_per_document: 15
	use_pagerank: true
	pagerank_damping: 0.85
	use_community_detection: false
	community_algorithm: "louvain"
	max_graph_hops: 3
	graph_weight_decay: 0.5
	combine_with_vector_search: true

	# Analytics Configuration
	analytics:
	enabled: true
	collect_query_metrics: true
	collect_performance_metrics: true
	collect_quality_metrics: true
	dashboard_enabled: false
	dashboard_port: 8050
	dashboard_host: "localhost"
	auto_refresh_seconds: 5
	metrics_retention_days: 30
	detailed_logs_retention_days: 7

	# Answer generator - HuggingFace API integration
	answer_generator:
	type: "adaptive_modular"
	config:
	llm_client:
	type: "huggingface"
	config:
	model_name: "microsoft/DialoGPT-medium"
	api_token: "${HF_TOKEN}"
	timeout: 30
	use_chat_completion: true
	fallback_models:
	- "google/gemma-2-2b-it"
	- "google/flan-t5-small"
	max_tokens: 512
	temperature: 0.1
	top_p: 0.9
	stop_sequences: []

	prompt_builder:
	type: "simple"
	config:
	max_context_length: 12000
	include_instructions: true
	citation_style: "inline"
	template: \|
	You are an expert technical assistant specializing in RISC-V architecture and computer systems.

	Context Documents:
	{context}

	Question: {query}

	Instructions:
	- Provide a comprehensive, detailed technical answer based ONLY on the provided context
	- Include technical specifications, encoding details, and implementation information when available
	- Explain concepts step-by-step with technical depth appropriate for engineers
	- Cover related concepts and connections mentioned in the context
	- Include specific examples, instruction formats, or implementation details when present
	- ALWAYS include citations in your answer using the format [Document X] where X is the document number
	- Every factual claim must be followed by a citation like [Document 1] or [Document 2]
	- Multiple citations can be combined like [Document 1, Document 2]
	- If the answer is not fully covered by the context, clearly state what information is missing

	Answer:

	response_parser:
	type: "markdown"
	config:
	extract_citations: true

	confidence_scorer:
	type: "semantic"
	config:
	min_answer_length: 20
	max_answer_length: 1000
	relevance_weight: 0.4
	grounding_weight: 0.4
	quality_weight: 0.2
	low_retrieval_penalty: 0.3 # Penalty when few documents retrieved
	min_context_documents: 3 # Minimum documents for full confidence

	# Global settings optimized for HuggingFace API usage
	global_settings:
	environment: "hf_api"
	log_level: "debug"
	max_workers: 4
	enable_performance_monitoring: true
	enable_cost_monitoring: true

	# API-specific settings
	api_retry_attempts: 3
	api_retry_delay: 1.0
	api_timeout: 30

	# Memory optimization for API usage
	enable_memory_optimization: true
	unload_unused_models: true
	model_cache_size: 2