Spaces:

ArthyP
/

enhanced-rag-demo

Sleeping

enhanced-rag-demo / src /components /generators /prompt_builders /simple_prompt.py

Arthur Passuello

initial commit

5e1a30c about 1 month ago

8.46 kB

	"""
	Simple prompt builder implementation.

	This module provides a basic prompt builder that uses straightforward
	templates to construct prompts from queries and context documents.

	Architecture Notes:
	- Direct implementation (no adapter needed)
	- Pure prompt construction algorithm
	- Configurable templates for different use cases
	"""

	import logging
	from typing import List, Dict, Any, Optional
	from textwrap import dedent

	from ..base import PromptBuilder, Document, ConfigurableComponent

	logger = logging.getLogger(__name__)


	class SimplePromptBuilder(PromptBuilder, ConfigurableComponent):
	"""
	Simple template-based prompt builder.

	Features:
	- Configurable prompt templates
	- Context length management
	- Citation instruction injection
	- Clear role definitions

	Configuration:
	- max_context_length: Maximum characters for context (default: 4000)
	- include_instructions: Include detailed instructions (default: True)
	- citation_style: How to format citations (default: "inline")
	"""

	# Default prompt template
	DEFAULT_TEMPLATE = dedent("""
	You are a helpful assistant answering questions based on the provided context.

	Context Documents:
	{context}

	Question: {query}

	Instructions:
	- Answer based ONLY on the provided context
	- Be concise and direct
	- If the answer is not in the context, say so
	- ALWAYS include citations in your answer using the format [Document X] where X is the document number
	- Every factual claim must be followed by a citation like [Document 1] or [Document 2]
	- Multiple citations can be combined like [Document 1, Document 2]

	Answer:
	""").strip()

	# Minimal template without instructions
	MINIMAL_TEMPLATE = dedent("""
	Context: {context}

	Question: {query}

	Answer based on the context:
	""").strip()

	def __init__(self,
	max_context_length: int = 4000,
	include_instructions: bool = True,
	citation_style: str = "inline",
	template: Optional[str] = None,
	config: Optional[Dict[str, Any]] = None):
	"""
	Initialize simple prompt builder.

	Args:
	max_context_length: Maximum characters for context
	include_instructions: Include detailed instructions
	citation_style: Citation format ("inline", "footnote", "none")
	template: Custom template (uses default if None)
	config: Additional configuration
	"""
	# Merge config
	builder_config = {
	'max_context_length': max_context_length,
	'include_instructions': include_instructions,
	'citation_style': citation_style,
	'template': template,
	**(config or {})
	}

	super().__init__(builder_config)

	# Set configuration
	self.max_context_length = builder_config['max_context_length']
	self.include_instructions = builder_config['include_instructions']
	self.citation_style = builder_config['citation_style']

	# Select template
	if builder_config['template']:
	self.template = builder_config['template']
	elif self.include_instructions:
	self.template = self.DEFAULT_TEMPLATE
	else:
	self.template = self.MINIMAL_TEMPLATE

	def build_prompt(self, query: str, context: List[Document]) -> str:
	"""
	Build a prompt from query and context documents.

	Args:
	query: User query string
	context: List of relevant context documents

	Returns:
	Formatted prompt string

	Raises:
	ValueError: If query is empty or context is invalid
	"""
	if not query.strip():
	raise ValueError("Query cannot be empty")

	if not context:
	# Handle empty context gracefully
	context_text = "No relevant context documents found."
	else:
	# Format context documents
	context_text = self._format_context(context)

	# Build prompt from template
	prompt = self.template.format(
	context=context_text,
	query=query.strip()
	)

	# Add citation instructions if needed
	if self.citation_style != "none" and "citation" not in prompt.lower():
	prompt = self._add_citation_instructions(prompt)

	logger.debug(f"Built prompt with {len(context)} documents, length: {len(prompt)}")

	return prompt

	def get_template(self) -> str:
	"""Return the prompt template being used."""
	return self.template

	def get_builder_info(self) -> Dict[str, Any]:
	"""Get information about the prompt builder."""
	return {
	'type': 'simple',
	'builder_class': self.__class__.__name__,
	'max_context_length': self.max_context_length,
	'include_instructions': self.include_instructions,
	'citation_style': self.citation_style,
	'template_length': len(self.template),
	'template_preview': self.template[:100] + '...' if len(self.template) > 100 else self.template
	}

	def _format_context(self, documents: List[Document]) -> str:
	"""
	Format context documents into a readable string.

	Args:
	documents: List of documents

	Returns:
	Formatted context string
	"""
	formatted_docs = []
	total_length = 0

	for i, doc in enumerate(documents, 1):
	# Format document with citation marker
	doc_header = f"[Document {i}]"
	if doc.metadata.get('source'):
	doc_header += f" Source: {doc.metadata['source']}"
	if doc.metadata.get('page') or doc.metadata.get('start_page'):
	page = doc.metadata.get('page') or doc.metadata.get('start_page')
	doc_header += f" (Page {page})"

	# Check if adding this document would exceed limit
	doc_text = f"{doc_header}\n{doc.content}\n"
	if total_length + len(doc_text) > self.max_context_length:
	# Truncate or skip
	remaining = self.max_context_length - total_length
	if remaining > 100: # Only add if we have reasonable space
	truncated = doc_text[:remaining] + "\n[Truncated...]"
	formatted_docs.append(truncated)
	break

	formatted_docs.append(doc_text)
	total_length += len(doc_text)

	return "\n".join(formatted_docs).strip()

	def _add_citation_instructions(self, prompt: str) -> str:
	"""
	Add citation instructions to the prompt.

	Args:
	prompt: Original prompt

	Returns:
	Prompt with citation instructions
	"""
	citation_instructions = {
	"inline": "\nIMPORTANT: You MUST include citations in the format [Document X] after every factual claim. For example: 'RISC-V is an open-source ISA [Document 1].'",
	"footnote": "\nInclude footnote-style citations at the end of your answer.",
	"none": ""
	}

	instruction = citation_instructions.get(self.citation_style, "")
	if instruction:
	# Add before the final "Answer:" line if present
	if "\nAnswer:" in prompt:
	prompt = prompt.replace("\nAnswer:", f"{instruction}\n\nAnswer:")
	else:
	prompt += instruction

	return prompt

	def set_template(self, template: str) -> None:
	"""
	Set a custom prompt template.

	Args:
	template: New template with {context} and {query} placeholders

	Raises:
	ValueError: If template is missing required placeholders
	"""
	if "{context}" not in template or "{query}" not in template:
	raise ValueError("Template must contain {context} and {query} placeholders")

	self.template = template
	self.config['template'] = template
	logger.info("Updated prompt template")