Arthur Passuello
initial commit
5e1a30c
"""
Simple prompt builder implementation.
This module provides a basic prompt builder that uses straightforward
templates to construct prompts from queries and context documents.
Architecture Notes:
- Direct implementation (no adapter needed)
- Pure prompt construction algorithm
- Configurable templates for different use cases
"""
import logging
from typing import List, Dict, Any, Optional
from textwrap import dedent
from ..base import PromptBuilder, Document, ConfigurableComponent
logger = logging.getLogger(__name__)
class SimplePromptBuilder(PromptBuilder, ConfigurableComponent):
"""
Simple template-based prompt builder.
Features:
- Configurable prompt templates
- Context length management
- Citation instruction injection
- Clear role definitions
Configuration:
- max_context_length: Maximum characters for context (default: 4000)
- include_instructions: Include detailed instructions (default: True)
- citation_style: How to format citations (default: "inline")
"""
# Default prompt template
DEFAULT_TEMPLATE = dedent("""
You are a helpful assistant answering questions based on the provided context.
Context Documents:
{context}
Question: {query}
Instructions:
- Answer based ONLY on the provided context
- Be concise and direct
- If the answer is not in the context, say so
- ALWAYS include citations in your answer using the format [Document X] where X is the document number
- Every factual claim must be followed by a citation like [Document 1] or [Document 2]
- Multiple citations can be combined like [Document 1, Document 2]
Answer:
""").strip()
# Minimal template without instructions
MINIMAL_TEMPLATE = dedent("""
Context: {context}
Question: {query}
Answer based on the context:
""").strip()
def __init__(self,
max_context_length: int = 4000,
include_instructions: bool = True,
citation_style: str = "inline",
template: Optional[str] = None,
config: Optional[Dict[str, Any]] = None):
"""
Initialize simple prompt builder.
Args:
max_context_length: Maximum characters for context
include_instructions: Include detailed instructions
citation_style: Citation format ("inline", "footnote", "none")
template: Custom template (uses default if None)
config: Additional configuration
"""
# Merge config
builder_config = {
'max_context_length': max_context_length,
'include_instructions': include_instructions,
'citation_style': citation_style,
'template': template,
**(config or {})
}
super().__init__(builder_config)
# Set configuration
self.max_context_length = builder_config['max_context_length']
self.include_instructions = builder_config['include_instructions']
self.citation_style = builder_config['citation_style']
# Select template
if builder_config['template']:
self.template = builder_config['template']
elif self.include_instructions:
self.template = self.DEFAULT_TEMPLATE
else:
self.template = self.MINIMAL_TEMPLATE
def build_prompt(self, query: str, context: List[Document]) -> str:
"""
Build a prompt from query and context documents.
Args:
query: User query string
context: List of relevant context documents
Returns:
Formatted prompt string
Raises:
ValueError: If query is empty or context is invalid
"""
if not query.strip():
raise ValueError("Query cannot be empty")
if not context:
# Handle empty context gracefully
context_text = "No relevant context documents found."
else:
# Format context documents
context_text = self._format_context(context)
# Build prompt from template
prompt = self.template.format(
context=context_text,
query=query.strip()
)
# Add citation instructions if needed
if self.citation_style != "none" and "citation" not in prompt.lower():
prompt = self._add_citation_instructions(prompt)
logger.debug(f"Built prompt with {len(context)} documents, length: {len(prompt)}")
return prompt
def get_template(self) -> str:
"""Return the prompt template being used."""
return self.template
def get_builder_info(self) -> Dict[str, Any]:
"""Get information about the prompt builder."""
return {
'type': 'simple',
'builder_class': self.__class__.__name__,
'max_context_length': self.max_context_length,
'include_instructions': self.include_instructions,
'citation_style': self.citation_style,
'template_length': len(self.template),
'template_preview': self.template[:100] + '...' if len(self.template) > 100 else self.template
}
def _format_context(self, documents: List[Document]) -> str:
"""
Format context documents into a readable string.
Args:
documents: List of documents
Returns:
Formatted context string
"""
formatted_docs = []
total_length = 0
for i, doc in enumerate(documents, 1):
# Format document with citation marker
doc_header = f"[Document {i}]"
if doc.metadata.get('source'):
doc_header += f" Source: {doc.metadata['source']}"
if doc.metadata.get('page') or doc.metadata.get('start_page'):
page = doc.metadata.get('page') or doc.metadata.get('start_page')
doc_header += f" (Page {page})"
# Check if adding this document would exceed limit
doc_text = f"{doc_header}\n{doc.content}\n"
if total_length + len(doc_text) > self.max_context_length:
# Truncate or skip
remaining = self.max_context_length - total_length
if remaining > 100: # Only add if we have reasonable space
truncated = doc_text[:remaining] + "\n[Truncated...]"
formatted_docs.append(truncated)
break
formatted_docs.append(doc_text)
total_length += len(doc_text)
return "\n".join(formatted_docs).strip()
def _add_citation_instructions(self, prompt: str) -> str:
"""
Add citation instructions to the prompt.
Args:
prompt: Original prompt
Returns:
Prompt with citation instructions
"""
citation_instructions = {
"inline": "\nIMPORTANT: You MUST include citations in the format [Document X] after every factual claim. For example: 'RISC-V is an open-source ISA [Document 1].'",
"footnote": "\nInclude footnote-style citations at the end of your answer.",
"none": ""
}
instruction = citation_instructions.get(self.citation_style, "")
if instruction:
# Add before the final "Answer:" line if present
if "\nAnswer:" in prompt:
prompt = prompt.replace("\nAnswer:", f"{instruction}\n\nAnswer:")
else:
prompt += instruction
return prompt
def set_template(self, template: str) -> None:
"""
Set a custom prompt template.
Args:
template: New template with {context} and {query} placeholders
Raises:
ValueError: If template is missing required placeholders
"""
if "{context}" not in template or "{query}" not in template:
raise ValueError("Template must contain {context} and {query} placeholders")
self.template = template
self.config['template'] = template
logger.info("Updated prompt template")