""" Simple prompt builder implementation. This module provides a basic prompt builder that uses straightforward templates to construct prompts from queries and context documents. Architecture Notes: - Direct implementation (no adapter needed) - Pure prompt construction algorithm - Configurable templates for different use cases """ import logging from typing import List, Dict, Any, Optional from textwrap import dedent from ..base import PromptBuilder, Document, ConfigurableComponent logger = logging.getLogger(__name__) class SimplePromptBuilder(PromptBuilder, ConfigurableComponent): """ Simple template-based prompt builder. Features: - Configurable prompt templates - Context length management - Citation instruction injection - Clear role definitions Configuration: - max_context_length: Maximum characters for context (default: 4000) - include_instructions: Include detailed instructions (default: True) - citation_style: How to format citations (default: "inline") """ # Default prompt template DEFAULT_TEMPLATE = dedent(""" You are a helpful assistant answering questions based on the provided context. Context Documents: {context} Question: {query} Instructions: - Answer based ONLY on the provided context - Be concise and direct - If the answer is not in the context, say so - ALWAYS include citations in your answer using the format [Document X] where X is the document number - Every factual claim must be followed by a citation like [Document 1] or [Document 2] - Multiple citations can be combined like [Document 1, Document 2] Answer: """).strip() # Minimal template without instructions MINIMAL_TEMPLATE = dedent(""" Context: {context} Question: {query} Answer based on the context: """).strip() def __init__(self, max_context_length: int = 4000, include_instructions: bool = True, citation_style: str = "inline", template: Optional[str] = None, config: Optional[Dict[str, Any]] = None): """ Initialize simple prompt builder. Args: max_context_length: Maximum characters for context include_instructions: Include detailed instructions citation_style: Citation format ("inline", "footnote", "none") template: Custom template (uses default if None) config: Additional configuration """ # Merge config builder_config = { 'max_context_length': max_context_length, 'include_instructions': include_instructions, 'citation_style': citation_style, 'template': template, **(config or {}) } super().__init__(builder_config) # Set configuration self.max_context_length = builder_config['max_context_length'] self.include_instructions = builder_config['include_instructions'] self.citation_style = builder_config['citation_style'] # Select template if builder_config['template']: self.template = builder_config['template'] elif self.include_instructions: self.template = self.DEFAULT_TEMPLATE else: self.template = self.MINIMAL_TEMPLATE def build_prompt(self, query: str, context: List[Document]) -> str: """ Build a prompt from query and context documents. Args: query: User query string context: List of relevant context documents Returns: Formatted prompt string Raises: ValueError: If query is empty or context is invalid """ if not query.strip(): raise ValueError("Query cannot be empty") if not context: # Handle empty context gracefully context_text = "No relevant context documents found." else: # Format context documents context_text = self._format_context(context) # Build prompt from template prompt = self.template.format( context=context_text, query=query.strip() ) # Add citation instructions if needed if self.citation_style != "none" and "citation" not in prompt.lower(): prompt = self._add_citation_instructions(prompt) logger.debug(f"Built prompt with {len(context)} documents, length: {len(prompt)}") return prompt def get_template(self) -> str: """Return the prompt template being used.""" return self.template def get_builder_info(self) -> Dict[str, Any]: """Get information about the prompt builder.""" return { 'type': 'simple', 'builder_class': self.__class__.__name__, 'max_context_length': self.max_context_length, 'include_instructions': self.include_instructions, 'citation_style': self.citation_style, 'template_length': len(self.template), 'template_preview': self.template[:100] + '...' if len(self.template) > 100 else self.template } def _format_context(self, documents: List[Document]) -> str: """ Format context documents into a readable string. Args: documents: List of documents Returns: Formatted context string """ formatted_docs = [] total_length = 0 for i, doc in enumerate(documents, 1): # Format document with citation marker doc_header = f"[Document {i}]" if doc.metadata.get('source'): doc_header += f" Source: {doc.metadata['source']}" if doc.metadata.get('page') or doc.metadata.get('start_page'): page = doc.metadata.get('page') or doc.metadata.get('start_page') doc_header += f" (Page {page})" # Check if adding this document would exceed limit doc_text = f"{doc_header}\n{doc.content}\n" if total_length + len(doc_text) > self.max_context_length: # Truncate or skip remaining = self.max_context_length - total_length if remaining > 100: # Only add if we have reasonable space truncated = doc_text[:remaining] + "\n[Truncated...]" formatted_docs.append(truncated) break formatted_docs.append(doc_text) total_length += len(doc_text) return "\n".join(formatted_docs).strip() def _add_citation_instructions(self, prompt: str) -> str: """ Add citation instructions to the prompt. Args: prompt: Original prompt Returns: Prompt with citation instructions """ citation_instructions = { "inline": "\nIMPORTANT: You MUST include citations in the format [Document X] after every factual claim. For example: 'RISC-V is an open-source ISA [Document 1].'", "footnote": "\nInclude footnote-style citations at the end of your answer.", "none": "" } instruction = citation_instructions.get(self.citation_style, "") if instruction: # Add before the final "Answer:" line if present if "\nAnswer:" in prompt: prompt = prompt.replace("\nAnswer:", f"{instruction}\n\nAnswer:") else: prompt += instruction return prompt def set_template(self, template: str) -> None: """ Set a custom prompt template. Args: template: New template with {context} and {query} placeholders Raises: ValueError: If template is missing required placeholders """ if "{context}" not in template or "{query}" not in template: raise ValueError("Template must contain {context} and {query} placeholders") self.template = template self.config['template'] = template logger.info("Updated prompt template")