Spaces:

ArthyP
/

enhanced-rag-demo

Sleeping

File size: 8,455 Bytes

5e1a30c

"""
Simple prompt builder implementation.

This module provides a basic prompt builder that uses straightforward
templates to construct prompts from queries and context documents.

Architecture Notes:
- Direct implementation (no adapter needed)
- Pure prompt construction algorithm
- Configurable templates for different use cases
"""

import logging
from typing import List, Dict, Any, Optional
from textwrap import dedent

from ..base import PromptBuilder, Document, ConfigurableComponent

logger = logging.getLogger(__name__)


class SimplePromptBuilder(PromptBuilder, ConfigurableComponent):
    """
    Simple template-based prompt builder.
    
    Features:
    - Configurable prompt templates
    - Context length management
    - Citation instruction injection
    - Clear role definitions
    
    Configuration:
    - max_context_length: Maximum characters for context (default: 4000)
    - include_instructions: Include detailed instructions (default: True)
    - citation_style: How to format citations (default: "inline")
    """
    
    # Default prompt template
    DEFAULT_TEMPLATE = dedent("""
    You are a helpful assistant answering questions based on the provided context.
    
    Context Documents:
    {context}
    
    Question: {query}
    
    Instructions:
    - Answer based ONLY on the provided context
    - Be concise and direct
    - If the answer is not in the context, say so
    - ALWAYS include citations in your answer using the format [Document X] where X is the document number
    - Every factual claim must be followed by a citation like [Document 1] or [Document 2]
    - Multiple citations can be combined like [Document 1, Document 2]
    
    Answer:
    """).strip()
    
    # Minimal template without instructions
    MINIMAL_TEMPLATE = dedent("""
    Context: {context}
    
    Question: {query}
    
    Answer based on the context:
    """).strip()
    
    def __init__(self, 
                 max_context_length: int = 4000,
                 include_instructions: bool = True,
                 citation_style: str = "inline",
                 template: Optional[str] = None,
                 config: Optional[Dict[str, Any]] = None):
        """
        Initialize simple prompt builder.
        
        Args:
            max_context_length: Maximum characters for context
            include_instructions: Include detailed instructions
            citation_style: Citation format ("inline", "footnote", "none")
            template: Custom template (uses default if None)
            config: Additional configuration
        """
        # Merge config
        builder_config = {
            'max_context_length': max_context_length,
            'include_instructions': include_instructions,
            'citation_style': citation_style,
            'template': template,
            **(config or {})
        }
        
        super().__init__(builder_config)
        
        # Set configuration
        self.max_context_length = builder_config['max_context_length']
        self.include_instructions = builder_config['include_instructions']
        self.citation_style = builder_config['citation_style']
        
        # Select template
        if builder_config['template']:
            self.template = builder_config['template']
        elif self.include_instructions:
            self.template = self.DEFAULT_TEMPLATE
        else:
            self.template = self.MINIMAL_TEMPLATE
    
    def build_prompt(self, query: str, context: List[Document]) -> str:
        """
        Build a prompt from query and context documents.
        
        Args:
            query: User query string
            context: List of relevant context documents
            
        Returns:
            Formatted prompt string
            
        Raises:
            ValueError: If query is empty or context is invalid
        """
        if not query.strip():
            raise ValueError("Query cannot be empty")
        
        if not context:
            # Handle empty context gracefully
            context_text = "No relevant context documents found."
        else:
            # Format context documents
            context_text = self._format_context(context)
        
        # Build prompt from template
        prompt = self.template.format(
            context=context_text,
            query=query.strip()
        )
        
        # Add citation instructions if needed
        if self.citation_style != "none" and "citation" not in prompt.lower():
            prompt = self._add_citation_instructions(prompt)
        
        logger.debug(f"Built prompt with {len(context)} documents, length: {len(prompt)}")
        
        return prompt
    
    def get_template(self) -> str:
        """Return the prompt template being used."""
        return self.template
    
    def get_builder_info(self) -> Dict[str, Any]:
        """Get information about the prompt builder."""
        return {
            'type': 'simple',
            'builder_class': self.__class__.__name__,
            'max_context_length': self.max_context_length,
            'include_instructions': self.include_instructions,
            'citation_style': self.citation_style,
            'template_length': len(self.template),
            'template_preview': self.template[:100] + '...' if len(self.template) > 100 else self.template
        }
    
    def _format_context(self, documents: List[Document]) -> str:
        """
        Format context documents into a readable string.
        
        Args:
            documents: List of documents
            
        Returns:
            Formatted context string
        """
        formatted_docs = []
        total_length = 0
        
        for i, doc in enumerate(documents, 1):
            # Format document with citation marker
            doc_header = f"[Document {i}]"
            if doc.metadata.get('source'):
                doc_header += f" Source: {doc.metadata['source']}"
            if doc.metadata.get('page') or doc.metadata.get('start_page'):
                page = doc.metadata.get('page') or doc.metadata.get('start_page')
                doc_header += f" (Page {page})"
            
            # Check if adding this document would exceed limit
            doc_text = f"{doc_header}\n{doc.content}\n"
            if total_length + len(doc_text) > self.max_context_length:
                # Truncate or skip
                remaining = self.max_context_length - total_length
                if remaining > 100:  # Only add if we have reasonable space
                    truncated = doc_text[:remaining] + "\n[Truncated...]"
                    formatted_docs.append(truncated)
                break
            
            formatted_docs.append(doc_text)
            total_length += len(doc_text)
        
        return "\n".join(formatted_docs).strip()
    
    def _add_citation_instructions(self, prompt: str) -> str:
        """
        Add citation instructions to the prompt.
        
        Args:
            prompt: Original prompt
            
        Returns:
            Prompt with citation instructions
        """
        citation_instructions = {
            "inline": "\nIMPORTANT: You MUST include citations in the format [Document X] after every factual claim. For example: 'RISC-V is an open-source ISA [Document 1].'",
            "footnote": "\nInclude footnote-style citations at the end of your answer.",
            "none": ""
        }
        
        instruction = citation_instructions.get(self.citation_style, "")
        if instruction:
            # Add before the final "Answer:" line if present
            if "\nAnswer:" in prompt:
                prompt = prompt.replace("\nAnswer:", f"{instruction}\n\nAnswer:")
            else:
                prompt += instruction
        
        return prompt
    
    def set_template(self, template: str) -> None:
        """
        Set a custom prompt template.
        
        Args:
            template: New template with {context} and {query} placeholders
            
        Raises:
            ValueError: If template is missing required placeholders
        """
        if "{context}" not in template or "{query}" not in template:
            raise ValueError("Template must contain {context} and {query} placeholders")
        
        self.template = template
        self.config['template'] = template
        logger.info("Updated prompt template")