File size: 8,455 Bytes
5e1a30c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"""
Simple prompt builder implementation.

This module provides a basic prompt builder that uses straightforward
templates to construct prompts from queries and context documents.

Architecture Notes:
- Direct implementation (no adapter needed)
- Pure prompt construction algorithm
- Configurable templates for different use cases
"""

import logging
from typing import List, Dict, Any, Optional
from textwrap import dedent

from ..base import PromptBuilder, Document, ConfigurableComponent

logger = logging.getLogger(__name__)


class SimplePromptBuilder(PromptBuilder, ConfigurableComponent):
    """
    Simple template-based prompt builder.
    
    Features:
    - Configurable prompt templates
    - Context length management
    - Citation instruction injection
    - Clear role definitions
    
    Configuration:
    - max_context_length: Maximum characters for context (default: 4000)
    - include_instructions: Include detailed instructions (default: True)
    - citation_style: How to format citations (default: "inline")
    """
    
    # Default prompt template
    DEFAULT_TEMPLATE = dedent("""
    You are a helpful assistant answering questions based on the provided context.
    
    Context Documents:
    {context}
    
    Question: {query}
    
    Instructions:
    - Answer based ONLY on the provided context
    - Be concise and direct
    - If the answer is not in the context, say so
    - ALWAYS include citations in your answer using the format [Document X] where X is the document number
    - Every factual claim must be followed by a citation like [Document 1] or [Document 2]
    - Multiple citations can be combined like [Document 1, Document 2]
    
    Answer:
    """).strip()
    
    # Minimal template without instructions
    MINIMAL_TEMPLATE = dedent("""
    Context: {context}
    
    Question: {query}
    
    Answer based on the context:
    """).strip()
    
    def __init__(self, 
                 max_context_length: int = 4000,
                 include_instructions: bool = True,
                 citation_style: str = "inline",
                 template: Optional[str] = None,
                 config: Optional[Dict[str, Any]] = None):
        """
        Initialize simple prompt builder.
        
        Args:
            max_context_length: Maximum characters for context
            include_instructions: Include detailed instructions
            citation_style: Citation format ("inline", "footnote", "none")
            template: Custom template (uses default if None)
            config: Additional configuration
        """
        # Merge config
        builder_config = {
            'max_context_length': max_context_length,
            'include_instructions': include_instructions,
            'citation_style': citation_style,
            'template': template,
            **(config or {})
        }
        
        super().__init__(builder_config)
        
        # Set configuration
        self.max_context_length = builder_config['max_context_length']
        self.include_instructions = builder_config['include_instructions']
        self.citation_style = builder_config['citation_style']
        
        # Select template
        if builder_config['template']:
            self.template = builder_config['template']
        elif self.include_instructions:
            self.template = self.DEFAULT_TEMPLATE
        else:
            self.template = self.MINIMAL_TEMPLATE
    
    def build_prompt(self, query: str, context: List[Document]) -> str:
        """
        Build a prompt from query and context documents.
        
        Args:
            query: User query string
            context: List of relevant context documents
            
        Returns:
            Formatted prompt string
            
        Raises:
            ValueError: If query is empty or context is invalid
        """
        if not query.strip():
            raise ValueError("Query cannot be empty")
        
        if not context:
            # Handle empty context gracefully
            context_text = "No relevant context documents found."
        else:
            # Format context documents
            context_text = self._format_context(context)
        
        # Build prompt from template
        prompt = self.template.format(
            context=context_text,
            query=query.strip()
        )
        
        # Add citation instructions if needed
        if self.citation_style != "none" and "citation" not in prompt.lower():
            prompt = self._add_citation_instructions(prompt)
        
        logger.debug(f"Built prompt with {len(context)} documents, length: {len(prompt)}")
        
        return prompt
    
    def get_template(self) -> str:
        """Return the prompt template being used."""
        return self.template
    
    def get_builder_info(self) -> Dict[str, Any]:
        """Get information about the prompt builder."""
        return {
            'type': 'simple',
            'builder_class': self.__class__.__name__,
            'max_context_length': self.max_context_length,
            'include_instructions': self.include_instructions,
            'citation_style': self.citation_style,
            'template_length': len(self.template),
            'template_preview': self.template[:100] + '...' if len(self.template) > 100 else self.template
        }
    
    def _format_context(self, documents: List[Document]) -> str:
        """
        Format context documents into a readable string.
        
        Args:
            documents: List of documents
            
        Returns:
            Formatted context string
        """
        formatted_docs = []
        total_length = 0
        
        for i, doc in enumerate(documents, 1):
            # Format document with citation marker
            doc_header = f"[Document {i}]"
            if doc.metadata.get('source'):
                doc_header += f" Source: {doc.metadata['source']}"
            if doc.metadata.get('page') or doc.metadata.get('start_page'):
                page = doc.metadata.get('page') or doc.metadata.get('start_page')
                doc_header += f" (Page {page})"
            
            # Check if adding this document would exceed limit
            doc_text = f"{doc_header}\n{doc.content}\n"
            if total_length + len(doc_text) > self.max_context_length:
                # Truncate or skip
                remaining = self.max_context_length - total_length
                if remaining > 100:  # Only add if we have reasonable space
                    truncated = doc_text[:remaining] + "\n[Truncated...]"
                    formatted_docs.append(truncated)
                break
            
            formatted_docs.append(doc_text)
            total_length += len(doc_text)
        
        return "\n".join(formatted_docs).strip()
    
    def _add_citation_instructions(self, prompt: str) -> str:
        """
        Add citation instructions to the prompt.
        
        Args:
            prompt: Original prompt
            
        Returns:
            Prompt with citation instructions
        """
        citation_instructions = {
            "inline": "\nIMPORTANT: You MUST include citations in the format [Document X] after every factual claim. For example: 'RISC-V is an open-source ISA [Document 1].'",
            "footnote": "\nInclude footnote-style citations at the end of your answer.",
            "none": ""
        }
        
        instruction = citation_instructions.get(self.citation_style, "")
        if instruction:
            # Add before the final "Answer:" line if present
            if "\nAnswer:" in prompt:
                prompt = prompt.replace("\nAnswer:", f"{instruction}\n\nAnswer:")
            else:
                prompt += instruction
        
        return prompt
    
    def set_template(self, template: str) -> None:
        """
        Set a custom prompt template.
        
        Args:
            template: New template with {context} and {query} placeholders
            
        Raises:
            ValueError: If template is missing required placeholders
        """
        if "{context}" not in template or "{query}" not in template:
            raise ValueError("Template must contain {context} and {query} placeholders")
        
        self.template = template
        self.config['template'] = template
        logger.info("Updated prompt template")