Spaces:
Sleeping
Sleeping
File size: 8,455 Bytes
5e1a30c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
"""
Simple prompt builder implementation.
This module provides a basic prompt builder that uses straightforward
templates to construct prompts from queries and context documents.
Architecture Notes:
- Direct implementation (no adapter needed)
- Pure prompt construction algorithm
- Configurable templates for different use cases
"""
import logging
from typing import List, Dict, Any, Optional
from textwrap import dedent
from ..base import PromptBuilder, Document, ConfigurableComponent
logger = logging.getLogger(__name__)
class SimplePromptBuilder(PromptBuilder, ConfigurableComponent):
"""
Simple template-based prompt builder.
Features:
- Configurable prompt templates
- Context length management
- Citation instruction injection
- Clear role definitions
Configuration:
- max_context_length: Maximum characters for context (default: 4000)
- include_instructions: Include detailed instructions (default: True)
- citation_style: How to format citations (default: "inline")
"""
# Default prompt template
DEFAULT_TEMPLATE = dedent("""
You are a helpful assistant answering questions based on the provided context.
Context Documents:
{context}
Question: {query}
Instructions:
- Answer based ONLY on the provided context
- Be concise and direct
- If the answer is not in the context, say so
- ALWAYS include citations in your answer using the format [Document X] where X is the document number
- Every factual claim must be followed by a citation like [Document 1] or [Document 2]
- Multiple citations can be combined like [Document 1, Document 2]
Answer:
""").strip()
# Minimal template without instructions
MINIMAL_TEMPLATE = dedent("""
Context: {context}
Question: {query}
Answer based on the context:
""").strip()
def __init__(self,
max_context_length: int = 4000,
include_instructions: bool = True,
citation_style: str = "inline",
template: Optional[str] = None,
config: Optional[Dict[str, Any]] = None):
"""
Initialize simple prompt builder.
Args:
max_context_length: Maximum characters for context
include_instructions: Include detailed instructions
citation_style: Citation format ("inline", "footnote", "none")
template: Custom template (uses default if None)
config: Additional configuration
"""
# Merge config
builder_config = {
'max_context_length': max_context_length,
'include_instructions': include_instructions,
'citation_style': citation_style,
'template': template,
**(config or {})
}
super().__init__(builder_config)
# Set configuration
self.max_context_length = builder_config['max_context_length']
self.include_instructions = builder_config['include_instructions']
self.citation_style = builder_config['citation_style']
# Select template
if builder_config['template']:
self.template = builder_config['template']
elif self.include_instructions:
self.template = self.DEFAULT_TEMPLATE
else:
self.template = self.MINIMAL_TEMPLATE
def build_prompt(self, query: str, context: List[Document]) -> str:
"""
Build a prompt from query and context documents.
Args:
query: User query string
context: List of relevant context documents
Returns:
Formatted prompt string
Raises:
ValueError: If query is empty or context is invalid
"""
if not query.strip():
raise ValueError("Query cannot be empty")
if not context:
# Handle empty context gracefully
context_text = "No relevant context documents found."
else:
# Format context documents
context_text = self._format_context(context)
# Build prompt from template
prompt = self.template.format(
context=context_text,
query=query.strip()
)
# Add citation instructions if needed
if self.citation_style != "none" and "citation" not in prompt.lower():
prompt = self._add_citation_instructions(prompt)
logger.debug(f"Built prompt with {len(context)} documents, length: {len(prompt)}")
return prompt
def get_template(self) -> str:
"""Return the prompt template being used."""
return self.template
def get_builder_info(self) -> Dict[str, Any]:
"""Get information about the prompt builder."""
return {
'type': 'simple',
'builder_class': self.__class__.__name__,
'max_context_length': self.max_context_length,
'include_instructions': self.include_instructions,
'citation_style': self.citation_style,
'template_length': len(self.template),
'template_preview': self.template[:100] + '...' if len(self.template) > 100 else self.template
}
def _format_context(self, documents: List[Document]) -> str:
"""
Format context documents into a readable string.
Args:
documents: List of documents
Returns:
Formatted context string
"""
formatted_docs = []
total_length = 0
for i, doc in enumerate(documents, 1):
# Format document with citation marker
doc_header = f"[Document {i}]"
if doc.metadata.get('source'):
doc_header += f" Source: {doc.metadata['source']}"
if doc.metadata.get('page') or doc.metadata.get('start_page'):
page = doc.metadata.get('page') or doc.metadata.get('start_page')
doc_header += f" (Page {page})"
# Check if adding this document would exceed limit
doc_text = f"{doc_header}\n{doc.content}\n"
if total_length + len(doc_text) > self.max_context_length:
# Truncate or skip
remaining = self.max_context_length - total_length
if remaining > 100: # Only add if we have reasonable space
truncated = doc_text[:remaining] + "\n[Truncated...]"
formatted_docs.append(truncated)
break
formatted_docs.append(doc_text)
total_length += len(doc_text)
return "\n".join(formatted_docs).strip()
def _add_citation_instructions(self, prompt: str) -> str:
"""
Add citation instructions to the prompt.
Args:
prompt: Original prompt
Returns:
Prompt with citation instructions
"""
citation_instructions = {
"inline": "\nIMPORTANT: You MUST include citations in the format [Document X] after every factual claim. For example: 'RISC-V is an open-source ISA [Document 1].'",
"footnote": "\nInclude footnote-style citations at the end of your answer.",
"none": ""
}
instruction = citation_instructions.get(self.citation_style, "")
if instruction:
# Add before the final "Answer:" line if present
if "\nAnswer:" in prompt:
prompt = prompt.replace("\nAnswer:", f"{instruction}\n\nAnswer:")
else:
prompt += instruction
return prompt
def set_template(self, template: str) -> None:
"""
Set a custom prompt template.
Args:
template: New template with {context} and {query} placeholders
Raises:
ValueError: If template is missing required placeholders
"""
if "{context}" not in template or "{query}" not in template:
raise ValueError("Template must contain {context} and {query} placeholders")
self.template = template
self.config['template'] = template
logger.info("Updated prompt template") |