Abhinav Gavireddi
[fix]: optimized the entire pipeline
6c61722
"""
AnswerGenerator: orchestrates retrieval, re-ranking, and answer generation.
This module contains:
- Retriever: Hybrid BM25 + dense retrieval over parsed chunks
- Reranker: Cross-encoder based re-ranking of candidate chunks
- AnswerGenerator: ties together retrieval, re-ranking, and LLM generation
Each component is modular and can be swapped or extended (e.g., add HyDE retriever).
"""
import os
import random
from typing import List, Dict, Any, Tuple
from src import logger, RetrieverConfig
from src.utils import LLMClient
from src.retriever import Retriever
class AnswerGenerator:
"""
Generates answers by retrieving documents from a vector store
and using them to build a context for an LLM.
This version is optimized for low latency by skipping the reranking step.
"""
def __init__(self, collection_name: str):
self.retriever = Retriever(collection_name, RetrieverConfig)
self.context_chunks_count = 5 # Use top 5 chunks for the final prompt
self.greetings = [
"Hello! I'm ready to answer your questions about the document. What would you like to know?",
"Hi there! How can I help you with your document today?",
"Hey! I've got the document open and I'm ready for your questions.",
"Greetings! Ask me anything about the document, and I'll do my best to find the answer for you."
]
def _truncate_to_last_sentence(self, text: str) -> str:
"""Finds the last period or newline and truncates the text to that point."""
# Find the last period
last_period = text.rfind('.')
# Find the last newline
last_newline = text.rfind('\n')
# Find the last of the two
last_marker = max(last_period, last_newline)
if last_marker != -1:
return text[:last_marker + 1].strip()
# If no sentence-ending punctuation, return the text as is (or a portion)
return text
def answer(self, question: str) -> Tuple[str, List[Dict[str, Any]]]:
"""
Retrieves documents, builds a context, and generates an answer.
Handles simple greetings separately to improve user experience.
"""
# Handle simple greetings to avoid a failed retrieval
normalized_question = question.lower().strip().rstrip('.,!')
greeting_triggers = ["hi", "hello", "hey", "hallo", "hola"]
if normalized_question in greeting_triggers:
return random.choice(self.greetings), []
# Retrieve candidate documents from the vector store
candidates = self.retriever.retrieve(question)
if not candidates:
logger.warning("No candidates retrieved from vector store.")
return "The document does not contain information on this topic.", []
# Use the top N chunks for context, without reranking
top_chunks = candidates[:self.context_chunks_count]
context = "\n\n".join(f"- {c['narration']}" for c in top_chunks)
# A more robust prompt that encourages a natural, conversational tone
prompt = (
"You are a helpful and friendly AI assistant for document analysis. "
"Your user is asking a question about a document. "
"Based *only* on the context provided below, formulate a clear and conversational answer. "
"Adopt a helpful and slightly informal tone, as if you were a knowledgeable colleague.\n\n"
"CONTEXT:\n"
"---------------------\n"
f"{context}\n"
"---------------------\n\n"
"USER'S QUESTION: "
f'"{question}"\n\n'
"YOUR TASK:\n"
"1. Carefully read the provided context.\n"
"2. If the context contains the answer, explain it to the user in a natural, conversational way. Do not just repeat the text verbatim.\n"
"3. If the context does not contain the necessary information, respond with: "
"'I've checked the document, but I couldn't find any information on that topic.'\n"
"4. **Crucially, do not use any information outside of the provided context.**\n\n"
"Answer:"
)
answer, finish_reason = LLMClient.generate(prompt, max_tokens=256)
# Handle cases where the response might be cut off
if finish_reason == 'length':
logger.warning("LLM response was truncated due to token limit.")
truncated_answer = self._truncate_to_last_sentence(answer)
answer = truncated_answer + " ... (response shortened)"
return answer, top_chunks