File size: 4,672 Bytes
3301b3c
 
 
 
 
 
 
 
 
 
 
6c61722
3301b3c
 
6c61722
a1d050d
6c61722
3301b3c
6c61722
3301b3c
6c61722
 
 
3301b3c
6c61722
 
 
 
 
 
 
 
 
04db7e0
6c61722
 
 
 
 
 
 
 
04db7e0
6c61722
 
 
 
 
3301b3c
6c61722
 
 
 
 
 
 
 
 
 
3301b3c
6c61722
04db7e0
6c61722
 
 
 
 
 
 
 
04db7e0
6c61722
 
04db7e0
6c61722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04db7e0
 
6c61722
 
 
 
 
 
 
 
 
04db7e0
3301b3c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""
AnswerGenerator: orchestrates retrieval, re-ranking, and answer generation.

This module contains:
 - Retriever: Hybrid BM25 + dense retrieval over parsed chunks
 - Reranker: Cross-encoder based re-ranking of candidate chunks
 - AnswerGenerator: ties together retrieval, re-ranking, and LLM generation

Each component is modular and can be swapped or extended (e.g., add HyDE retriever).
"""
import os
import random
from typing import List, Dict, Any, Tuple

from src import logger, RetrieverConfig
from src.utils import LLMClient
from src.retriever import Retriever

class AnswerGenerator:
    """
    Generates answers by retrieving documents from a vector store
    and using them to build a context for an LLM.
    This version is optimized for low latency by skipping the reranking step.
    """
    def __init__(self, collection_name: str):
        self.retriever = Retriever(collection_name, RetrieverConfig)
        self.context_chunks_count = 5 # Use top 5 chunks for the final prompt
        self.greetings = [
            "Hello! I'm ready to answer your questions about the document. What would you like to know?",
            "Hi there! How can I help you with your document today?",
            "Hey! I've got the document open and I'm ready for your questions.",
            "Greetings! Ask me anything about the document, and I'll do my best to find the answer for you."
        ]

    def _truncate_to_last_sentence(self, text: str) -> str:
        """Finds the last period or newline and truncates the text to that point."""
        # Find the last period
        last_period = text.rfind('.')
        # Find the last newline
        last_newline = text.rfind('\n')
        # Find the last of the two
        last_marker = max(last_period, last_newline)

        if last_marker != -1:
            return text[:last_marker + 1].strip()
        
        # If no sentence-ending punctuation, return the text as is (or a portion)
        return text

    def answer(self, question: str) -> Tuple[str, List[Dict[str, Any]]]:
        """
        Retrieves documents, builds a context, and generates an answer.
        Handles simple greetings separately to improve user experience.
        """
        # Handle simple greetings to avoid a failed retrieval
        normalized_question = question.lower().strip().rstrip('.,!')
        greeting_triggers = ["hi", "hello", "hey", "hallo", "hola"]
        if normalized_question in greeting_triggers:
            return random.choice(self.greetings), []

        # Retrieve candidate documents from the vector store
        candidates = self.retriever.retrieve(question)
        
        if not candidates:
            logger.warning("No candidates retrieved from vector store.")
            return "The document does not contain information on this topic.", []
        
        # Use the top N chunks for context, without reranking
        top_chunks = candidates[:self.context_chunks_count]
        
        context = "\n\n".join(f"- {c['narration']}" for c in top_chunks)
        
        # A more robust prompt that encourages a natural, conversational tone
        prompt = (
            "You are a helpful and friendly AI assistant for document analysis. "
            "Your user is asking a question about a document. "
            "Based *only* on the context provided below, formulate a clear and conversational answer. "
            "Adopt a helpful and slightly informal tone, as if you were a knowledgeable colleague.\n\n"
            "CONTEXT:\n"
            "---------------------\n"
            f"{context}\n"
            "---------------------\n\n"
            "USER'S QUESTION: "
            f'"{question}"\n\n'
            "YOUR TASK:\n"
            "1. Carefully read the provided context.\n"
            "2. If the context contains the answer, explain it to the user in a natural, conversational way. Do not just repeat the text verbatim.\n"
            "3. If the context does not contain the necessary information, respond with: "
            "'I've checked the document, but I couldn't find any information on that topic.'\n"
            "4. **Crucially, do not use any information outside of the provided context.**\n\n"
            "Answer:"
        )
        
        answer, finish_reason = LLMClient.generate(prompt, max_tokens=256)

        # Handle cases where the response might be cut off
        if finish_reason == 'length':
            logger.warning("LLM response was truncated due to token limit.")
            truncated_answer = self._truncate_to_last_sentence(answer)
            answer = truncated_answer + " ... (response shortened)"

        return answer, top_chunks