Spaces:

JTh34
/

puppycompanion-v3

Sleeping

File size: 4,759 Bytes

b3b7a20

# rag_system.py
import logging
from typing import Dict, List, Optional, TypedDict

from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage
from langchain.prompts import ChatPromptTemplate
from langchain_core.tools import tool

from langgraph.graph import StateGraph, START, END

# Logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# RAG prompt for puppy-related questions
RAG_PROMPT =    """
                You are an assistant specialized in puppy education and care.
                Your role is to help new puppy owners by answering their questions with accuracy and kindness.
                Use only the information provided in the context to formulate your answers.
                If you cannot find the information in the context, just say "I don't know".

                ### Question
                {question}

                ### Context
                {context}
                """

class State(TypedDict):
    question: str
    context: List[Document]
    response: str

class RAGSystem:
    """RAG system for puppy-related questions"""
    
    def __init__(self, retriever, model_name: str = "gpt-4o-mini"):
        self.retriever = retriever
        self.llm = ChatOpenAI(model=model_name)
        self.rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
        self.graph_rag = self._build_graph()
    
    def _build_graph(self):
        """Builds the RAG graph"""
        
        def retrieve(state):
            retrieved_docs = self.retriever.invoke(state["question"])
            return {"context": retrieved_docs}

        def generate(state):
            docs_content = "\n\n".join([doc.page_content for doc in state["context"]])
            messages = self.rag_prompt.format_messages(
                question=state["question"], 
                context=docs_content
            )
            response = self.llm.invoke(messages)
            return {"response": response.content}

        # Build the graph
        graph_builder = StateGraph(State).add_sequence([retrieve, generate])
        graph_builder.add_edge(START, "retrieve")
        return graph_builder.compile()
    
    def process_query(self, question: str) -> Dict:
        """ Processes a query and returns the response with context """
        result = self.graph_rag.invoke({"question": question})
        
        # Format detailed source information
        sources_info = []
        for i, doc in enumerate(result["context"], 1):
            metadata = doc.metadata
            # Extract useful metadata information
            source_name = metadata.get('source', 'Unknown')
            page = metadata.get('page', 'N/A')
            chapter = metadata.get('chapter', '')
            
            # Create a detailed source description
            if chapter:
                source_desc = f"Chunk {i} - {source_name} (Chapter: {chapter}, Page: {page})"
            else:
                source_desc = f"Chunk {i} - {source_name} (Page: {page})"
                
            sources_info.append({
                'chunk_number': i,
                'description': source_desc,
                'source': source_name,
                'page': page,
                'chapter': chapter,
                'content_preview': doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
            })
        
        return {
            "response": result["response"],
            "context": result["context"],
            "sources_info": sources_info,
            "total_chunks": len(result["context"])
        }
    
    def create_rag_tool(self):
        """Creates a RAG tool for the agent"""
        
        # Reference to the current instance to use it in the tool
        rag_system = self
        
        @tool
        def ai_rag_tool(question: str) -> Dict:
            """MANDATORY for all questions about puppies, their behavior, education or training.
            This tool accesses a specialized knowledge base on puppies with expert and reliable information.
            Any question regarding puppy care, education, behavior or health MUST be processed by this tool.
            The input must be a complete question."""
            
            # Invoke the RAG graph
            result = rag_system.process_query(question)
            
            return {
                "messages": [HumanMessage(content=result["response"])],
                "context": result["context"],
                "sources_info": result["sources_info"],
                "total_chunks": result["total_chunks"]
            }
        
        return ai_rag_tool