HF_GAIA_AGENT

Runtime error

File size: 4,859 Bytes

from typing import Any, Optional, List, Dict
from smolagents.tools import Tool
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import json

class RAGSearchTool(Tool):
    name = "rag_search"
    description = "A RAG (Retrieval Augmented Generation) tool that can store documents and perform semantic search using FAISS and GTE-small embeddings."
    inputs = {
        'action': {'type': 'string', 'description': 'The action to perform: "add" to add documents, "search" to search existing documents, or "clear" to clear the database'},
        'content': {'type': 'string', 'description': 'For "add" action: the text content to add to the database. For "search" action: the query to search for. For "clear" action: can be empty.'},
        'metadata': {'type': 'object', 'description': 'Optional metadata for the documents when adding them', 'nullable': True}
    }
    output_type = "string"

    def __init__(self, persist_dir="rag_db", **kwargs):
        super().__init__()
        self.persist_dir = persist_dir
        try:
            from langchain_community.embeddings import HuggingFaceEmbeddings
            from langchain_community.vectorstores import FAISS
        except ImportError as e:
            raise ImportError(
                "You must install packages `faiss-cpu` and `sentence-transformers` to run this tool: "
                "run `pip install faiss-cpu sentence-transformers`."
            ) from e
        
        # Initialize the embedding model
        self.embeddings = HuggingFaceEmbeddings(
            model_name="thenlper/gte-small",
            model_kwargs={'device': 'cpu'},
            encode_kwargs={'normalize_embeddings': True}
        )
        
        # Initialize text splitter for chunking
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50,
            length_function=len,
        )
        
        # Try to load existing database or create new one
        if os.path.exists(os.path.join(persist_dir, "index.faiss")):
            self.db = FAISS.load_local(persist_dir, self.embeddings, allow_dangerous_deserialization=True)
        else:
            self.db = None
        
        self.is_initialized = True

    def forward(self, action: str, content: str, metadata: Optional[Dict] = None) -> str:
        try:
            if action == "add":
                if not content:
                    return "Error: No content provided to add to the database."
                
                # Split text into chunks
                chunks = self.text_splitter.split_text(content)
                
                # Create metadata for each chunk if provided
                metadatas = [metadata] * len(chunks) if metadata else None
                
                # Initialize db if it doesn't exist
                if self.db is None:
                    self.db = FAISS.from_texts(chunks, self.embeddings, metadatas=metadatas)
                else:
                    self.db.add_texts(chunks, metadatas=metadatas)
                
                # Save the updated database
                self.db.save_local(self.persist_dir)
                
                return f"Successfully added {len(chunks)} chunks to the database."
                
            elif action == "search":
                if not content:
                    return "Error: No search query provided."
                if self.db is None:
                    return "Error: No documents have been added to the database yet."
                
                # Perform similarity search
                results = self.db.similarity_search_with_score(content, k=3)
                
                # Format results
                formatted_results = []
                for doc, score in results:
                    result = {
                        'content': doc.page_content,
                        'metadata': doc.metadata,
                        'similarity_score': float(score)  # Convert numpy float to Python float
                    }
                    formatted_results.append(result)
                
                return json.dumps(formatted_results, indent=2)
                
            elif action == "clear":
                if os.path.exists(self.persist_dir):
                    import shutil
                    shutil.rmtree(self.persist_dir)
                self.db = None
                return "Database cleared successfully."
                
            else:
                return f"Error: Invalid action '{action}'. Valid actions are 'add', 'search', or 'clear'."
                
        except Exception as e:
            return f"Error performing {action} operation: {str(e)}"