from langchain.tools import Tool from langchain.docstore.document import Document from sentence_transformers import SentenceTransformer import torch import datasets # Load the dataset def load_guest_dataset(): guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train") # Convert dataset entries into Document objects docs = [ Document( page_content="\n".join([ f"Name: {guest['name']}", f"Relation: {guest['relation']}", f"Description: {guest['description']}", f"Email: {guest['email']}" ]), metadata={"name": guest["name"]} ) for guest in guest_dataset ] # Initialize the sentence-transformers model model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode([doc.page_content for doc in docs], convert_to_tensor=True) # Define the extraction function def extract_text(query: str) -> str: """Retrieves detailed information about gala guests based on their name or relation.""" query_embedding = model.encode(query, convert_to_tensor=True) similarities = torch.nn.functional.cosine_similarity(query_embedding, embeddings) top_k = torch.topk(similarities, k=3) results = [docs[i] for i in top_k.indices] if results: return "\n\n".join([doc.page_content for doc in results]) else: return "No matching guest information found." # Create the tool guest_info_tool = Tool( name="guest_info_retriever", func=extract_text, description="Retrieves detailed information about gala guests based on their name or relation." ) return guest_info_tool