|
from dotenv import load_dotenv |
|
from typing import TypedDict, List, Dict, Any, Optional, Annotated |
|
|
|
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings |
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
from langchain_groq import ChatGroq |
|
|
|
from langgraph.graph import StateGraph, MessagesState, START, END |
|
from langgraph.graph.message import add_messages |
|
from langchain_core.messages import SystemMessage, HumanMessage, AnyMessage, AIMessage |
|
from langchain_core.messages.ai import subtract_usage |
|
|
|
from langchain.tools import Tool |
|
from langchain_core.tools import tool |
|
from langchain_community.tools.tavily_search import TavilySearchResults |
|
from langchain_community.document_loaders import WikipediaLoader |
|
from langchain_community.document_loaders import ArxivLoader |
|
from langchain_community.retrievers import BM25Retriever |
|
|
|
from langgraph.prebuilt import ToolNode, tools_condition |
|
|
|
from prompts import system_prompt |
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
def get_arxiv_url(content: str) -> str: |
|
"""Extract arXiv ID from text content and format as a URL.""" |
|
lines = content.split('\n') |
|
for line in lines: |
|
if line.strip().startswith('arXiv:'): |
|
parts = line.strip().split() |
|
if parts: |
|
arxiv_id_with_prefix = parts[0] |
|
|
|
arxiv_id = arxiv_id_with_prefix.replace('arXiv:', '').strip() |
|
|
|
|
|
base_arxiv_id = arxiv_id.split('v')[0] |
|
return f"https://arxiv.org/abs/{base_arxiv_id}" |
|
return "unknown" |
|
|
|
|
|
|
|
@tool |
|
def search_wiki(query: str) -> Dict[str, str]: |
|
"""Search Wikipedia for a query and return maximum 2 results. |
|
|
|
Args: |
|
query: The search query.""" |
|
print(f" executing search_wiki with query: {query}") |
|
try: |
|
search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
|
print(f"Found {len(search_docs)} documents for query '{query}'") |
|
formatted_search_docs = "\n\n---\n\n".join( |
|
[ |
|
f'<Document source="{doc.metadata.get("source", "unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
|
for doc in search_docs |
|
] |
|
) |
|
if not formatted_search_docs: |
|
print ("Empty search results") |
|
return {"wiki_results": formatted_search_docs} |
|
except Exception as e: |
|
print(f"Error in search_wiki: {e}") |
|
return {"wiki_results": f"Search error: {str(e)}"} |
|
|
|
|
|
|
|
@tool |
|
def search_web(query: str) -> Dict[str, str]: |
|
"""Search Tavily for a query and return maximum 3 results. |
|
|
|
Args: |
|
query: The search query.""" |
|
print(f" executing search_web with query: {query}") |
|
|
|
try: |
|
search_docs = TavilySearchResults(max_results=3).run(query) |
|
print(f"DEBUG: search_docs type: {type(search_docs)}") |
|
print(f"DEBUG: search_docs content: {search_docs}") |
|
print(f"Found {len(search_docs)} documents for query '{query}'") |
|
|
|
formatted_search_docs = "\n\n---\n\n".join( |
|
[ |
|
f'<Document source="{doc.get("url", "unknown")}" page="{doc.get("page", "N/A")}"/>\n{doc.get("content", "")}\n</Document>' |
|
for doc in search_docs |
|
] |
|
) |
|
if not formatted_search_docs: |
|
print ("Empty search results") |
|
return {"web_results": formatted_search_docs} |
|
except Exception as e: |
|
print(f"Error in search_web: {e}") |
|
return {"web_results": f"Search error: {str(e)}"} |
|
|
|
|
|
|
|
@tool |
|
def search_arxiv(query: str) -> Dict[str, str]: |
|
"""Search Arxiv for a query and return maximum 3 result. |
|
|
|
Args: |
|
query: The search query.""" |
|
print(f" executing search_arxiv with query: {query}") |
|
try: |
|
search_docs = ArxivLoader(query=query, load_max_docs=3).load() |
|
print(f"DEBUG: search_docs type: {type(search_docs)}") |
|
print(f"DEBUG: search_docs content: {search_docs}") |
|
print(f"Found {len(search_docs)} documents for query '{query}'") |
|
formatted_search_docs = "\n\n---\n\n".join( |
|
[ |
|
f'<Document source="{get_arxiv_url(doc.page_content)}" page="{doc.metadata.get("page", "N/A")}"/>\n{doc.page_content[:1000]}\n</Document>' |
|
for doc in search_docs |
|
] |
|
) |
|
if not formatted_search_docs: |
|
print ("Empty search results") |
|
return {"arxiv_results": formatted_search_docs} |
|
except Exception as e: |
|
print(f"Error in search_arxiv: {e}") |
|
return {"arxiv_results": f"Search error: {str(e)}"} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sys_msg = SystemMessage(content=system_prompt) |
|
|
|
|
|
tools = [ |
|
search_web, |
|
search_wiki, |
|
search_arxiv |
|
] |
|
|
|
|
|
|
|
def build_graph(): |
|
|
|
llm = ChatGoogleGenerativeAI( |
|
model="gemini-2.5-flash-preview-04-17", |
|
temperature=0 |
|
) |
|
print(f"DEBUG: llm object = {llm}") |
|
|
|
|
|
llm_with_tools = llm.bind_tools(tools) |
|
print(f"DEBUG: llm_with_tools object = {llm_with_tools}") |
|
|
|
|
|
class AgentState(TypedDict): |
|
messages: Annotated[list[AnyMessage], add_messages] |
|
|
|
def assistant(state: AgentState): |
|
result = llm_with_tools.invoke(state["messages"]) |
|
print(f"DEBUG: LLM result = {result}") |
|
|
|
|
|
if isinstance(result, AIMessage) and result.usage_metadata is None: |
|
|
|
result.usage_metadata = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} |
|
|
|
return { |
|
"messages": [result] |
|
} |
|
|
|
|
|
|
|
builder = StateGraph(AgentState) |
|
|
|
|
|
builder.add_node("assistant", assistant) |
|
builder.add_node("tools", ToolNode(tools)) |
|
|
|
|
|
builder.add_edge(START, "assistant") |
|
builder.add_conditional_edges( |
|
"assistant", |
|
tools_condition, |
|
{ |
|
|
|
"tools": "tools", |
|
|
|
END: END, |
|
} |
|
) |
|
builder.add_edge("tools", "assistant") |
|
|
|
return builder.compile() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
question = "latest research on quantum computing" |
|
graph = build_graph() |
|
messages = [HumanMessage(content=question)] |
|
print(f"Running graph with question: {question}") |
|
messages = graph.invoke({"messages": messages}) |
|
print("Graph execution finished. Messages:") |
|
for m in messages["messages"]: |
|
m.pretty_print() |
|
|