Update langgraph_agent.py
Browse files- langgraph_agent.py +268 -85
langgraph_agent.py
CHANGED
@@ -1,85 +1,268 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import io
|
3 |
+
import contextlib
|
4 |
+
import pandas as pd
|
5 |
+
from typing import Dict, List, Union
|
6 |
+
import re
|
7 |
+
|
8 |
+
from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
|
9 |
+
from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
|
10 |
+
|
11 |
+
from langgraph.graph import START, StateGraph, MessagesState
|
12 |
+
from langgraph.prebuilt import tools_condition, ToolNode
|
13 |
+
from langchain_openai import ChatOpenAI
|
14 |
+
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
15 |
+
from langchain_community.document_loaders import WikipediaLoader
|
16 |
+
from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
|
17 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
18 |
+
from langchain_core.tools import tool
|
19 |
+
from langchain_google_community import GoogleSearchAPIWrapper
|
20 |
+
|
21 |
+
@tool
|
22 |
+
def multiply(a: int, b: int) -> int:
|
23 |
+
"""Multiply two integers."""
|
24 |
+
return a * b
|
25 |
+
|
26 |
+
@tool
|
27 |
+
def add(a: int, b: int) -> int:
|
28 |
+
"""Add two integers."""
|
29 |
+
return a + b
|
30 |
+
|
31 |
+
@tool
|
32 |
+
def subtract(a: int, b: int) -> int:
|
33 |
+
"""Subtract the second integer from the first."""
|
34 |
+
return a - b
|
35 |
+
|
36 |
+
@tool
|
37 |
+
def divide(a: int, b: int) -> float:
|
38 |
+
"""Divide first integer by second; error if divisor is zero."""
|
39 |
+
if b == 0:
|
40 |
+
raise ValueError("Cannot divide by zero.")
|
41 |
+
return a / b
|
42 |
+
|
43 |
+
@tool
|
44 |
+
def modulus(a: int, b: int) -> int:
|
45 |
+
"""Return the remainder of dividing first integer by second."""
|
46 |
+
return a % b
|
47 |
+
|
48 |
+
@tool
|
49 |
+
def wiki_search(query: str) -> dict:
|
50 |
+
"""Search Wikipedia for a query and return up to 2 documents."""
|
51 |
+
try:
|
52 |
+
docs = WikipediaLoader(query=query, load_max_docs=5, lang="en", doc_content_chars_max=7000).load()
|
53 |
+
if not docs:
|
54 |
+
return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
|
55 |
+
formatted = "\n\n---\n\n".join(
|
56 |
+
f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
|
57 |
+
for d in docs
|
58 |
+
)
|
59 |
+
return {"wiki_results": formatted}
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error in wiki_search tool: {e}")
|
62 |
+
return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
|
63 |
+
|
64 |
+
search = GoogleSearchAPIWrapper()
|
65 |
+
|
66 |
+
@tool
|
67 |
+
def google_web_search(query: str) -> str:
|
68 |
+
"""Perform a web search (via Google Custom Search) and return results."""
|
69 |
+
try:
|
70 |
+
return search.run(query)
|
71 |
+
except Exception as e:
|
72 |
+
print(f"Error in google_web_search tool: {e}")
|
73 |
+
return f"Error occurred while searching the web for '{query}'. Details: {str(e)}"
|
74 |
+
|
75 |
+
|
76 |
+
# HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
|
77 |
+
# But keeping InferenceClient initialization for completeness if other HF tools might be added later.
|
78 |
+
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
79 |
+
MODEL = os.getenv("MODEL")
|
80 |
+
HF_INFERENCE_CLIENT = None
|
81 |
+
if HF_API_TOKEN:
|
82 |
+
HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
|
83 |
+
else:
|
84 |
+
print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
|
85 |
+
|
86 |
+
@tool
|
87 |
+
def read_file_content(file_path: str) -> Dict[str, str]:
|
88 |
+
"""Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
|
89 |
+
try:
|
90 |
+
_, file_extension = os.path.splitext(file_path)
|
91 |
+
file_extension = file_extension.lower()
|
92 |
+
|
93 |
+
# Prioritize handling of video, audio, and image files for direct LLM processing
|
94 |
+
if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
|
95 |
+
return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
|
96 |
+
elif file_extension == ".mp3":
|
97 |
+
return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
|
98 |
+
elif file_extension in (".jpeg", ".jpg", ".png"):
|
99 |
+
return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
|
100 |
+
|
101 |
+
# Handle text and code files
|
102 |
+
elif file_extension in (".txt", ".py"):
|
103 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
104 |
+
content = f.read()
|
105 |
+
return {"file_type": "text/code", "file_name": file_path, "file_content": content}
|
106 |
+
|
107 |
+
# Handle Excel files
|
108 |
+
elif file_extension == ".xlsx":
|
109 |
+
df = pd.read_excel(file_path)
|
110 |
+
content = df.to_string()
|
111 |
+
return {"file_type": "excel", "file_name": file_path, "file_content": content}
|
112 |
+
|
113 |
+
else:
|
114 |
+
return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
|
115 |
+
|
116 |
+
except FileNotFoundError:
|
117 |
+
return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
|
118 |
+
except Exception as e:
|
119 |
+
return {"file_error": f"Error reading file {file_path}: {e}"}
|
120 |
+
|
121 |
+
|
122 |
+
@tool
|
123 |
+
def python_interpreter(code: str) -> Dict[str, str]:
|
124 |
+
"""Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
|
125 |
+
old_stdout = io.StringIO()
|
126 |
+
with contextlib.redirect_stdout(old_stdout):
|
127 |
+
try:
|
128 |
+
exec_globals = {}
|
129 |
+
exec_locals = {}
|
130 |
+
exec(code, exec_globals, exec_locals)
|
131 |
+
output = old_stdout.getvalue()
|
132 |
+
return {"execution_result": output.strip()}
|
133 |
+
except Exception as e:
|
134 |
+
return {"execution_error": str(e)}
|
135 |
+
|
136 |
+
# --- Youtube Tool (Remains the same) ---
|
137 |
+
@tool
|
138 |
+
def Youtube(url: str, question: str) -> Dict[str, str]:
|
139 |
+
"""
|
140 |
+
Tells about the YouTube video identified by the given URL, answering a question about it.
|
141 |
+
Note: This is a simulated response. In a real application, this would interact with a YouTube API
|
142 |
+
or a video analysis service to get actual video information and transcripts.
|
143 |
+
"""
|
144 |
+
print(f"Youtube called with URL: {url}, Question: {question}")
|
145 |
+
|
146 |
+
# Placeholder for actual YouTube API call.
|
147 |
+
# In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
|
148 |
+
# or a dedicated video transcription/analysis service.
|
149 |
+
|
150 |
+
# Simulating the previous video content for demonstration
|
151 |
+
if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
|
152 |
+
return {
|
153 |
+
"video_url": url,
|
154 |
+
"question_asked": question,
|
155 |
+
"video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
|
156 |
+
"details": {
|
157 |
+
"00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
|
158 |
+
"00:00:11": "Teal'c takes a large gulp from a black mug",
|
159 |
+
"00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
|
160 |
+
"00:00:26": "Someone agrees, 'extremely'"
|
161 |
+
}
|
162 |
+
}
|
163 |
+
else:
|
164 |
+
return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
|
165 |
+
|
166 |
+
# --- END YOUTUBE TOOL ---
|
167 |
+
|
168 |
+
API_KEY = os.getenv("GEMINI_API_KEY")
|
169 |
+
HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
|
170 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
171 |
+
|
172 |
+
# Update the tools list (removed describe_image and arvix_search)
|
173 |
+
tools = [
|
174 |
+
multiply, add, subtract, divide, modulus,
|
175 |
+
wiki_search,
|
176 |
+
google_web_search,
|
177 |
+
read_file_content,
|
178 |
+
python_interpreter,
|
179 |
+
Youtube,
|
180 |
+
]
|
181 |
+
|
182 |
+
with open("prompt.txt", "r", encoding="utf-8") as f:
|
183 |
+
system_prompt = f.read()
|
184 |
+
sys_msg = SystemMessage(content=system_prompt)
|
185 |
+
|
186 |
+
def build_graph(provider: str = "gemini"):
|
187 |
+
if provider == "gemini":
|
188 |
+
llm = ChatGoogleGenerativeAI(
|
189 |
+
model=MODEL,
|
190 |
+
temperature=1.0,
|
191 |
+
max_retries=2,
|
192 |
+
api_key=GEMINI_API_KEY,
|
193 |
+
max_tokens=5000
|
194 |
+
)
|
195 |
+
elif provider == "huggingface":
|
196 |
+
llm = ChatHuggingFace(
|
197 |
+
llm=HuggingFaceEndpoint(
|
198 |
+
url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
|
199 |
+
),
|
200 |
+
temperature=0,
|
201 |
+
)
|
202 |
+
else:
|
203 |
+
raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
|
204 |
+
|
205 |
+
llm_with_tools = llm.bind_tools(tools)
|
206 |
+
|
207 |
+
def assistant(state: MessagesState):
|
208 |
+
messages_to_send = [sys_msg] + state["messages"]
|
209 |
+
|
210 |
+
# --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
|
211 |
+
# When read_file_content returns a file_type of "image" or "audio",
|
212 |
+
# the agent should be able to send the actual binary data of that file
|
213 |
+
# as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
|
214 |
+
# supports this via content parts in HumanMessage.
|
215 |
+
#
|
216 |
+
# For this setup, we're assuming the framework (LangGraph/LangChain)
|
217 |
+
# will correctly handle passing the actual file content when read_file_content
|
218 |
+
# is called and its output indicates a media type.
|
219 |
+
#
|
220 |
+
# A more explicit implementation in the assistant node might look like this
|
221 |
+
# for real binary file handling if the framework doesn't do it implicitly:
|
222 |
+
#
|
223 |
+
# new_messages_to_send = []
|
224 |
+
# for msg in state["messages"]:
|
225 |
+
# if isinstance(msg, HumanMessage) and msg.tool_calls:
|
226 |
+
# # If a tool call to read_file_content happened in the previous turn
|
227 |
+
# # and it returned a media type, we might need to get the file data
|
228 |
+
# # and append it to the message parts. This logic is complex and
|
229 |
+
# # depends heavily on how tool outputs are structured and passed.
|
230 |
+
# # For simplicity in this template, we assume direct handling by the LLM
|
231 |
+
# # if the tool output indicates media, and the file itself is accessible
|
232 |
+
# # via the environment.
|
233 |
+
# pass # Keep original message, tool output will follow
|
234 |
+
# elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
|
235 |
+
# # This is a conceptual example for if the HumanMessage itself contains file data
|
236 |
+
# # or a reference that needs to be resolved into data.
|
237 |
+
# # You'd need to load the actual file bytes here.
|
238 |
+
# # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
|
239 |
+
# # with open(msg.content[0]["file_path"], "rb") as f:
|
240 |
+
# # file_bytes = f.read()
|
241 |
+
# # new_messages_to_send.append(
|
242 |
+
# # HumanMessage(
|
243 |
+
# # content=[
|
244 |
+
# # {"type": "text", "text": "Here is the media content:"},
|
245 |
+
# # {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
|
246 |
+
# # ]
|
247 |
+
# # )
|
248 |
+
# # )
|
249 |
+
# else:
|
250 |
+
# new_messages_to_send.append(msg)
|
251 |
+
# llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
|
252 |
+
# --- END IMPORTANT NOTE ---
|
253 |
+
|
254 |
+
llm_response = llm_with_tools.invoke(messages_to_send,{"recursion_limit": 25}) # For now, keep as is, rely on framework
|
255 |
+
print(f"LLM Raw Response: {llm_response}")
|
256 |
+
return {"messages": [llm_response]}
|
257 |
+
|
258 |
+
builder = StateGraph(MessagesState)
|
259 |
+
builder.add_node("assistant", assistant)
|
260 |
+
builder.add_node("tools", ToolNode(tools))
|
261 |
+
builder.add_edge(START, "assistant")
|
262 |
+
builder.add_conditional_edges("assistant", tools_condition)
|
263 |
+
builder.add_edge("tools", "assistant")
|
264 |
+
|
265 |
+
return builder.compile()
|
266 |
+
|
267 |
+
if __name__ == "__main__":
|
268 |
+
pass
|