philincloud commited on
Commit
6bef82e
·
verified ·
1 Parent(s): 60c1cc4

Update langgraph_agent.py

Browse files
Files changed (1) hide show
  1. langgraph_agent.py +239 -240
langgraph_agent.py CHANGED
@@ -1,269 +1,268 @@
1
- import os
2
- import io
3
- import contextlib
4
- import pandas as pd
5
- from typing import Dict, List, Union
6
- import re
7
 
8
- from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
9
- from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
10
 
11
- from langgraph.graph import START, StateGraph, MessagesState
12
- from langgraph.prebuilt import tools_condition, ToolNode
13
- from langchain_openai import ChatOpenAI
14
- from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
15
- from langchain_community.document_loaders import WikipediaLoader
16
- from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
17
- from langchain_google_genai import ChatGoogleGenerativeAI
18
- from langchain_core.tools import tool
19
- from langchain_google_community import GoogleSearchAPIWrapper
20
 
21
- @tool
22
- def multiply(a: int, b: int) -> int:
23
- """Multiply two integers."""
24
- return a * b
25
 
26
- @tool
27
- def add(a: int, b: int) -> int:
28
- """Add two integers."""
29
- return a + b
30
 
31
- @tool
32
- def subtract(a: int, b: int) -> int:
33
- """Subtract the second integer from the first."""
34
- return a - b
35
 
36
- @tool
37
- def divide(a: int, b: int) -> float:
38
- """Divide first integer by second; error if divisor is zero."""
39
- if b == 0:
40
- raise ValueError("Cannot divide by zero.")
41
- return a / b
42
 
43
- @tool
44
- def modulus(a: int, b: int) -> int:
45
- """Return the remainder of dividing first integer by second."""
46
- return a % b
47
 
48
- @tool
49
- def wiki_search(query: str) -> dict:
50
- """Search Wikipedia for a query and return up to 2 documents."""
51
- try:
52
- docs = WikipediaLoader(query=query, load_max_docs=5, lang="en", doc_content_chars_max=7000).load()
53
- if not docs:
54
- return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
55
- formatted = "\n\n---\n\n".join(
56
- f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
57
- for d in docs
58
- )
59
- return {"wiki_results": formatted}
60
- except Exception as e:
61
- print(f"Error in wiki_search tool: {e}")
62
- return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
63
 
64
- search = GoogleSearchAPIWrapper()
65
 
66
- @tool
67
- def google_web_search(query: str) -> str:
68
- """Perform a web search (via Google Custom Search) and return results."""
69
- try:
70
- return search.run(query)
71
- except Exception as e:
72
- print(f"Error in google_web_search tool: {e}")
73
- return f"Error occurred while searching the web for '{query}'. Details: {str(e)}"
74
 
75
 
76
- # HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
77
- # But keeping InferenceClient initialization for completeness if other HF tools might be added later.
78
- HF_API_TOKEN = os.getenv("HF_API_TOKEN")
79
- MODEL = os.getenv("MODEL")
80
- HF_INFERENCE_CLIENT = None
81
- if HF_API_TOKEN:
82
- HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
83
- else:
84
- print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
85
 
86
- @tool
87
- def read_file_content(file_path: str) -> Dict[str, str]:
88
- """Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
89
- try:
90
- _, file_extension = os.path.splitext(file_path)
91
- file_extension = file_extension.lower()
92
 
93
- # Prioritize handling of video, audio, and image files for direct LLM processing
94
- if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
95
- return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
96
- elif file_extension == ".mp3":
97
- return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
98
- elif file_extension in (".jpeg", ".jpg", ".png"):
99
- return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
100
-
101
- # Handle text and code files
102
- elif file_extension in (".txt", ".py"):
103
- with open(file_path, "r", encoding="utf-8") as f:
104
- content = f.read()
105
- return {"file_type": "text/code", "file_name": file_path, "file_content": content}
106
-
107
- # Handle Excel files
108
- elif file_extension == ".xlsx":
109
- df = pd.read_excel(file_path)
110
- content = df.to_string()
111
- return {"file_type": "excel", "file_name": file_path, "file_content": content}
112
-
113
- else:
114
- return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
115
-
116
- except FileNotFoundError:
117
- return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
118
- except Exception as e:
119
- return {"file_error": f"Error reading file {file_path}: {e}"}
120
 
121
 
122
- @tool
123
- def python_interpreter(code: str) -> Dict[str, str]:
124
- """Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
125
- old_stdout = io.StringIO()
126
- with contextlib.redirect_stdout(old_stdout):
127
- try:
128
- exec_globals = {}
129
- exec_locals = {}
130
- exec(code, exec_globals, exec_locals)
131
- output = old_stdout.getvalue()
132
- return {"execution_result": output.strip()}
133
- except Exception as e:
134
- return {"execution_error": str(e)}
135
 
136
- # --- Youtube Tool (Remains the same) ---
137
- @tool
138
- def Youtube(url: str, question: str) -> Dict[str, str]:
139
- """
140
- Tells about the YouTube video identified by the given URL, answering a question about it.
141
- Note: This is a simulated response. In a real application, this would interact with a YouTube API
142
- or a video analysis service to get actual video information and transcripts.
143
- """
144
- print(f"Youtube called with URL: {url}, Question: {question}")
145
-
146
- # Placeholder for actual YouTube API call.
147
- # In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
148
- # or a dedicated video transcription/analysis service.
149
 
150
- # Simulating the previous video content for demonstration
151
- if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
152
- return {
153
- "video_url": url,
154
- "question_asked": question,
155
- "video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
156
- "details": {
157
- "00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
158
- "00:00:11": "Teal'c takes a large gulp from a black mug",
159
- "00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
160
- "00:00:26": "Someone agrees, 'extremely'"
161
- }
162
- }
163
- else:
164
- return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
165
 
166
- # --- END YOUTUBE TOOL ---
167
 
168
- API_KEY = os.getenv("GEMINI_API_KEY")
169
- HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
170
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
171
 
172
- # Update the tools list (removed describe_image and arvix_search)
173
- tools = [
174
- multiply, add, subtract, divide, modulus,
175
- wiki_search,
176
- google_web_search,
177
- read_file_content,
178
- python_interpreter,
179
- Youtube,
180
- ]
181
 
182
- with open("prompt.txt", "r", encoding="utf-8") as f:
183
- system_prompt = f.read()
184
- sys_msg = SystemMessage(content=system_prompt)
185
 
186
- def build_graph(provider: str = "gemini"):
187
- if provider == "gemini":
188
- llm = ChatGoogleGenerativeAI(
189
- model=MODEL,
190
- temperature=1.0,
191
- max_retries=2,
192
- api_key=GEMINI_API_KEY,
193
- max_tokens=5000
194
- )
195
- elif provider == "huggingface":
196
- llm = ChatHuggingFace(
197
- llm=HuggingFaceEndpoint(
198
- url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
199
- ),
200
- temperature=0,
201
- )
202
- else:
203
- raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
204
 
205
- llm_with_tools = llm.bind_tools(tools)
206
 
207
- def assistant(state: MessagesState):
208
- messages_to_send = [sys_msg] + state["messages"]
209
-
210
- # --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
211
- # When read_file_content returns a file_type of "image" or "audio",
212
- # the agent should be able to send the actual binary data of that file
213
- # as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
214
- # supports this via content parts in HumanMessage.
215
- #
216
- # For this setup, we're assuming the framework (LangGraph/LangChain)
217
- # will correctly handle passing the actual file content when read_file_content
218
- # is called and its output indicates a media type.
219
- #
220
- # A more explicit implementation in the assistant node might look like this
221
- # for real binary file handling if the framework doesn't do it implicitly:
222
- #
223
- # new_messages_to_send = []
224
- # for msg in state["messages"]:
225
- # if isinstance(msg, HumanMessage) and msg.tool_calls:
226
- # # If a tool call to read_file_content happened in the previous turn
227
- # # and it returned a media type, we might need to get the file data
228
- # # and append it to the message parts. This logic is complex and
229
- # # depends heavily on how tool outputs are structured and passed.
230
- # # For simplicity in this template, we assume direct handling by the LLM
231
- # # if the tool output indicates media, and the file itself is accessible
232
- # # via the environment.
233
- # pass # Keep original message, tool output will follow
234
- # elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
235
- # # This is a conceptual example for if the HumanMessage itself contains file data
236
- # # or a reference that needs to be resolved into data.
237
- # # You'd need to load the actual file bytes here.
238
- # # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
239
- # # with open(msg.content[0]["file_path"], "rb") as f:
240
- # # file_bytes = f.read()
241
- # # new_messages_to_send.append(
242
- # # HumanMessage(
243
- # # content=[
244
- # # {"type": "text", "text": "Here is the media content:"},
245
- # # {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
246
- # # ]
247
- # # )
248
- # # )
249
- # else:
250
- # new_messages_to_send.append(msg)
251
- # llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
252
- # --- END IMPORTANT NOTE ---
253
 
254
- llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
255
- print(f"LLM Raw Response: {llm_response}")
256
- return {"messages": [llm_response]}
257
 
258
- builder = StateGraph(MessagesState)
259
- builder.add_node("assistant", assistant)
260
- builder.add_node("tools", ToolNode(tools))
261
- builder.add_edge(START, "assistant")
262
- builder.add_conditional_edges("assistant", tools_condition)
263
- builder.add_edge("tools", "assistant")
264
 
265
- return builder.compile()
266
 
267
- if __name__ == "__main__":
268
- pass
269
-
 
1
+ import os
2
+ import io
3
+ import contextlib
4
+ import pandas as pd
5
+ from typing import Dict, List, Union
6
+ import re
7
 
8
+ from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
9
+ from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
10
 
11
+ from langgraph.graph import START, StateGraph, MessagesState
12
+ from langgraph.prebuilt import tools_condition, ToolNode
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
15
+ from langchain_community.document_loaders import WikipediaLoader
16
+ from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
17
+ from langchain_google_genai import ChatGoogleGenerativeAI
18
+ from langchain_core.tools import tool
19
+ from langchain_google_community import GoogleSearchAPIWrapper
20
 
21
+ @tool
22
+ def multiply(a: int, b: int) -> int:
23
+ """Multiply two integers."""
24
+ return a * b
25
 
26
+ @tool
27
+ def add(a: int, b: int) -> int:
28
+ """Add two integers."""
29
+ return a + b
30
 
31
+ @tool
32
+ def subtract(a: int, b: int) -> int:
33
+ """Subtract the second integer from the first."""
34
+ return a - b
35
 
36
+ @tool
37
+ def divide(a: int, b: int) -> float:
38
+ """Divide first integer by second; error if divisor is zero."""
39
+ if b == 0:
40
+ raise ValueError("Cannot divide by zero.")
41
+ return a / b
42
 
43
+ @tool
44
+ def modulus(a: int, b: int) -> int:
45
+ """Return the remainder of dividing first integer by second."""
46
+ return a % b
47
 
48
+ @tool
49
+ def wiki_search(query: str) -> dict:
50
+ """Search Wikipedia for a query and return up to 2 documents."""
51
+ try:
52
+ docs = WikipediaLoader(query=query, load_max_docs=5, lang="en", doc_content_chars_max=7000).load()
53
+ if not docs:
54
+ return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
55
+ formatted = "\n\n---\n\n".join(
56
+ f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
57
+ for d in docs
58
+ )
59
+ return {"wiki_results": formatted}
60
+ except Exception as e:
61
+ print(f"Error in wiki_search tool: {e}")
62
+ return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
63
 
64
+ search = GoogleSearchAPIWrapper()
65
 
66
+ @tool
67
+ def google_web_search(query: str) -> str:
68
+ """Perform a web search (via Google Custom Search) and return results."""
69
+ try:
70
+ return search.run(query)
71
+ except Exception as e:
72
+ print(f"Error in google_web_search tool: {e}")
73
+ return f"Error occurred while searching the web for '{query}'. Details: {str(e)}"
74
 
75
 
76
+ # HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
77
+ # But keeping InferenceClient initialization for completeness if other HF tools might be added later.
78
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN")
79
+ MODEL = os.getenv("MODEL")
80
+ HF_INFERENCE_CLIENT = None
81
+ if HF_API_TOKEN:
82
+ HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
83
+ else:
84
+ print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
85
 
86
+ @tool
87
+ def read_file_content(file_path: str) -> Dict[str, str]:
88
+ """Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
89
+ try:
90
+ _, file_extension = os.path.splitext(file_path)
91
+ file_extension = file_extension.lower()
92
 
93
+ # Prioritize handling of video, audio, and image files for direct LLM processing
94
+ if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
95
+ return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
96
+ elif file_extension == ".mp3":
97
+ return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
98
+ elif file_extension in (".jpeg", ".jpg", ".png"):
99
+ return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
100
+
101
+ # Handle text and code files
102
+ elif file_extension in (".txt", ".py"):
103
+ with open(file_path, "r", encoding="utf-8") as f:
104
+ content = f.read()
105
+ return {"file_type": "text/code", "file_name": file_path, "file_content": content}
106
+
107
+ # Handle Excel files
108
+ elif file_extension == ".xlsx":
109
+ df = pd.read_excel(file_path)
110
+ content = df.to_string()
111
+ return {"file_type": "excel", "file_name": file_path, "file_content": content}
112
+
113
+ else:
114
+ return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
115
+
116
+ except FileNotFoundError:
117
+ return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
118
+ except Exception as e:
119
+ return {"file_error": f"Error reading file {file_path}: {e}"}
120
 
121
 
122
+ @tool
123
+ def python_interpreter(code: str) -> Dict[str, str]:
124
+ """Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
125
+ old_stdout = io.StringIO()
126
+ with contextlib.redirect_stdout(old_stdout):
127
+ try:
128
+ exec_globals = {}
129
+ exec_locals = {}
130
+ exec(code, exec_globals, exec_locals)
131
+ output = old_stdout.getvalue()
132
+ return {"execution_result": output.strip()}
133
+ except Exception as e:
134
+ return {"execution_error": str(e)}
135
 
136
+ # --- Youtube Tool (Remains the same) ---
137
+ @tool
138
+ def Youtube(url: str, question: str) -> Dict[str, str]:
139
+ """
140
+ Tells about the YouTube video identified by the given URL, answering a question about it.
141
+ Note: This is a simulated response. In a real application, this would interact with a YouTube API
142
+ or a video analysis service to get actual video information and transcripts.
143
+ """
144
+ print(f"Youtube called with URL: {url}, Question: {question}")
145
+
146
+ # Placeholder for actual YouTube API call.
147
+ # In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
148
+ # or a dedicated video transcription/analysis service.
149
 
150
+ # Simulating the previous video content for demonstration
151
+ if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
152
+ return {
153
+ "video_url": url,
154
+ "question_asked": question,
155
+ "video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
156
+ "details": {
157
+ "00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
158
+ "00:00:11": "Teal'c takes a large gulp from a black mug",
159
+ "00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
160
+ "00:00:26": "Someone agrees, 'extremely'"
161
+ }
162
+ }
163
+ else:
164
+ return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
165
 
166
+ # --- END YOUTUBE TOOL ---
167
 
168
+ API_KEY = os.getenv("GEMINI_API_KEY")
169
+ HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
170
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
171
 
172
+ # Update the tools list (removed describe_image and arvix_search)
173
+ tools = [
174
+ multiply, add, subtract, divide, modulus,
175
+ wiki_search,
176
+ google_web_search,
177
+ read_file_content,
178
+ python_interpreter,
179
+ Youtube,
180
+ ]
181
 
182
+ with open("prompt.txt", "r", encoding="utf-8") as f:
183
+ system_prompt = f.read()
184
+ sys_msg = SystemMessage(content=system_prompt)
185
 
186
+ def build_graph(provider: str = "gemini"):
187
+ if provider == "gemini":
188
+ llm = ChatGoogleGenerativeAI(
189
+ model=MODEL,
190
+ temperature=1.0,
191
+ max_retries=2,
192
+ api_key=GEMINI_API_KEY,
193
+ max_tokens=5000
194
+ )
195
+ elif provider == "huggingface":
196
+ llm = ChatHuggingFace(
197
+ llm=HuggingFaceEndpoint(
198
+ url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
199
+ ),
200
+ temperature=0,
201
+ )
202
+ else:
203
+ raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
204
 
205
+ llm_with_tools = llm.bind_tools(tools)
206
 
207
+ def assistant(state: MessagesState):
208
+ messages_to_send = [sys_msg] + state["messages"]
209
+
210
+ # --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
211
+ # When read_file_content returns a file_type of "image" or "audio",
212
+ # the agent should be able to send the actual binary data of that file
213
+ # as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
214
+ # supports this via content parts in HumanMessage.
215
+ #
216
+ # For this setup, we're assuming the framework (LangGraph/LangChain)
217
+ # will correctly handle passing the actual file content when read_file_content
218
+ # is called and its output indicates a media type.
219
+ #
220
+ # A more explicit implementation in the assistant node might look like this
221
+ # for real binary file handling if the framework doesn't do it implicitly:
222
+ #
223
+ # new_messages_to_send = []
224
+ # for msg in state["messages"]:
225
+ # if isinstance(msg, HumanMessage) and msg.tool_calls:
226
+ # # If a tool call to read_file_content happened in the previous turn
227
+ # # and it returned a media type, we might need to get the file data
228
+ # # and append it to the message parts. This logic is complex and
229
+ # # depends heavily on how tool outputs are structured and passed.
230
+ # # For simplicity in this template, we assume direct handling by the LLM
231
+ # # if the tool output indicates media, and the file itself is accessible
232
+ # # via the environment.
233
+ # pass # Keep original message, tool output will follow
234
+ # elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
235
+ # # This is a conceptual example for if the HumanMessage itself contains file data
236
+ # # or a reference that needs to be resolved into data.
237
+ # # You'd need to load the actual file bytes here.
238
+ # # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
239
+ # # with open(msg.content[0]["file_path"], "rb") as f:
240
+ # # file_bytes = f.read()
241
+ # # new_messages_to_send.append(
242
+ # # HumanMessage(
243
+ # # content=[
244
+ # # {"type": "text", "text": "Here is the media content:"},
245
+ # # {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
246
+ # # ]
247
+ # # )
248
+ # # )
249
+ # else:
250
+ # new_messages_to_send.append(msg)
251
+ # llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
252
+ # --- END IMPORTANT NOTE ---
253
 
254
+ llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
255
+ print(f"LLM Raw Response: {llm_response}")
256
+ return {"messages": [llm_response]}
257
 
258
+ builder = StateGraph(MessagesState)
259
+ builder.add_node("assistant", assistant)
260
+ builder.add_node("tools", ToolNode(tools))
261
+ builder.add_edge(START, "assistant")
262
+ builder.add_conditional_edges("assistant", tools_condition)
263
+ builder.add_edge("tools", "assistant")
264
 
265
+ return builder.compile()
266
 
267
+ if __name__ == "__main__":
268
+ pass