omarequalmars
commited on
Commit
·
a1c1d9a
1
Parent(s):
0519c89
added excel/csv analysis
Browse files- app.py +38 -69
- graph/__pycache__/graph_builder.cpython-313.pyc +0 -0
- nodes/__pycache__/core.cpython-313.pyc +0 -0
- nodes/core.py +10 -55
- requirements.txt +2 -1
- states/__pycache__/state.cpython-313.pyc +0 -0
- tools/__pycache__/__init__.cpython-313.pyc +0 -0
- tools/__pycache__/langchain_tools.cpython-313.pyc +0 -0
- tools/__pycache__/math_tools.cpython-313.pyc +0 -0
- tools/__pycache__/multimodal_tools.cpython-313.pyc +0 -0
- tools/__pycache__/search_tools.cpython-313.pyc +0 -0
- tools/__pycache__/utils.cpython-313.pyc +0 -0
- tools/__pycache__/youtube_tools.cpython-313.pyc +0 -0
- tools/langchain_tools.py +181 -16
- tools/multimodal_tools.py +233 -3
- tools/search_tools.py +8 -8
app.py
CHANGED
@@ -14,35 +14,24 @@ load_dotenv()
|
|
14 |
from graph.graph_builder import graph
|
15 |
from langchain_core.messages import HumanMessage
|
16 |
|
17 |
-
# (Keep Constants as is)
|
18 |
# --- Constants ---
|
19 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
20 |
|
21 |
# --- File Download Helper Function ---
|
22 |
def download_file(task_id: str, api_url: str) -> Optional[str]:
|
23 |
-
"""
|
24 |
-
Download file associated with a task_id from the evaluation API
|
25 |
-
|
26 |
-
Args:
|
27 |
-
task_id: The task ID to download file for
|
28 |
-
api_url: Base API URL
|
29 |
-
|
30 |
-
Returns:
|
31 |
-
str: Local path to downloaded file, or None if failed
|
32 |
-
"""
|
33 |
try:
|
34 |
file_url = f"{api_url}/files/{task_id}"
|
35 |
-
print(f"📁 Downloading file for task {task_id}
|
36 |
|
37 |
response = requests.get(file_url, timeout=30)
|
38 |
response.raise_for_status()
|
39 |
|
40 |
-
#
|
41 |
content_disposition = response.headers.get('Content-Disposition', '')
|
42 |
if 'filename=' in content_disposition:
|
43 |
filename = content_disposition.split('filename=')[1].strip('"')
|
44 |
else:
|
45 |
-
# Fallback filename based on content type
|
46 |
content_type = response.headers.get('Content-Type', '')
|
47 |
if 'image' in content_type:
|
48 |
extension = '.jpg'
|
@@ -61,28 +50,22 @@ def download_file(task_id: str, api_url: str) -> Optional[str]:
|
|
61 |
with open(file_path, 'wb') as f:
|
62 |
f.write(response.content)
|
63 |
|
64 |
-
print(f"✅ File downloaded
|
65 |
return file_path
|
66 |
|
67 |
-
except requests.exceptions.RequestException as e:
|
68 |
-
print(f"❌ Error downloading file for task {task_id}: {e}")
|
69 |
-
return None
|
70 |
except Exception as e:
|
71 |
-
print(f"❌
|
72 |
return None
|
73 |
|
74 |
# --- Your LangGraph Agent Definition ---
|
75 |
-
# ----- THIS IS WHERE YOU BUILD YOUR AGENT ------
|
76 |
class BasicAgent:
|
77 |
def __init__(self):
|
78 |
"""Initialize the LangGraph agent"""
|
79 |
print("LangGraph Agent initialized with multimodal, search, math, and YouTube tools.")
|
80 |
|
81 |
-
# Verify environment variables
|
82 |
if not os.getenv("OPENROUTER_API_KEY"):
|
83 |
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
84 |
|
85 |
-
# The graph is already compiled and ready to use
|
86 |
self.graph = graph
|
87 |
print("✅ Agent ready with tools: multimodal, search, math, YouTube")
|
88 |
|
@@ -98,17 +81,17 @@ class BasicAgent:
|
|
98 |
str: The final answer (formatted for evaluation)
|
99 |
"""
|
100 |
print(f"🤖 Processing question: {question[:50]}...")
|
101 |
-
if file_path:
|
102 |
-
print(f"📎 Associated file: {file_path}")
|
103 |
|
104 |
try:
|
105 |
-
#
|
106 |
-
|
107 |
-
if file_path:
|
108 |
enhanced_question = f"{question}\n\nFile provided: {file_path}"
|
109 |
-
print(f"
|
|
|
|
|
|
|
110 |
|
111 |
-
# Create initial state with the enhanced
|
112 |
initial_state = {"messages": [HumanMessage(content=enhanced_question)]}
|
113 |
|
114 |
# Run the LangGraph agent
|
@@ -118,20 +101,16 @@ class BasicAgent:
|
|
118 |
final_message = result["messages"][-1]
|
119 |
answer = final_message.content
|
120 |
|
121 |
-
# Clean up the answer for evaluation (
|
122 |
-
# The evaluation system expects just the answer, no explanations
|
123 |
if isinstance(answer, str):
|
124 |
answer = answer.strip()
|
125 |
|
126 |
-
# Remove common prefixes that might interfere with evaluation
|
127 |
prefixes_to_remove = [
|
128 |
"The answer is: ",
|
129 |
"Answer: ",
|
130 |
"The result is: ",
|
131 |
"Result: ",
|
132 |
"The final answer is: ",
|
133 |
-
"Based on the analysis: ",
|
134 |
-
"Based on the file: ",
|
135 |
]
|
136 |
|
137 |
for prefix in prefixes_to_remove:
|
@@ -146,23 +125,13 @@ class BasicAgent:
|
|
146 |
error_msg = f"Error processing question: {str(e)}"
|
147 |
print(f"❌ {error_msg}")
|
148 |
return error_msg
|
149 |
-
finally:
|
150 |
-
# Clean up temporary file if it exists
|
151 |
-
if file_path and os.path.exists(file_path) and tempfile.gettempdir() in file_path:
|
152 |
-
try:
|
153 |
-
os.remove(file_path)
|
154 |
-
print(f"🧹 Cleaned up temporary file: {file_path}")
|
155 |
-
except Exception as e:
|
156 |
-
print(f"⚠️ Could not clean up temporary file: {e}")
|
157 |
|
158 |
-
# Keep the rest of the file unchanged (run_and_submit_all function and Gradio interface)
|
159 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
160 |
"""
|
161 |
Fetches all questions, downloads associated files, runs the BasicAgent on them,
|
162 |
submits all answers, and displays the results.
|
163 |
"""
|
164 |
-
|
165 |
-
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
166 |
|
167 |
if profile:
|
168 |
username= f"{profile.username}"
|
@@ -175,14 +144,13 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
175 |
questions_url = f"{api_url}/questions"
|
176 |
submit_url = f"{api_url}/submit"
|
177 |
|
178 |
-
# 1. Instantiate Agent
|
179 |
try:
|
180 |
agent = BasicAgent()
|
181 |
except Exception as e:
|
182 |
print(f"Error instantiating agent: {e}")
|
183 |
return f"Error initializing agent: {e}", None
|
184 |
|
185 |
-
# In the case of an app running as a hugging Face space, this link points toward your codebase
|
186 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
187 |
print(agent_code)
|
188 |
|
@@ -203,64 +171,67 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
203 |
print(f"An unexpected error occurred fetching questions: {e}")
|
204 |
return f"An unexpected error occurred fetching questions: {e}", None
|
205 |
|
206 |
-
# 3.
|
207 |
results_log = []
|
208 |
answers_payload = []
|
|
|
|
|
209 |
print(f"Running agent on {len(questions_data)} questions...")
|
210 |
|
211 |
for item in questions_data:
|
212 |
task_id = item.get("task_id")
|
213 |
question_text = item.get("question")
|
214 |
-
file_name = item.get("file_name")
|
215 |
|
216 |
if not task_id or question_text is None:
|
217 |
print(f"Skipping item with missing task_id or question: {item}")
|
218 |
continue
|
219 |
|
220 |
-
|
221 |
-
print(f"Question: {question_text[:100]}...")
|
222 |
-
if file_name:
|
223 |
-
print(f"Associated file: {file_name}")
|
224 |
-
|
225 |
-
# ✅ Download file if it exists
|
226 |
downloaded_file_path = None
|
227 |
if file_name:
|
|
|
228 |
downloaded_file_path = download_file(task_id, api_url)
|
229 |
-
if
|
230 |
-
|
231 |
|
232 |
try:
|
233 |
-
#
|
234 |
submitted_answer = agent(question_text, downloaded_file_path)
|
235 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
236 |
results_log.append({
|
237 |
"Task ID": task_id,
|
238 |
"Question": question_text,
|
239 |
-
"File": file_name if file_name else "None",
|
240 |
"Submitted Answer": submitted_answer
|
241 |
})
|
242 |
-
print(f"✅ Task {task_id} completed")
|
243 |
|
244 |
except Exception as e:
|
245 |
print(f"❌ Error running agent on task {task_id}: {e}")
|
246 |
-
error_answer = f"AGENT ERROR: {e}"
|
247 |
results_log.append({
|
248 |
"Task ID": task_id,
|
249 |
"Question": question_text,
|
250 |
-
"
|
251 |
-
"Submitted Answer": error_answer
|
252 |
})
|
253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
if not answers_payload:
|
255 |
print("Agent did not produce any answers to submit.")
|
256 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
257 |
|
258 |
-
# 4. Prepare Submission
|
259 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
260 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
261 |
print(status_update)
|
262 |
|
263 |
-
# 5. Submit
|
264 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
265 |
try:
|
266 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
@@ -303,7 +274,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
303 |
results_df = pd.DataFrame(results_log)
|
304 |
return status_message, results_df
|
305 |
|
306 |
-
# --- Build Gradio Interface
|
307 |
with gr.Blocks() as demo:
|
308 |
gr.Markdown("# LangGraph Agent Evaluation Runner")
|
309 |
gr.Markdown(
|
@@ -320,7 +291,6 @@ with gr.Blocks() as demo:
|
|
320 |
- 🔍 **Search**: Web search using multiple providers (DuckDuckGo, Tavily, SerpAPI)
|
321 |
- 🧮 **Math**: Basic arithmetic, complex calculations, percentages, factorials
|
322 |
- 📺 **YouTube**: Extract captions, get video information
|
323 |
-
- 📁 **File Processing**: Automatically downloads and processes evaluation files
|
324 |
|
325 |
---
|
326 |
**Note:** Processing all questions may take some time as the agent carefully analyzes each question and uses appropriate tools.
|
@@ -341,7 +311,6 @@ with gr.Blocks() as demo:
|
|
341 |
|
342 |
if __name__ == "__main__":
|
343 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
344 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
345 |
space_host_startup = os.getenv("SPACE_HOST")
|
346 |
space_id_startup = os.getenv("SPACE_ID")
|
347 |
|
@@ -361,4 +330,4 @@ if __name__ == "__main__":
|
|
361 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
362 |
|
363 |
print("Launching Gradio Interface for LangGraph Agent Evaluation...")
|
364 |
-
demo.launch(debug=True, share=
|
|
|
14 |
from graph.graph_builder import graph
|
15 |
from langchain_core.messages import HumanMessage
|
16 |
|
|
|
17 |
# --- Constants ---
|
18 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
19 |
|
20 |
# --- File Download Helper Function ---
|
21 |
def download_file(task_id: str, api_url: str) -> Optional[str]:
|
22 |
+
"""Download file associated with a task_id from the evaluation API"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
try:
|
24 |
file_url = f"{api_url}/files/{task_id}"
|
25 |
+
print(f"📁 Downloading file for task {task_id}")
|
26 |
|
27 |
response = requests.get(file_url, timeout=30)
|
28 |
response.raise_for_status()
|
29 |
|
30 |
+
# Get filename from headers or create one
|
31 |
content_disposition = response.headers.get('Content-Disposition', '')
|
32 |
if 'filename=' in content_disposition:
|
33 |
filename = content_disposition.split('filename=')[1].strip('"')
|
34 |
else:
|
|
|
35 |
content_type = response.headers.get('Content-Type', '')
|
36 |
if 'image' in content_type:
|
37 |
extension = '.jpg'
|
|
|
50 |
with open(file_path, 'wb') as f:
|
51 |
f.write(response.content)
|
52 |
|
53 |
+
print(f"✅ File downloaded: {file_path}")
|
54 |
return file_path
|
55 |
|
|
|
|
|
|
|
56 |
except Exception as e:
|
57 |
+
print(f"❌ Error downloading file for task {task_id}: {e}")
|
58 |
return None
|
59 |
|
60 |
# --- Your LangGraph Agent Definition ---
|
|
|
61 |
class BasicAgent:
|
62 |
def __init__(self):
|
63 |
"""Initialize the LangGraph agent"""
|
64 |
print("LangGraph Agent initialized with multimodal, search, math, and YouTube tools.")
|
65 |
|
|
|
66 |
if not os.getenv("OPENROUTER_API_KEY"):
|
67 |
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
68 |
|
|
|
69 |
self.graph = graph
|
70 |
print("✅ Agent ready with tools: multimodal, search, math, YouTube")
|
71 |
|
|
|
81 |
str: The final answer (formatted for evaluation)
|
82 |
"""
|
83 |
print(f"🤖 Processing question: {question[:50]}...")
|
|
|
|
|
84 |
|
85 |
try:
|
86 |
+
# CRITICAL: Only modify the prompt if there's actually a valid file
|
87 |
+
if file_path and os.path.exists(file_path):
|
|
|
88 |
enhanced_question = f"{question}\n\nFile provided: {file_path}"
|
89 |
+
print(f"📎 Processing with file: {file_path}")
|
90 |
+
else:
|
91 |
+
# Keep EXACTLY the same behavior as before for non-file questions
|
92 |
+
enhanced_question = question
|
93 |
|
94 |
+
# Create initial state with the question (enhanced or original)
|
95 |
initial_state = {"messages": [HumanMessage(content=enhanced_question)]}
|
96 |
|
97 |
# Run the LangGraph agent
|
|
|
101 |
final_message = result["messages"][-1]
|
102 |
answer = final_message.content
|
103 |
|
104 |
+
# Clean up the answer for evaluation (UNCHANGED from original)
|
|
|
105 |
if isinstance(answer, str):
|
106 |
answer = answer.strip()
|
107 |
|
|
|
108 |
prefixes_to_remove = [
|
109 |
"The answer is: ",
|
110 |
"Answer: ",
|
111 |
"The result is: ",
|
112 |
"Result: ",
|
113 |
"The final answer is: ",
|
|
|
|
|
114 |
]
|
115 |
|
116 |
for prefix in prefixes_to_remove:
|
|
|
125 |
error_msg = f"Error processing question: {str(e)}"
|
126 |
print(f"❌ {error_msg}")
|
127 |
return error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
|
|
129 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
130 |
"""
|
131 |
Fetches all questions, downloads associated files, runs the BasicAgent on them,
|
132 |
submits all answers, and displays the results.
|
133 |
"""
|
134 |
+
space_id = os.getenv("SPACE_ID")
|
|
|
135 |
|
136 |
if profile:
|
137 |
username= f"{profile.username}"
|
|
|
144 |
questions_url = f"{api_url}/questions"
|
145 |
submit_url = f"{api_url}/submit"
|
146 |
|
147 |
+
# 1. Instantiate Agent
|
148 |
try:
|
149 |
agent = BasicAgent()
|
150 |
except Exception as e:
|
151 |
print(f"Error instantiating agent: {e}")
|
152 |
return f"Error initializing agent: {e}", None
|
153 |
|
|
|
154 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
155 |
print(agent_code)
|
156 |
|
|
|
171 |
print(f"An unexpected error occurred fetching questions: {e}")
|
172 |
return f"An unexpected error occurred fetching questions: {e}", None
|
173 |
|
174 |
+
# 3. Process Questions (with minimal changes)
|
175 |
results_log = []
|
176 |
answers_payload = []
|
177 |
+
downloaded_files = [] # Track files for cleanup
|
178 |
+
|
179 |
print(f"Running agent on {len(questions_data)} questions...")
|
180 |
|
181 |
for item in questions_data:
|
182 |
task_id = item.get("task_id")
|
183 |
question_text = item.get("question")
|
184 |
+
file_name = item.get("file_name")
|
185 |
|
186 |
if not task_id or question_text is None:
|
187 |
print(f"Skipping item with missing task_id or question: {item}")
|
188 |
continue
|
189 |
|
190 |
+
# Download file if it exists
|
|
|
|
|
|
|
|
|
|
|
191 |
downloaded_file_path = None
|
192 |
if file_name:
|
193 |
+
print(f"📋 Task {task_id} has file: {file_name}")
|
194 |
downloaded_file_path = download_file(task_id, api_url)
|
195 |
+
if downloaded_file_path:
|
196 |
+
downloaded_files.append(downloaded_file_path) # Track for cleanup
|
197 |
|
198 |
try:
|
199 |
+
# Call agent (behavior unchanged for non-file questions)
|
200 |
submitted_answer = agent(question_text, downloaded_file_path)
|
201 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
202 |
results_log.append({
|
203 |
"Task ID": task_id,
|
204 |
"Question": question_text,
|
|
|
205 |
"Submitted Answer": submitted_answer
|
206 |
})
|
|
|
207 |
|
208 |
except Exception as e:
|
209 |
print(f"❌ Error running agent on task {task_id}: {e}")
|
|
|
210 |
results_log.append({
|
211 |
"Task ID": task_id,
|
212 |
"Question": question_text,
|
213 |
+
"Submitted Answer": f"AGENT ERROR: {e}"
|
|
|
214 |
})
|
215 |
|
216 |
+
# Cleanup downloaded files AFTER all processing is complete
|
217 |
+
for file_path in downloaded_files:
|
218 |
+
try:
|
219 |
+
if os.path.exists(file_path):
|
220 |
+
os.remove(file_path)
|
221 |
+
print(f"🧹 Cleaned up: {file_path}")
|
222 |
+
except Exception as e:
|
223 |
+
print(f"⚠️ Could not clean up {file_path}: {e}")
|
224 |
+
|
225 |
if not answers_payload:
|
226 |
print("Agent did not produce any answers to submit.")
|
227 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
228 |
|
229 |
+
# 4. Prepare Submission (UNCHANGED)
|
230 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
231 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
232 |
print(status_update)
|
233 |
|
234 |
+
# 5. Submit (UNCHANGED)
|
235 |
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
236 |
try:
|
237 |
response = requests.post(submit_url, json=submission_data, timeout=60)
|
|
|
274 |
results_df = pd.DataFrame(results_log)
|
275 |
return status_message, results_df
|
276 |
|
277 |
+
# --- Build Gradio Interface (UNCHANGED) ---
|
278 |
with gr.Blocks() as demo:
|
279 |
gr.Markdown("# LangGraph Agent Evaluation Runner")
|
280 |
gr.Markdown(
|
|
|
291 |
- 🔍 **Search**: Web search using multiple providers (DuckDuckGo, Tavily, SerpAPI)
|
292 |
- 🧮 **Math**: Basic arithmetic, complex calculations, percentages, factorials
|
293 |
- 📺 **YouTube**: Extract captions, get video information
|
|
|
294 |
|
295 |
---
|
296 |
**Note:** Processing all questions may take some time as the agent carefully analyzes each question and uses appropriate tools.
|
|
|
311 |
|
312 |
if __name__ == "__main__":
|
313 |
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
|
|
314 |
space_host_startup = os.getenv("SPACE_HOST")
|
315 |
space_id_startup = os.getenv("SPACE_ID")
|
316 |
|
|
|
330 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
331 |
|
332 |
print("Launching Gradio Interface for LangGraph Agent Evaluation...")
|
333 |
+
demo.launch(debug=True, share=True)
|
graph/__pycache__/graph_builder.cpython-313.pyc
CHANGED
Binary files a/graph/__pycache__/graph_builder.cpython-313.pyc and b/graph/__pycache__/graph_builder.cpython-313.pyc differ
|
|
nodes/__pycache__/core.cpython-313.pyc
CHANGED
Binary files a/nodes/__pycache__/core.cpython-313.pyc and b/nodes/__pycache__/core.cpython-313.pyc differ
|
|
nodes/core.py
CHANGED
@@ -1,82 +1,37 @@
|
|
1 |
-
# nodes/core.py
|
2 |
from states.state import AgentState
|
3 |
import os
|
4 |
from dotenv import load_dotenv
|
5 |
-
from langchain_openai import ChatOpenAI
|
6 |
-
from tools.langchain_tools import
|
7 |
-
extract_text,
|
8 |
-
analyze_image_tool,
|
9 |
-
analyze_audio_tool,
|
10 |
-
add,
|
11 |
-
subtract,
|
12 |
-
multiply,
|
13 |
-
divide,
|
14 |
-
search_tool,
|
15 |
-
extract_youtube_transcript,
|
16 |
-
get_youtube_info,
|
17 |
-
calculate_expression,
|
18 |
-
factorial,
|
19 |
-
square_root,
|
20 |
-
percentage,
|
21 |
-
average
|
22 |
-
)
|
23 |
|
24 |
load_dotenv()
|
25 |
|
26 |
-
# Read your API key from the environment variable
|
27 |
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
28 |
|
29 |
if not openrouter_api_key:
|
30 |
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
31 |
|
32 |
-
# Initialize OpenRouter ChatOpenAI
|
33 |
chat = ChatOpenAI(
|
34 |
-
model="google/gemini-2.5-pro-preview", #
|
35 |
-
# Alternative models you can use:
|
36 |
-
# model="mistralai/mistral-7b-instruct:free", # Fast, free text model
|
37 |
-
# model="google/gemma-2-9b-it:free", # Google's free model
|
38 |
-
# model="qwen/qwen-2.5-72b-instruct:free", # High-quality free model
|
39 |
-
|
40 |
temperature=0,
|
41 |
max_retries=2,
|
42 |
base_url="https://openrouter.ai/api/v1",
|
43 |
api_key=openrouter_api_key,
|
44 |
default_headers={
|
45 |
-
"HTTP-Referer": "https://your-app.com",
|
46 |
-
"X-Title": "LangGraph Agent",
|
47 |
}
|
48 |
)
|
49 |
|
50 |
-
#
|
51 |
-
tools =
|
52 |
-
extract_text,
|
53 |
-
analyze_image_tool,
|
54 |
-
analyze_audio_tool,
|
55 |
-
extract_youtube_transcript,
|
56 |
-
add,
|
57 |
-
subtract,
|
58 |
-
multiply,
|
59 |
-
divide,
|
60 |
-
search_tool
|
61 |
-
]
|
62 |
-
|
63 |
-
# Extended tools list (if you want more capabilities)
|
64 |
-
extended_tools = tools + [
|
65 |
-
get_youtube_info,
|
66 |
-
calculate_expression,
|
67 |
-
factorial,
|
68 |
-
square_root,
|
69 |
-
percentage,
|
70 |
-
average
|
71 |
-
]
|
72 |
|
73 |
-
# Use core tools by default (matching original), but you can switch to extended_tools
|
74 |
chat_with_tools = chat.bind_tools(tools)
|
75 |
|
76 |
def assistant(state: AgentState):
|
77 |
-
"""
|
78 |
-
Assistant node - maintains the exact same system prompt for evaluation compatibility
|
79 |
-
"""
|
80 |
sys_msg = (
|
81 |
"You are a helpful assistant with access to tools. Understand user requests accurately. "
|
82 |
"Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints. "
|
|
|
1 |
+
# nodes/core.py (Updated to include Excel tool)
|
2 |
from states.state import AgentState
|
3 |
import os
|
4 |
from dotenv import load_dotenv
|
5 |
+
from langchain_openai import ChatOpenAI
|
6 |
+
from tools.langchain_tools import EXTENDED_TOOLS # ✅ Changed from individual imports
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
load_dotenv()
|
9 |
|
|
|
10 |
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
11 |
|
12 |
if not openrouter_api_key:
|
13 |
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
14 |
|
15 |
+
# Initialize OpenRouter ChatOpenAI
|
16 |
chat = ChatOpenAI(
|
17 |
+
model="google/gemini-2.5-pro-preview", # Tool-compatible model
|
|
|
|
|
|
|
|
|
|
|
18 |
temperature=0,
|
19 |
max_retries=2,
|
20 |
base_url="https://openrouter.ai/api/v1",
|
21 |
api_key=openrouter_api_key,
|
22 |
default_headers={
|
23 |
+
"HTTP-Referer": "https://your-app.com",
|
24 |
+
"X-Title": "LangGraph Agent",
|
25 |
}
|
26 |
)
|
27 |
|
28 |
+
# Use EXTENDED_TOOLS which includes Excel support
|
29 |
+
tools = EXTENDED_TOOLS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
|
|
31 |
chat_with_tools = chat.bind_tools(tools)
|
32 |
|
33 |
def assistant(state: AgentState):
|
34 |
+
"""Assistant node with Excel support"""
|
|
|
|
|
35 |
sys_msg = (
|
36 |
"You are a helpful assistant with access to tools. Understand user requests accurately. "
|
37 |
"Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints. "
|
requirements.txt
CHANGED
@@ -10,4 +10,5 @@ langchain-community
|
|
10 |
gradio
|
11 |
pandas
|
12 |
gradio[oauth]
|
13 |
-
|
|
|
|
10 |
gradio
|
11 |
pandas
|
12 |
gradio[oauth]
|
13 |
+
openpyxl
|
14 |
+
xlrd
|
states/__pycache__/state.cpython-313.pyc
CHANGED
Binary files a/states/__pycache__/state.cpython-313.pyc and b/states/__pycache__/state.cpython-313.pyc differ
|
|
tools/__pycache__/__init__.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/__init__.cpython-313.pyc and b/tools/__pycache__/__init__.cpython-313.pyc differ
|
|
tools/__pycache__/langchain_tools.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/langchain_tools.cpython-313.pyc and b/tools/__pycache__/langchain_tools.cpython-313.pyc differ
|
|
tools/__pycache__/math_tools.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/math_tools.cpython-313.pyc and b/tools/__pycache__/math_tools.cpython-313.pyc differ
|
|
tools/__pycache__/multimodal_tools.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/multimodal_tools.cpython-313.pyc and b/tools/__pycache__/multimodal_tools.cpython-313.pyc differ
|
|
tools/__pycache__/search_tools.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/search_tools.cpython-313.pyc and b/tools/__pycache__/search_tools.cpython-313.pyc differ
|
|
tools/__pycache__/utils.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/utils.cpython-313.pyc and b/tools/__pycache__/utils.cpython-313.pyc differ
|
|
tools/__pycache__/youtube_tools.cpython-313.pyc
CHANGED
Binary files a/tools/__pycache__/youtube_tools.cpython-313.pyc and b/tools/__pycache__/youtube_tools.cpython-313.pyc differ
|
|
tools/langchain_tools.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
-
# tools/langchain_tools.py
|
2 |
"""
|
3 |
LangChain-compatible tool wrappers for our existing tools
|
|
|
4 |
"""
|
5 |
|
6 |
from langchain_core.tools import tool
|
@@ -11,7 +12,7 @@ from dotenv import load_dotenv
|
|
11 |
# Load environment variables FIRST, before any tool imports
|
12 |
load_dotenv()
|
13 |
|
14 |
-
from .multimodal_tools import MultimodalTools, analyze_transcript as _analyze_transcript
|
15 |
from .search_tools import SearchTools
|
16 |
from .math_tools import MathTools
|
17 |
from .youtube_tools import YouTubeTools
|
@@ -21,7 +22,10 @@ multimodal_tools = MultimodalTools()
|
|
21 |
search_tools = SearchTools()
|
22 |
youtube_tools = YouTubeTools()
|
23 |
|
24 |
-
#
|
|
|
|
|
|
|
25 |
@tool
|
26 |
def extract_text(image_path: str) -> str:
|
27 |
"""Extract text from an image using OCR"""
|
@@ -38,7 +42,16 @@ def analyze_audio_tool(transcript: str, question: str = "Summarize this audio co
|
|
38 |
return multimodal_tools.analyze_audio_transcript(transcript, question)
|
39 |
|
40 |
@tool
|
41 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
"""Search the web for information"""
|
43 |
results = search_tools.search(query, max_results)
|
44 |
if not results:
|
@@ -54,6 +67,44 @@ def search_tool(query: str, max_results: int = 5) -> str:
|
|
54 |
|
55 |
return "\n".join(formatted_results)
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
@tool
|
58 |
def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
|
59 |
"""Extract transcript/captions from a YouTube video"""
|
@@ -63,6 +114,28 @@ def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
|
|
63 |
else:
|
64 |
return "No captions available for this video"
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
@tool
|
67 |
def add(a: float, b: float) -> float:
|
68 |
"""Add two numbers"""
|
@@ -85,19 +158,13 @@ def divide(a: float, b: float) -> str:
|
|
85 |
return str(result)
|
86 |
|
87 |
@tool
|
88 |
-
def
|
89 |
-
"""
|
90 |
-
|
91 |
-
if info:
|
92 |
-
return f"Title: {info.get('title', 'Unknown')}\nAuthor: {info.get('author', 'Unknown')}\nDuration: {info.get('length', 0)} seconds\nViews: {info.get('views', 0):,}"
|
93 |
-
else:
|
94 |
-
return "Could not retrieve video information"
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
from .math_tools import calculate_expression as calc_expr
|
100 |
-
return str(calc_expr(expression))
|
101 |
|
102 |
@tool
|
103 |
def factorial(n: int) -> str:
|
@@ -126,3 +193,101 @@ def average(numbers: str) -> str:
|
|
126 |
return str(result)
|
127 |
except Exception as e:
|
128 |
return f"Error parsing numbers: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/langchain_tools.py
|
2 |
"""
|
3 |
LangChain-compatible tool wrappers for our existing tools
|
4 |
+
Complete integration of multimodal, search, math, and YouTube tools
|
5 |
"""
|
6 |
|
7 |
from langchain_core.tools import tool
|
|
|
12 |
# Load environment variables FIRST, before any tool imports
|
13 |
load_dotenv()
|
14 |
|
15 |
+
from .multimodal_tools import MultimodalTools, analyze_transcript as _analyze_transcript, analyze_excel, analyze_python
|
16 |
from .search_tools import SearchTools
|
17 |
from .math_tools import MathTools
|
18 |
from .youtube_tools import YouTubeTools
|
|
|
22 |
search_tools = SearchTools()
|
23 |
youtube_tools = YouTubeTools()
|
24 |
|
25 |
+
# =============================================================================
|
26 |
+
# MULTIMODAL TOOLS
|
27 |
+
# =============================================================================
|
28 |
+
|
29 |
@tool
|
30 |
def extract_text(image_path: str) -> str:
|
31 |
"""Extract text from an image using OCR"""
|
|
|
42 |
return multimodal_tools.analyze_audio_transcript(transcript, question)
|
43 |
|
44 |
@tool
|
45 |
+
def analyze_excel_tool(file_path: str, question: str) -> str:
|
46 |
+
"""Analyze Excel or CSV files to answer questions about the data"""
|
47 |
+
return analyze_excel(file_path, question)
|
48 |
+
|
49 |
+
# =============================================================================
|
50 |
+
# SEARCH TOOLS
|
51 |
+
# =============================================================================
|
52 |
+
|
53 |
+
@tool
|
54 |
+
def search_tool(query: str, max_results: int = 10) -> str:
|
55 |
"""Search the web for information"""
|
56 |
results = search_tools.search(query, max_results)
|
57 |
if not results:
|
|
|
67 |
|
68 |
return "\n".join(formatted_results)
|
69 |
|
70 |
+
@tool
|
71 |
+
def search_news_tool(query: str, max_results: int = 10) -> str:
|
72 |
+
"""Search for news articles about a topic"""
|
73 |
+
results = search_tools.search_news(query, max_results)
|
74 |
+
if not results:
|
75 |
+
return "No news results found"
|
76 |
+
|
77 |
+
# Format results for the LLM
|
78 |
+
formatted_results = []
|
79 |
+
for i, result in enumerate(results, 1):
|
80 |
+
title = result.get('title', 'No title')
|
81 |
+
content = result.get('content', 'No content')
|
82 |
+
url = result.get('url', 'No URL')
|
83 |
+
formatted_results.append(f"{i}. {title}\n{content[:200]}...\nSource: {url}\n")
|
84 |
+
|
85 |
+
return "\n".join(formatted_results)
|
86 |
+
|
87 |
+
@tool
|
88 |
+
def search_academic_tool(query: str, max_results: int = 10) -> str:
|
89 |
+
"""Search for academic research and papers"""
|
90 |
+
results = search_tools.search_academic(query, max_results)
|
91 |
+
if not results:
|
92 |
+
return "No academic results found"
|
93 |
+
|
94 |
+
# Format results for the LLM
|
95 |
+
formatted_results = []
|
96 |
+
for i, result in enumerate(results, 1):
|
97 |
+
title = result.get('title', 'No title')
|
98 |
+
content = result.get('content', 'No content')
|
99 |
+
url = result.get('url', 'No URL')
|
100 |
+
formatted_results.append(f"{i}. {title}\n{content[:200]}...\nSource: {url}\n")
|
101 |
+
|
102 |
+
return "\n".join(formatted_results)
|
103 |
+
|
104 |
+
# =============================================================================
|
105 |
+
# YOUTUBE TOOLS
|
106 |
+
# =============================================================================
|
107 |
+
|
108 |
@tool
|
109 |
def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
|
110 |
"""Extract transcript/captions from a YouTube video"""
|
|
|
114 |
else:
|
115 |
return "No captions available for this video"
|
116 |
|
117 |
+
@tool
|
118 |
+
def get_youtube_info(url: str) -> str:
|
119 |
+
"""Get information about a YouTube video"""
|
120 |
+
info = youtube_tools.get_video_info(url)
|
121 |
+
if info:
|
122 |
+
return f"Title: {info.get('title', 'Unknown')}\nAuthor: {info.get('author', 'Unknown')}\nDuration: {info.get('length', 0)} seconds\nViews: {info.get('views', 0):,}"
|
123 |
+
else:
|
124 |
+
return "Could not retrieve video information"
|
125 |
+
|
126 |
+
@tool
|
127 |
+
def get_youtube_playlist_info(playlist_url: str) -> str:
|
128 |
+
"""Get information about a YouTube playlist"""
|
129 |
+
info = youtube_tools.get_playlist_info(playlist_url)
|
130 |
+
if info:
|
131 |
+
return f"Playlist: {info.get('title', 'Unknown')}\nVideos: {info.get('video_count', 0)}\nOwner: {info.get('owner', 'Unknown')}"
|
132 |
+
else:
|
133 |
+
return "Could not retrieve playlist information"
|
134 |
+
|
135 |
+
# =============================================================================
|
136 |
+
# MATH TOOLS - Basic Operations
|
137 |
+
# =============================================================================
|
138 |
+
|
139 |
@tool
|
140 |
def add(a: float, b: float) -> float:
|
141 |
"""Add two numbers"""
|
|
|
158 |
return str(result)
|
159 |
|
160 |
@tool
|
161 |
+
def power(base: float, exponent: float) -> float:
|
162 |
+
"""Calculate base raised to the power of exponent"""
|
163 |
+
return MathTools.power(base, exponent)
|
|
|
|
|
|
|
|
|
164 |
|
165 |
+
# =============================================================================
|
166 |
+
# MATH TOOLS - Advanced Operations
|
167 |
+
# =============================================================================
|
|
|
|
|
168 |
|
169 |
@tool
|
170 |
def factorial(n: int) -> str:
|
|
|
193 |
return str(result)
|
194 |
except Exception as e:
|
195 |
return f"Error parsing numbers: {str(e)}"
|
196 |
+
|
197 |
+
@tool
|
198 |
+
def calculate_expression(expression: str) -> str:
|
199 |
+
"""Calculate a mathematical expression safely"""
|
200 |
+
from .math_tools import calculate_expression as calc_expr
|
201 |
+
return str(calc_expr(expression))
|
202 |
+
|
203 |
+
@tool
|
204 |
+
def absolute_value(n: float) -> float:
|
205 |
+
"""Calculate absolute value of a number"""
|
206 |
+
return MathTools.absolute(n)
|
207 |
+
|
208 |
+
@tool
|
209 |
+
def round_number(n: float, decimals: int = 2) -> float:
|
210 |
+
"""Round number to specified decimal places"""
|
211 |
+
return MathTools.round_number(n, decimals)
|
212 |
+
|
213 |
+
@tool
|
214 |
+
def min_value(numbers: str) -> str:
|
215 |
+
"""Find minimum value in a list of numbers (provide as comma-separated string)"""
|
216 |
+
try:
|
217 |
+
number_list = [float(x.strip()) for x in numbers.split(',')]
|
218 |
+
result = MathTools.min_value(number_list)
|
219 |
+
return str(result)
|
220 |
+
except Exception as e:
|
221 |
+
return f"Error parsing numbers: {str(e)}"
|
222 |
+
|
223 |
+
@tool
|
224 |
+
def max_value(numbers: str) -> str:
|
225 |
+
"""Find maximum value in a list of numbers (provide as comma-separated string)"""
|
226 |
+
try:
|
227 |
+
number_list = [float(x.strip()) for x in numbers.split(',')]
|
228 |
+
result = MathTools.max_value(number_list)
|
229 |
+
return str(result)
|
230 |
+
except Exception as e:
|
231 |
+
return f"Error parsing numbers: {str(e)}"
|
232 |
+
|
233 |
+
@tool
|
234 |
+
def compound_interest(principal: float, rate: float, time: float, compounds_per_year: int = 1) -> str:
|
235 |
+
"""Calculate compound interest"""
|
236 |
+
result = MathTools.calculate_compound_interest(principal, rate, time, compounds_per_year)
|
237 |
+
return str(result)
|
238 |
+
|
239 |
+
@tool
|
240 |
+
def solve_quadratic(a: float, b: float, c: float) -> str:
|
241 |
+
"""Solve quadratic equation ax² + bx + c = 0"""
|
242 |
+
result = MathTools.solve_quadratic(a, b, c)
|
243 |
+
return str(result)
|
244 |
+
|
245 |
+
@tool
|
246 |
+
def analyze_python_tool(file_path: str, question: str = "What is the final output of this code?") -> str:
|
247 |
+
"""Read and analyze Python code files, can execute code to get results"""
|
248 |
+
return analyze_python(file_path, question)
|
249 |
+
# =============================================================================
|
250 |
+
# TOOL COLLECTIONS FOR EASY IMPORT
|
251 |
+
# =============================================================================
|
252 |
+
|
253 |
+
# Core tools (matching original template)
|
254 |
+
CORE_TOOLS = [
|
255 |
+
extract_text,
|
256 |
+
analyze_image_tool,
|
257 |
+
analyze_audio_tool,
|
258 |
+
extract_youtube_transcript,
|
259 |
+
add,
|
260 |
+
subtract,
|
261 |
+
multiply,
|
262 |
+
divide,
|
263 |
+
search_tool
|
264 |
+
]
|
265 |
+
|
266 |
+
# Extended tools with new Excel functionality
|
267 |
+
EXTENDED_TOOLS = CORE_TOOLS + [
|
268 |
+
analyze_excel_tool, # NEW: Excel/CSV analysis
|
269 |
+
analyze_python_tool,
|
270 |
+
search_news_tool,
|
271 |
+
search_academic_tool,
|
272 |
+
get_youtube_info,
|
273 |
+
get_youtube_playlist_info,
|
274 |
+
calculate_expression,
|
275 |
+
factorial,
|
276 |
+
square_root,
|
277 |
+
percentage,
|
278 |
+
average
|
279 |
+
]
|
280 |
+
|
281 |
+
# All available tools
|
282 |
+
ALL_TOOLS = EXTENDED_TOOLS + [
|
283 |
+
power,
|
284 |
+
absolute_value,
|
285 |
+
round_number,
|
286 |
+
min_value,
|
287 |
+
max_value,
|
288 |
+
compound_interest,
|
289 |
+
solve_quadratic
|
290 |
+
]
|
291 |
+
|
292 |
+
# Default export (for backwards compatibility)
|
293 |
+
tools = CORE_TOOLS
|
tools/multimodal_tools.py
CHANGED
@@ -1,6 +1,12 @@
|
|
1 |
# tools/multimodal_tools.py
|
2 |
import requests
|
3 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from typing import Optional, Dict, Any
|
5 |
from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger
|
6 |
|
@@ -28,7 +34,7 @@ class MultimodalTools:
|
|
28 |
self.openrouter_url,
|
29 |
headers=self.headers,
|
30 |
json=payload,
|
31 |
-
timeout=
|
32 |
)
|
33 |
response.raise_for_status()
|
34 |
|
@@ -78,7 +84,7 @@ class MultimodalTools:
|
|
78 |
}
|
79 |
],
|
80 |
"temperature": 0,
|
81 |
-
"max_tokens":
|
82 |
}
|
83 |
|
84 |
return self._make_openrouter_request(payload)
|
@@ -128,7 +134,7 @@ class MultimodalTools:
|
|
128 |
}
|
129 |
],
|
130 |
"temperature": 0,
|
131 |
-
"max_tokens":
|
132 |
}
|
133 |
|
134 |
return self._make_openrouter_request(payload)
|
@@ -138,6 +144,219 @@ class MultimodalTools:
|
|
138 |
logger.error(error_msg)
|
139 |
return error_msg
|
140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
def describe_image(self, image_path: str) -> str:
|
142 |
"""Get a detailed description of an image"""
|
143 |
return self.analyze_image(
|
@@ -164,3 +383,14 @@ def analyze_transcript(transcript: str, question: str = "Summarize this content"
|
|
164 |
"""Standalone function to analyze audio transcript"""
|
165 |
tools = MultimodalTools()
|
166 |
return tools.analyze_audio_transcript(transcript, question)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# tools/multimodal_tools.py
|
2 |
import requests
|
3 |
import json
|
4 |
+
import pandas as pd
|
5 |
+
import os # ✅ Added for file operations
|
6 |
+
import io # ✅ Added for code execution
|
7 |
+
import contextlib # ✅ Added for code execution
|
8 |
+
import ast # ✅ Added for code validation
|
9 |
+
import traceback # ✅ Added for error handling
|
10 |
from typing import Optional, Dict, Any
|
11 |
from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger
|
12 |
|
|
|
34 |
self.openrouter_url,
|
35 |
headers=self.headers,
|
36 |
json=payload,
|
37 |
+
timeout=60
|
38 |
)
|
39 |
response.raise_for_status()
|
40 |
|
|
|
84 |
}
|
85 |
],
|
86 |
"temperature": 0,
|
87 |
+
"max_tokens": 2048
|
88 |
}
|
89 |
|
90 |
return self._make_openrouter_request(payload)
|
|
|
134 |
}
|
135 |
],
|
136 |
"temperature": 0,
|
137 |
+
"max_tokens": 2048
|
138 |
}
|
139 |
|
140 |
return self._make_openrouter_request(payload)
|
|
|
144 |
logger.error(error_msg)
|
145 |
return error_msg
|
146 |
|
147 |
+
def analyze_excel_file(self, file_path: str, question: str) -> str:
|
148 |
+
"""
|
149 |
+
Analyze Excel or CSV file content using AI
|
150 |
+
|
151 |
+
Args:
|
152 |
+
file_path: Path to Excel (.xlsx) or CSV file
|
153 |
+
question: Question about the data
|
154 |
+
|
155 |
+
Returns:
|
156 |
+
AI analysis of the spreadsheet data
|
157 |
+
"""
|
158 |
+
if not validate_file_exists(file_path):
|
159 |
+
return f"Error: File not found at {file_path}"
|
160 |
+
|
161 |
+
try:
|
162 |
+
# Try reading as Excel first, then CSV
|
163 |
+
try:
|
164 |
+
df = pd.read_excel(file_path)
|
165 |
+
except Exception:
|
166 |
+
try:
|
167 |
+
df = pd.read_csv(file_path)
|
168 |
+
except Exception as e:
|
169 |
+
return f"Error reading file: Unable to read as Excel or CSV - {str(e)}"
|
170 |
+
|
171 |
+
# Convert dataframe to text representation for AI analysis
|
172 |
+
data_summary = f"""
|
173 |
+
Data file analysis:
|
174 |
+
- Shape: {df.shape[0]} rows, {df.shape[1]} columns
|
175 |
+
- Columns: {list(df.columns)}
|
176 |
+
|
177 |
+
First few rows:
|
178 |
+
{df.head().to_string()}
|
179 |
+
|
180 |
+
Data types:
|
181 |
+
{df.dtypes.to_string()}
|
182 |
+
|
183 |
+
Summary statistics:
|
184 |
+
{df.describe().to_string()}
|
185 |
+
"""
|
186 |
+
|
187 |
+
payload = {
|
188 |
+
"model": self.text_model,
|
189 |
+
"messages": [
|
190 |
+
{
|
191 |
+
"role": "user",
|
192 |
+
"content": f"Analyze this spreadsheet data and answer the question.\n\n{data_summary}\n\nQuestion: {question}"
|
193 |
+
}
|
194 |
+
],
|
195 |
+
"temperature": 0,
|
196 |
+
"max_tokens": 2048
|
197 |
+
}
|
198 |
+
|
199 |
+
return self._make_openrouter_request(payload)
|
200 |
+
|
201 |
+
except Exception as e:
|
202 |
+
error_msg = f"Error analyzing Excel file: {str(e)}"
|
203 |
+
logger.error(error_msg)
|
204 |
+
return error_msg
|
205 |
+
|
206 |
+
# ✅ NEW METHOD - Added Python code processing
|
207 |
+
def _validate_python_code(self, code: str) -> bool:
|
208 |
+
"""Validate Python code syntax"""
|
209 |
+
try:
|
210 |
+
ast.parse(code)
|
211 |
+
return True
|
212 |
+
except SyntaxError:
|
213 |
+
return False
|
214 |
+
|
215 |
+
def _execute_python_code(self, code: str) -> str:
|
216 |
+
"""
|
217 |
+
Safely execute Python code and capture output
|
218 |
+
Based on search results from LlamaIndex SimpleCodeExecutor pattern
|
219 |
+
"""
|
220 |
+
# Capture stdout and stderr
|
221 |
+
stdout = io.StringIO()
|
222 |
+
stderr = io.StringIO()
|
223 |
+
output = ""
|
224 |
+
return_value = None
|
225 |
+
|
226 |
+
# Create a safe execution namespace
|
227 |
+
safe_globals = {
|
228 |
+
'__builtins__': {
|
229 |
+
'print': print,
|
230 |
+
'len': len,
|
231 |
+
'str': str,
|
232 |
+
'int': int,
|
233 |
+
'float': float,
|
234 |
+
'list': list,
|
235 |
+
'dict': dict,
|
236 |
+
'sum': sum,
|
237 |
+
'max': max,
|
238 |
+
'min': min,
|
239 |
+
'abs': abs,
|
240 |
+
'round': round,
|
241 |
+
'range': range,
|
242 |
+
'enumerate': enumerate,
|
243 |
+
'zip': zip,
|
244 |
+
}
|
245 |
+
}
|
246 |
+
safe_locals = {}
|
247 |
+
|
248 |
+
try:
|
249 |
+
# Execute with captured output
|
250 |
+
with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
|
251 |
+
# Try to detect if there's a return value (last expression)
|
252 |
+
try:
|
253 |
+
tree = ast.parse(code)
|
254 |
+
last_node = tree.body[-1] if tree.body else None
|
255 |
+
|
256 |
+
# If the last statement is an expression, capture its value
|
257 |
+
if isinstance(last_node, ast.Expr):
|
258 |
+
# Split code to add a return value assignment
|
259 |
+
lines = code.rstrip().split('\n')
|
260 |
+
last_line = lines[-1]
|
261 |
+
exec_code = '\n'.join(lines[:-1]) + f'\n__result__ = {last_line}'
|
262 |
+
|
263 |
+
# Execute modified code
|
264 |
+
exec(exec_code, safe_globals, safe_locals)
|
265 |
+
return_value = safe_locals.get('__result__')
|
266 |
+
else:
|
267 |
+
# Normal execution
|
268 |
+
exec(code, safe_globals, safe_locals)
|
269 |
+
except:
|
270 |
+
# If parsing fails, just execute the code as is
|
271 |
+
exec(code, safe_globals, safe_locals)
|
272 |
+
|
273 |
+
# Get output
|
274 |
+
output = stdout.getvalue()
|
275 |
+
if stderr.getvalue():
|
276 |
+
output += "\n" + stderr.getvalue()
|
277 |
+
|
278 |
+
# Add return value if it exists
|
279 |
+
if return_value is not None:
|
280 |
+
output += f"\n\nFinal result: {return_value}"
|
281 |
+
|
282 |
+
return output.strip() if output.strip() else str(return_value) if return_value is not None else "Code executed successfully (no output)"
|
283 |
+
|
284 |
+
except Exception as e:
|
285 |
+
# Capture exception information
|
286 |
+
error_output = f"Error: {type(e).__name__}: {str(e)}"
|
287 |
+
logger.error(f"Code execution error: {error_output}")
|
288 |
+
return error_output
|
289 |
+
|
290 |
+
def analyze_python_file(self, file_path: str, question: str = "What is the final output of this code?") -> str:
|
291 |
+
"""
|
292 |
+
Read and analyze Python code file
|
293 |
+
|
294 |
+
Args:
|
295 |
+
file_path: Path to Python (.py) file
|
296 |
+
question: Question about the code
|
297 |
+
|
298 |
+
Returns:
|
299 |
+
Analysis or execution result of the Python code
|
300 |
+
"""
|
301 |
+
if not validate_file_exists(file_path):
|
302 |
+
return f"Error: Python file not found at {file_path}"
|
303 |
+
|
304 |
+
try:
|
305 |
+
# Read the Python file
|
306 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
307 |
+
code_content = f.read()
|
308 |
+
|
309 |
+
if not code_content.strip():
|
310 |
+
return "Error: Python file is empty"
|
311 |
+
|
312 |
+
# Validate syntax
|
313 |
+
if not self._validate_python_code(code_content):
|
314 |
+
return "Error: Python file contains syntax errors"
|
315 |
+
|
316 |
+
# If question asks for output/result, execute the code
|
317 |
+
if any(keyword in question.lower() for keyword in ['output', 'result', 'execute', 'run', 'final']):
|
318 |
+
logger.info(f"Executing Python code from {file_path}")
|
319 |
+
execution_result = self._execute_python_code(code_content)
|
320 |
+
|
321 |
+
# Also provide AI analysis if needed
|
322 |
+
if len(execution_result) < 50: # Short result, add AI analysis
|
323 |
+
payload = {
|
324 |
+
"model": self.text_model,
|
325 |
+
"messages": [
|
326 |
+
{
|
327 |
+
"role": "user",
|
328 |
+
"content": f"Python code:\n``````\n\nExecution result: {execution_result}\n\nQuestion: {question}"
|
329 |
+
}
|
330 |
+
],
|
331 |
+
"temperature": 0,
|
332 |
+
"max_tokens": 1024
|
333 |
+
}
|
334 |
+
|
335 |
+
ai_analysis = self._make_openrouter_request(payload)
|
336 |
+
return f"Execution result: {execution_result}\n\nAnalysis: {ai_analysis}"
|
337 |
+
else:
|
338 |
+
return execution_result
|
339 |
+
else:
|
340 |
+
# Just analyze the code without execution
|
341 |
+
payload = {
|
342 |
+
"model": self.text_model,
|
343 |
+
"messages": [
|
344 |
+
{
|
345 |
+
"role": "user",
|
346 |
+
"content": f"Analyze this Python code and answer the question.\n\nPython code:\n``````\n\nQuestion: {question}"
|
347 |
+
}
|
348 |
+
],
|
349 |
+
"temperature": 0,
|
350 |
+
"max_tokens": 2048
|
351 |
+
}
|
352 |
+
|
353 |
+
return self._make_openrouter_request(payload)
|
354 |
+
|
355 |
+
except Exception as e:
|
356 |
+
error_msg = f"Error analyzing Python file: {str(e)}"
|
357 |
+
logger.error(error_msg)
|
358 |
+
return error_msg
|
359 |
+
|
360 |
def describe_image(self, image_path: str) -> str:
|
361 |
"""Get a detailed description of an image"""
|
362 |
return self.analyze_image(
|
|
|
383 |
"""Standalone function to analyze audio transcript"""
|
384 |
tools = MultimodalTools()
|
385 |
return tools.analyze_audio_transcript(transcript, question)
|
386 |
+
|
387 |
+
def analyze_excel(file_path: str, question: str) -> str:
|
388 |
+
"""Standalone function to analyze Excel/CSV files"""
|
389 |
+
tools = MultimodalTools()
|
390 |
+
return tools.analyze_excel_file(file_path, question)
|
391 |
+
|
392 |
+
# ✅ NEW FUNCTION - Added Python code convenience function
|
393 |
+
def analyze_python(file_path: str, question: str = "What is the final output of this code?") -> str:
|
394 |
+
"""Standalone function to analyze Python files"""
|
395 |
+
tools = MultimodalTools()
|
396 |
+
return tools.analyze_python_file(file_path, question)
|
tools/search_tools.py
CHANGED
@@ -17,7 +17,7 @@ class SearchTools:
|
|
17 |
# Tertiary: SerpAPI (expensive, fallback only)
|
18 |
self.serpapi_key = os.getenv("SERPAPI_KEY")
|
19 |
|
20 |
-
def search_duckduckgo(self, query: str, max_results: int =
|
21 |
"""
|
22 |
Free search using DuckDuckGo Instant Answer API
|
23 |
|
@@ -69,7 +69,7 @@ class SearchTools:
|
|
69 |
logger.error(f"DuckDuckGo search failed: {str(e)}")
|
70 |
return []
|
71 |
|
72 |
-
def search_tavily(self, query: str, max_results: int =
|
73 |
"""
|
74 |
Search using Tavily API (cost-effective)
|
75 |
|
@@ -116,7 +116,7 @@ class SearchTools:
|
|
116 |
logger.error(f"Tavily search failed: {str(e)}")
|
117 |
return []
|
118 |
|
119 |
-
def search_serpapi(self, query: str, max_results: int =
|
120 |
"""
|
121 |
Search using SerpAPI (expensive, fallback only)
|
122 |
|
@@ -162,7 +162,7 @@ class SearchTools:
|
|
162 |
logger.error(f"SerpAPI search failed: {str(e)}")
|
163 |
return []
|
164 |
|
165 |
-
def search(self, query: str, max_results: int =
|
166 |
"""
|
167 |
Comprehensive search using multiple providers with fallback strategy
|
168 |
|
@@ -201,23 +201,23 @@ class SearchTools:
|
|
201 |
logger.error("All search providers failed")
|
202 |
return []
|
203 |
|
204 |
-
def search_news(self, query: str, max_results: int =
|
205 |
"""Search for news articles"""
|
206 |
news_query = f"news {query}"
|
207 |
return self.search(news_query, max_results)
|
208 |
|
209 |
-
def search_academic(self, query: str, max_results: int =
|
210 |
"""Search for academic content"""
|
211 |
academic_query = f"academic research {query} site:scholar.google.com OR site:arxiv.org OR site:researchgate.net"
|
212 |
return self.search(academic_query, max_results)
|
213 |
|
214 |
# Convenience functions
|
215 |
-
def search_web(query: str, max_results: int =
|
216 |
"""Standalone function for web search"""
|
217 |
tools = SearchTools()
|
218 |
return tools.search(query, max_results)
|
219 |
|
220 |
-
def search_news(query: str, max_results: int =
|
221 |
"""Standalone function for news search"""
|
222 |
tools = SearchTools()
|
223 |
return tools.search_news(query, max_results)
|
|
|
17 |
# Tertiary: SerpAPI (expensive, fallback only)
|
18 |
self.serpapi_key = os.getenv("SERPAPI_KEY")
|
19 |
|
20 |
+
def search_duckduckgo(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
21 |
"""
|
22 |
Free search using DuckDuckGo Instant Answer API
|
23 |
|
|
|
69 |
logger.error(f"DuckDuckGo search failed: {str(e)}")
|
70 |
return []
|
71 |
|
72 |
+
def search_tavily(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
73 |
"""
|
74 |
Search using Tavily API (cost-effective)
|
75 |
|
|
|
116 |
logger.error(f"Tavily search failed: {str(e)}")
|
117 |
return []
|
118 |
|
119 |
+
def search_serpapi(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
120 |
"""
|
121 |
Search using SerpAPI (expensive, fallback only)
|
122 |
|
|
|
162 |
logger.error(f"SerpAPI search failed: {str(e)}")
|
163 |
return []
|
164 |
|
165 |
+
def search(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
166 |
"""
|
167 |
Comprehensive search using multiple providers with fallback strategy
|
168 |
|
|
|
201 |
logger.error("All search providers failed")
|
202 |
return []
|
203 |
|
204 |
+
def search_news(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
205 |
"""Search for news articles"""
|
206 |
news_query = f"news {query}"
|
207 |
return self.search(news_query, max_results)
|
208 |
|
209 |
+
def search_academic(self, query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
210 |
"""Search for academic content"""
|
211 |
academic_query = f"academic research {query} site:scholar.google.com OR site:arxiv.org OR site:researchgate.net"
|
212 |
return self.search(academic_query, max_results)
|
213 |
|
214 |
# Convenience functions
|
215 |
+
def search_web(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
216 |
"""Standalone function for web search"""
|
217 |
tools = SearchTools()
|
218 |
return tools.search(query, max_results)
|
219 |
|
220 |
+
def search_news(query: str, max_results: int = 10) -> List[Dict[str, Any]]:
|
221 |
"""Standalone function for news search"""
|
222 |
tools = SearchTools()
|
223 |
return tools.search_news(query, max_results)
|