Spaces:
Running
Running
Update tests.py
Browse files
tests.py
CHANGED
@@ -1,391 +1,1046 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
-
import
|
4 |
-
from
|
5 |
-
|
6 |
-
from subprocess import Popen, PIPE
|
7 |
-
from threading import Timer
|
8 |
-
import os
|
9 |
-
import glob
|
10 |
-
import http.client
|
11 |
-
import json
|
12 |
import openpyxl
|
13 |
-
import
|
14 |
-
from google import genai
|
15 |
-
|
16 |
-
client = genai.Client(api_key="AIzaSyDtP05TyoIy9j0uPL7_wLEhgQEE75AZQSc")
|
17 |
-
|
18 |
-
source_dir = "/app/uploads/temp"
|
19 |
-
destination_dir = "/app/code_interpreter"
|
20 |
-
files_list=[]
|
21 |
-
downloaded_files=[]
|
22 |
-
# os.environ.get('GROQ_API_KEY')
|
23 |
-
os.environ["GROQ_API_KEY"] ="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"
|
24 |
-
os.environ["GEMINI_API_KEY"] ="AIzaSyAQgAtQPpY0bQaCqCISGxeyF6tpDePx-Jg"
|
25 |
-
os.environ["OPENROUTER_API_KEY"] = "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"
|
26 |
-
mcp = FastMCP("code_sandbox")
|
27 |
-
data={}
|
28 |
-
result=""
|
29 |
-
stdout=""
|
30 |
-
stderr=""
|
31 |
import requests
|
32 |
-
import
|
33 |
-
from
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
try:
|
45 |
-
|
46 |
-
|
47 |
-
response = requests.get(files_url)
|
48 |
-
response.raise_for_status() # Check for HTTP errors
|
49 |
-
|
50 |
-
# 2. Parse the HTML using BeautifulSoup
|
51 |
soup = BeautifulSoup(response.content, "html.parser")
|
52 |
-
|
53 |
-
# 3. Find all the <a> (anchor) tags, which represent the links to the files
|
54 |
-
# This assumes the file links are inside <a> tags as shown in the server code
|
55 |
file_links = soup.find_all("a")
|
56 |
|
57 |
-
# 4. Iterate through the links and download the files
|
58 |
for link in file_links:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
# Construct the full file URL if the href is relative
|
63 |
-
if not file_url.startswith("http"):
|
64 |
-
file_url = f"{base_url}{file_url}" # Relative URLs
|
65 |
-
|
66 |
-
filename = os.path.basename(file_url) # Extract the filename from the URL
|
67 |
-
file_path = os.path.join(download_directory, filename)
|
68 |
-
if filename in downloaded_files:
|
69 |
-
pass
|
70 |
-
else:
|
71 |
-
downloaded_files.append(filename)
|
72 |
-
print(f"Downloading: {filename} from {file_url}")
|
73 |
-
|
74 |
-
# Download the file
|
75 |
-
file_response = requests.get(file_url, stream=True) # Use stream=True for large files
|
76 |
-
file_response.raise_for_status() # Check for HTTP errors
|
77 |
-
|
78 |
-
with open(file_path, "wb") as file: # Open in binary write mode
|
79 |
-
for chunk in file_response.iter_content(chunk_size=8192): # Iterate and write in chunks (good for large files)
|
80 |
-
if chunk: # filter out keep-alive new chunks
|
81 |
-
file.write(chunk)
|
82 |
-
|
83 |
-
print(f"Downloaded: {filename} to {file_path}")
|
84 |
-
|
85 |
-
except requests.exceptions.RequestException as e:
|
86 |
-
print(f"Error downloading {link.get('href')}: {e}")
|
87 |
-
except OSError as e: #Handles potential issues with file permissions or disk space.
|
88 |
-
print(f"Error saving {filename}: {e}")
|
89 |
-
|
90 |
-
except requests.exceptions.RequestException as e:
|
91 |
-
print(f"Error getting file list from server: {e}")
|
92 |
-
except Exception as e: # Catch all other potential errors
|
93 |
-
print(f"An unexpected error occurred: {e}")
|
94 |
-
|
95 |
-
def transfer_files():
|
96 |
-
for item in os.listdir(source_dir):
|
97 |
-
item_path = os.path.join(source_dir, item)
|
98 |
-
if os.path.isdir(item_path): # Check if it's a directory
|
99 |
-
for filename in os.listdir(item_path):
|
100 |
-
source_file_path = os.path.join(item_path, filename)
|
101 |
-
destination_file_path = os.path.join(destination_dir, filename)
|
102 |
-
shutil.move(source_file_path, destination_file_path)
|
103 |
-
|
104 |
-
def upload_file(file_path, upload_url):
|
105 |
-
"""Uploads a file to the specified server endpoint."""
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
115 |
|
116 |
-
#
|
117 |
-
|
|
|
118 |
|
119 |
-
|
120 |
-
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
return None
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
-
def run(cmd, timeout_sec):
|
144 |
-
global stdout
|
145 |
-
global stderr
|
146 |
-
proc = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE,cwd="/app/code_interpreter/")
|
147 |
-
timer = Timer(timeout_sec, proc.kill)
|
148 |
try:
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
finally:
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
"""Ask another AI model about audios.The AI model can listen to the audio and give answers.Eg-query:Generate detailed minutes of meeting from the audio clip,audiopath='/app/code_interpreter/<audioname>'.Note:The audios are automatically present in the /app/code_interpreter directory."""
|
158 |
-
download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
|
159 |
-
myfile = client.files.upload(file=audiopath)
|
160 |
-
|
161 |
-
response = client.models.generate_content(
|
162 |
-
model='gemini-2.0-flash',
|
163 |
-
contents=[query, myfile]
|
164 |
-
)
|
165 |
-
return {"Output":str(response.text)}
|
166 |
|
167 |
-
@mcp.tool()
|
168 |
-
def analyse_video(videopath,query) -> dict:
|
169 |
-
"""Ask another AI model about videos.The AI model can see the videos and give answers.Eg-query:Create a very detailed transcript and summary of the video,videopath='/app/code_interpreter/<videoname>'Note:The videos are automatically present in the /app/code_interpreter directory."""
|
170 |
-
download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
|
171 |
-
video_file = client.files.upload(file=videopath)
|
172 |
-
|
173 |
-
while video_file.state.name == "PROCESSING":
|
174 |
-
print('.', end='')
|
175 |
-
time.sleep(1)
|
176 |
-
video_file = client.files.get(name=video_file.name)
|
177 |
-
|
178 |
-
if video_file.state.name == "FAILED":
|
179 |
-
raise ValueError(video_file.state.name)
|
180 |
-
|
181 |
-
response = client.models.generate_content(
|
182 |
-
model='gemini-2.0-flash',
|
183 |
-
contents=[query, video_file]
|
184 |
-
)
|
185 |
-
return {"Output":str(response.text)}
|
186 |
|
|
|
187 |
|
188 |
@mcp.tool()
|
189 |
-
def
|
190 |
-
"""
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
|
|
|
|
|
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
contents=[query, video_file]
|
198 |
-
)
|
199 |
-
return {"Output":str(response.text)}
|
200 |
|
201 |
@mcp.tool()
|
202 |
-
def
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
|
|
|
|
|
|
212 |
|
213 |
|
214 |
@mcp.tool()
|
215 |
-
def
|
216 |
-
"""
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
|
|
|
|
|
|
236 |
|
237 |
-
@mcp.tool()
|
238 |
-
def run_shell_command(cmd:str) -> dict:
|
239 |
-
"""(cmd:Example- mkdir test.By default , the command is run inside the /app/code_interpreter/ directory.).Remember, the code_interpreter is running on **alpine linux** , so write commands accordingly.Eg-sudo does not work and is not required.."""
|
240 |
-
global stdout
|
241 |
-
global stderr
|
242 |
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
|
252 |
@mcp.tool()
|
253 |
-
def install_python_packages(python_packages:str) ->
|
254 |
-
"""
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
package_names = python_packages.strip()
|
257 |
-
command="pip install"
|
258 |
if not package_names:
|
259 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
-
run(
|
262 |
-
f"{command} --break-system-packages {package_names}", timeout_sec=300
|
263 |
-
)
|
264 |
-
while stderr=="" and stdout=="":
|
265 |
-
pass
|
266 |
-
time.sleep(2)
|
267 |
-
return {"stdout":stdout,"stderr":stderr,"info":"Ran package installation command"}
|
268 |
|
269 |
@mcp.tool()
|
270 |
-
def
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
}
|
277 |
-
conn.request("GET",f"/api/transcript?videoId={videoid}", headers=headers)
|
278 |
|
279 |
-
res = conn.getresponse()
|
280 |
-
data = res.read()
|
281 |
-
return json.loads(data)
|
282 |
|
283 |
@mcp.tool()
|
284 |
-
def
|
285 |
-
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
-
workbook = openpyxl.load_workbook(os.path.join(destination_dir, filename))
|
290 |
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
-
# Iterate over all sheets
|
295 |
-
for sheet_name in workbook.sheetnames:
|
296 |
-
sheet = workbook[sheet_name]
|
297 |
-
# Iterate over all rows and columns
|
298 |
-
for row in sheet.iter_rows():
|
299 |
-
for cell in row:
|
300 |
-
# Get cell coordinate (e.g., 'A1') and value
|
301 |
-
cell_coordinate = cell.coordinate
|
302 |
-
cell_value = cell.value
|
303 |
-
if cell_value is not None:
|
304 |
-
excel_data_dict[cell_coordinate] = str(cell_value)
|
305 |
-
return excel_data_dict
|
306 |
@mcp.tool()
|
307 |
-
def
|
308 |
-
"""
|
|
|
309 |
|
310 |
-
|
|
|
311 |
|
|
|
|
|
|
|
|
|
|
|
312 |
|
|
|
|
|
313 |
headers = {
|
314 |
-
|
315 |
-
|
316 |
-
'Content-Type': "application/json"
|
317 |
}
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
|
|
|
|
|
|
335 |
|
336 |
|
337 |
@mcp.tool()
|
338 |
-
def
|
339 |
-
"""
|
340 |
-
|
341 |
-
model="groq/deepseek-r1-distill-llama-70b",
|
342 |
-
messages=[
|
343 |
-
{"role": "user", "content": f"{query}.Here is what i Know about the query:{info}"}
|
344 |
-
],
|
345 |
-
stream=False
|
346 |
-
)
|
347 |
|
|
|
|
|
348 |
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
|
352 |
-
def deepthinking2(query:str,info:str) -> dict:
|
353 |
-
"""Ask another intelligent AI about the query.Ask the question defined by the query string and what you know about the question as well as provide your own knowledge and ideas about the question through the info string."""
|
354 |
-
response = completion(
|
355 |
-
model="openrouter/deepseek/deepseek-chat",
|
356 |
-
messages=[
|
357 |
-
{"role": "user", "content": f"Hi!"}],
|
358 |
-
provider={"order": ["Together"],"allow_fallbacks":False},
|
359 |
-
|
360 |
-
)
|
361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
|
363 |
-
return {"response":str(response.choices[0].message.content)}
|
364 |
|
365 |
@mcp.tool()
|
366 |
-
def
|
367 |
-
"""
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
|
376 |
-
|
377 |
|
378 |
-
if
|
379 |
-
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
|
383 |
-
# @mcp.tool()
|
384 |
-
# def run_website(start_cmd:str,port=8501) -> dict:
|
385 |
-
# """(start_cmd:streamlit run app.py).Always specify sandbox id.Specify port (int) if different from 8501."""
|
386 |
-
# output=sbx.commands.run(start_cmd,sandbox_id)
|
387 |
-
# url = sbx.get_host(port)
|
388 |
-
# info={"info":f"Your Application is live [here](https://{url})"}
|
389 |
|
390 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
|
|
|
1 |
+
nce the readability of the provided Python code.Key Areas for Improvement:import osimport globimport jsonimport loggingimport shutilimport time
|
2 |
+
from pathlib import Path
|
3 |
+
from subprocess import TimeoutExpired
|
4 |
+
from typing import List, Dict, Optional, Tuple, Any
|
5 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import openpyxl
|
7 |
+
import pexpect
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import requests
|
9 |
+
from bs4 import BeautifulSoup
|
10 |
+
from google import genai # Assuming genai handles API key internally via env or client init
|
11 |
+
from litellm import completion
|
12 |
+
from mcp.server.fastmcp import FastMCP
|
13 |
+
from requests.exceptions import RequestException
|
14 |
+
|
15 |
+
# --- Configuration ---
|
16 |
+
|
17 |
+
# Load API Keys from Environment Variables (Recommended)
|
18 |
+
# Ensure these are set in your deployment environment
|
19 |
+
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
|
20 |
+
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
21 |
+
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
|
22 |
+
RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY") # Added for RapidAPI calls
|
23 |
+
|
24 |
+
# Check for missing essential keys
|
25 |
+
if not GEMINI_API_KEY:
|
26 |
+
logging.warning("GEMINI_API_KEY environment variable not set.")
|
27 |
+
# Add checks for other keys if they are strictly required
|
28 |
+
# if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY not set")
|
29 |
+
# if not OPENROUTER_API_KEY: raise ValueError("OPENROUTER_API_KEY not set")
|
30 |
+
# if not RAPIDAPI_KEY: raise ValueError("RAPIDAPI_KEY not set")
|
31 |
+
|
32 |
+
# Set keys for services that require explicit environment variable setting
|
33 |
+
# (litellm might read these automatically, but explicit setting is safer)
|
34 |
+
if GROQ_API_KEY:
|
35 |
+
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
36 |
+
if GEMINI_API_KEY:
|
37 |
+
# Note: genai client might use its own way, but litellm might need this
|
38 |
+
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY
|
39 |
+
if OPENROUTER_API_KEY:
|
40 |
+
os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
|
41 |
+
|
42 |
+
|
43 |
+
# --- Constants ---
|
44 |
+
CODE_DIR = Path("/app/code_interpreter")
|
45 |
+
TEMP_UPLOAD_DIR = Path("/app/uploads/temp") # Source for transfer_files
|
46 |
+
SERVER_BASE_URL = "https://opengpt-4ik5.onrender.com"
|
47 |
+
FILES_ENDPOINT = "/upload" # Endpoint to list files
|
48 |
+
UPLOAD_ENDPOINT = "/upload" # Endpoint to upload files
|
49 |
+
SERVER_FILES_URL = f"{SERVER_BASE_URL}{FILES_ENDPOINT}"
|
50 |
+
SERVER_UPLOAD_URL = f"{SERVER_BASE_URL}{UPLOAD_ENDPOINT}"
|
51 |
+
SERVER_STATIC_URL_PREFIX = f"{SERVER_BASE_URL}/static/"
|
52 |
+
|
53 |
+
# RapidAPI Endpoints
|
54 |
+
YOUTUBE_TRANSCRIPT_API = "youtube-transcript3.p.rapidapi.com"
|
55 |
+
SCRAPE_NINJA_API = "scrapeninja.p.rapidapi.com"
|
56 |
+
|
57 |
+
# --- Global State (Use Sparingly) ---
|
58 |
+
# Keep track of files present in the CODE_DIR to identify newly created ones
|
59 |
+
# This state persists across tool calls within a single mcp run
|
60 |
+
tracked_files_in_codedir: set[Path] = set(CODE_DIR.glob("*"))
|
61 |
+
# Keep track of files downloaded from the server to avoid re-downloading
|
62 |
+
server_downloaded_files: set[str] = set()
|
63 |
+
|
64 |
+
# --- Logging Setup ---
|
65 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
66 |
+
|
67 |
+
# --- Clients ---
|
68 |
+
try:
|
69 |
+
# Initialize Gemini Client (Ensure API key is handled, ideally via env var)
|
70 |
+
# If the env var GEMINI_API_KEY is set, genai might pick it up automatically.
|
71 |
+
# If not, you might need to pass it explicitly if the env var method above isn't enough:
|
72 |
+
# client = genai.Client(api_key=GEMINI_API_KEY)
|
73 |
+
# Or rely on application default credentials if configured.
|
74 |
+
if GEMINI_API_KEY:
|
75 |
+
client = genai.Client(api_key=GEMINI_API_KEY)
|
76 |
+
logging.info("Gemini Client initialized using API Key.")
|
77 |
+
else:
|
78 |
+
# Attempt to initialize without explicit key (might use ADC or other methods)
|
79 |
+
client = genai.Client()
|
80 |
+
logging.info("Gemini Client initialized (attempting default credentials).")
|
81 |
+
|
82 |
+
except Exception as e:
|
83 |
+
logging.error(f"Failed to initialize Gemini client: {e}")
|
84 |
+
client = None # Indicate client is unavailable
|
85 |
|
86 |
+
mcp = FastMCP("code_sandbox")
|
87 |
+
requests_session = requests.Session() # Use a session for potential connection pooling
|
88 |
+
|
89 |
+
# --- Helper Functions ---
|
90 |
+
|
91 |
+
def download_server_files(
|
92 |
+
base_url: str,
|
93 |
+
files_endpoint: str,
|
94 |
+
download_directory: Path,
|
95 |
+
already_downloaded: set[str]
|
96 |
+
) -> set[str]:
|
97 |
+
"""
|
98 |
+
Downloads all files listed on the server's file listing page
|
99 |
+
that haven't been downloaded yet in this session.
|
100 |
+
|
101 |
+
Args:
|
102 |
+
base_url: The base URL of the server (e.g., "https://example.com").
|
103 |
+
files_endpoint: The path to the page listing files (e.g., "/uploads").
|
104 |
+
download_directory: The local directory (Path object) to save files.
|
105 |
+
already_downloaded: A set of filenames already downloaded.
|
106 |
+
|
107 |
+
Returns:
|
108 |
+
The updated set of downloaded filenames.
|
109 |
+
"""
|
110 |
+
download_directory.mkdir(parents=True, exist_ok=True)
|
111 |
+
files_url = f"{base_url}{files_endpoint}"
|
112 |
+
newly_downloaded_count = 0
|
113 |
|
114 |
try:
|
115 |
+
response = requests_session.get(files_url, timeout=30)
|
116 |
+
response.raise_for_status()
|
|
|
|
|
|
|
|
|
117 |
soup = BeautifulSoup(response.content, "html.parser")
|
|
|
|
|
|
|
118 |
file_links = soup.find_all("a")
|
119 |
|
|
|
120 |
for link in file_links:
|
121 |
+
file_href = link.get("href")
|
122 |
+
if not file_href:
|
123 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
+
# Construct full URL if relative
|
126 |
+
if not file_href.startswith(("http://", "https://")):
|
127 |
+
file_url = f"{base_url}{file_href}"
|
128 |
+
else:
|
129 |
+
file_url = file_href
|
130 |
|
131 |
+
filename = Path(file_url).name
|
132 |
+
if not filename:
|
133 |
+
logging.warning(f"Could not extract filename from URL: {file_url}")
|
134 |
+
continue
|
135 |
|
136 |
+
# Skip if already downloaded in this session
|
137 |
+
if filename in already_downloaded:
|
138 |
+
continue
|
139 |
|
140 |
+
file_path = download_directory / filename
|
141 |
+
logging.info(f"Downloading: {filename} from {file_url}")
|
142 |
|
143 |
+
try:
|
144 |
+
file_response = requests_session.get(file_url, stream=True, timeout=60)
|
145 |
+
file_response.raise_for_status()
|
146 |
+
|
147 |
+
with open(file_path, "wb") as f:
|
148 |
+
for chunk in file_response.iter_content(chunk_size=8192):
|
149 |
+
if chunk:
|
150 |
+
f.write(chunk)
|
151 |
+
|
152 |
+
logging.info(f"Downloaded: {filename} to {file_path}")
|
153 |
+
already_downloaded.add(filename)
|
154 |
+
newly_downloaded_count += 1
|
155 |
+
|
156 |
+
except RequestException as e:
|
157 |
+
logging.error(f"Error downloading {filename}: {e}")
|
158 |
+
except OSError as e:
|
159 |
+
logging.error(f"Error saving {filename}: {e}")
|
160 |
+
except Exception as e:
|
161 |
+
logging.error(f"Unexpected error downloading/saving {filename}: {e}")
|
162 |
+
|
163 |
+
except RequestException as e:
|
164 |
+
logging.error(f"Error getting file list from {files_url}: {e}")
|
165 |
+
except Exception as e:
|
166 |
+
logging.error(f"An unexpected error occurred during file download process: {e}")
|
167 |
+
|
168 |
+
logging.info(f"Downloaded {newly_downloaded_count} new files from server.")
|
169 |
+
return already_downloaded
|
170 |
+
|
171 |
+
def transfer_temp_files(source_dir: Path, destination_dir: Path):
|
172 |
+
"""Moves files from temp upload subdirectories to the main code directory."""
|
173 |
+
destination_dir.mkdir(parents=True, exist_ok=True)
|
174 |
+
moved_count = 0
|
175 |
+
if not source_dir.exists():
|
176 |
+
logging.warning(f"Source directory for transfer does not exist: {source_dir}")
|
177 |
+
return
|
178 |
+
|
179 |
+
for item in source_dir.iterdir():
|
180 |
+
if item.is_dir(): # Check if it's a directory (e.g., session-specific temp folder)
|
181 |
+
for source_file_path in item.iterdir():
|
182 |
+
if source_file_path.is_file():
|
183 |
+
destination_file_path = destination_dir / source_file_path.name
|
184 |
+
try:
|
185 |
+
shutil.move(str(source_file_path), str(destination_file_path))
|
186 |
+
logging.info(f"Moved {source_file_path.name} to {destination_dir}")
|
187 |
+
moved_count += 1
|
188 |
+
except OSError as e:
|
189 |
+
logging.error(f"Error moving {source_file_path.name}: {e}")
|
190 |
+
elif item.is_file(): # Also handle files directly in source_dir if any
|
191 |
+
destination_file_path = destination_dir / item.name
|
192 |
+
try:
|
193 |
+
shutil.move(str(item), str(destination_file_path))
|
194 |
+
logging.info(f"Moved {item.name} directly to {destination_dir}")
|
195 |
+
moved_count += 1
|
196 |
+
except OSError as e:
|
197 |
+
logging.error(f"Error moving {item.name}: {e}")
|
198 |
+
if moved_count > 0:
|
199 |
+
logging.info(f"Transferred {moved_count} files from {source_dir} area.")
|
200 |
+
|
201 |
+
def upload_file_to_server(file_path: Path, upload_url: str) -> Optional[str]:
|
202 |
+
"""
|
203 |
+
Uploads a single file to the specified server endpoint.
|
204 |
+
|
205 |
+
Args:
|
206 |
+
file_path: Path object of the file to upload.
|
207 |
+
upload_url: The URL to upload the file to.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
The filename returned by the server upon successful upload, or None on failure.
|
211 |
+
"""
|
212 |
+
if not file_path.is_file():
|
213 |
+
logging.error(f"File not found or is not a file: {file_path}")
|
214 |
return None
|
215 |
|
216 |
+
try:
|
217 |
+
with open(file_path, "rb") as f:
|
218 |
+
files = {"file": (file_path.name, f)}
|
219 |
+
response = requests_session.post(upload_url, files=files, timeout=60)
|
220 |
+
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
221 |
+
|
222 |
+
# Assuming server returns the filename (or identifier) in the body
|
223 |
+
server_filename = response.text.strip()
|
224 |
+
logging.info(f"File '{file_path.name}' uploaded successfully. Server identifier: {server_filename}")
|
225 |
+
return server_filename
|
226 |
+
|
227 |
+
except FileNotFoundError:
|
228 |
+
logging.error(f"File not found during upload attempt: {file_path}")
|
229 |
+
return None
|
230 |
+
except RequestException as e:
|
231 |
+
logging.error(f"Upload failed for {file_path.name}. Network/Server error: {e}")
|
232 |
+
if hasattr(e, 'response') and e.response is not None:
|
233 |
+
logging.error(f"Server response: {e.response.status_code} - {e.response.text}")
|
234 |
+
return None
|
235 |
+
except Exception as e:
|
236 |
+
logging.error(f"An unexpected error occurred during upload of {file_path.name}: {e}")
|
237 |
+
return None
|
238 |
|
239 |
+
def run_command_in_sandbox(
|
240 |
+
command: str,
|
241 |
+
timeout_sec: int,
|
242 |
+
run_forever: bool = False,
|
243 |
+
cwd: Path = CODE_DIR
|
244 |
+
) -> str:
|
245 |
+
"""
|
246 |
+
Runs a shell command using pexpect in a specific directory.
|
247 |
+
|
248 |
+
Args:
|
249 |
+
command: The command string to execute.
|
250 |
+
timeout_sec: Timeout in seconds. Ignored if run_forever is True.
|
251 |
+
run_forever: If True, does not enforce timeout (use with caution).
|
252 |
+
cwd: The working directory (Path object) for the command.
|
253 |
+
|
254 |
+
Returns:
|
255 |
+
The captured stdout/stderr output of the command.
|
256 |
+
"""
|
257 |
+
output = ""
|
258 |
+
full_command = f"cd {shlex.quote(str(cwd))} && {command}"
|
259 |
+
logging.info(f"Running command: {full_command}")
|
260 |
|
|
|
|
|
|
|
|
|
|
|
261 |
try:
|
262 |
+
child = pexpect.spawn("bash", timeout=30) # Base timeout for pexpect interactions
|
263 |
+
# Set a unique prompt marker to detect command completion reliably
|
264 |
+
prompt_marker = f"COMMAND_DONE_{time.time()}"
|
265 |
+
child.sendline(f'export PS1="{prompt_marker}"')
|
266 |
+
child.expect_exact(prompt_marker, timeout=10) # Wait for prompt change
|
267 |
+
|
268 |
+
child.sendline(full_command)
|
269 |
+
|
270 |
+
if run_forever:
|
271 |
+
# For forever commands, we might just return after sending,
|
272 |
+
# or wait for initial output, depending on requirements.
|
273 |
+
# Here, we'll just log and return an indication it started.
|
274 |
+
logging.info(f"Command '{command}' started in 'run_forever' mode.")
|
275 |
+
# Optionally, capture some initial output if needed:
|
276 |
+
# try:
|
277 |
+
# output = child.read_nonblocking(size=1024, timeout=5).decode(errors='ignore')
|
278 |
+
# except pexpect.TIMEOUT:
|
279 |
+
# pass # No initial output quickly
|
280 |
+
# child.close(force=True) # Or keep it running? Depends on MCP lifecycle.
|
281 |
+
# For now, assume we detach:
|
282 |
+
# NOTE: Pexpect might not be ideal for true 'daemonizing'.
|
283 |
+
# A better approach for 'forever' might be `subprocess.Popen` without waiting.
|
284 |
+
# However, sticking to the original tool's apparent intent with pexpect:
|
285 |
+
# We can't easily get continuous output AND return control without threads.
|
286 |
+
# Returning immediately after sending the command for 'forever' mode.
|
287 |
+
return f"Command '{command}' started in background (output streaming not captured)."
|
288 |
+
|
289 |
+
# For commands with timeout:
|
290 |
+
start_time = time.time()
|
291 |
+
while True:
|
292 |
+
if time.time() - start_time > timeout_sec:
|
293 |
+
raise TimeoutExpired(command, timeout_sec)
|
294 |
+
try:
|
295 |
+
# Expect the specific prompt marker
|
296 |
+
index = child.expect([prompt_marker, pexpect.EOF, pexpect.TIMEOUT], timeout=max(1, timeout_sec - (time.time() - start_time)))
|
297 |
+
line = child.before.decode(errors='ignore')
|
298 |
+
output += line
|
299 |
+
# logging.debug(f"Shell output: {line.strip()}") # Log intermediate output if needed
|
300 |
+
|
301 |
+
if index == 0: # Prompt marker found, command finished
|
302 |
+
logging.info(f"Command '{command}' finished.")
|
303 |
+
break
|
304 |
+
elif index == 1: # EOF
|
305 |
+
logging.warning(f"Command '{command}' resulted in EOF.")
|
306 |
+
break
|
307 |
+
# index == 2 (TIMEOUT) is handled by the outer loop's timeout check
|
308 |
+
|
309 |
+
except pexpect.TIMEOUT:
|
310 |
+
logging.warning(f"Pexpect read timed out waiting for output or prompt for command: {command}")
|
311 |
+
# Check outer loop timeout condition
|
312 |
+
if time.time() - start_time > timeout_sec:
|
313 |
+
raise TimeoutExpired(command, timeout_sec)
|
314 |
+
# Otherwise, continue waiting if overall time not exceeded
|
315 |
+
continue
|
316 |
+
except Exception as e:
|
317 |
+
logging.error(f"Pexpect error during command '{command}': {e}")
|
318 |
+
output += f"\nPexpect Error: {e}"
|
319 |
+
break
|
320 |
+
|
321 |
+
except TimeoutExpired:
|
322 |
+
output += f"\n--- TimeoutError: Command '{command}' exceeded {timeout_sec} seconds ---"
|
323 |
+
logging.error(f"Command '{command}' timed out after {timeout_sec} seconds.")
|
324 |
+
except pexpect.ExceptionPexpect as e:
|
325 |
+
output += f"\n--- Pexpect Error: {e} ---"
|
326 |
+
logging.error(f"Pexpect execution failed for command '{command}': {e}")
|
327 |
+
except Exception as e:
|
328 |
+
output += f"\n--- Unexpected Error: {e} ---"
|
329 |
+
logging.error(f"Unexpected error running command '{command}': {e}")
|
330 |
finally:
|
331 |
+
if 'child' in locals() and child.isalive():
|
332 |
+
child.close(force=True)
|
333 |
+
|
334 |
+
logging.info(f"Command '{command}' completed. Output length: {len(output)}")
|
335 |
+
# logging.debug(f"Final Output:\n{output}") # Optional: log full output
|
336 |
+
return output.strip() # Remove trailing newline/marker if any
|
337 |
+
|
338 |
+
|
339 |
+
def _ensure_files_synced(code_dir: Path, temp_dir: Path):
|
340 |
+
"""Ensures local code dir has latest server files and temp uploads."""
|
341 |
+
global server_downloaded_files
|
342 |
+
logging.info("Ensuring local file system is synchronized...")
|
343 |
+
# 1. Transfer files moved to the temp upload area
|
344 |
+
transfer_temp_files(temp_dir, code_dir)
|
345 |
+
# 2. Download missing files from the server
|
346 |
+
server_downloaded_files = download_server_files(
|
347 |
+
SERVER_BASE_URL, FILES_ENDPOINT, code_dir, server_downloaded_files
|
348 |
+
)
|
349 |
+
# 3. Update the set of tracked files *after* syncing
|
350 |
+
global tracked_files_in_codedir
|
351 |
+
tracked_files_in_codedir = set(code_dir.glob("*"))
|
352 |
+
|
353 |
+
|
354 |
+
def _upload_new_files(code_dir: Path, known_files_before: set[Path]) -> Tuple[List[str], set[Path]]:
|
355 |
+
"""Finds new files in code_dir, uploads them, returns URLs and updated file set."""
|
356 |
+
current_files = set(code_dir.glob("*"))
|
357 |
+
new_files = current_files - known_files_before
|
358 |
+
uploaded_file_urls = []
|
359 |
+
|
360 |
+
if not new_files:
|
361 |
+
logging.info("No new files detected for upload.")
|
362 |
+
return [], current_files # Return empty list and the latest set
|
363 |
+
|
364 |
+
logging.info(f"Detected {len(new_files)} new files for upload: {[f.name for f in new_files]}")
|
365 |
+
|
366 |
+
for file_path in new_files:
|
367 |
+
if file_path.is_file(): # Ensure it's a file
|
368 |
+
server_filename = upload_file_to_server(file_path, SERVER_UPLOAD_URL)
|
369 |
+
if server_filename:
|
370 |
+
# Construct the download URL based on the server's static path convention
|
371 |
+
download_url = f"{SERVER_STATIC_URL_PREFIX}{server_filename}"
|
372 |
+
uploaded_file_urls.append(download_url)
|
373 |
+
else:
|
374 |
+
logging.error(f"Failed to upload {file_path.name}, skipping URL generation.")
|
375 |
+
else:
|
376 |
+
logging.warning(f"Skipping upload for non-file item: {file_path}")
|
377 |
|
378 |
|
379 |
+
logging.info(f"Uploaded {len(uploaded_file_urls)} new files.")
|
380 |
+
return uploaded_file_urls, current_files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
|
383 |
+
# --- MCP Tools ---
|
384 |
|
385 |
@mcp.tool()
|
386 |
+
def analyse_audio(audiopath: str, query: str) -> Dict[str, str]:
|
387 |
+
"""
|
388 |
+
Ask a Gemini AI model about an audio file.
|
389 |
+
The AI model can listen to the audio and answer questions based on it.
|
390 |
+
|
391 |
+
Args:
|
392 |
+
audiopath: The path to the audio file within the '/app/code_interpreter' directory
|
393 |
+
(e.g., '/app/code_interpreter/meeting.mp3').
|
394 |
+
query: The question to ask about the audio content.
|
395 |
+
|
396 |
+
Returns:
|
397 |
+
A dictionary containing the AI's response under the key "Output".
|
398 |
+
Returns an error message if the client or file processing fails.
|
399 |
+
"""
|
400 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
|
401 |
+
if not client:
|
402 |
+
return {"Output": "Error: Gemini client not initialized."}
|
403 |
+
|
404 |
+
audio_file_path = Path(audiopath)
|
405 |
+
if not audio_file_path.is_absolute(): # Assume relative to CODE_DIR if not absolute
|
406 |
+
audio_file_path = CODE_DIR / audiopath
|
407 |
+
|
408 |
+
if not audio_file_path.exists():
|
409 |
+
return {"Output": f"Error: Audio file not found at {audio_file_path}"}
|
410 |
+
|
411 |
+
logging.info(f"Analysing audio: {audio_file_path.name} with query: '{query}'")
|
412 |
+
try:
|
413 |
+
# Upload file to Gemini API
|
414 |
+
audio_file_ref = client.files.upload(file=str(audio_file_path))
|
415 |
+
logging.info(f"Uploaded {audio_file_path.name} to Gemini API. File ref: {audio_file_ref.name}, State: {audio_file_ref.state.name}")
|
416 |
+
|
417 |
+
# Wait for processing (with timeout)
|
418 |
+
start_time = time.time()
|
419 |
+
timeout_seconds = 120 # Adjust as needed
|
420 |
+
while audio_file_ref.state.name == "PROCESSING":
|
421 |
+
if time.time() - start_time > timeout_seconds:
|
422 |
+
logging.error(f"Gemini file processing timed out for {audio_file_ref.name}")
|
423 |
+
return {"Output": f"Error: Gemini file processing timed out for {audio_file_path.name}."}
|
424 |
+
print('.', end='', flush=True) # Keep original progress indicator
|
425 |
+
time.sleep(2)
|
426 |
+
audio_file_ref = client.files.get(name=audio_file_ref.name)
|
427 |
+
|
428 |
+
print() # Newline after progress dots
|
429 |
+
|
430 |
+
if audio_file_ref.state.name == "FAILED":
|
431 |
+
logging.error(f"Gemini file processing failed for {audio_file_ref.name}. State: {audio_file_ref.state.name}")
|
432 |
+
return {"Output": f"Error: Gemini failed to process the audio file {audio_file_path.name}."}
|
433 |
+
|
434 |
+
if audio_file_ref.state.name != "ACTIVE":
|
435 |
+
logging.warning(f"Gemini file {audio_file_ref.name} ended in unexpected state: {audio_file_ref.state.name}")
|
436 |
+
# Proceed anyway, but log warning
|
437 |
+
|
438 |
+
# Generate content
|
439 |
+
response = client.models.generate_content(
|
440 |
+
model='gemini-1.5-flash', # Use appropriate model
|
441 |
+
contents=[query, audio_file_ref]
|
442 |
+
)
|
443 |
+
logging.info(f"Gemini analysis complete for {audio_file_path.name}.")
|
444 |
+
return {"Output": response.text}
|
445 |
|
446 |
+
except Exception as e:
|
447 |
+
logging.error(f"Error during Gemini audio analysis for {audio_file_path.name}: {e}", exc_info=True)
|
448 |
+
return {"Output": f"An error occurred during audio analysis: {e}"}
|
449 |
|
450 |
+
# Note: analyse_video and analyse_images follow the same pattern as analyse_audio
|
451 |
+
# Refactoring them similarly:
|
|
|
|
|
|
|
452 |
|
453 |
@mcp.tool()
|
454 |
+
def analyse_video(videopath: str, query: str) -> Dict[str, str]:
|
455 |
+
"""
|
456 |
+
Ask a Gemini AI model about a video file.
|
457 |
+
The AI model can watch the video and answer questions based on it.
|
458 |
+
|
459 |
+
Args:
|
460 |
+
videopath: Path to the video file within '/app/code_interpreter'
|
461 |
+
(e.g., '/app/code_interpreter/presentation.mp4').
|
462 |
+
query: The question to ask about the video content.
|
463 |
+
|
464 |
+
Returns:
|
465 |
+
A dictionary containing the AI's response under the key "Output".
|
466 |
+
Returns an error message if the client or file processing fails.
|
467 |
+
"""
|
468 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
|
469 |
+
if not client:
|
470 |
+
return {"Output": "Error: Gemini client not initialized."}
|
471 |
+
|
472 |
+
video_file_path = Path(videopath)
|
473 |
+
if not video_file_path.is_absolute():
|
474 |
+
video_file_path = CODE_DIR / videopath
|
475 |
+
|
476 |
+
if not video_file_path.exists():
|
477 |
+
return {"Output": f"Error: Video file not found at {video_file_path}"}
|
478 |
+
|
479 |
+
logging.info(f"Analysing video: {video_file_path.name} with query: '{query}'")
|
480 |
+
try:
|
481 |
+
video_file_ref = client.files.upload(file=str(video_file_path))
|
482 |
+
logging.info(f"Uploaded {video_file_path.name} to Gemini API. File ref: {video_file_ref.name}, State: {video_file_ref.state.name}")
|
483 |
+
|
484 |
+
start_time = time.time()
|
485 |
+
timeout_seconds = 300 # Videos might take longer
|
486 |
+
while video_file_ref.state.name == "PROCESSING":
|
487 |
+
if time.time() - start_time > timeout_seconds:
|
488 |
+
logging.error(f"Gemini file processing timed out for {video_file_ref.name}")
|
489 |
+
return {"Output": f"Error: Gemini file processing timed out for {video_file_path.name}."}
|
490 |
+
print('.', end='', flush=True)
|
491 |
+
time.sleep(5) # Longer sleep for video
|
492 |
+
video_file_ref = client.files.get(name=video_file_ref.name)
|
493 |
+
print()
|
494 |
+
|
495 |
+
if video_file_ref.state.name == "FAILED":
|
496 |
+
logging.error(f"Gemini file processing failed for {video_file_ref.name}")
|
497 |
+
return {"Output": f"Error: Gemini failed to process the video file {video_file_path.name}."}
|
498 |
+
|
499 |
+
if video_file_ref.state.name != "ACTIVE":
|
500 |
+
logging.warning(f"Gemini file {video_file_ref.name} ended in unexpected state: {video_file_ref.state.name}")
|
501 |
+
|
502 |
+
response = client.models.generate_content(
|
503 |
+
model='gemini-1.5-flash',
|
504 |
+
contents=[query, video_file_ref]
|
505 |
+
)
|
506 |
+
logging.info(f"Gemini analysis complete for {video_file_path.name}.")
|
507 |
+
return {"Output": response.text}
|
508 |
|
509 |
+
except Exception as e:
|
510 |
+
logging.error(f"Error during Gemini video analysis for {video_file_path.name}: {e}", exc_info=True)
|
511 |
+
return {"Output": f"An error occurred during video analysis: {e}"}
|
512 |
|
513 |
|
514 |
@mcp.tool()
|
515 |
+
def analyse_images(imagepath: str, query: str) -> Dict[str, str]:
|
516 |
+
"""
|
517 |
+
Ask a Gemini AI model about an image file.
|
518 |
+
The AI model can see the image and answer questions based on it.
|
519 |
+
|
520 |
+
Args:
|
521 |
+
imagepath: Path to the image file within '/app/code_interpreter'
|
522 |
+
(e.g., '/app/code_interpreter/diagram.png').
|
523 |
+
query: The question to ask about the image content.
|
524 |
+
|
525 |
+
Returns:
|
526 |
+
A dictionary containing the AI's response under the key "Output".
|
527 |
+
Returns an error message if the client or file processing fails.
|
528 |
+
"""
|
529 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
|
530 |
+
if not client:
|
531 |
+
return {"Output": "Error: Gemini client not initialized."}
|
532 |
+
|
533 |
+
image_file_path = Path(imagepath)
|
534 |
+
if not image_file_path.is_absolute():
|
535 |
+
image_file_path = CODE_DIR / imagepath
|
536 |
+
|
537 |
+
if not image_file_path.exists():
|
538 |
+
return {"Output": f"Error: Image file not found at {image_file_path}"}
|
539 |
+
|
540 |
+
logging.info(f"Analysing image: {image_file_path.name} with query: '{query}'")
|
541 |
+
try:
|
542 |
+
# Note: For Gemini Flash/Pro Vision, direct image data might be preferred over file API
|
543 |
+
# Check Gemini API docs for best practices. Using File API for consistency here.
|
544 |
+
image_file_ref = client.files.upload(file=str(image_file_path))
|
545 |
+
logging.info(f"Uploaded {image_file_path.name} to Gemini API. File ref: {image_file_ref.name}, State: {image_file_ref.state.name}")
|
546 |
+
|
547 |
+
start_time = time.time()
|
548 |
+
timeout_seconds = 60
|
549 |
+
while image_file_ref.state.name == "PROCESSING":
|
550 |
+
if time.time() - start_time > timeout_seconds:
|
551 |
+
logging.error(f"Gemini file processing timed out for {image_file_ref.name}")
|
552 |
+
return {"Output": f"Error: Gemini file processing timed out for {image_file_path.name}."}
|
553 |
+
print('.', end='', flush=True)
|
554 |
+
time.sleep(1)
|
555 |
+
image_file_ref = client.files.get(name=image_file_ref.name)
|
556 |
+
print()
|
557 |
+
|
558 |
+
if image_file_ref.state.name == "FAILED":
|
559 |
+
logging.error(f"Gemini file processing failed for {image_file_ref.name}")
|
560 |
+
return {"Output": f"Error: Gemini failed to process the image file {image_file_path.name}."}
|
561 |
+
|
562 |
+
if image_file_ref.state.name != "ACTIVE":
|
563 |
+
logging.warning(f"Gemini file {image_file_ref.name} ended in unexpected state: {image_file_ref.state.name}")
|
564 |
+
|
565 |
+
|
566 |
+
response = client.models.generate_content(
|
567 |
+
model='gemini-1.5-flash', # Or a vision-specific model
|
568 |
+
contents=[query, image_file_ref]
|
569 |
+
)
|
570 |
+
logging.info(f"Gemini analysis complete for {image_file_path.name}.")
|
571 |
+
return {"Output": response.text}
|
572 |
|
573 |
+
except Exception as e:
|
574 |
+
logging.error(f"Error during Gemini image analysis for {image_file_path.name}: {e}", exc_info=True)
|
575 |
+
return {"Output": f"An error occurred during image analysis: {e}"}
|
576 |
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
+
@mcp.tool()
|
579 |
+
def create_code_file(filename: str, code: str) -> Dict[str, str]:
|
580 |
+
"""
|
581 |
+
Creates a file with the specified content in the code execution directory.
|
582 |
+
Overwrites the file if it already exists.
|
583 |
+
|
584 |
+
Args:
|
585 |
+
filename: The name of the file to create (e.g., 'script.py', 'data.txt').
|
586 |
+
The file is created in '/app/code_interpreter/'.
|
587 |
+
code: The string content to write into the file.
|
588 |
+
|
589 |
+
Returns:
|
590 |
+
A dictionary indicating the task outcome.
|
591 |
+
"""
|
592 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Ensure dir exists, sync base files
|
593 |
+
|
594 |
+
if not filename:
|
595 |
+
return {"info": "Error: Filename cannot be empty."}
|
596 |
+
|
597 |
+
# Basic sanitization (prevent escaping the directory)
|
598 |
+
filename = Path(filename).name
|
599 |
+
file_path = CODE_DIR / filename
|
600 |
|
601 |
+
logging.info(f"Creating/overwriting file: {file_path}")
|
602 |
+
try:
|
603 |
+
with open(file_path, "w", encoding='utf-8') as f:
|
604 |
+
f.write(code)
|
605 |
+
|
606 |
+
# Update tracked files immediately after creation
|
607 |
+
global tracked_files_in_codedir
|
608 |
+
tracked_files_in_codedir.add(file_path)
|
609 |
+
|
610 |
+
logging.info(f"Successfully wrote {len(code)} characters to {file_path}")
|
611 |
+
return {"info": f"File '{filename}' created/updated successfully in {CODE_DIR}."}
|
612 |
+
except OSError as e:
|
613 |
+
logging.error(f"Failed to write file {file_path}: {e}")
|
614 |
+
return {"info": f"Error: Could not write file '{filename}'. Reason: {e}"}
|
615 |
+
except Exception as e:
|
616 |
+
logging.error(f"Unexpected error writing file {file_path}: {e}", exc_info=True)
|
617 |
+
return {"info": f"Error: An unexpected error occurred while writing '{filename}'. Reason: {e}"}
|
618 |
|
619 |
|
620 |
@mcp.tool()
|
621 |
+
def install_python_packages(python_packages: str) -> Dict[str, str]:
|
622 |
+
"""
|
623 |
+
Installs specified Python packages using pip in the sandbox environment.
|
624 |
+
|
625 |
+
Args:
|
626 |
+
python_packages: A space-separated string of package names (e.g., "numpy pandas matplotlib").
|
627 |
+
|
628 |
+
Returns:
|
629 |
+
A dictionary containing the stdout/stderr of the pip command and an info message.
|
630 |
+
"""
|
631 |
package_names = python_packages.strip()
|
|
|
632 |
if not package_names:
|
633 |
+
return {"output": "", "info": "No packages specified for installation."}
|
634 |
+
|
635 |
+
# Basic check to prevent unintended commands (though sandbox should limit this)
|
636 |
+
if not all(pkg.isalnum() or pkg in ['-', '_', '.', '=', '>','<'] for pkg in package_names.replace(" ","")):
|
637 |
+
logging.warning(f"Potentially unsafe package string detected: {package_names}")
|
638 |
+
# Decide whether to reject or proceed cautiously
|
639 |
+
# return {"output": "Error: Invalid characters in package names.", "info": "Installation aborted."}
|
640 |
+
|
641 |
+
# Use --break-system-packages for modern pip behavior in managed environments
|
642 |
+
command = f"pip install --break-system-packages {package_names}"
|
643 |
+
logging.info(f"Attempting to install packages: {package_names}")
|
644 |
+
|
645 |
+
# Use a longer timeout for package installation
|
646 |
+
output = run_command_in_sandbox(command, timeout_sec=600, run_forever=False, cwd=CODE_DIR)
|
647 |
+
|
648 |
+
if "Successfully installed" in output or "Requirement already satisfied" in output:
|
649 |
+
logging.info(f"Pip install command finished for: {package_names}")
|
650 |
+
info_msg = f"Package installation command executed for: {package_names}."
|
651 |
+
else:
|
652 |
+
logging.warning(f"Pip install command for '{package_names}' may have encountered issues.")
|
653 |
+
info_msg = f"Package installation command executed for: {package_names}. Check output for details."
|
654 |
+
|
655 |
+
return {"output": output, "info": info_msg}
|
656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
|
658 |
@mcp.tool()
|
659 |
+
def run_code(
|
660 |
+
filename: str,
|
661 |
+
code: str,
|
662 |
+
start_cmd: str,
|
663 |
+
python_packages: str = "",
|
664 |
+
timeout_seconds: int = 300,
|
665 |
+
run_forever: bool = False
|
666 |
+
) -> Dict[str, Any]:
|
667 |
+
"""
|
668 |
+
Creates a code file, optionally installs Python packages, executes the code
|
669 |
+
using the provided start command, and uploads any newly created files.
|
670 |
+
|
671 |
+
Args:
|
672 |
+
filename: Name of the file to create (e.g., "app.py"). Stored in /app/code_interpreter/.
|
673 |
+
code: Full source code to write to the file.
|
674 |
+
start_cmd: Command to execute the file (e.g., "python /app/code_interpreter/app.py").
|
675 |
+
Ensure paths within the command are absolute or relative to /app/code_interpreter.
|
676 |
+
python_packages: Space-separated list of Python packages to install via pip.
|
677 |
+
Leave empty or "" if none needed. Pre-installed: gradio, XlsxWriter, openpyxl.
|
678 |
+
timeout_seconds: Maximum execution time in seconds (default 300). Ignored if run_forever is True.
|
679 |
+
run_forever: If True, the command attempts to run indefinitely (e.g., for servers).
|
680 |
+
Output capture might be limited, and timeout is ignored.
|
681 |
+
|
682 |
+
Returns:
|
683 |
+
A dictionary containing:
|
684 |
+
- "output": The stdout/stderr from the execution.
|
685 |
+
- "info": Status message about file creation/package installation.
|
686 |
+
- "files_download_links": A list of URLs for any new files created during execution.
|
687 |
+
"""
|
688 |
+
global tracked_files_in_codedir
|
689 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Sync before starting
|
690 |
+
|
691 |
+
info_messages = []
|
692 |
+
|
693 |
+
# 1. Install packages if specified
|
694 |
+
if python_packages:
|
695 |
+
install_result = install_python_packages(python_packages)
|
696 |
+
info_messages.append(install_result.get("info", "Package install attempted."))
|
697 |
+
# Optionally include install output in the main output or log it
|
698 |
+
logging.debug(f"Package install output:\n{install_result.get('output', '')}")
|
699 |
+
|
700 |
+
|
701 |
+
# 2. Create the code file
|
702 |
+
create_result = create_code_file(filename, code)
|
703 |
+
info_messages.append(create_result.get("info", "File creation attempted."))
|
704 |
+
if "Error:" in create_result.get("info", ""):
|
705 |
+
return {
|
706 |
+
"output": "Aborted due to file creation error.",
|
707 |
+
"info": "\n".join(info_messages),
|
708 |
+
"files_download_links": []
|
709 |
+
}
|
710 |
+
|
711 |
+
# Refresh known files *after* creating the target file
|
712 |
+
known_files_before_run = set(CODE_DIR.glob("*"))
|
713 |
+
tracked_files_in_codedir = known_files_before_run # Update global state
|
714 |
+
|
715 |
+
# 3. Execute the command
|
716 |
+
logging.info(f"Executing start command: {start_cmd}")
|
717 |
+
exec_output = run_command_in_sandbox(start_cmd, timeout_sec=timeout_seconds, run_forever=run_forever, cwd=CODE_DIR)
|
718 |
+
|
719 |
+
# 4. Upload any new files created by the execution
|
720 |
+
new_file_urls, tracked_files_in_codedir = _upload_new_files(CODE_DIR, known_files_before_run)
|
721 |
+
|
722 |
+
return {
|
723 |
+
"output": exec_output,
|
724 |
+
"info": "\n".join(info_messages),
|
725 |
+
"files_download_links": new_file_urls
|
726 |
}
|
|
|
727 |
|
|
|
|
|
|
|
728 |
|
729 |
@mcp.tool()
|
730 |
+
def run_existing_code(
|
731 |
+
start_cmd: str,
|
732 |
+
timeout_seconds: int = 300,
|
733 |
+
run_forever: bool = False
|
734 |
+
) -> Dict[str, Any]:
|
735 |
+
"""
|
736 |
+
Executes a command assuming the necessary code files already exist
|
737 |
+
in the '/app/code_interpreter/' directory. Uploads any newly created files.
|
738 |
+
|
739 |
+
Args:
|
740 |
+
start_cmd: Command to execute (e.g., "python /app/code_interpreter/main.py").
|
741 |
+
Ensure paths within the command are absolute or relative to /app/code_interpreter.
|
742 |
+
timeout_seconds: Maximum execution time in seconds (default 300). Ignored if run_forever is True.
|
743 |
+
run_forever: If True, the command attempts to run indefinitely. Output capture might be limited.
|
744 |
+
|
745 |
+
Returns:
|
746 |
+
A dictionary containing:
|
747 |
+
- "output": The stdout/stderr from the execution.
|
748 |
+
- "files_download_links": A list of URLs for any new files created during execution.
|
749 |
+
"""
|
750 |
+
global tracked_files_in_codedir
|
751 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Ensure files are present
|
752 |
+
|
753 |
+
known_files_before_run = tracked_files_in_codedir # Use current tracked state
|
754 |
+
|
755 |
+
# Execute the command
|
756 |
+
logging.info(f"Executing command on existing files: {start_cmd}")
|
757 |
+
exec_output = run_command_in_sandbox(start_cmd, timeout_sec=timeout_seconds, run_forever=run_forever, cwd=CODE_DIR)
|
758 |
+
|
759 |
+
# Upload any new files created by the execution
|
760 |
+
new_file_urls, tracked_files_in_codedir = _upload_new_files(CODE_DIR, known_files_before_run)
|
761 |
+
|
762 |
+
return {
|
763 |
+
"output": exec_output,
|
764 |
+
"files_download_links": new_file_urls
|
765 |
+
}
|
766 |
|
|
|
767 |
|
768 |
+
@mcp.tool()
|
769 |
+
def run_shell_command(
|
770 |
+
cmd: str,
|
771 |
+
timeout_seconds: int = 300,
|
772 |
+
run_forever: bool = False
|
773 |
+
) -> Dict[str, Any]:
|
774 |
+
"""
|
775 |
+
Runs an arbitrary shell command in the '/app/code_interpreter/' directory.
|
776 |
+
Useful for file manipulation, setup, or simple tasks. Executes on Alpine Linux.
|
777 |
+
Avoid commands requiring sudo. Uploads any newly created files.
|
778 |
+
|
779 |
+
Args:
|
780 |
+
cmd: The shell command to execute (e.g., "mkdir output_data", "ls -l").
|
781 |
+
timeout_seconds: Maximum execution time in seconds (default 300). Ignored if run_forever is True.
|
782 |
+
run_forever: If True, the command attempts to run indefinitely. Output capture might be limited.
|
783 |
+
|
784 |
+
Returns:
|
785 |
+
A dictionary containing:
|
786 |
+
- "output": The stdout/stderr from the command execution.
|
787 |
+
- "files_download_links": A list of URLs for any new files created by the command.
|
788 |
+
"""
|
789 |
+
global tracked_files_in_codedir
|
790 |
+
# Syncing might be relevant if the command interacts with downloaded/transferred files
|
791 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
|
792 |
+
|
793 |
+
known_files_before_run = tracked_files_in_codedir
|
794 |
+
|
795 |
+
# Execute the command
|
796 |
+
logging.info(f"Executing shell command: {cmd}")
|
797 |
+
exec_output = run_command_in_sandbox(cmd, timeout_sec=timeout_seconds, run_forever=run_forever, cwd=CODE_DIR)
|
798 |
+
|
799 |
+
# Upload any new files created by the execution (e.g., if cmd was `tar czf archive.tar.gz data/`)
|
800 |
+
new_file_urls, tracked_files_in_codedir = _upload_new_files(CODE_DIR, known_files_before_run)
|
801 |
+
|
802 |
+
return {
|
803 |
+
"output": exec_output,
|
804 |
+
"files_download_links": new_file_urls
|
805 |
+
}
|
806 |
+
|
807 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
808 |
@mcp.tool()
|
809 |
+
def get_youtube_transcript(video_id: str) -> Dict[str, Any]:
|
810 |
+
"""
|
811 |
+
Fetches the transcript of a YouTube video using its video ID via RapidAPI.
|
812 |
|
813 |
+
Args:
|
814 |
+
video_id: The unique ID of the YouTube video (e.g., "ZacjOVVgoLY").
|
815 |
|
816 |
+
Returns:
|
817 |
+
A dictionary containing the transcript data or an error message.
|
818 |
+
"""
|
819 |
+
if not RAPIDAPI_KEY:
|
820 |
+
return {"error": "RapidAPI key is not configured."}
|
821 |
|
822 |
+
url = f"https://{YOUTUBE_TRANSCRIPT_API}/api/transcript"
|
823 |
+
params = {"videoId": video_id}
|
824 |
headers = {
|
825 |
+
'x-rapidapi-key': RAPIDAPI_KEY,
|
826 |
+
'x-rapidapi-host': YOUTUBE_TRANSCRIPT_API
|
|
|
827 |
}
|
828 |
+
logging.info(f"Fetching YouTube transcript for video ID: {video_id}")
|
829 |
+
|
830 |
+
try:
|
831 |
+
response = requests_session.get(url, headers=headers, params=params, timeout=30)
|
832 |
+
response.raise_for_status()
|
833 |
+
data = response.json()
|
834 |
+
logging.info(f"Successfully fetched transcript for {video_id}.")
|
835 |
+
return data
|
836 |
+
except RequestException as e:
|
837 |
+
logging.error(f"Error fetching YouTube transcript for {video_id}: {e}")
|
838 |
+
error_msg = f"Failed to fetch transcript: {e}"
|
839 |
+
if hasattr(e, 'response') and e.response is not None:
|
840 |
+
error_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" # Include snippet of response
|
841 |
+
return {"error": error_msg}
|
842 |
+
except json.JSONDecodeError as e:
|
843 |
+
logging.error(f"Error decoding JSON response for youtube transcript {video_id}: {e}")
|
844 |
+
return {"error": f"Failed to parse transcript response: {e}"}
|
845 |
+
except Exception as e:
|
846 |
+
logging.error(f"Unexpected error fetching YouTube transcript {video_id}: {e}", exc_info=True)
|
847 |
+
return {"error": f"An unexpected error occurred: {e}"}
|
848 |
|
849 |
|
850 |
@mcp.tool()
|
851 |
+
def read_excel_file(filename: str) -> Dict[str, Any]:
|
852 |
+
"""
|
853 |
+
Reads data from an Excel file (.xlsx) located in '/app/code_interpreter/'.
|
|
|
|
|
|
|
|
|
|
|
|
|
854 |
|
855 |
+
Args:
|
856 |
+
filename: The name of the Excel file (e.g., 'report.xlsx').
|
857 |
|
858 |
+
Returns:
|
859 |
+
A dictionary where keys are cell coordinates (e.g., 'Sheet1!A1')
|
860 |
+
and values are the corresponding cell contents (converted to string).
|
861 |
+
Returns an error message if the file cannot be read.
|
862 |
+
"""
|
863 |
+
_ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Make sure file is present
|
864 |
|
865 |
+
file_path = CODE_DIR / Path(filename).name # Sanitize name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
866 |
|
867 |
+
if not file_path.exists():
|
868 |
+
logging.error(f"Excel file not found: {file_path}")
|
869 |
+
return {"error": f"File not found: {filename}"}
|
870 |
+
|
871 |
+
logging.info(f"Reading Excel file: {file_path}")
|
872 |
+
excel_data_dict = {}
|
873 |
+
try:
|
874 |
+
workbook = openpyxl.load_workbook(file_path, data_only=True) # Read values, not formulas
|
875 |
+
for sheet_name in workbook.sheetnames:
|
876 |
+
sheet = workbook[sheet_name]
|
877 |
+
for row in sheet.iter_rows():
|
878 |
+
for cell in row:
|
879 |
+
if cell.value is not None:
|
880 |
+
# Use sheet name in key for clarity if multiple sheets exist
|
881 |
+
cell_coordinate = f"{sheet_name}!{cell.coordinate}"
|
882 |
+
# Keep original type if simple, else convert complex types to string
|
883 |
+
cell_value = cell.value
|
884 |
+
if not isinstance(cell_value, (str, int, float, bool)):
|
885 |
+
cell_value = str(cell_value)
|
886 |
+
excel_data_dict[cell_coordinate] = cell_value
|
887 |
+
logging.info(f"Successfully read {len(excel_data_dict)} cells from {filename}.")
|
888 |
+
return excel_data_dict
|
889 |
+
except Exception as e:
|
890 |
+
logging.error(f"Failed to read Excel file {file_path}: {e}", exc_info=True)
|
891 |
+
return {"error": f"Could not read Excel file '{filename}'. Reason: {e}"}
|
892 |
|
|
|
893 |
|
894 |
@mcp.tool()
|
895 |
+
def scrape_website_content(url: str, query: Optional[str] = None) -> Dict[str, str]:
|
896 |
+
"""
|
897 |
+
Scrapes the textual content of a single website URL using ScrapeNinja via RapidAPI
|
898 |
+
and optionally asks a question about the content using an AI model.
|
899 |
+
|
900 |
+
Args:
|
901 |
+
url: The URL of the website to scrape.
|
902 |
+
query: An optional question to ask the AI about the scraped content.
|
903 |
+
|
904 |
+
Returns:
|
905 |
+
A dictionary containing the scraped content ("content") and,
|
906 |
+
if a query was provided, the AI's answer ("ai_answer").
|
907 |
+
Returns an error message on failure.
|
908 |
+
"""
|
909 |
+
if not RAPIDAPI_KEY:
|
910 |
+
return {"error": "RapidAPI key is not configured."}
|
911 |
+
|
912 |
+
scrape_url = f"https://{SCRAPE_NINJA_API}/scrape"
|
913 |
+
headers = {
|
914 |
+
'x-rapidapi-key': RAPIDAPI_KEY,
|
915 |
+
'x-rapidapi-host': SCRAPE_NINJA_API,
|
916 |
+
'Content-Type': "application/json"
|
917 |
+
}
|
918 |
+
payload = json.dumps({"url": url})
|
919 |
+
logging.info(f"Scraping website: {url}")
|
920 |
+
result = {}
|
921 |
|
922 |
+
try:
|
923 |
+
response = requests_session.post(scrape_url, headers=headers, data=payload, timeout=60)
|
924 |
+
response.raise_for_status()
|
925 |
+
# Assuming ScrapeNinja returns JSON with content, adjust based on actual API response
|
926 |
+
scraped_data = response.json()
|
927 |
+
# Extract main textual content - this might need adjustment based on ScrapeNinja's output format
|
928 |
+
content = scraped_data.get("body", "") # Or another relevant key like 'text'
|
929 |
+
if not content:
|
930 |
+
content = str(scraped_data) # Fallback to string representation if body is empty
|
931 |
+
|
932 |
+
# Basic cleaning (optional, enhance as needed)
|
933 |
+
soup = BeautifulSoup(content, "html.parser")
|
934 |
+
cleaned_content = soup.get_text(separator=' ', strip=True)
|
935 |
+
result["content"] = cleaned_content
|
936 |
+
logging.info(f"Successfully scraped content from {url} (length: {len(cleaned_content)}).")
|
937 |
+
|
938 |
+
if query:
|
939 |
+
logging.info(f"Asking AI query about scraped content: '{query}'")
|
940 |
+
try:
|
941 |
+
ai_response = completion(
|
942 |
+
model="gemini/gemini-1.5-flash", # Use a suitable model
|
943 |
+
messages=[
|
944 |
+
{"role": "system", "content": "You are an AI assistant analyzing website content."},
|
945 |
+
{"role": "user", "content": f"Based on the following website content, please answer this question: {query}\n\nWebsite Content:\n{cleaned_content[:15000]}"} # Limit context size
|
946 |
+
],
|
947 |
+
max_tokens=500,
|
948 |
+
temperature=0.5,
|
949 |
+
)
|
950 |
+
ai_answer = ai_response.choices[0].message.content
|
951 |
+
result["ai_answer"] = ai_answer
|
952 |
+
logging.info(f"Received AI answer for query on {url}.")
|
953 |
+
except Exception as e:
|
954 |
+
logging.error(f"AI query failed for {url}: {e}", exc_info=True)
|
955 |
+
result["ai_answer"] = f"Error during AI analysis: {e}"
|
956 |
+
|
957 |
+
return result
|
958 |
+
|
959 |
+
except RequestException as e:
|
960 |
+
logging.error(f"Error scraping {url}: {e}")
|
961 |
+
error_msg = f"Failed to scrape {url}: {e}"
|
962 |
+
if hasattr(e, 'response') and e.response is not None:
|
963 |
+
error_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
|
964 |
+
return {"error": error_msg}
|
965 |
+
except json.JSONDecodeError as e:
|
966 |
+
logging.error(f"Error decoding JSON response for scrape {url}: {e}")
|
967 |
+
return {"error": f"Failed to parse scrape response: {e}"}
|
968 |
+
except Exception as e:
|
969 |
+
logging.error(f"Unexpected error scraping {url}: {e}", exc_info=True)
|
970 |
+
return {"error": f"An unexpected error occurred during scraping: {e}"}
|
971 |
+
|
972 |
+
# Consolidated Deep Thinking Tool
|
973 |
+
@mcp.tool()
|
974 |
+
def ask_advanced_ai(model_provider: str, query: str, context_info: str) -> Dict[str, str]:
|
975 |
+
"""
|
976 |
+
Leverages a powerful external AI model for complex reasoning or generation tasks.
|
977 |
+
|
978 |
+
Args:
|
979 |
+
model_provider: The provider/model to use. Supported: 'groq', 'openrouter', 'gemini'.
|
980 |
+
query: The main question or task for the AI.
|
981 |
+
context_info: Additional context, background information, or previous findings
|
982 |
+
relevant to the query.
|
983 |
+
|
984 |
+
Returns:
|
985 |
+
A dictionary containing the AI's response under the key "response".
|
986 |
+
Returns an error message on failure.
|
987 |
+
"""
|
988 |
+
logging.info(f"Sending query to advanced AI ({model_provider}): '{query[:100]}...'")
|
989 |
+
|
990 |
+
model_map = {
|
991 |
+
# Using specific model names known to litellm
|
992 |
+
'groq': "groq/llama3-70b-8192", # Example: Use a powerful Groq model
|
993 |
+
'openrouter': "openrouter/meta-llama/llama-3-70b-instruct", # Example: Use a powerful OpenRouter model
|
994 |
+
'gemini': "gemini/gemini-1.5-pro-latest" # Example: Use a powerful Gemini model
|
995 |
+
}
|
996 |
|
997 |
+
model_name = model_map.get(model_provider.lower())
|
998 |
|
999 |
+
if not model_name:
|
1000 |
+
logging.error(f"Unsupported model provider specified: {model_provider}")
|
1001 |
+
return {"response": f"Error: Unsupported model provider '{model_provider}'. Use 'groq', 'openrouter', or 'gemini'."}
|
1002 |
+
|
1003 |
+
# Check for required API key for the selected provider
|
1004 |
+
key_missing = False
|
1005 |
+
if model_provider == 'groq' and not GROQ_API_KEY: key_missing = True
|
1006 |
+
if model_provider == 'openrouter' and not OPENROUTER_API_KEY: key_missing = True
|
1007 |
+
if model_provider == 'gemini' and not GEMINI_API_KEY: key_missing = True # litellm might need env var
|
1008 |
+
|
1009 |
+
if key_missing:
|
1010 |
+
logging.error(f"API Key for {model_provider} is not configured.")
|
1011 |
+
return {"response": f"Error: API key for provider '{model_provider}' is missing."}
|
1012 |
+
|
1013 |
+
|
1014 |
+
messages = [
|
1015 |
+
{"role": "system", "content": "You are a highly capable AI assistant performing advanced reasoning or generation."},
|
1016 |
+
{"role": "user", "content": f"Based on the following information, please address the query.\n\nContext/Information Provided:\n{context_info}\n\nQuery:\n{query}"}
|
1017 |
+
]
|
1018 |
|
1019 |
+
try:
|
1020 |
+
response = completion(
|
1021 |
+
model=model_name,
|
1022 |
+
messages=messages,
|
1023 |
+
# stream=False # Already default
|
1024 |
+
# Add other parameters like temperature, max_tokens if needed
|
1025 |
+
)
|
1026 |
+
ai_response = response.choices[0].message.content
|
1027 |
+
logging.info(f"Received response from {model_provider} AI.")
|
1028 |
+
return {"response": ai_response}
|
1029 |
+
except Exception as e:
|
1030 |
+
logging.error(f"Error calling {model_provider} AI ({model_name}): {e}", exc_info=True)
|
1031 |
+
# Attempt to extract more detail from the exception if possible (litellm might provide specifics)
|
1032 |
+
return {"response": f"Error interacting with {model_provider} AI: {e}"}
|
1033 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1034 |
|
1035 |
+
# --- Main Execution ---
|
1036 |
+
if __name__ == "__main__":
|
1037 |
+
logging.info("Starting FastMCP server...")
|
1038 |
+
# Ensure code directory exists on startup
|
1039 |
+
CODE_DIR.mkdir(parents=True, exist_ok=True)
|
1040 |
+
# Initial scan of files in code dir
|
1041 |
+
tracked_files_in_codedir = set(CODE_DIR.glob("*"))
|
1042 |
+
logging.info(f"Initial tracked files in {CODE_DIR}: {[f.name for f in tracked_files_in_codedir]}")
|
1043 |
+
|
1044 |
+
# Initialize and run the server using standard I/O transport
|
1045 |
+
mcp.run(transport='stdio')
|
1046 |
|