Usha-532 commited on
Commit
47f02ea
·
verified ·
1 Parent(s): 91a7855

Update tests.py

Browse files
Files changed (1) hide show
  1. tests.py +970 -315
tests.py CHANGED
@@ -1,391 +1,1046 @@
1
- from mcp.server.fastmcp import FastMCP
2
- import random
3
- import time
4
- from litellm import completion
5
- import shlex
6
- from subprocess import Popen, PIPE
7
- from threading import Timer
8
- import os
9
- import glob
10
- import http.client
11
- import json
12
  import openpyxl
13
- import shutil
14
- from google import genai
15
-
16
- client = genai.Client(api_key="AIzaSyDtP05TyoIy9j0uPL7_wLEhgQEE75AZQSc")
17
-
18
- source_dir = "/app/uploads/temp"
19
- destination_dir = "/app/code_interpreter"
20
- files_list=[]
21
- downloaded_files=[]
22
- # os.environ.get('GROQ_API_KEY')
23
- os.environ["GROQ_API_KEY"] ="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"
24
- os.environ["GEMINI_API_KEY"] ="AIzaSyAQgAtQPpY0bQaCqCISGxeyF6tpDePx-Jg"
25
- os.environ["OPENROUTER_API_KEY"] = "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"
26
- mcp = FastMCP("code_sandbox")
27
- data={}
28
- result=""
29
- stdout=""
30
- stderr=""
31
  import requests
32
- import os
33
- from bs4 import BeautifulSoup # For parsing HTML
34
-
35
-
36
- def download_all_files(base_url, files_endpoint, download_directory):
37
- """Downloads all files listed on the server's /upload page."""
38
- global downloaded_files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # Create the download directory if it doesn't exist
41
- if not os.path.exists(download_directory):
42
- os.makedirs(download_directory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  try:
45
- # 1. Get the HTML of the /upload page
46
- files_url = f"{base_url}{files_endpoint}"
47
- response = requests.get(files_url)
48
- response.raise_for_status() # Check for HTTP errors
49
-
50
- # 2. Parse the HTML using BeautifulSoup
51
  soup = BeautifulSoup(response.content, "html.parser")
52
-
53
- # 3. Find all the <a> (anchor) tags, which represent the links to the files
54
- # This assumes the file links are inside <a> tags as shown in the server code
55
  file_links = soup.find_all("a")
56
 
57
- # 4. Iterate through the links and download the files
58
  for link in file_links:
59
- try:
60
- file_url = link.get("href") # Extract the href attribute (the URL)
61
- if file_url:
62
- # Construct the full file URL if the href is relative
63
- if not file_url.startswith("http"):
64
- file_url = f"{base_url}{file_url}" # Relative URLs
65
-
66
- filename = os.path.basename(file_url) # Extract the filename from the URL
67
- file_path = os.path.join(download_directory, filename)
68
- if filename in downloaded_files:
69
- pass
70
- else:
71
- downloaded_files.append(filename)
72
- print(f"Downloading: {filename} from {file_url}")
73
-
74
- # Download the file
75
- file_response = requests.get(file_url, stream=True) # Use stream=True for large files
76
- file_response.raise_for_status() # Check for HTTP errors
77
-
78
- with open(file_path, "wb") as file: # Open in binary write mode
79
- for chunk in file_response.iter_content(chunk_size=8192): # Iterate and write in chunks (good for large files)
80
- if chunk: # filter out keep-alive new chunks
81
- file.write(chunk)
82
-
83
- print(f"Downloaded: {filename} to {file_path}")
84
-
85
- except requests.exceptions.RequestException as e:
86
- print(f"Error downloading {link.get('href')}: {e}")
87
- except OSError as e: #Handles potential issues with file permissions or disk space.
88
- print(f"Error saving {filename}: {e}")
89
-
90
- except requests.exceptions.RequestException as e:
91
- print(f"Error getting file list from server: {e}")
92
- except Exception as e: # Catch all other potential errors
93
- print(f"An unexpected error occurred: {e}")
94
-
95
- def transfer_files():
96
- for item in os.listdir(source_dir):
97
- item_path = os.path.join(source_dir, item)
98
- if os.path.isdir(item_path): # Check if it's a directory
99
- for filename in os.listdir(item_path):
100
- source_file_path = os.path.join(item_path, filename)
101
- destination_file_path = os.path.join(destination_dir, filename)
102
- shutil.move(source_file_path, destination_file_path)
103
-
104
- def upload_file(file_path, upload_url):
105
- """Uploads a file to the specified server endpoint."""
106
 
107
- try:
108
- # Check if the file exists
109
- if not os.path.exists(file_path):
110
- raise FileNotFoundError(f"File not found: {file_path}")
 
111
 
112
- # Prepare the file for upload
113
- with open(file_path, "rb") as file:
114
- files = {"file": (os.path.basename(file_path), file)} # Important: Provide filename
 
115
 
116
- # Send the POST request
117
- response = requests.post(upload_url, files=files)
 
118
 
119
- # Check the response status code
120
- response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
121
 
122
- # Parse and print the response
123
- if response.status_code == 200:
124
- print(f"File uploaded successfully. Filename returned by server: {response.text}")
125
- return response.text # Return the filename returned by the server
126
- else:
127
- print(f"Upload failed. Status code: {response.status_code}, Response: {response.text}")
128
- return None
129
-
130
- except FileNotFoundError as e:
131
- print(e)
132
- return None # or re-raise the exception if you want the program to halt
133
- except requests.exceptions.RequestException as e:
134
- print(f"Upload failed. Network error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  return None
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- TOKEN = "5182224145:AAEjkSlPqV-Q3rH8A9X8HfCDYYEQ44v_qy0"
139
- chat_id = "5075390513"
140
- from requests_futures.sessions import FuturesSession
141
- session = FuturesSession()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- def run(cmd, timeout_sec):
144
- global stdout
145
- global stderr
146
- proc = Popen(shlex.split(cmd), stdout=PIPE, stderr=PIPE,cwd="/app/code_interpreter/")
147
- timer = Timer(timeout_sec, proc.kill)
148
  try:
149
- timer.start()
150
- stdout, stderr = proc.communicate()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  finally:
152
- timer.cancel()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
 
155
- @mcp.tool()
156
- def analyse_audio(audiopath,query) -> dict:
157
- """Ask another AI model about audios.The AI model can listen to the audio and give answers.Eg-query:Generate detailed minutes of meeting from the audio clip,audiopath='/app/code_interpreter/<audioname>'.Note:The audios are automatically present in the /app/code_interpreter directory."""
158
- download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
159
- myfile = client.files.upload(file=audiopath)
160
-
161
- response = client.models.generate_content(
162
- model='gemini-2.0-flash',
163
- contents=[query, myfile]
164
- )
165
- return {"Output":str(response.text)}
166
 
167
- @mcp.tool()
168
- def analyse_video(videopath,query) -> dict:
169
- """Ask another AI model about videos.The AI model can see the videos and give answers.Eg-query:Create a very detailed transcript and summary of the video,videopath='/app/code_interpreter/<videoname>'Note:The videos are automatically present in the /app/code_interpreter directory."""
170
- download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
171
- video_file = client.files.upload(file=videopath)
172
-
173
- while video_file.state.name == "PROCESSING":
174
- print('.', end='')
175
- time.sleep(1)
176
- video_file = client.files.get(name=video_file.name)
177
-
178
- if video_file.state.name == "FAILED":
179
- raise ValueError(video_file.state.name)
180
-
181
- response = client.models.generate_content(
182
- model='gemini-2.0-flash',
183
- contents=[query, video_file]
184
- )
185
- return {"Output":str(response.text)}
186
 
 
187
 
188
  @mcp.tool()
189
- def analyse_images(imagepath,query) -> dict:
190
- """Ask another AI model about images.The AI model can see the images and give answers.Eg-query:Who is the person in this image?,imagepath='/app/code_interpreter/<imagename>'.Note:The images are automatically present in the /app/code_interpreter directory."""
191
- download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
192
- video_file = client.files.upload(file=imagepath)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
 
 
 
194
 
195
- response = client.models.generate_content(
196
- model='gemini-2.0-flash',
197
- contents=[query, video_file]
198
- )
199
- return {"Output":str(response.text)}
200
 
201
  @mcp.tool()
202
- def create_code_files(filename: str, code: str) -> dict:
203
- global destination_dir
204
- download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
205
- """Create code files by passing the the filename as well the entire code to write.The file is created by default in the /app/code_interpreter directory.Note:All user uploaded files that you might need to work upon are stored in the /app/code_interpreter directory."""
206
- transfer_files()
207
- f = open(os.path.join(destination_dir, filename), "w")
208
- f.write(code)
209
- f.close()
210
- return {"info":"task completed. The referenced code files were created successfully. "}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
 
 
 
212
 
213
 
214
  @mcp.tool()
215
- def run_code_files(start_cmd:str) -> dict:
216
- """(start_cmd:Example- sudo python /app/code_interpreter/app.py or bash /app/code_interpreter/app.py).The files must be inside the /app/code_interpreter directory."""
217
- global files_list
218
- global stdout
219
- global stderr
220
- run(start_cmd, 300)
221
- while stderr=="" and stdout=="":
222
- pass
223
- time.sleep(1.5)
224
- onlyfiles = glob.glob("/app/code_interpreter/*")
225
- onlyfiles=list(set(onlyfiles)-set(files_list))
226
- uploaded_filenames=[]
227
- for files in onlyfiles:
228
- try:
229
- uploaded_filename = upload_file(files, "https://opengpt-4ik5.onrender.com/upload")
230
- uploaded_filenames.append(f"https://opengpt-4ik5.onrender.com/static/{uploaded_filename}")
231
- except:
232
- pass
233
- files_list=onlyfiles
234
- return {"stdout":stdout,"stderr":stderr,"Files_download_link":uploaded_filenames}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
 
 
 
236
 
237
- @mcp.tool()
238
- def run_shell_command(cmd:str) -> dict:
239
- """(cmd:Example- mkdir test.By default , the command is run inside the /app/code_interpreter/ directory.).Remember, the code_interpreter is running on **alpine linux** , so write commands accordingly.Eg-sudo does not work and is not required.."""
240
- global stdout
241
- global stderr
242
 
243
- run(cmd, 300)
244
- while stderr=="" and stdout=="":
245
- pass
246
- time.sleep(1.5)
247
- transfer_files()
248
- return {"stdout":stdout,"stderr":stderr}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
 
252
  @mcp.tool()
253
- def install_python_packages(python_packages:str) -> dict:
254
- """python_packages to install seperated by space.eg-(python packages:numpy matplotlib).The following python packages are preinstalled:gradio XlsxWriter openpyxl"""
255
- global sbx
 
 
 
 
 
 
 
256
  package_names = python_packages.strip()
257
- command="pip install"
258
  if not package_names:
259
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
- run(
262
- f"{command} --break-system-packages {package_names}", timeout_sec=300
263
- )
264
- while stderr=="" and stdout=="":
265
- pass
266
- time.sleep(2)
267
- return {"stdout":stdout,"stderr":stderr,"info":"Ran package installation command"}
268
 
269
  @mcp.tool()
270
- def get_youtube_transcript(videoid:str) -> dict:
271
- """Get the transcript of a youtube video by passing the video id.First search the web using google / exa for the relevant videos.Eg videoid=ZacjOVVgoLY"""
272
- conn = http.client.HTTPSConnection("youtube-transcript3.p.rapidapi.com")
273
- headers = {
274
- 'x-rapidapi-key': "2a155d4498mshd52b7d6b7a2ff86p10cdd0jsn6252e0f2f529",
275
- 'x-rapidapi-host': "youtube-transcript3.p.rapidapi.com"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
- conn.request("GET",f"/api/transcript?videoId={videoid}", headers=headers)
278
 
279
- res = conn.getresponse()
280
- data = res.read()
281
- return json.loads(data)
282
 
283
  @mcp.tool()
284
- def read_excel_file(filename) -> dict:
285
- """Reads the contents of an excel file.Returns a dict with key :value pair = cell location:cell content.Always run this command first , when working with excels.The excel file is automatically present in the /app/code_interpreter directory.Note:Always use openpyxl in python to work with excel files."""
286
- global destination_dir
287
- download_all_files("https://opengpt-4ik5.onrender.com", "/upload", "/app/code_interpreter")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
- workbook = openpyxl.load_workbook(os.path.join(destination_dir, filename))
290
 
291
- # Create an empty dictionary to store the data
292
- excel_data_dict = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
- # Iterate over all sheets
295
- for sheet_name in workbook.sheetnames:
296
- sheet = workbook[sheet_name]
297
- # Iterate over all rows and columns
298
- for row in sheet.iter_rows():
299
- for cell in row:
300
- # Get cell coordinate (e.g., 'A1') and value
301
- cell_coordinate = cell.coordinate
302
- cell_value = cell.value
303
- if cell_value is not None:
304
- excel_data_dict[cell_coordinate] = str(cell_value)
305
- return excel_data_dict
306
  @mcp.tool()
307
- def scrape_websites(url_list:list,query:str) -> list:
308
- """Get the entire content of websites by passing in the url lists.query is the question you want to ask about the content of the website.e.g-query:Give .pptx links in the website.Note:Max urls in url_list is 3."""
 
309
 
310
- conn = http.client.HTTPSConnection("scrapeninja.p.rapidapi.com")
 
311
 
 
 
 
 
 
312
 
 
 
313
  headers = {
314
- 'x-rapidapi-key': "2a155d4498mshd52b7d6b7a2ff86p10cdd0jsn6252e0f2f529",
315
- 'x-rapidapi-host': "scrapeninja.p.rapidapi.com",
316
- 'Content-Type': "application/json"
317
  }
318
- Output=[]
319
- for urls in url_list:
320
- payload = {"url" :urls}
321
- payload=json.dumps(payload)
322
- conn.request("POST", "/scrape", payload, headers)
323
- res = conn.getresponse()
324
- data = res.read()
325
- content=str(data.decode("utf-8"))
326
- response = completion(
327
- model="gemini/gemini-2.0-flash-exp",
328
- messages=[
329
- {"role": "user", "content": f"Output the following content in the human readable format.Try to conserve all the links and the text.Try to ouput the entire content.Remove the html codes so its human readable.Also answer this question about the content in a seperate paragraph:{query}.Here is the content:{content}"}
330
- ],
331
- )
332
- Output.append(response.choices[0].message.content)
333
-
334
- return {"website_content":Output}
 
 
 
335
 
336
 
337
  @mcp.tool()
338
- def deepthinking1(query:str,info:str) -> dict:
339
- """Ask another intelligent AI about the query.Ask the question defined by the query string and what you know about the question as well as provide your own knowledge and ideas about the question through the info string."""
340
- response = completion(
341
- model="groq/deepseek-r1-distill-llama-70b",
342
- messages=[
343
- {"role": "user", "content": f"{query}.Here is what i Know about the query:{info}"}
344
- ],
345
- stream=False
346
- )
347
 
 
 
348
 
349
- return {"response":str(response.choices[0].message.content)}
 
 
 
 
 
350
 
351
- @mcp.tool()
352
- def deepthinking2(query:str,info:str) -> dict:
353
- """Ask another intelligent AI about the query.Ask the question defined by the query string and what you know about the question as well as provide your own knowledge and ideas about the question through the info string."""
354
- response = completion(
355
- model="openrouter/deepseek/deepseek-chat",
356
- messages=[
357
- {"role": "user", "content": f"Hi!"}],
358
- provider={"order": ["Together"],"allow_fallbacks":False},
359
-
360
- )
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
- return {"response":str(response.choices[0].message.content)}
364
 
365
  @mcp.tool()
366
- def deepthinking3(query:str,info:str) -> dict:
367
- """Ask another intelligent AI about the query.Ask the question defined by the query string and what you know about the question as well as provide your own knowledge and ideas about the question through the info string."""
368
- response = completion(
369
- model="gemini/gemini-2.0-flash-thinking-exp-01-21",
370
- messages=[
371
- {"role": "user", "content": f"{query}.Here is what i Know about the query:{info}"}
372
- ],
373
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
- return {"response":str(response.choices[0].message.content)}
377
 
378
- if __name__ == "__main__":
379
- # Initialize and run the server
380
- mcp.run(transport='stdio')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
- # @mcp.tool()
384
- # def run_website(start_cmd:str,port=8501) -> dict:
385
- # """(start_cmd:streamlit run app.py).Always specify sandbox id.Specify port (int) if different from 8501."""
386
- # output=sbx.commands.run(start_cmd,sandbox_id)
387
- # url = sbx.get_host(port)
388
- # info={"info":f"Your Application is live [here](https://{url})"}
389
 
390
- # return info
 
 
 
 
 
 
 
 
 
 
391
 
 
1
+ nce the readability of the provided Python code.Key Areas for Improvement:import osimport globimport jsonimport loggingimport shutilimport time
2
+ from pathlib import Path
3
+ from subprocess import TimeoutExpired
4
+ from typing import List, Dict, Optional, Tuple, Any
5
+
 
 
 
 
 
 
6
  import openpyxl
7
+ import pexpect
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import requests
9
+ from bs4 import BeautifulSoup
10
+ from google import genai # Assuming genai handles API key internally via env or client init
11
+ from litellm import completion
12
+ from mcp.server.fastmcp import FastMCP
13
+ from requests.exceptions import RequestException
14
+
15
+ # --- Configuration ---
16
+
17
+ # Load API Keys from Environment Variables (Recommended)
18
+ # Ensure these are set in your deployment environment
19
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
20
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
21
+ OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
22
+ RAPIDAPI_KEY = os.environ.get("RAPIDAPI_KEY") # Added for RapidAPI calls
23
+
24
+ # Check for missing essential keys
25
+ if not GEMINI_API_KEY:
26
+ logging.warning("GEMINI_API_KEY environment variable not set.")
27
+ # Add checks for other keys if they are strictly required
28
+ # if not GROQ_API_KEY: raise ValueError("GROQ_API_KEY not set")
29
+ # if not OPENROUTER_API_KEY: raise ValueError("OPENROUTER_API_KEY not set")
30
+ # if not RAPIDAPI_KEY: raise ValueError("RAPIDAPI_KEY not set")
31
+
32
+ # Set keys for services that require explicit environment variable setting
33
+ # (litellm might read these automatically, but explicit setting is safer)
34
+ if GROQ_API_KEY:
35
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
36
+ if GEMINI_API_KEY:
37
+ # Note: genai client might use its own way, but litellm might need this
38
+ os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY
39
+ if OPENROUTER_API_KEY:
40
+ os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
41
+
42
+
43
+ # --- Constants ---
44
+ CODE_DIR = Path("/app/code_interpreter")
45
+ TEMP_UPLOAD_DIR = Path("/app/uploads/temp") # Source for transfer_files
46
+ SERVER_BASE_URL = "https://opengpt-4ik5.onrender.com"
47
+ FILES_ENDPOINT = "/upload" # Endpoint to list files
48
+ UPLOAD_ENDPOINT = "/upload" # Endpoint to upload files
49
+ SERVER_FILES_URL = f"{SERVER_BASE_URL}{FILES_ENDPOINT}"
50
+ SERVER_UPLOAD_URL = f"{SERVER_BASE_URL}{UPLOAD_ENDPOINT}"
51
+ SERVER_STATIC_URL_PREFIX = f"{SERVER_BASE_URL}/static/"
52
+
53
+ # RapidAPI Endpoints
54
+ YOUTUBE_TRANSCRIPT_API = "youtube-transcript3.p.rapidapi.com"
55
+ SCRAPE_NINJA_API = "scrapeninja.p.rapidapi.com"
56
+
57
+ # --- Global State (Use Sparingly) ---
58
+ # Keep track of files present in the CODE_DIR to identify newly created ones
59
+ # This state persists across tool calls within a single mcp run
60
+ tracked_files_in_codedir: set[Path] = set(CODE_DIR.glob("*"))
61
+ # Keep track of files downloaded from the server to avoid re-downloading
62
+ server_downloaded_files: set[str] = set()
63
+
64
+ # --- Logging Setup ---
65
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
66
+
67
+ # --- Clients ---
68
+ try:
69
+ # Initialize Gemini Client (Ensure API key is handled, ideally via env var)
70
+ # If the env var GEMINI_API_KEY is set, genai might pick it up automatically.
71
+ # If not, you might need to pass it explicitly if the env var method above isn't enough:
72
+ # client = genai.Client(api_key=GEMINI_API_KEY)
73
+ # Or rely on application default credentials if configured.
74
+ if GEMINI_API_KEY:
75
+ client = genai.Client(api_key=GEMINI_API_KEY)
76
+ logging.info("Gemini Client initialized using API Key.")
77
+ else:
78
+ # Attempt to initialize without explicit key (might use ADC or other methods)
79
+ client = genai.Client()
80
+ logging.info("Gemini Client initialized (attempting default credentials).")
81
+
82
+ except Exception as e:
83
+ logging.error(f"Failed to initialize Gemini client: {e}")
84
+ client = None # Indicate client is unavailable
85
 
86
+ mcp = FastMCP("code_sandbox")
87
+ requests_session = requests.Session() # Use a session for potential connection pooling
88
+
89
+ # --- Helper Functions ---
90
+
91
+ def download_server_files(
92
+ base_url: str,
93
+ files_endpoint: str,
94
+ download_directory: Path,
95
+ already_downloaded: set[str]
96
+ ) -> set[str]:
97
+ """
98
+ Downloads all files listed on the server's file listing page
99
+ that haven't been downloaded yet in this session.
100
+
101
+ Args:
102
+ base_url: The base URL of the server (e.g., "https://example.com").
103
+ files_endpoint: The path to the page listing files (e.g., "/uploads").
104
+ download_directory: The local directory (Path object) to save files.
105
+ already_downloaded: A set of filenames already downloaded.
106
+
107
+ Returns:
108
+ The updated set of downloaded filenames.
109
+ """
110
+ download_directory.mkdir(parents=True, exist_ok=True)
111
+ files_url = f"{base_url}{files_endpoint}"
112
+ newly_downloaded_count = 0
113
 
114
  try:
115
+ response = requests_session.get(files_url, timeout=30)
116
+ response.raise_for_status()
 
 
 
 
117
  soup = BeautifulSoup(response.content, "html.parser")
 
 
 
118
  file_links = soup.find_all("a")
119
 
 
120
  for link in file_links:
121
+ file_href = link.get("href")
122
+ if not file_href:
123
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ # Construct full URL if relative
126
+ if not file_href.startswith(("http://", "https://")):
127
+ file_url = f"{base_url}{file_href}"
128
+ else:
129
+ file_url = file_href
130
 
131
+ filename = Path(file_url).name
132
+ if not filename:
133
+ logging.warning(f"Could not extract filename from URL: {file_url}")
134
+ continue
135
 
136
+ # Skip if already downloaded in this session
137
+ if filename in already_downloaded:
138
+ continue
139
 
140
+ file_path = download_directory / filename
141
+ logging.info(f"Downloading: {filename} from {file_url}")
142
 
143
+ try:
144
+ file_response = requests_session.get(file_url, stream=True, timeout=60)
145
+ file_response.raise_for_status()
146
+
147
+ with open(file_path, "wb") as f:
148
+ for chunk in file_response.iter_content(chunk_size=8192):
149
+ if chunk:
150
+ f.write(chunk)
151
+
152
+ logging.info(f"Downloaded: {filename} to {file_path}")
153
+ already_downloaded.add(filename)
154
+ newly_downloaded_count += 1
155
+
156
+ except RequestException as e:
157
+ logging.error(f"Error downloading {filename}: {e}")
158
+ except OSError as e:
159
+ logging.error(f"Error saving {filename}: {e}")
160
+ except Exception as e:
161
+ logging.error(f"Unexpected error downloading/saving {filename}: {e}")
162
+
163
+ except RequestException as e:
164
+ logging.error(f"Error getting file list from {files_url}: {e}")
165
+ except Exception as e:
166
+ logging.error(f"An unexpected error occurred during file download process: {e}")
167
+
168
+ logging.info(f"Downloaded {newly_downloaded_count} new files from server.")
169
+ return already_downloaded
170
+
171
+ def transfer_temp_files(source_dir: Path, destination_dir: Path):
172
+ """Moves files from temp upload subdirectories to the main code directory."""
173
+ destination_dir.mkdir(parents=True, exist_ok=True)
174
+ moved_count = 0
175
+ if not source_dir.exists():
176
+ logging.warning(f"Source directory for transfer does not exist: {source_dir}")
177
+ return
178
+
179
+ for item in source_dir.iterdir():
180
+ if item.is_dir(): # Check if it's a directory (e.g., session-specific temp folder)
181
+ for source_file_path in item.iterdir():
182
+ if source_file_path.is_file():
183
+ destination_file_path = destination_dir / source_file_path.name
184
+ try:
185
+ shutil.move(str(source_file_path), str(destination_file_path))
186
+ logging.info(f"Moved {source_file_path.name} to {destination_dir}")
187
+ moved_count += 1
188
+ except OSError as e:
189
+ logging.error(f"Error moving {source_file_path.name}: {e}")
190
+ elif item.is_file(): # Also handle files directly in source_dir if any
191
+ destination_file_path = destination_dir / item.name
192
+ try:
193
+ shutil.move(str(item), str(destination_file_path))
194
+ logging.info(f"Moved {item.name} directly to {destination_dir}")
195
+ moved_count += 1
196
+ except OSError as e:
197
+ logging.error(f"Error moving {item.name}: {e}")
198
+ if moved_count > 0:
199
+ logging.info(f"Transferred {moved_count} files from {source_dir} area.")
200
+
201
+ def upload_file_to_server(file_path: Path, upload_url: str) -> Optional[str]:
202
+ """
203
+ Uploads a single file to the specified server endpoint.
204
+
205
+ Args:
206
+ file_path: Path object of the file to upload.
207
+ upload_url: The URL to upload the file to.
208
+
209
+ Returns:
210
+ The filename returned by the server upon successful upload, or None on failure.
211
+ """
212
+ if not file_path.is_file():
213
+ logging.error(f"File not found or is not a file: {file_path}")
214
  return None
215
 
216
+ try:
217
+ with open(file_path, "rb") as f:
218
+ files = {"file": (file_path.name, f)}
219
+ response = requests_session.post(upload_url, files=files, timeout=60)
220
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
221
+
222
+ # Assuming server returns the filename (or identifier) in the body
223
+ server_filename = response.text.strip()
224
+ logging.info(f"File '{file_path.name}' uploaded successfully. Server identifier: {server_filename}")
225
+ return server_filename
226
+
227
+ except FileNotFoundError:
228
+ logging.error(f"File not found during upload attempt: {file_path}")
229
+ return None
230
+ except RequestException as e:
231
+ logging.error(f"Upload failed for {file_path.name}. Network/Server error: {e}")
232
+ if hasattr(e, 'response') and e.response is not None:
233
+ logging.error(f"Server response: {e.response.status_code} - {e.response.text}")
234
+ return None
235
+ except Exception as e:
236
+ logging.error(f"An unexpected error occurred during upload of {file_path.name}: {e}")
237
+ return None
238
 
239
+ def run_command_in_sandbox(
240
+ command: str,
241
+ timeout_sec: int,
242
+ run_forever: bool = False,
243
+ cwd: Path = CODE_DIR
244
+ ) -> str:
245
+ """
246
+ Runs a shell command using pexpect in a specific directory.
247
+
248
+ Args:
249
+ command: The command string to execute.
250
+ timeout_sec: Timeout in seconds. Ignored if run_forever is True.
251
+ run_forever: If True, does not enforce timeout (use with caution).
252
+ cwd: The working directory (Path object) for the command.
253
+
254
+ Returns:
255
+ The captured stdout/stderr output of the command.
256
+ """
257
+ output = ""
258
+ full_command = f"cd {shlex.quote(str(cwd))} && {command}"
259
+ logging.info(f"Running command: {full_command}")
260
 
 
 
 
 
 
261
  try:
262
+ child = pexpect.spawn("bash", timeout=30) # Base timeout for pexpect interactions
263
+ # Set a unique prompt marker to detect command completion reliably
264
+ prompt_marker = f"COMMAND_DONE_{time.time()}"
265
+ child.sendline(f'export PS1="{prompt_marker}"')
266
+ child.expect_exact(prompt_marker, timeout=10) # Wait for prompt change
267
+
268
+ child.sendline(full_command)
269
+
270
+ if run_forever:
271
+ # For forever commands, we might just return after sending,
272
+ # or wait for initial output, depending on requirements.
273
+ # Here, we'll just log and return an indication it started.
274
+ logging.info(f"Command '{command}' started in 'run_forever' mode.")
275
+ # Optionally, capture some initial output if needed:
276
+ # try:
277
+ # output = child.read_nonblocking(size=1024, timeout=5).decode(errors='ignore')
278
+ # except pexpect.TIMEOUT:
279
+ # pass # No initial output quickly
280
+ # child.close(force=True) # Or keep it running? Depends on MCP lifecycle.
281
+ # For now, assume we detach:
282
+ # NOTE: Pexpect might not be ideal for true 'daemonizing'.
283
+ # A better approach for 'forever' might be `subprocess.Popen` without waiting.
284
+ # However, sticking to the original tool's apparent intent with pexpect:
285
+ # We can't easily get continuous output AND return control without threads.
286
+ # Returning immediately after sending the command for 'forever' mode.
287
+ return f"Command '{command}' started in background (output streaming not captured)."
288
+
289
+ # For commands with timeout:
290
+ start_time = time.time()
291
+ while True:
292
+ if time.time() - start_time > timeout_sec:
293
+ raise TimeoutExpired(command, timeout_sec)
294
+ try:
295
+ # Expect the specific prompt marker
296
+ index = child.expect([prompt_marker, pexpect.EOF, pexpect.TIMEOUT], timeout=max(1, timeout_sec - (time.time() - start_time)))
297
+ line = child.before.decode(errors='ignore')
298
+ output += line
299
+ # logging.debug(f"Shell output: {line.strip()}") # Log intermediate output if needed
300
+
301
+ if index == 0: # Prompt marker found, command finished
302
+ logging.info(f"Command '{command}' finished.")
303
+ break
304
+ elif index == 1: # EOF
305
+ logging.warning(f"Command '{command}' resulted in EOF.")
306
+ break
307
+ # index == 2 (TIMEOUT) is handled by the outer loop's timeout check
308
+
309
+ except pexpect.TIMEOUT:
310
+ logging.warning(f"Pexpect read timed out waiting for output or prompt for command: {command}")
311
+ # Check outer loop timeout condition
312
+ if time.time() - start_time > timeout_sec:
313
+ raise TimeoutExpired(command, timeout_sec)
314
+ # Otherwise, continue waiting if overall time not exceeded
315
+ continue
316
+ except Exception as e:
317
+ logging.error(f"Pexpect error during command '{command}': {e}")
318
+ output += f"\nPexpect Error: {e}"
319
+ break
320
+
321
+ except TimeoutExpired:
322
+ output += f"\n--- TimeoutError: Command '{command}' exceeded {timeout_sec} seconds ---"
323
+ logging.error(f"Command '{command}' timed out after {timeout_sec} seconds.")
324
+ except pexpect.ExceptionPexpect as e:
325
+ output += f"\n--- Pexpect Error: {e} ---"
326
+ logging.error(f"Pexpect execution failed for command '{command}': {e}")
327
+ except Exception as e:
328
+ output += f"\n--- Unexpected Error: {e} ---"
329
+ logging.error(f"Unexpected error running command '{command}': {e}")
330
  finally:
331
+ if 'child' in locals() and child.isalive():
332
+ child.close(force=True)
333
+
334
+ logging.info(f"Command '{command}' completed. Output length: {len(output)}")
335
+ # logging.debug(f"Final Output:\n{output}") # Optional: log full output
336
+ return output.strip() # Remove trailing newline/marker if any
337
+
338
+
339
+ def _ensure_files_synced(code_dir: Path, temp_dir: Path):
340
+ """Ensures local code dir has latest server files and temp uploads."""
341
+ global server_downloaded_files
342
+ logging.info("Ensuring local file system is synchronized...")
343
+ # 1. Transfer files moved to the temp upload area
344
+ transfer_temp_files(temp_dir, code_dir)
345
+ # 2. Download missing files from the server
346
+ server_downloaded_files = download_server_files(
347
+ SERVER_BASE_URL, FILES_ENDPOINT, code_dir, server_downloaded_files
348
+ )
349
+ # 3. Update the set of tracked files *after* syncing
350
+ global tracked_files_in_codedir
351
+ tracked_files_in_codedir = set(code_dir.glob("*"))
352
+
353
+
354
+ def _upload_new_files(code_dir: Path, known_files_before: set[Path]) -> Tuple[List[str], set[Path]]:
355
+ """Finds new files in code_dir, uploads them, returns URLs and updated file set."""
356
+ current_files = set(code_dir.glob("*"))
357
+ new_files = current_files - known_files_before
358
+ uploaded_file_urls = []
359
+
360
+ if not new_files:
361
+ logging.info("No new files detected for upload.")
362
+ return [], current_files # Return empty list and the latest set
363
+
364
+ logging.info(f"Detected {len(new_files)} new files for upload: {[f.name for f in new_files]}")
365
+
366
+ for file_path in new_files:
367
+ if file_path.is_file(): # Ensure it's a file
368
+ server_filename = upload_file_to_server(file_path, SERVER_UPLOAD_URL)
369
+ if server_filename:
370
+ # Construct the download URL based on the server's static path convention
371
+ download_url = f"{SERVER_STATIC_URL_PREFIX}{server_filename}"
372
+ uploaded_file_urls.append(download_url)
373
+ else:
374
+ logging.error(f"Failed to upload {file_path.name}, skipping URL generation.")
375
+ else:
376
+ logging.warning(f"Skipping upload for non-file item: {file_path}")
377
 
378
 
379
+ logging.info(f"Uploaded {len(uploaded_file_urls)} new files.")
380
+ return uploaded_file_urls, current_files
 
 
 
 
 
 
 
 
 
381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
+ # --- MCP Tools ---
384
 
385
  @mcp.tool()
386
+ def analyse_audio(audiopath: str, query: str) -> Dict[str, str]:
387
+ """
388
+ Ask a Gemini AI model about an audio file.
389
+ The AI model can listen to the audio and answer questions based on it.
390
+
391
+ Args:
392
+ audiopath: The path to the audio file within the '/app/code_interpreter' directory
393
+ (e.g., '/app/code_interpreter/meeting.mp3').
394
+ query: The question to ask about the audio content.
395
+
396
+ Returns:
397
+ A dictionary containing the AI's response under the key "Output".
398
+ Returns an error message if the client or file processing fails.
399
+ """
400
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
401
+ if not client:
402
+ return {"Output": "Error: Gemini client not initialized."}
403
+
404
+ audio_file_path = Path(audiopath)
405
+ if not audio_file_path.is_absolute(): # Assume relative to CODE_DIR if not absolute
406
+ audio_file_path = CODE_DIR / audiopath
407
+
408
+ if not audio_file_path.exists():
409
+ return {"Output": f"Error: Audio file not found at {audio_file_path}"}
410
+
411
+ logging.info(f"Analysing audio: {audio_file_path.name} with query: '{query}'")
412
+ try:
413
+ # Upload file to Gemini API
414
+ audio_file_ref = client.files.upload(file=str(audio_file_path))
415
+ logging.info(f"Uploaded {audio_file_path.name} to Gemini API. File ref: {audio_file_ref.name}, State: {audio_file_ref.state.name}")
416
+
417
+ # Wait for processing (with timeout)
418
+ start_time = time.time()
419
+ timeout_seconds = 120 # Adjust as needed
420
+ while audio_file_ref.state.name == "PROCESSING":
421
+ if time.time() - start_time > timeout_seconds:
422
+ logging.error(f"Gemini file processing timed out for {audio_file_ref.name}")
423
+ return {"Output": f"Error: Gemini file processing timed out for {audio_file_path.name}."}
424
+ print('.', end='', flush=True) # Keep original progress indicator
425
+ time.sleep(2)
426
+ audio_file_ref = client.files.get(name=audio_file_ref.name)
427
+
428
+ print() # Newline after progress dots
429
+
430
+ if audio_file_ref.state.name == "FAILED":
431
+ logging.error(f"Gemini file processing failed for {audio_file_ref.name}. State: {audio_file_ref.state.name}")
432
+ return {"Output": f"Error: Gemini failed to process the audio file {audio_file_path.name}."}
433
+
434
+ if audio_file_ref.state.name != "ACTIVE":
435
+ logging.warning(f"Gemini file {audio_file_ref.name} ended in unexpected state: {audio_file_ref.state.name}")
436
+ # Proceed anyway, but log warning
437
+
438
+ # Generate content
439
+ response = client.models.generate_content(
440
+ model='gemini-1.5-flash', # Use appropriate model
441
+ contents=[query, audio_file_ref]
442
+ )
443
+ logging.info(f"Gemini analysis complete for {audio_file_path.name}.")
444
+ return {"Output": response.text}
445
 
446
+ except Exception as e:
447
+ logging.error(f"Error during Gemini audio analysis for {audio_file_path.name}: {e}", exc_info=True)
448
+ return {"Output": f"An error occurred during audio analysis: {e}"}
449
 
450
+ # Note: analyse_video and analyse_images follow the same pattern as analyse_audio
451
+ # Refactoring them similarly:
 
 
 
452
 
453
  @mcp.tool()
454
+ def analyse_video(videopath: str, query: str) -> Dict[str, str]:
455
+ """
456
+ Ask a Gemini AI model about a video file.
457
+ The AI model can watch the video and answer questions based on it.
458
+
459
+ Args:
460
+ videopath: Path to the video file within '/app/code_interpreter'
461
+ (e.g., '/app/code_interpreter/presentation.mp4').
462
+ query: The question to ask about the video content.
463
+
464
+ Returns:
465
+ A dictionary containing the AI's response under the key "Output".
466
+ Returns an error message if the client or file processing fails.
467
+ """
468
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
469
+ if not client:
470
+ return {"Output": "Error: Gemini client not initialized."}
471
+
472
+ video_file_path = Path(videopath)
473
+ if not video_file_path.is_absolute():
474
+ video_file_path = CODE_DIR / videopath
475
+
476
+ if not video_file_path.exists():
477
+ return {"Output": f"Error: Video file not found at {video_file_path}"}
478
+
479
+ logging.info(f"Analysing video: {video_file_path.name} with query: '{query}'")
480
+ try:
481
+ video_file_ref = client.files.upload(file=str(video_file_path))
482
+ logging.info(f"Uploaded {video_file_path.name} to Gemini API. File ref: {video_file_ref.name}, State: {video_file_ref.state.name}")
483
+
484
+ start_time = time.time()
485
+ timeout_seconds = 300 # Videos might take longer
486
+ while video_file_ref.state.name == "PROCESSING":
487
+ if time.time() - start_time > timeout_seconds:
488
+ logging.error(f"Gemini file processing timed out for {video_file_ref.name}")
489
+ return {"Output": f"Error: Gemini file processing timed out for {video_file_path.name}."}
490
+ print('.', end='', flush=True)
491
+ time.sleep(5) # Longer sleep for video
492
+ video_file_ref = client.files.get(name=video_file_ref.name)
493
+ print()
494
+
495
+ if video_file_ref.state.name == "FAILED":
496
+ logging.error(f"Gemini file processing failed for {video_file_ref.name}")
497
+ return {"Output": f"Error: Gemini failed to process the video file {video_file_path.name}."}
498
+
499
+ if video_file_ref.state.name != "ACTIVE":
500
+ logging.warning(f"Gemini file {video_file_ref.name} ended in unexpected state: {video_file_ref.state.name}")
501
+
502
+ response = client.models.generate_content(
503
+ model='gemini-1.5-flash',
504
+ contents=[query, video_file_ref]
505
+ )
506
+ logging.info(f"Gemini analysis complete for {video_file_path.name}.")
507
+ return {"Output": response.text}
508
 
509
+ except Exception as e:
510
+ logging.error(f"Error during Gemini video analysis for {video_file_path.name}: {e}", exc_info=True)
511
+ return {"Output": f"An error occurred during video analysis: {e}"}
512
 
513
 
514
  @mcp.tool()
515
+ def analyse_images(imagepath: str, query: str) -> Dict[str, str]:
516
+ """
517
+ Ask a Gemini AI model about an image file.
518
+ The AI model can see the image and answer questions based on it.
519
+
520
+ Args:
521
+ imagepath: Path to the image file within '/app/code_interpreter'
522
+ (e.g., '/app/code_interpreter/diagram.png').
523
+ query: The question to ask about the image content.
524
+
525
+ Returns:
526
+ A dictionary containing the AI's response under the key "Output".
527
+ Returns an error message if the client or file processing fails.
528
+ """
529
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
530
+ if not client:
531
+ return {"Output": "Error: Gemini client not initialized."}
532
+
533
+ image_file_path = Path(imagepath)
534
+ if not image_file_path.is_absolute():
535
+ image_file_path = CODE_DIR / imagepath
536
+
537
+ if not image_file_path.exists():
538
+ return {"Output": f"Error: Image file not found at {image_file_path}"}
539
+
540
+ logging.info(f"Analysing image: {image_file_path.name} with query: '{query}'")
541
+ try:
542
+ # Note: For Gemini Flash/Pro Vision, direct image data might be preferred over file API
543
+ # Check Gemini API docs for best practices. Using File API for consistency here.
544
+ image_file_ref = client.files.upload(file=str(image_file_path))
545
+ logging.info(f"Uploaded {image_file_path.name} to Gemini API. File ref: {image_file_ref.name}, State: {image_file_ref.state.name}")
546
+
547
+ start_time = time.time()
548
+ timeout_seconds = 60
549
+ while image_file_ref.state.name == "PROCESSING":
550
+ if time.time() - start_time > timeout_seconds:
551
+ logging.error(f"Gemini file processing timed out for {image_file_ref.name}")
552
+ return {"Output": f"Error: Gemini file processing timed out for {image_file_path.name}."}
553
+ print('.', end='', flush=True)
554
+ time.sleep(1)
555
+ image_file_ref = client.files.get(name=image_file_ref.name)
556
+ print()
557
+
558
+ if image_file_ref.state.name == "FAILED":
559
+ logging.error(f"Gemini file processing failed for {image_file_ref.name}")
560
+ return {"Output": f"Error: Gemini failed to process the image file {image_file_path.name}."}
561
+
562
+ if image_file_ref.state.name != "ACTIVE":
563
+ logging.warning(f"Gemini file {image_file_ref.name} ended in unexpected state: {image_file_ref.state.name}")
564
+
565
+
566
+ response = client.models.generate_content(
567
+ model='gemini-1.5-flash', # Or a vision-specific model
568
+ contents=[query, image_file_ref]
569
+ )
570
+ logging.info(f"Gemini analysis complete for {image_file_path.name}.")
571
+ return {"Output": response.text}
572
 
573
+ except Exception as e:
574
+ logging.error(f"Error during Gemini image analysis for {image_file_path.name}: {e}", exc_info=True)
575
+ return {"Output": f"An error occurred during image analysis: {e}"}
576
 
 
 
 
 
 
577
 
578
+ @mcp.tool()
579
+ def create_code_file(filename: str, code: str) -> Dict[str, str]:
580
+ """
581
+ Creates a file with the specified content in the code execution directory.
582
+ Overwrites the file if it already exists.
583
+
584
+ Args:
585
+ filename: The name of the file to create (e.g., 'script.py', 'data.txt').
586
+ The file is created in '/app/code_interpreter/'.
587
+ code: The string content to write into the file.
588
+
589
+ Returns:
590
+ A dictionary indicating the task outcome.
591
+ """
592
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Ensure dir exists, sync base files
593
+
594
+ if not filename:
595
+ return {"info": "Error: Filename cannot be empty."}
596
+
597
+ # Basic sanitization (prevent escaping the directory)
598
+ filename = Path(filename).name
599
+ file_path = CODE_DIR / filename
600
 
601
+ logging.info(f"Creating/overwriting file: {file_path}")
602
+ try:
603
+ with open(file_path, "w", encoding='utf-8') as f:
604
+ f.write(code)
605
+
606
+ # Update tracked files immediately after creation
607
+ global tracked_files_in_codedir
608
+ tracked_files_in_codedir.add(file_path)
609
+
610
+ logging.info(f"Successfully wrote {len(code)} characters to {file_path}")
611
+ return {"info": f"File '{filename}' created/updated successfully in {CODE_DIR}."}
612
+ except OSError as e:
613
+ logging.error(f"Failed to write file {file_path}: {e}")
614
+ return {"info": f"Error: Could not write file '{filename}'. Reason: {e}"}
615
+ except Exception as e:
616
+ logging.error(f"Unexpected error writing file {file_path}: {e}", exc_info=True)
617
+ return {"info": f"Error: An unexpected error occurred while writing '{filename}'. Reason: {e}"}
618
 
619
 
620
  @mcp.tool()
621
+ def install_python_packages(python_packages: str) -> Dict[str, str]:
622
+ """
623
+ Installs specified Python packages using pip in the sandbox environment.
624
+
625
+ Args:
626
+ python_packages: A space-separated string of package names (e.g., "numpy pandas matplotlib").
627
+
628
+ Returns:
629
+ A dictionary containing the stdout/stderr of the pip command and an info message.
630
+ """
631
  package_names = python_packages.strip()
 
632
  if not package_names:
633
+ return {"output": "", "info": "No packages specified for installation."}
634
+
635
+ # Basic check to prevent unintended commands (though sandbox should limit this)
636
+ if not all(pkg.isalnum() or pkg in ['-', '_', '.', '=', '>','<'] for pkg in package_names.replace(" ","")):
637
+ logging.warning(f"Potentially unsafe package string detected: {package_names}")
638
+ # Decide whether to reject or proceed cautiously
639
+ # return {"output": "Error: Invalid characters in package names.", "info": "Installation aborted."}
640
+
641
+ # Use --break-system-packages for modern pip behavior in managed environments
642
+ command = f"pip install --break-system-packages {package_names}"
643
+ logging.info(f"Attempting to install packages: {package_names}")
644
+
645
+ # Use a longer timeout for package installation
646
+ output = run_command_in_sandbox(command, timeout_sec=600, run_forever=False, cwd=CODE_DIR)
647
+
648
+ if "Successfully installed" in output or "Requirement already satisfied" in output:
649
+ logging.info(f"Pip install command finished for: {package_names}")
650
+ info_msg = f"Package installation command executed for: {package_names}."
651
+ else:
652
+ logging.warning(f"Pip install command for '{package_names}' may have encountered issues.")
653
+ info_msg = f"Package installation command executed for: {package_names}. Check output for details."
654
+
655
+ return {"output": output, "info": info_msg}
656
 
 
 
 
 
 
 
 
657
 
658
  @mcp.tool()
659
+ def run_code(
660
+ filename: str,
661
+ code: str,
662
+ start_cmd: str,
663
+ python_packages: str = "",
664
+ timeout_seconds: int = 300,
665
+ run_forever: bool = False
666
+ ) -> Dict[str, Any]:
667
+ """
668
+ Creates a code file, optionally installs Python packages, executes the code
669
+ using the provided start command, and uploads any newly created files.
670
+
671
+ Args:
672
+ filename: Name of the file to create (e.g., "app.py"). Stored in /app/code_interpreter/.
673
+ code: Full source code to write to the file.
674
+ start_cmd: Command to execute the file (e.g., "python /app/code_interpreter/app.py").
675
+ Ensure paths within the command are absolute or relative to /app/code_interpreter.
676
+ python_packages: Space-separated list of Python packages to install via pip.
677
+ Leave empty or "" if none needed. Pre-installed: gradio, XlsxWriter, openpyxl.
678
+ timeout_seconds: Maximum execution time in seconds (default 300). Ignored if run_forever is True.
679
+ run_forever: If True, the command attempts to run indefinitely (e.g., for servers).
680
+ Output capture might be limited, and timeout is ignored.
681
+
682
+ Returns:
683
+ A dictionary containing:
684
+ - "output": The stdout/stderr from the execution.
685
+ - "info": Status message about file creation/package installation.
686
+ - "files_download_links": A list of URLs for any new files created during execution.
687
+ """
688
+ global tracked_files_in_codedir
689
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Sync before starting
690
+
691
+ info_messages = []
692
+
693
+ # 1. Install packages if specified
694
+ if python_packages:
695
+ install_result = install_python_packages(python_packages)
696
+ info_messages.append(install_result.get("info", "Package install attempted."))
697
+ # Optionally include install output in the main output or log it
698
+ logging.debug(f"Package install output:\n{install_result.get('output', '')}")
699
+
700
+
701
+ # 2. Create the code file
702
+ create_result = create_code_file(filename, code)
703
+ info_messages.append(create_result.get("info", "File creation attempted."))
704
+ if "Error:" in create_result.get("info", ""):
705
+ return {
706
+ "output": "Aborted due to file creation error.",
707
+ "info": "\n".join(info_messages),
708
+ "files_download_links": []
709
+ }
710
+
711
+ # Refresh known files *after* creating the target file
712
+ known_files_before_run = set(CODE_DIR.glob("*"))
713
+ tracked_files_in_codedir = known_files_before_run # Update global state
714
+
715
+ # 3. Execute the command
716
+ logging.info(f"Executing start command: {start_cmd}")
717
+ exec_output = run_command_in_sandbox(start_cmd, timeout_sec=timeout_seconds, run_forever=run_forever, cwd=CODE_DIR)
718
+
719
+ # 4. Upload any new files created by the execution
720
+ new_file_urls, tracked_files_in_codedir = _upload_new_files(CODE_DIR, known_files_before_run)
721
+
722
+ return {
723
+ "output": exec_output,
724
+ "info": "\n".join(info_messages),
725
+ "files_download_links": new_file_urls
726
  }
 
727
 
 
 
 
728
 
729
  @mcp.tool()
730
+ def run_existing_code(
731
+ start_cmd: str,
732
+ timeout_seconds: int = 300,
733
+ run_forever: bool = False
734
+ ) -> Dict[str, Any]:
735
+ """
736
+ Executes a command assuming the necessary code files already exist
737
+ in the '/app/code_interpreter/' directory. Uploads any newly created files.
738
+
739
+ Args:
740
+ start_cmd: Command to execute (e.g., "python /app/code_interpreter/main.py").
741
+ Ensure paths within the command are absolute or relative to /app/code_interpreter.
742
+ timeout_seconds: Maximum execution time in seconds (default 300). Ignored if run_forever is True.
743
+ run_forever: If True, the command attempts to run indefinitely. Output capture might be limited.
744
+
745
+ Returns:
746
+ A dictionary containing:
747
+ - "output": The stdout/stderr from the execution.
748
+ - "files_download_links": A list of URLs for any new files created during execution.
749
+ """
750
+ global tracked_files_in_codedir
751
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Ensure files are present
752
+
753
+ known_files_before_run = tracked_files_in_codedir # Use current tracked state
754
+
755
+ # Execute the command
756
+ logging.info(f"Executing command on existing files: {start_cmd}")
757
+ exec_output = run_command_in_sandbox(start_cmd, timeout_sec=timeout_seconds, run_forever=run_forever, cwd=CODE_DIR)
758
+
759
+ # Upload any new files created by the execution
760
+ new_file_urls, tracked_files_in_codedir = _upload_new_files(CODE_DIR, known_files_before_run)
761
+
762
+ return {
763
+ "output": exec_output,
764
+ "files_download_links": new_file_urls
765
+ }
766
 
 
767
 
768
+ @mcp.tool()
769
+ def run_shell_command(
770
+ cmd: str,
771
+ timeout_seconds: int = 300,
772
+ run_forever: bool = False
773
+ ) -> Dict[str, Any]:
774
+ """
775
+ Runs an arbitrary shell command in the '/app/code_interpreter/' directory.
776
+ Useful for file manipulation, setup, or simple tasks. Executes on Alpine Linux.
777
+ Avoid commands requiring sudo. Uploads any newly created files.
778
+
779
+ Args:
780
+ cmd: The shell command to execute (e.g., "mkdir output_data", "ls -l").
781
+ timeout_seconds: Maximum execution time in seconds (default 300). Ignored if run_forever is True.
782
+ run_forever: If True, the command attempts to run indefinitely. Output capture might be limited.
783
+
784
+ Returns:
785
+ A dictionary containing:
786
+ - "output": The stdout/stderr from the command execution.
787
+ - "files_download_links": A list of URLs for any new files created by the command.
788
+ """
789
+ global tracked_files_in_codedir
790
+ # Syncing might be relevant if the command interacts with downloaded/transferred files
791
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR)
792
+
793
+ known_files_before_run = tracked_files_in_codedir
794
+
795
+ # Execute the command
796
+ logging.info(f"Executing shell command: {cmd}")
797
+ exec_output = run_command_in_sandbox(cmd, timeout_sec=timeout_seconds, run_forever=run_forever, cwd=CODE_DIR)
798
+
799
+ # Upload any new files created by the execution (e.g., if cmd was `tar czf archive.tar.gz data/`)
800
+ new_file_urls, tracked_files_in_codedir = _upload_new_files(CODE_DIR, known_files_before_run)
801
+
802
+ return {
803
+ "output": exec_output,
804
+ "files_download_links": new_file_urls
805
+ }
806
+
807
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  @mcp.tool()
809
+ def get_youtube_transcript(video_id: str) -> Dict[str, Any]:
810
+ """
811
+ Fetches the transcript of a YouTube video using its video ID via RapidAPI.
812
 
813
+ Args:
814
+ video_id: The unique ID of the YouTube video (e.g., "ZacjOVVgoLY").
815
 
816
+ Returns:
817
+ A dictionary containing the transcript data or an error message.
818
+ """
819
+ if not RAPIDAPI_KEY:
820
+ return {"error": "RapidAPI key is not configured."}
821
 
822
+ url = f"https://{YOUTUBE_TRANSCRIPT_API}/api/transcript"
823
+ params = {"videoId": video_id}
824
  headers = {
825
+ 'x-rapidapi-key': RAPIDAPI_KEY,
826
+ 'x-rapidapi-host': YOUTUBE_TRANSCRIPT_API
 
827
  }
828
+ logging.info(f"Fetching YouTube transcript for video ID: {video_id}")
829
+
830
+ try:
831
+ response = requests_session.get(url, headers=headers, params=params, timeout=30)
832
+ response.raise_for_status()
833
+ data = response.json()
834
+ logging.info(f"Successfully fetched transcript for {video_id}.")
835
+ return data
836
+ except RequestException as e:
837
+ logging.error(f"Error fetching YouTube transcript for {video_id}: {e}")
838
+ error_msg = f"Failed to fetch transcript: {e}"
839
+ if hasattr(e, 'response') and e.response is not None:
840
+ error_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" # Include snippet of response
841
+ return {"error": error_msg}
842
+ except json.JSONDecodeError as e:
843
+ logging.error(f"Error decoding JSON response for youtube transcript {video_id}: {e}")
844
+ return {"error": f"Failed to parse transcript response: {e}"}
845
+ except Exception as e:
846
+ logging.error(f"Unexpected error fetching YouTube transcript {video_id}: {e}", exc_info=True)
847
+ return {"error": f"An unexpected error occurred: {e}"}
848
 
849
 
850
  @mcp.tool()
851
+ def read_excel_file(filename: str) -> Dict[str, Any]:
852
+ """
853
+ Reads data from an Excel file (.xlsx) located in '/app/code_interpreter/'.
 
 
 
 
 
 
854
 
855
+ Args:
856
+ filename: The name of the Excel file (e.g., 'report.xlsx').
857
 
858
+ Returns:
859
+ A dictionary where keys are cell coordinates (e.g., 'Sheet1!A1')
860
+ and values are the corresponding cell contents (converted to string).
861
+ Returns an error message if the file cannot be read.
862
+ """
863
+ _ensure_files_synced(CODE_DIR, TEMP_UPLOAD_DIR) # Make sure file is present
864
 
865
+ file_path = CODE_DIR / Path(filename).name # Sanitize name
 
 
 
 
 
 
 
 
 
866
 
867
+ if not file_path.exists():
868
+ logging.error(f"Excel file not found: {file_path}")
869
+ return {"error": f"File not found: {filename}"}
870
+
871
+ logging.info(f"Reading Excel file: {file_path}")
872
+ excel_data_dict = {}
873
+ try:
874
+ workbook = openpyxl.load_workbook(file_path, data_only=True) # Read values, not formulas
875
+ for sheet_name in workbook.sheetnames:
876
+ sheet = workbook[sheet_name]
877
+ for row in sheet.iter_rows():
878
+ for cell in row:
879
+ if cell.value is not None:
880
+ # Use sheet name in key for clarity if multiple sheets exist
881
+ cell_coordinate = f"{sheet_name}!{cell.coordinate}"
882
+ # Keep original type if simple, else convert complex types to string
883
+ cell_value = cell.value
884
+ if not isinstance(cell_value, (str, int, float, bool)):
885
+ cell_value = str(cell_value)
886
+ excel_data_dict[cell_coordinate] = cell_value
887
+ logging.info(f"Successfully read {len(excel_data_dict)} cells from {filename}.")
888
+ return excel_data_dict
889
+ except Exception as e:
890
+ logging.error(f"Failed to read Excel file {file_path}: {e}", exc_info=True)
891
+ return {"error": f"Could not read Excel file '{filename}'. Reason: {e}"}
892
 
 
893
 
894
  @mcp.tool()
895
+ def scrape_website_content(url: str, query: Optional[str] = None) -> Dict[str, str]:
896
+ """
897
+ Scrapes the textual content of a single website URL using ScrapeNinja via RapidAPI
898
+ and optionally asks a question about the content using an AI model.
899
+
900
+ Args:
901
+ url: The URL of the website to scrape.
902
+ query: An optional question to ask the AI about the scraped content.
903
+
904
+ Returns:
905
+ A dictionary containing the scraped content ("content") and,
906
+ if a query was provided, the AI's answer ("ai_answer").
907
+ Returns an error message on failure.
908
+ """
909
+ if not RAPIDAPI_KEY:
910
+ return {"error": "RapidAPI key is not configured."}
911
+
912
+ scrape_url = f"https://{SCRAPE_NINJA_API}/scrape"
913
+ headers = {
914
+ 'x-rapidapi-key': RAPIDAPI_KEY,
915
+ 'x-rapidapi-host': SCRAPE_NINJA_API,
916
+ 'Content-Type': "application/json"
917
+ }
918
+ payload = json.dumps({"url": url})
919
+ logging.info(f"Scraping website: {url}")
920
+ result = {}
921
 
922
+ try:
923
+ response = requests_session.post(scrape_url, headers=headers, data=payload, timeout=60)
924
+ response.raise_for_status()
925
+ # Assuming ScrapeNinja returns JSON with content, adjust based on actual API response
926
+ scraped_data = response.json()
927
+ # Extract main textual content - this might need adjustment based on ScrapeNinja's output format
928
+ content = scraped_data.get("body", "") # Or another relevant key like 'text'
929
+ if not content:
930
+ content = str(scraped_data) # Fallback to string representation if body is empty
931
+
932
+ # Basic cleaning (optional, enhance as needed)
933
+ soup = BeautifulSoup(content, "html.parser")
934
+ cleaned_content = soup.get_text(separator=' ', strip=True)
935
+ result["content"] = cleaned_content
936
+ logging.info(f"Successfully scraped content from {url} (length: {len(cleaned_content)}).")
937
+
938
+ if query:
939
+ logging.info(f"Asking AI query about scraped content: '{query}'")
940
+ try:
941
+ ai_response = completion(
942
+ model="gemini/gemini-1.5-flash", # Use a suitable model
943
+ messages=[
944
+ {"role": "system", "content": "You are an AI assistant analyzing website content."},
945
+ {"role": "user", "content": f"Based on the following website content, please answer this question: {query}\n\nWebsite Content:\n{cleaned_content[:15000]}"} # Limit context size
946
+ ],
947
+ max_tokens=500,
948
+ temperature=0.5,
949
+ )
950
+ ai_answer = ai_response.choices[0].message.content
951
+ result["ai_answer"] = ai_answer
952
+ logging.info(f"Received AI answer for query on {url}.")
953
+ except Exception as e:
954
+ logging.error(f"AI query failed for {url}: {e}", exc_info=True)
955
+ result["ai_answer"] = f"Error during AI analysis: {e}"
956
+
957
+ return result
958
+
959
+ except RequestException as e:
960
+ logging.error(f"Error scraping {url}: {e}")
961
+ error_msg = f"Failed to scrape {url}: {e}"
962
+ if hasattr(e, 'response') and e.response is not None:
963
+ error_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
964
+ return {"error": error_msg}
965
+ except json.JSONDecodeError as e:
966
+ logging.error(f"Error decoding JSON response for scrape {url}: {e}")
967
+ return {"error": f"Failed to parse scrape response: {e}"}
968
+ except Exception as e:
969
+ logging.error(f"Unexpected error scraping {url}: {e}", exc_info=True)
970
+ return {"error": f"An unexpected error occurred during scraping: {e}"}
971
+
972
+ # Consolidated Deep Thinking Tool
973
+ @mcp.tool()
974
+ def ask_advanced_ai(model_provider: str, query: str, context_info: str) -> Dict[str, str]:
975
+ """
976
+ Leverages a powerful external AI model for complex reasoning or generation tasks.
977
+
978
+ Args:
979
+ model_provider: The provider/model to use. Supported: 'groq', 'openrouter', 'gemini'.
980
+ query: The main question or task for the AI.
981
+ context_info: Additional context, background information, or previous findings
982
+ relevant to the query.
983
+
984
+ Returns:
985
+ A dictionary containing the AI's response under the key "response".
986
+ Returns an error message on failure.
987
+ """
988
+ logging.info(f"Sending query to advanced AI ({model_provider}): '{query[:100]}...'")
989
+
990
+ model_map = {
991
+ # Using specific model names known to litellm
992
+ 'groq': "groq/llama3-70b-8192", # Example: Use a powerful Groq model
993
+ 'openrouter': "openrouter/meta-llama/llama-3-70b-instruct", # Example: Use a powerful OpenRouter model
994
+ 'gemini': "gemini/gemini-1.5-pro-latest" # Example: Use a powerful Gemini model
995
+ }
996
 
997
+ model_name = model_map.get(model_provider.lower())
998
 
999
+ if not model_name:
1000
+ logging.error(f"Unsupported model provider specified: {model_provider}")
1001
+ return {"response": f"Error: Unsupported model provider '{model_provider}'. Use 'groq', 'openrouter', or 'gemini'."}
1002
+
1003
+ # Check for required API key for the selected provider
1004
+ key_missing = False
1005
+ if model_provider == 'groq' and not GROQ_API_KEY: key_missing = True
1006
+ if model_provider == 'openrouter' and not OPENROUTER_API_KEY: key_missing = True
1007
+ if model_provider == 'gemini' and not GEMINI_API_KEY: key_missing = True # litellm might need env var
1008
+
1009
+ if key_missing:
1010
+ logging.error(f"API Key for {model_provider} is not configured.")
1011
+ return {"response": f"Error: API key for provider '{model_provider}' is missing."}
1012
+
1013
+
1014
+ messages = [
1015
+ {"role": "system", "content": "You are a highly capable AI assistant performing advanced reasoning or generation."},
1016
+ {"role": "user", "content": f"Based on the following information, please address the query.\n\nContext/Information Provided:\n{context_info}\n\nQuery:\n{query}"}
1017
+ ]
1018
 
1019
+ try:
1020
+ response = completion(
1021
+ model=model_name,
1022
+ messages=messages,
1023
+ # stream=False # Already default
1024
+ # Add other parameters like temperature, max_tokens if needed
1025
+ )
1026
+ ai_response = response.choices[0].message.content
1027
+ logging.info(f"Received response from {model_provider} AI.")
1028
+ return {"response": ai_response}
1029
+ except Exception as e:
1030
+ logging.error(f"Error calling {model_provider} AI ({model_name}): {e}", exc_info=True)
1031
+ # Attempt to extract more detail from the exception if possible (litellm might provide specifics)
1032
+ return {"response": f"Error interacting with {model_provider} AI: {e}"}
1033
 
 
 
 
 
 
 
1034
 
1035
+ # --- Main Execution ---
1036
+ if __name__ == "__main__":
1037
+ logging.info("Starting FastMCP server...")
1038
+ # Ensure code directory exists on startup
1039
+ CODE_DIR.mkdir(parents=True, exist_ok=True)
1040
+ # Initial scan of files in code dir
1041
+ tracked_files_in_codedir = set(CODE_DIR.glob("*"))
1042
+ logging.info(f"Initial tracked files in {CODE_DIR}: {[f.name for f in tracked_files_in_codedir]}")
1043
+
1044
+ # Initialize and run the server using standard I/O transport
1045
+ mcp.run(transport='stdio')
1046