yinde's picture
Update app.py
4d15549 verified
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
import cv2
import numpy as np
from Gradio_UI import GradioUI
# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
#Keep this format for the description / args / args description but feel free to modify the tool
"""A tool that does nothing yet
Args:
arg1: the first argument
arg2: the second argument
"""
return "What magic will you build ?"
@tool
def simple_object_detection(image_path: str, confidence_threshold: float) -> str:
"""
A tool that performs simple object detection on an image using MobileNet SSD with error handling.
Args:
image_path: Path to the input image.
confidence_threshold: Minimum confidence (e.g., 0.2) to filter weak detections.
Returns:
A string indicating the location of the saved processed image or an error message.
"""
try:
# List of class labels MobileNet SSD was trained on
classes = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"]
# Paths to the pre-trained model files (ensure these files are downloaded)
prototxt_path = "MobileNetSSD_deploy.prototxt.txt"
model_path = "MobileNetSSD_deploy.caffemodel"
# Load the pre-trained model from disk
net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
# Load the image and get its dimensions
image = cv2.imread(image_path)
if image is None:
return f"Error: Image at {image_path} could not be loaded."
(h, w) = image.shape[:2]
# Prepare the image as a blob for the network
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)),
scalefactor=0.007843, size=(300, 300),
mean=127.5)
# Pass the blob through the network to obtain detections
net.setInput(blob)
detections = net.forward()
# Loop over the detections and draw bounding boxes for those above the threshold
for i in range(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > confidence_threshold:
idx = int(detections[0, 0, i, 1])
# Compute bounding box coordinates
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# Draw the bounding box and label on the image
label = f"{classes[idx]}: {confidence * 100:.2f}%"
cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
y = startY - 10 if startY - 10 > 10 else startY + 20
cv2.putText(image, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Save the output image
output_path = "output.jpg"
cv2.imwrite(output_path, image)
return f"Processed image saved as {output_path}"
except Exception as e:
return f"An error occurred: {str(e)}"
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[DuckDuckGoSearchTool(), simple_object_detection, get_current_time_in_timezone, final_answer], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()