File size: 5,275 Bytes
9b5b26a
 
 
 
c19d193
6aae614
c6ce228
 
8fe992b
9b5b26a
 
5df72d6
9b5b26a
3d1237b
9b5b26a
 
 
 
 
 
 
 
9ed17af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b5b26a
 
 
 
 
 
 
 
 
 
 
 
 
 
8c01ffb
 
6aae614
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
 
9b5b26a
 
8c01ffb
8f1f706
861422e
 
9b5b26a
8c01ffb
8fe992b
4d15549
8c01ffb
 
 
 
 
 
861422e
8fe992b
 
9b5b26a
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
import cv2
import numpy as np

from Gradio_UI import GradioUI

# Below is an example of a tool that does nothing. Amaze us with your creativity !
@tool
def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return type
    #Keep this format for the description / args / args description but feel free to modify the tool
    """A tool that does nothing yet 
    Args:
        arg1: the first argument
        arg2: the second argument
    """
    return "What magic will you build ?"

@tool
def simple_object_detection(image_path: str, confidence_threshold: float) -> str:
    """
    A tool that performs simple object detection on an image using MobileNet SSD with error handling.
    
    Args:
        image_path: Path to the input image.
        confidence_threshold: Minimum confidence (e.g., 0.2) to filter weak detections.
    
    Returns:
        A string indicating the location of the saved processed image or an error message.
    """
    try:
        # List of class labels MobileNet SSD was trained on
        classes = ["background", "aeroplane", "bicycle", "bird", "boat",
                   "bottle", "bus", "car", "cat", "chair", "cow",
                   "diningtable", "dog", "horse", "motorbike", "person",
                   "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

        # Paths to the pre-trained model files (ensure these files are downloaded)
        prototxt_path = "MobileNetSSD_deploy.prototxt.txt"
        model_path = "MobileNetSSD_deploy.caffemodel"
        
        # Load the pre-trained model from disk
        net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
        
        # Load the image and get its dimensions
        image = cv2.imread(image_path)
        if image is None:
            return f"Error: Image at {image_path} could not be loaded."
        (h, w) = image.shape[:2]
        
        # Prepare the image as a blob for the network
        blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 
                                     scalefactor=0.007843, size=(300, 300), 
                                     mean=127.5)
        
        # Pass the blob through the network to obtain detections
        net.setInput(blob)
        detections = net.forward()
        
        # Loop over the detections and draw bounding boxes for those above the threshold
        for i in range(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > confidence_threshold:
                idx = int(detections[0, 0, i, 1])
                # Compute bounding box coordinates
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                
                # Draw the bounding box and label on the image
                label = f"{classes[idx]}: {confidence * 100:.2f}%"
                cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
                y = startY - 10 if startY - 10 > 10 else startY + 20
                cv2.putText(image, label, (startX, y),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Save the output image
        output_path = "output.jpg"
        cv2.imwrite(output_path, image)
        
        return f"Processed image saved as {output_path}"
    
    except Exception as e:
        return f"An error occurred: {str(e)}"


@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """A tool that fetches the current local time in a specified timezone.
    Args:
        timezone: A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        # Create timezone object
        tz = pytz.timezone(timezone)
        # Get current time in that timezone
        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"


final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)


# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)


with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[DuckDuckGoSearchTool(), simple_object_detection, get_current_time_in_timezone, final_answer], ## add your tools here (don't remove final answer)
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)


GradioUI(agent).launch()