|
from smolagents import ( |
|
ToolCallingAgent, |
|
CodeAgent, |
|
DuckDuckGoSearchTool, |
|
VisitWebpageTool, |
|
InferenceClientModel, |
|
OpenAIServerModel, |
|
WikipediaSearchTool, |
|
) |
|
from dotenv import load_dotenv |
|
from tracing import setup_tracing |
|
from tools import ( |
|
read_image, |
|
transcribe_audio, |
|
run_video, |
|
read_code, |
|
fetch_task_files, |
|
) |
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
trace_provider = None |
|
|
|
MANAGER_PROMPT = """You are a helpful assistant tasked with answering questions using a set of tools. |
|
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template: |
|
FINAL ANSWER: [YOUR FINAL ANSWER]. |
|
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. |
|
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. |
|
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. |
|
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. |
|
Your answer should only start with "FINAL ANSWER: ", then follows with the answer. """ |
|
|
|
helium_instructions = """ |
|
You can use helium to access websites. Don't bother about the helium driver, it's already managed. |
|
We've already ran "from helium import *" |
|
Then you can go to pages! |
|
Code: |
|
```py |
|
go_to('github.com/trending') |
|
```<end_code> |
|
|
|
You can directly click clickable elements by inputting the text that appears on them. |
|
Code: |
|
```py |
|
click("Top products") |
|
```<end_code> |
|
|
|
If it's a link: |
|
Code: |
|
```py |
|
click(Link("Top products")) |
|
```<end_code> |
|
|
|
If you try to interact with an element and it's not found, you'll get a LookupError. |
|
In general stop your action after each button click to see what happens on your screenshot. |
|
Never try to login in a page. |
|
|
|
To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from. |
|
Code: |
|
```py |
|
scroll_down(num_pixels=1200) # This will scroll one viewport down |
|
```<end_code> |
|
|
|
When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails). |
|
Just use your built-in tool `close_popups` to close them: |
|
Code: |
|
```py |
|
close_popups() |
|
```<end_code> |
|
|
|
You can use .exists() to check for the existence of an element. For example: |
|
Code: |
|
```py |
|
if Text('Accept cookies?').exists(): |
|
click('I accept') |
|
```<end_code> |
|
""" |
|
|
|
add_sys_prompt = """\n\nWhen processing tasks with files: |
|
|
|
1. Use the fetch_task_files tool with the URL provided to you to download and process files |
|
2. Depending on the file type returned, use the appropriate specialized tool: |
|
- For images: Use the data_url returned with read_image tool |
|
- For audio: Use the audio data with transcribe_audio tool |
|
- For code files: Use read_code tool |
|
- For videos: Use run_video tool |
|
|
|
3. When handling different file types: |
|
- Images: The fetch_task_files tool will return a data_url you can use directly with read_image |
|
- Code: Do not execute code files, analyze them as text |
|
- Tabular data (CSV, Excel): Use pandas to analyze the data |
|
- Videos: Extract relevant information from visual frames and audio |
|
|
|
4. Keep answers concise and to the point. The answer is likely as simple as one word. |
|
5. Make sure you provide the answer in accordance with the instruction provided in the question. |
|
6. Do not return the raw result of tool calls as your final answer. |
|
7. Do not add any additional information, explanation, unnecessary words or symbols. |
|
""" |
|
|
|
|
|
def initialize_tracing(enabled=True, provider="langfuse"): |
|
""" |
|
Initialize tracing for the agent module |
|
|
|
Args: |
|
enabled: Whether tracing should be active |
|
provider: Which provider to use - "langfuse" or "phoenix" |
|
""" |
|
global trace_provider |
|
if trace_provider is None: |
|
trace_provider = setup_tracing( |
|
service_name="smolagent", enabled=enabled, provider=provider |
|
) |
|
return trace_provider |
|
|
|
|
|
def get_agent(): |
|
|
|
initialize_tracing() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
web_agent = ToolCallingAgent( |
|
tools=[ |
|
DuckDuckGoSearchTool(), |
|
VisitWebpageTool(), |
|
WikipediaSearchTool(), |
|
], |
|
model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1), |
|
max_steps=3, |
|
name="Web_Agent", |
|
description="A web agent that can search the web and visit webpages.", |
|
verbosity_level=1, |
|
) |
|
|
|
mm_agent = CodeAgent( |
|
tools=[ |
|
fetch_task_files, |
|
read_image, |
|
transcribe_audio, |
|
read_code, |
|
run_video, |
|
], |
|
model=InferenceClientModel( |
|
model_id="Qwen/Qwen2.5-VL-32B-Instruct", |
|
), |
|
max_steps=3, |
|
additional_authorized_imports=["pandas", "numpy", "openpyxl"], |
|
name="Multimedia_Agent", |
|
description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.", |
|
verbosity_level=1, |
|
) |
|
mm_agent.prompt_templates["system_prompt"] += add_sys_prompt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
manager_agent = CodeAgent( |
|
tools=[], |
|
managed_agents=[mm_agent, web_agent], |
|
model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1), |
|
max_steps=5, |
|
planning_interval=10, |
|
additional_authorized_imports=["pandas", "numpy", "openpyxl"], |
|
verbosity_level=2, |
|
) |
|
|
|
return manager_agent |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
initialize_tracing(enabled=True, provider="phoenix") |
|
|
|
|
|
agent = get_agent() |
|
agent.visualize() |
|
|
|
print("Running agent with tracing enabled...") |
|
result = agent.run( |
|
"How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia." |
|
) |
|
print(f"Result: {result}") |
|
print( |
|
"If using Phoenix: run 'python -m phoenix.server.main serve' and view at http://localhost:6006" |
|
) |
|
print("If using Langfuse: view traces at https://cloud.langfuse.com") |
|
|