File size: 7,792 Bytes
c4821fe e74aed7 8102d4b c4821fe d971efd 8102d4b c4821fe d971efd 1c0a810 8102d4b e74aed7 8102d4b d971efd 42b982f d971efd 42b982f d971efd c4821fe d971efd 8102d4b c4821fe d971efd c4821fe 8102d4b 0bd7a60 c4821fe 1c0a810 c4821fe e74aed7 8102d4b e74aed7 8102d4b e74aed7 8102d4b 096b983 8102d4b e74aed7 8102d4b e74aed7 c4821fe 8102d4b d971efd c4821fe e74aed7 8102d4b 1c0a810 0bd7a60 096b983 e74aed7 c4821fe 8102d4b c4821fe 42b982f d971efd c4821fe 8102d4b d971efd 8102d4b c4821fe d971efd 42b982f d971efd 42b982f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 |
from smolagents import (
ToolCallingAgent,
CodeAgent,
DuckDuckGoSearchTool,
VisitWebpageTool,
InferenceClientModel,
OpenAIServerModel,
WikipediaSearchTool,
)
from dotenv import load_dotenv
from tracing import setup_tracing
from tools import (
read_image,
transcribe_audio,
run_video,
read_code,
fetch_task_files,
)
# from tools import go_back, close_popups, search_item_ctrl_f, save_screenshot
load_dotenv()
# Initialize tracing when module is imported
trace_provider = None
MANAGER_PROMPT = """You are a helpful assistant tasked with answering questions using a set of tools.
Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
Your answer should only start with "FINAL ANSWER: ", then follows with the answer. """
helium_instructions = """
You can use helium to access websites. Don't bother about the helium driver, it's already managed.
We've already ran "from helium import *"
Then you can go to pages!
Code:
```py
go_to('github.com/trending')
```<end_code>
You can directly click clickable elements by inputting the text that appears on them.
Code:
```py
click("Top products")
```<end_code>
If it's a link:
Code:
```py
click(Link("Top products"))
```<end_code>
If you try to interact with an element and it's not found, you'll get a LookupError.
In general stop your action after each button click to see what happens on your screenshot.
Never try to login in a page.
To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from.
Code:
```py
scroll_down(num_pixels=1200) # This will scroll one viewport down
```<end_code>
When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
Just use your built-in tool `close_popups` to close them:
Code:
```py
close_popups()
```<end_code>
You can use .exists() to check for the existence of an element. For example:
Code:
```py
if Text('Accept cookies?').exists():
click('I accept')
```<end_code>
"""
add_sys_prompt = """\n\nWhen processing tasks with files:
1. Use the fetch_task_files tool with the URL provided to you to download and process files
2. Depending on the file type returned, use the appropriate specialized tool:
- For images: Use the data_url returned with read_image tool
- For audio: Use the audio data with transcribe_audio tool
- For code files: Use read_code tool
- For videos: Use run_video tool
3. When handling different file types:
- Images: The fetch_task_files tool will return a data_url you can use directly with read_image
- Code: Do not execute code files, analyze them as text
- Tabular data (CSV, Excel): Use pandas to analyze the data
- Videos: Extract relevant information from visual frames and audio
4. Keep answers concise and to the point. The answer is likely as simple as one word.
5. Make sure you provide the answer in accordance with the instruction provided in the question.
6. Do not return the raw result of tool calls as your final answer.
7. Do not add any additional information, explanation, unnecessary words or symbols.
"""
def initialize_tracing(enabled=True, provider="langfuse"):
"""
Initialize tracing for the agent module
Args:
enabled: Whether tracing should be active
provider: Which provider to use - "langfuse" or "phoenix"
"""
global trace_provider
if trace_provider is None:
trace_provider = setup_tracing(
service_name="smolagent", enabled=enabled, provider=provider
)
return trace_provider
def get_agent():
# Ensure tracing is initialized
initialize_tracing()
# SmolagentsInstrumentor will automatically trace agent operations
# llm_qwen = InferenceClientModel(
# model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together"
# )
# llm_deepseek = InferenceClientModel(
# "deepseek-ai/DeepSeek-R1",
# provider="together",
# max_tokens=8096,
# # "Qwen/Qwen3-235B-A22B-FP8",
# # provider="together",
# # max_tokens=8096,
# )
# Create web agent
web_agent = ToolCallingAgent(
tools=[
DuckDuckGoSearchTool(),
VisitWebpageTool(),
WikipediaSearchTool(),
],
model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
max_steps=3,
name="Web_Agent",
description="A web agent that can search the web and visit webpages.",
verbosity_level=1,
)
mm_agent = CodeAgent(
tools=[
fetch_task_files,
read_image,
transcribe_audio,
read_code,
run_video,
],
model=InferenceClientModel(
model_id="Qwen/Qwen2.5-VL-32B-Instruct", # provider="together"
),
max_steps=3,
additional_authorized_imports=["pandas", "numpy", "openpyxl"],
name="Multimedia_Agent",
description="An agent that can process and analyze images, audio, video, and other files. It needs to be provided with a valid URL to fetch the file.",
verbosity_level=1,
)
mm_agent.prompt_templates["system_prompt"] += add_sys_prompt
# Initialize the model
# vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")
# # Create the agent
# vision_agent = CodeAgent(
# tools=[go_back, close_popups, search_item_ctrl_f],
# model=vlm,
# additional_authorized_imports=["helium", "selenium"],
# step_callbacks=[save_screenshot],
# max_steps=10,
# planning_interval=10,
# verbosity_level=1,
# name="Vision_Agent",
# description="A vision agent that can interact with webpages and take screenshots.",
# )
# vision_agent.prompt_templates["system_prompt"] += helium_instructions
# Import helium for the agent
# Create manager agent
manager_agent = CodeAgent(
tools=[],
managed_agents=[mm_agent, web_agent],
model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
max_steps=5,
planning_interval=10,
additional_authorized_imports=["pandas", "numpy", "openpyxl"],
verbosity_level=2,
)
return manager_agent
if __name__ == "__main__":
# Initialize tracing when run directly
# Choose one provider: "langfuse" (default) or "phoenix"
initialize_tracing(enabled=True, provider="phoenix")
# Get agent with tracing already configured
agent = get_agent()
agent.visualize()
# Run agent - SmolagentsInstrumentor will automatically trace the execution
print("Running agent with tracing enabled...")
result = agent.run(
"How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
)
print(f"Result: {result}")
print(
"If using Phoenix: run 'python -m phoenix.server.main serve' and view at http://localhost:6006"
)
print("If using Langfuse: view traces at https://cloud.langfuse.com")
|