smolagent_course_final_assignment

Sleeping

smolagent_course_final_assignment / agent.py

Kai Jennissen

added tools

8102d4b unverified 3 months ago

7.21 kB

	from smolagents import (
	ToolCallingAgent,
	CodeAgent,
	DuckDuckGoSearchTool,
	VisitWebpageTool,
	# InferenceClientModel,
	OpenAIServerModel,
	WikipediaSearchTool,
	)
	from dotenv import load_dotenv
	from tracing import setup_tracing
	from tools import (
	read_image,
	transcribe_audio,
	run_video,
	read_code,
	fetch_task_files,
	)

	# from tools import go_back, close_popups, search_item_ctrl_f, save_screenshot

	load_dotenv()

	# Initialize tracing when module is imported
	trace_provider = None

	MANAGER_PROMPT = """You are a helpful assistant tasked with answering questions using a set of tools.
	Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
	FINAL ANSWER: [YOUR FINAL ANSWER].
	YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
	If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
	If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
	If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
	Your answer should only start with "FINAL ANSWER: ", then follows with the answer. """

	helium_instructions = """
	You can use helium to access websites. Don't bother about the helium driver, it's already managed.
	We've already ran "from helium import *"
	Then you can go to pages!
	Code:
	```py
	go_to('github.com/trending')
	```<end_code>

	You can directly click clickable elements by inputting the text that appears on them.
	Code:
	```py
	click("Top products")
	```<end_code>

	If it's a link:
	Code:
	```py
	click(Link("Top products"))
	```<end_code>

	If you try to interact with an element and it's not found, you'll get a LookupError.
	In general stop your action after each button click to see what happens on your screenshot.
	Never try to login in a page.

	To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from.
	Code:
	```py
	scroll_down(num_pixels=1200) # This will scroll one viewport down
	```<end_code>

	When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
	Just use your built-in tool `close_popups` to close them:
	Code:
	```py
	close_popups()
	```<end_code>

	You can use .exists() to check for the existence of an element. For example:
	Code:
	```py
	if Text('Accept cookies?').exists():
	click('I accept')
	```<end_code>
	"""

	add_sys_prompt = """\n\nIf a file_url is available or an url is given in question statement, then request and use the content to answer the question. \
	If a code file, such as .py file, is given, do not attempt to execute it but rather open it as a text file and analyze the content. \
	When a tabluar file, such as csv, tsv, xlsx, is given, read it using pandas.

	Make sure you provide the answer in accordance with the instruction provided in the question. Do not return the result of tool as a final_answer.
	Do Not add any additional information, explanation, unnecessary words or symbols. The answer is likely as simple as one word."""


	def initialize_tracing(enabled=True, provider="langfuse"):
	"""
	Initialize tracing for the agent module

	Args:
	enabled: Whether tracing should be active
	provider: Which provider to use - "langfuse" or "phoenix"
	"""
	global trace_provider
	if trace_provider is None:
	trace_provider = setup_tracing(
	service_name="smolagent", enabled=enabled, provider=provider
	)
	return trace_provider


	def get_agent():
	# Ensure tracing is initialized
	initialize_tracing()

	# SmolagentsInstrumentor will automatically trace agent operations

	# llm_qwen = InferenceClientModel(
	# model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together"
	# )
	# llm_deepseek = InferenceClientModel(
	# "deepseek-ai/DeepSeek-R1",
	# provider="together",
	# max_tokens=8096,
	# # "Qwen/Qwen3-235B-A22B-FP8",
	# # provider="together",
	# # max_tokens=8096,
	# )

	# Create web agent
	web_agent = ToolCallingAgent(
	tools=[
	DuckDuckGoSearchTool(),
	VisitWebpageTool(),
	WikipediaSearchTool(),
	],
	model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
	max_steps=3,
	name="Web_Agent",
	description="A web agent that can search the web and visit webpages.",
	verbosity_level=1,
	)
	mm_agent = CodeAgent(
	tools=[
	read_image,
	transcribe_audio,
	read_code,
	run_video,
	],
	model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
	max_steps=3,
	name="Multimedia_Agent",
	description="An agent that can answer questions about all types of images, videos and speech. Needs to be provided with a valid url or an image.",
	verbosity_level=1,
	)

	# Initialize the model
	# vlm = InferenceClientModel(model_id="Qwen/Qwen2.5-Vision-32B", provider="together")

	# # Create the agent
	# vision_agent = CodeAgent(
	# tools=[go_back, close_popups, search_item_ctrl_f],
	# model=vlm,
	# additional_authorized_imports=["helium", "selenium"],
	# step_callbacks=[save_screenshot],
	# max_steps=10,
	# planning_interval=10,
	# verbosity_level=1,
	# name="Vision_Agent",
	# description="A vision agent that can interact with webpages and take screenshots.",
	# )
	# vision_agent.prompt_templates["system_prompt"] += helium_instructions

	# Import helium for the agent
	# Create manager agent
	manager_agent = CodeAgent(
	tools=[fetch_task_files],
	managed_agents=[web_agent, mm_agent],
	model=OpenAIServerModel(model_id="gpt-4.1", temperature=0.1),
	max_steps=5,
	planning_interval=10,
	additional_authorized_imports=["pandas", "numpy"],
	verbosity_level=1,
	)

	manager_agent.prompt_templates["system_prompt"] += add_sys_prompt
	return manager_agent


	if __name__ == "__main__":
	# Initialize tracing when run directly
	# Choose one provider: "langfuse" (default) or "phoenix"
	initialize_tracing(enabled=True, provider="phoenix")

	# Get agent with tracing already configured
	agent = get_agent()
	agent.visualize()
	# Run agent - SmolagentsInstrumentor will automatically trace the execution
	print("Running agent with tracing enabled...")
	result = agent.run(
	"How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
	)
	print(f"Result: {result}")
	print(
	"If using Phoenix: run 'python -m phoenix.server.main serve' and view at http://localhost:6006"
	)
	print("If using Langfuse: view traces at https://cloud.langfuse.com")