Spaces:

Agents-MCP-Hackathon
/

rebrowse

Sleeping

rebrowse / src /agent /custom_agent.py

zk1tty

add src/ filies

94ff58a 3 months ago

49 kB

	import json
	import logging
	import pdb
	import traceback
	from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, Type, TypeVar, Union
	from PIL import Image, ImageDraw, ImageFont
	import os
	import base64
	import io
	import asyncio
	import time
	import platform
	from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
	from browser_use.agent.service import Agent
	from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, \
	save_conversation
	from browser_use.agent.views import (
	ActionResult,
	AgentError,
	AgentHistory,
	AgentHistoryList,
	AgentOutput,
	AgentSettings,
	AgentStepInfo,
	StepMetadata,
	ToolCallingMethod,
	)
	from browser_use.agent.gif import create_history_gif
	from browser_use.browser.browser import Browser
	from browser_use.browser.context import BrowserContext
	from browser_use.browser.views import BrowserStateHistory
	from browser_use.controller.service import Controller
	from browser_use.telemetry.views import (
	AgentEndTelemetryEvent,
	AgentRunTelemetryEvent,
	AgentStepTelemetryEvent,
	)
	from browser_use.utils import time_execution_async
	from langchain_core.language_models.chat_models import BaseChatModel
	from langchain_core.messages import (
	BaseMessage,
	HumanMessage,
	AIMessage
	)
	from browser_use.browser.views import BrowserState
	from browser_use.agent.prompts import PlannerPrompt

	from pydantic import BaseModel
	from json_repair import repair_json
	from src.utils.agent_state import AgentState
	from src.utils.replayer import TraceReplayer, load_trace, Drift
	from src.utils.recorder import Recorder

	from .custom_message_manager import CustomMessageManager, CustomMessageManagerSettings
	from .custom_views import CustomAgentOutput, CustomAgentStepInfo, CustomAgentState as CustomAgentStateType, CustomAgentBrain

	logger = logging.getLogger(__name__)

	Context = TypeVar('Context')

	# Define a simple structure for replay task details for clarity
	class ReplayTaskDetails:
	def __init__(self, mode: str, trace_path: str, speed: float = 1.0, trace_save_path: Optional[str] = None):
	self.mode = mode
	self.trace_path = trace_path
	self.speed = speed
	self.trace_save_path = trace_save_path # For saving new traces if needed during an operation that might also record


	class CustomAgent(Agent):
	def __init__(
	self,
	task: str,
	llm: BaseChatModel,
	add_infos: str = "",
	# Optional parameters
	browser: Browser \| None = None,
	browser_context: BrowserContext \| None = None,
	controller: Controller[Context] = Controller(),
	# Initial agent run parameters
	sensitive_data: Optional[Dict[str, str]] = None,
	initial_actions: Optional[List[Dict[str, Dict[str, Any]]]] = None,
	# Cloud Callbacks
	register_new_step_callback: Callable[['BrowserState', 'AgentOutput', int], Awaitable[None]] \| None = None,
	register_done_callback: Callable[['AgentHistoryList'], Awaitable[None]] \| None = None,
	register_external_agent_status_raise_error_callback: Callable[[], Awaitable[bool]] \| None = None,
	# Agent settings
	use_vision: bool = True,
	use_vision_for_planner: bool = False,
	save_conversation_path: Optional[str] = None,
	save_conversation_path_encoding: Optional[str] = 'utf-8',
	max_failures: int = 3,
	retry_delay: int = 10,
	system_prompt_class: Type[SystemPrompt] = SystemPrompt,
	agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt,
	max_input_tokens: int = 128000,
	validate_output: bool = False,
	message_context: Optional[str] = None,
	generate_gif: bool \| str = False,
	available_file_paths: Optional[list[str]] = None,
	include_attributes: list[str] = [
	'title',
	'type',
	'name',
	'role',
	'aria-label',
	'placeholder',
	'value',
	'alt',
	'aria-expanded',
	'data-date-format',
	],
	max_actions_per_step: int = 10,
	tool_calling_method: Optional[ToolCallingMethod] = 'auto',
	page_extraction_llm: Optional[BaseChatModel] = None,
	planner_llm: Optional[BaseChatModel] = None,
	planner_interval: int = 1, # Run planner every N steps
	# Inject state
	injected_agent_state: Optional[CustomAgentStateType] = None,
	context: Context \| None = None,
	):
	super().__init__(
	task=task,
	llm=llm,
	browser=browser,
	browser_context=browser_context,
	controller=controller,
	sensitive_data=sensitive_data,
	initial_actions=initial_actions,
	register_new_step_callback=register_new_step_callback,
	register_done_callback=register_done_callback,
	register_external_agent_status_raise_error_callback=register_external_agent_status_raise_error_callback,
	use_vision=use_vision,
	use_vision_for_planner=use_vision_for_planner,
	save_conversation_path=save_conversation_path,
	save_conversation_path_encoding=save_conversation_path_encoding,
	max_failures=max_failures,
	retry_delay=retry_delay,
	system_prompt_class=system_prompt_class,
	max_input_tokens=max_input_tokens,
	validate_output=validate_output,
	message_context=message_context,
	generate_gif=generate_gif,
	available_file_paths=available_file_paths,
	include_attributes=include_attributes,
	max_actions_per_step=max_actions_per_step,
	tool_calling_method=tool_calling_method,
	page_extraction_llm=page_extraction_llm,
	planner_llm=planner_llm,
	planner_interval=planner_interval,
	injected_agent_state=None,
	context=context,
	)
	# Initialize or restore CustomAgentState
	if injected_agent_state is not None and isinstance(injected_agent_state, CustomAgentStateType):
	self.state: CustomAgentStateType = injected_agent_state
	else:
	self.state: CustomAgentStateType = CustomAgentStateType()
	if injected_agent_state is not None: # Was provided but wrong type
	logger.warning("injected_agent_state was provided but is not of type CustomAgentState. Initializing default CustomAgentState.")

	self.add_infos = add_infos
	# self.replay_event_file is removed, handled by task_input in run()
	self.current_task_memory: str = "" # Initialize custom memory

	self._message_manager: CustomMessageManager = CustomMessageManager(
	task=self.task, # self.task is set by super().__init__
	system_message=self.settings.system_prompt_class(
	self.available_actions,
	max_actions_per_step=self.settings.max_actions_per_step,
	).get_system_message(),
	settings=CustomMessageManagerSettings(
	max_input_tokens=self.settings.max_input_tokens,
	include_attributes=self.settings.include_attributes,
	message_context=self.settings.message_context,
	sensitive_data=sensitive_data,
	available_file_paths=self.settings.available_file_paths,
	agent_prompt_class=agent_prompt_class
	),
	state=self.state.message_manager_state, # Use state from CustomAgentStateType
	)

	## TODO: Eval the response from LLM
	def _log_response(self, response: CustomAgentOutput) -> None:
	"""Log the model's response"""
	if "Success" in response.current_state.evaluation_previous_goal:
	emoji = "✅"
	elif "Failed" in response.current_state.evaluation_previous_goal:
	emoji = "❌"
	else:
	emoji = "🤷"

	logger.info(f"{emoji} Eval: {response.current_state.evaluation_previous_goal}")
	logger.info(f"🧠 New Memory: {response.current_state.important_contents}")
	logger.info(f"🤔 Thought: {response.current_state.thought}")
	logger.info(f"🎯 Next Goal: {response.current_state.next_goal}")
	for i, action in enumerate(response.action):
	logger.info(
	f"🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}"
	)

	def _setup_action_models(self) -> None:
	"""Setup dynamic action models from controller's registry"""
	# Get the dynamic action model from controller's registry
	self.ActionModel = self.controller.registry.create_action_model()
	# Create output model with the dynamic actions
	self.AgentOutput = CustomAgentOutput.type_with_custom_actions(self.ActionModel)

	def update_step_info(
	self, model_output: CustomAgentOutput, step_info: Optional[CustomAgentStepInfo] = None
	):
	"""
	update step info
	@dev : New Memory from LLM stores at important_contents.
	Usage of important_contents is
	- Track progress in repetitive tasks (e.g., "for each", "for all", "x times")
	- Store important information found during the task
	- Keep track of status and subresults for long tasks
	- Store extracted content from pages
	"""
	if step_info is None:
	return

	step_info.step_number += 1
	important_contents = model_output.current_state.important_contents
	if (
	important_contents
	and "None" not in important_contents
	and important_contents not in step_info.memory
	):
	step_info.memory += important_contents + "\n"

	logger.info(f"🧠 All Memory: \n{step_info.memory}")

	# hint: get next action from LLM by calling llm.invoke in utils/llm.py
	@time_execution_async("--get_next_action")
	async def get_next_action(self, input_messages: list[BaseMessage]) -> CustomAgentOutput:
	"""Get next action from LLM based on current state"""

	# The _convert_input_messages and cleaned_messages logic seems to have been
	# for a specific format possibly expected by a previous _get_model_output method.
	# We will now directly use self.llm.ainvoke with input_messages (List[BaseMessage]).
	# The logic for removing image_urls, if still needed, would have to be
	# applied to input_messages before this call, or handled by the LLM itself.

	if not self.llm:
	logger.error("LLM not initialized in CustomAgent.")
	# Return an error structure that _parse_model_output can handle
	# This assumes _parse_model_output can parse a JSON string error.
	# The actual error handling might need to be more robust based on _parse_model_output's capabilities.
	# Also, self.AgentOutput needs to be available here.
	if not hasattr(self, 'AgentOutput') or not self.AgentOutput:
	self._setup_action_models() # Ensure AgentOutput is set up

	# Construct a raw string that _parse_model_output can work with to produce an AgentOutput
	# This usually involves a JSON string that looks like what the LLM would output in an error case.
	# For now, an empty actions list and an error message in thought/state might be a way.
	# This is a placeholder for robust error generation.
	error_payload = {
	"current_state": {
	"evaluation_previous_goal": "Error",
	"important_contents": "LLM not initialized.",
	"thought": "Critical error: LLM not initialized.",
	"next_goal": "Cannot proceed."
	},
	"action": []
	}
	model_output_raw = json.dumps(error_payload)
	return self._parse_model_output(model_output_raw, self.ActionModel)

	try:
	llm_response = await self.llm.ainvoke(input_messages)

	# model_output_raw should be a string, typically the content from the LLM response.
	# The base class's _parse_model_output is expected to handle this string.
	if hasattr(llm_response, 'content') and llm_response.content is not None:
	model_output_raw = str(llm_response.content)
	elif isinstance(llm_response, AIMessage) and llm_response.tool_calls:
	# If content is None but there are tool_calls, the parser might expect
	# a specific string format (e.g., JSON of tool_calls) or to handle AIMessage directly.
	# Forcing it to string for now, assuming the parser can handle stringified tool_calls
	# or that the main information is in .content and tool_calls are metadata for the parser.
	# This part is sensitive to how the base Agent's parser works.
	# A common robust approach is for the LLM to put tool call JSON into the .content string.
	# If not, serializing tool_calls to JSON is a common fallback if the parser expects it.
	try:
	# Attempt to create a JSON string that might represent the tool calls
	# ToolCall objects in Langchain are typically TypedDicts and directly serializable.
	model_output_raw = json.dumps(llm_response.tool_calls)
	except Exception as serialization_error:
	logger.warning(f"Could not serialize tool_calls for AIMessage: {serialization_error}. Falling back to str(AIMessage).")
	model_output_raw = str(llm_response) # Fallback to full string representation
	else:
	model_output_raw = str(llm_response) # General fallback

	except Exception as e:
	logger.error(f"Error invoking LLM: {e}", exc_info=True)
	error_payload = {
	"current_state": {
	"evaluation_previous_goal": "Error",
	"important_contents": f"LLM invocation error: {str(e)}",
	"thought": f"LLM invocation error: {str(e)}",
	"next_goal": "Cannot proceed."
	},
	"action": []
	}
	model_output_raw = json.dumps(error_payload)

	# Parse the model output
	# Ensure self.ActionModel is available for the parser
	if not hasattr(self, 'ActionModel') or not self.ActionModel:
	self._setup_action_models() # Ensure ActionModel is set up for parsing

	parsed_output = self._parse_model_output(model_output_raw, self.ActionModel)
	return parsed_output

	async def _run_planner(self) -> Optional[str]:
	"""Run the planner to analyze state and suggest next steps"""
	# Skip planning if no planner_llm is set
	if not self.settings.planner_llm:
	return None

	# Create planner message history using full message history
	planner_messages = [
	PlannerPrompt(self.controller.registry.get_prompt_description()).get_system_message(),
	*self.message_manager.get_messages()[1:], # Use full message history except the first
	]

	if not self.settings.use_vision_for_planner and self.settings.use_vision:
	# Type hint for last_state_message was HumanMessage, ensure planner_messages[-1] is HumanMessage or check type
	last_planner_message = planner_messages[-1]
	new_msg_content: Union[str, List[Dict[str, Any]]] = '' # type for new content

	if isinstance(last_planner_message, HumanMessage):
	if isinstance(last_planner_message.content, list):
	processed_content_list = []
	for item in last_planner_message.content:
	if isinstance(item, dict):
	if item.get('type') == 'text':
	processed_content_list.append({'type': 'text', 'text': item.get('text', '')})
	# Keep other dict types if necessary, or filter image_url
	elif item.get('type') == 'image_url':
	continue # Skip image
	else:
	processed_content_list.append(item) # Keep other dicts
	elif isinstance(item, str):
	processed_content_list.append({'type': 'text', 'text': item}) # Convert str to dict
	new_msg_content = processed_content_list
	# Reconstruct new_msg from processed_content_list if needed as a single string
	temp_new_msg = ""
	for item_content in new_msg_content: # new_msg_content is List[Dict[str,Any]]
	if isinstance(item_content, dict) and item_content.get('type') == 'text':
	temp_new_msg += item_content.get('text','')
	new_msg = temp_new_msg

	elif isinstance(last_planner_message.content, str):
	new_msg = last_planner_message.content

	planner_messages[-1] = HumanMessage(content=new_msg if new_msg else last_planner_message.content)


	# Get planner output
	response = await self.settings.planner_llm.ainvoke(planner_messages)
	plan = str(response.content)
	# console log plan
	print(f"plan: {plan}")

	last_message_from_manager = self.message_manager.get_messages()[-1]
	if isinstance(last_message_from_manager, HumanMessage):
	# Target last_message_from_manager (which is a HumanMessage) for modification
	if isinstance(last_message_from_manager.content, list):
	# Create a new list for content to avoid modifying immutable parts if any
	new_content_list = []
	modified = False
	for item in last_message_from_manager.content:
	if isinstance(item, dict) and item.get('type') == 'text':
	current_text = item.get('text', '')
	# Create a new dict for the modified text item
	new_content_list.append({'type': 'text', 'text': current_text + f"\\nPlanning Agent outputs plans:\\n {plan}\\n"})
	modified = True
	else:
	new_content_list.append(item) # Keep other items as is
	if modified:
	last_message_from_manager.content = new_content_list
	else: # If no text item was found to append to, add a new one
	new_content_list.append({'type': 'text', 'text': f"\\nPlanning Agent outputs plans:\\n {plan}\\n"})
	last_message_from_manager.content = new_content_list

	elif isinstance(last_message_from_manager.content, str):
	last_message_from_manager.content += f"\\nPlanning Agent outputs plans:\\n {plan}\\n "
	# If no modification happened (e.g. content was not list or str, or list had no text part)
	# one might consider appending a new HumanMessage with the plan, but that changes history structure.

	try:
	plan_json = json.loads(plan.replace("```json", "").replace("```", ""))
	logger.info(f'📋 Plans:\\n{json.dumps(plan_json, indent=4)}')

	reasoning_content = getattr(response, "reasoning_content", None)
	if reasoning_content:
	logger.info("🤯 Start Planning Deep Thinking: ")
	logger.info(reasoning_content)
	logger.info("🤯 End Planning Deep Thinking")

	except json.JSONDecodeError:
	logger.info(f'📋 Plans:\n{plan}')
	except Exception as e:
	logger.debug(f'Error parsing planning analysis: {e}')
	logger.info(f'📋 Plans: {plan}')
	return plan

	def _summarize_browsing_history(self, max_steps: int = 5, max_chars: int = 1500) -> str:
	if not hasattr(self.state, 'history') or not self.state.history:
	return "No browsing history yet."

	summary_lines = []
	try:
	# Iterate backwards through history items
	for history_item in reversed(self.state.history.history):
	if len(summary_lines) >= max_steps:
	break

	page_title = getattr(history_item.state, "page_title", "Unknown Page") if history_item.state else "Unknown Page"
	url = getattr(history_item.state, "url", "Unknown URL") if history_item.state else "Unknown URL"

	actions_summary = []
	current_actions = history_item.model_output.action if history_item.model_output and hasattr(history_item.model_output, 'action') else []
	if current_actions:
	for act_model in current_actions: # act_model is ActionModel
	if hasattr(act_model, 'name'):
	action_str = f"{act_model.name}" # type: ignore[attr-defined]
	args_str = json.dumps(act_model.arguments) if hasattr(act_model, 'arguments') and act_model.arguments else "" # type: ignore[attr-defined]
	if args_str and args_str !="{}":
	action_str += f"({args_str})"
	actions_summary.append(action_str)

	action_desc = "; ".join(actions_summary) if actions_summary else "No action taken"
	step_num_str = f"Step {history_item.metadata.step_number}" if history_item.metadata and hasattr(history_item.metadata, 'step_number') else "Step Unknown"
	summary_line = f"- {step_num_str}: [{page_title}]({url}) - Action: {action_desc}\\\\n"

	if sum(len(s) for s in summary_lines) + len(summary_line) > max_chars and summary_lines:
	summary_lines.append("... (history truncated due to length)")
	break
	summary_lines.append(summary_line)
	except Exception as e:
	logger.error(f"Error summarizing browsing history: {e}")
	return "Error summarizing history."

	if not summary_lines:
	return "No actions recorded in recent history."
	return "Browsing History (Recent Steps):\\n" + "".join(reversed(summary_lines))

	@time_execution_async("--step")
	async def step(self, base_step_info: Optional[AgentStepInfo] = None) -> None:
	# The base_step_info comes from the superclass Agent's run loop.
	# We need to create a CustomAgentStepInfo for our custom prompts.

	# if not base_step_info: # This check might be too strict if super().run() doesn't always provide it.
	# logger.error("base_step_info not provided to CustomAgent.step by superclass run loop.")
	# # Decide how to handle this: error out, or create a default?
	# # For now, let's assume it's provided or self.state is the source of truth for step numbers.
	# # If super().run() manages step counts, base_step_info.step_number would be relevant.
	# # If CustomAgent manages its own (self.state.n_steps), use that.
	# # Let's use self.state for step counts as it seems to be incremented by CustomAgent.

	current_custom_step_info = CustomAgentStepInfo(
	step_number=self.state.n_steps, # Use self.state.n_steps
	max_steps=self.state.max_steps if self.state.max_steps is not None else 100, # Get from state or default
	task=self.task,
	add_infos=self.add_infos,
	memory=self.current_task_memory
	)
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Starting step.") # AGENT_HEALTH_LOG

	model_output = None # Initialize to ensure it's defined for finally
	state = None # Initialize
	result = None # Initialize
	tokens = 0 # Initialize
	step_start_time = time.time()

	try:
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Attempting to get browser state.") # NEW LOG
	state = await self.browser_context.get_state()
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Browser state retrieval complete. State is None: {state is None}") # NEW LOG
	if state:
	logger.debug(f"CustomAgent.step: self.browser_context.get_state() returned. URL: {state.url if state else 'N/A'}")
	# AGENT_HEALTH_LOG - Log raw observation
	if state:
	# Avoid logging full screenshots if they are part of the state
	# loggable_state_dict = state.model_dump() # This caused AttributeError
	# Manually construct a dictionary for logging from BrowserState attributes
	loggable_state_dict = {
	"url": getattr(state, 'url', 'N/A'),
	"html_content": getattr(state, 'html_content', '')[:200] + "... (truncated)" if getattr(state, 'html_content', '') else 'N/A',
	"interactive_elements": f"{len(getattr(state, 'interactive_elements', []))} elements",
	"page_title": getattr(state, 'page_title', 'N/A'),
	"focused_element_index": getattr(state, 'focused_element_index', None),
	# Add other relevant attributes you want to log from BrowserState
	}
	screenshot_data = getattr(state, 'screenshot', None)
	if screenshot_data:
	loggable_state_dict['screenshot'] = f"Screenshot data present (length: {len(screenshot_data)})"
	else:
	loggable_state_dict['screenshot'] = "No screenshot data"

	logger.debug(f"CustomAgent - Step {current_custom_step_info.step_number}: Raw observation received: {json.dumps(loggable_state_dict, indent=2)}")
	else:
	logger.debug(f"CustomAgent - Step {current_custom_step_info.step_number}: No observation (state is None).")

	await self._raise_if_stopped_or_paused()

	history_summary_str = self._summarize_browsing_history(max_steps=5, max_chars=1500)

	self.message_manager.add_state_message(
	state=state,
	actions=self.state.last_action, # type: ignore[call-arg]
	result=self.state.last_result,
	step_info=current_custom_step_info, # Use the created CustomAgentStepInfo
	use_vision=self.settings.use_vision,
	history_summary=history_summary_str # type: ignore[call-arg]
	)

	if self.settings.planner_llm and self.state.n_steps % self.settings.planner_interval == 0:
	await self._run_planner()

	input_messages = self.message_manager.get_messages()
	tokens = self._message_manager.state.history.current_tokens
	# AGENT_HEALTH_LOG - Before LLM call
	logger.debug(f"CustomAgent - Step {current_custom_step_info.step_number}: Preparing to call LLM. Number of input messages: {len(input_messages)}. Tokens: {tokens}")
	# For very detailed debugging, you might log the messages themselves, but be mindful of size/sensitivity
	# for msg_idx, msg_content in enumerate(input_messages):
	# logger.trace(f" Input Message {msg_idx}: Role: {msg_content.type}, Content: {msg_content.content[:200]}")


	try:
	model_output = await self.get_next_action(input_messages)
	# AGENT_HEALTH_LOG - After LLM call
	if model_output:
	logger.debug(f"CustomAgent - Step {current_custom_step_info.step_number}: LLM response received: {model_output.model_dump_json(indent=2)}")
	else:
	logger.warning(f"CustomAgent - Step {current_custom_step_info.step_number}: LLM call did not return a valid model_output.")

	self._log_response(model_output)

	# self.state.n_steps is incremented here, AFTER CustomAgentStepInfo was created with the current step number
	# This is fine, as the prompt needs the current step, and n_steps tracks completed/next step.

	if self.register_new_step_callback:
	await self.register_new_step_callback(state, model_output, self.state.n_steps +1) # n_steps will be for the next step

	if self.settings.save_conversation_path:
	target = self.settings.save_conversation_path + f'_{self.state.n_steps +1}.txt'
	save_conversation(input_messages, model_output, target,
	self.settings.save_conversation_path_encoding)

	if self.model_name != "deepseek-reasoner":
	self.message_manager._remove_state_message_by_index(-1) # type: ignore[attr-defined]
	await self._raise_if_stopped_or_paused()
	except Exception as e:
	self.message_manager._remove_state_message_by_index(-1) # type: ignore[attr-defined]
	raise e

	# AGENT_HEALTH_LOG - Before action execution
	if model_output and model_output.action:
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Attempting actions: {model_output.action}")
	else:
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: No actions to perform based on LLM output.")

	# AGENT_HEALTH_LOG - Wrap action execution
	try:
	result = await self.multi_act(model_output.action) # type: ignore
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Actions executed. Result: {result}")
	except Exception as e_action:
	logger.error(f"CustomAgent - Step {current_custom_step_info.step_number}: Error during action execution (multi_act): {e_action}", exc_info=True)
	# Decide how to set result or if error handling in _handle_step_error is sufficient
	# For now, this log will capture it, and the main exception handler will take over.
	raise # Re-raise to be caught by the outer try-except

	# Update step_info's memory (which is current_custom_step_info) with model output
	self.update_step_info(model_output, current_custom_step_info) # type: ignore
	# Persist the updated memory for the next step
	self.current_task_memory = current_custom_step_info.memory

	# Increment n_steps after all actions for the current step are done and memory is updated.
	self.state.n_steps += 1


	for ret_ in result:
	if ret_.extracted_content and "Extracted page" in ret_.extracted_content:
	if ret_.extracted_content[:100] not in self.state.extracted_content:
	self.state.extracted_content += ret_.extracted_content
	self.state.last_result = result
	self.state.last_action = model_output.action
	if len(result) > 0 and result[-1].is_done:
	if not self.state.extracted_content:
	# If step_info's memory was used for CustomAgentStepInfo it might be outdated here.
	# Use current_task_memory which should be the most up-to-date.
	self.state.extracted_content = self.current_task_memory
	result[-1].extracted_content = self.state.extracted_content
	logger.info(f"📄 Result: {result[-1].extracted_content}")
	self.state.consecutive_failures = 0

	except InterruptedError:
	logger.debug('Agent paused')
	self.state.last_result = [
	ActionResult(
	error='The agent was paused - now continuing actions might need to be repeated',
	include_in_memory=True
	)
	]
	# AGENT_HEALTH_LOG - End of step (paused)
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Paused.")
	return
	except Exception as e:
	result = await self._handle_step_error(e)
	self.state.last_result = result
	# AGENT_HEALTH_LOG - End of step (exception)
	logger.error(f"CustomAgent - Step {current_custom_step_info.step_number}: Ended with exception: {e}", exc_info=True)
	finally:
	logger.debug("Entering CustomAgent.step finally block.") # DEBUG
	step_end_time = time.time()
	actions_telemetry = [a.model_dump(exclude_unset=True) for a in model_output.action] if model_output and hasattr(model_output, 'action') and model_output.action else []

	logger.debug("Attempting to capture telemetry.") # DEBUG
	self.telemetry.capture(
	AgentStepTelemetryEvent(
	agent_id=self.state.agent_id,
	step=self.state.n_steps, # Note: n_steps was already incremented
	actions=actions_telemetry,
	consecutive_failures=self.state.consecutive_failures,
	step_error=[r.error for r in result if r.error] if result else ['No result after step execution'], # Modified for clarity
	)
	)
	logger.debug("Telemetry captured.") # DEBUG

	if not result:
	logger.debug("No result from multi_act, returning from step.") # DEBUG
	return

	if state and model_output:
	logger.debug(f"Calling _make_history_item with model_output: {type(model_output)}, state: {type(state)}, result: {type(result)}") # DEBUG
	metadata = StepMetadata(
	step_number=self.state.n_steps, # n_steps was already incremented
	step_start_time=step_start_time,
	step_end_time=step_end_time,
	input_tokens=tokens,
	)
	self._make_history_item(model_output, state, result, metadata)
	logger.debug("_make_history_item finished.") # DEBUG
	else:
	logger.debug("Skipping _make_history_item due to no state or model_output.") # DEBUG

	# Log final state before returning from step
	logger.debug(f"CustomAgent.step state before return: n_steps={self.state.n_steps}, stopped={self.state.stopped}, paused={self.state.paused}, consecutive_failures={self.state.consecutive_failures}, last_result_count={len(self.state.last_result) if self.state.last_result else 0}")
	if self.state.last_result:
	for i, res_item in enumerate(self.state.last_result):
	logger.debug(f" last_result[{i}]: error='{res_item.error}', is_done={res_item.is_done}")

	logger.debug("Exiting CustomAgent.step finally block.") # DEBUG
	# AGENT_HEALTH_LOG - End of step (finally block)
	logger.info(f"CustomAgent - Step {current_custom_step_info.step_number}: Finished step processing (finally block).")

	# New: modified to accept ReplayTaskDetails at replay mode
	async def run(self, task_input: Union[str, ReplayTaskDetails], max_steps: int = 100) -> Optional[AgentHistoryList]:
	"""
	Run the agent to complete the task.
	If task_input is ReplayTaskDetails, it runs in replay mode.
	Otherwise, it runs in autonomous mode.
	"""
	self.state.start_time = time.time()
	self.state.task_input = task_input
	self.state.max_steps = max_steps

	if isinstance(task_input, ReplayTaskDetails) and task_input.mode == "replay":
	logger.info(f"🚀 Starting agent in REPLAY mode for trace: {task_input.trace_path}")
	if not self.browser_context:
	logger.error("Replay mode: Browser context is not available.")
	return None

	# Ensure there is a page to replay on
	if not self.page or self.page.is_closed():
	logger.info("Replay mode: self.page is not valid. Attempting to get/create a page.")
	playwright_context = getattr(self.browser_context, "playwright_context", None)
	if playwright_context and playwright_context.pages:
	self.page = playwright_context.pages[0]
	await self.page.bring_to_front()
	logger.info(f"Replay mode: Using existing page: {self.page.url}")
	elif playwright_context:
	self.page = await playwright_context.new_page()
	logger.info(f"Replay mode: Created new page: {self.page.url}")
	else:
	logger.error("Replay mode: playwright_context is None, cannot create or get a page.")
	return None

	try:
	trace_events = load_trace(task_input.trace_path)
	if not trace_events:
	logger.warning(f"Replay mode: No events found in trace file: {task_input.trace_path}")
	return None

	replayer = TraceReplayer(self.page, trace_events)
	logger.info(f"Replayer initialized. Starting playback at speed: {task_input.speed}x")
	await replayer.play(speed=task_input.speed)
	logger.info(f"🏁 Replay finished for trace: {task_input.trace_path}")
	except Drift as d:
	drift_message = getattr(d, "message", str(d))
	logger.error(f"💣 DRIFT DETECTED during replay of {task_input.trace_path}: {drift_message}")
	if d.event:
	logger.error(f" Drift occurred at event: {json.dumps(d.event)}")
	except FileNotFoundError:
	logger.error(f"Replay mode: Trace file not found at {task_input.trace_path}")
	except Exception as e:
	logger.exception(f"Replay mode: An unexpected error occurred during replay of {task_input.trace_path}")
	finally:
	# Decide if browser/context should be closed after replay based on agent settings (e.g., keep_browser_open)
	# For now, let's assume it follows the general agent cleanup logic if applicable, or stays open.
	pass
	return None # Replay mode doesn't return standard agent history

	# Autonomous mode logic continues below
	elif isinstance(task_input, str):
	if task_input != self.task:
	logger.info(f"Autonomous run: Task updated from '{self.task}' to '{task_input}'")
	self.task = task_input
	# self._message_manager.task = self.task # add_new_task will set this

	# Clear existing messages from the history
	if hasattr(self._message_manager.state, 'history') and hasattr(self._message_manager.state.history, 'messages') and isinstance(self._message_manager.state.history.messages, list):
	logger.debug("Clearing message history list as task has changed.")
	self._message_manager.state.history.messages.clear()
	# Also reset token count if possible/necessary, assuming it's managed alongside messages
	if hasattr(self._message_manager.state.history, 'current_tokens'):
	self._message_manager.state.history.current_tokens = 0
	else:
	logger.warning("Could not clear message history messages list for new task.")

	# Inform the message manager about the new task
	if hasattr(self._message_manager, "add_new_task") and callable(self._message_manager.add_new_task):
	logger.debug(f"Calling message_manager.add_new_task() with new task: {self.task[:70]}...")
	self._message_manager.add_new_task(self.task)
	# add_infos is not directly used by add_new_task, but could be part of the task string construction if needed earlier.
	# For now, we assume self.task (already updated from task_input) contains all necessary info.
	else:
	logger.warning(f"CustomMessageManager does not have a callable 'add_new_task' method. New task may not be properly set in message manager.")

	logger.info(f"Starting autonomous agent run for task: '{self.task}', max_steps: {max_steps}")
	logger.debug(f"CustomAgent: About to call super().run('{self.task}', {max_steps}, {self.controller})")
	history: Optional[AgentHistoryList] = await super().run(self.task, max_steps=max_steps, controller=self.controller)
	logger.debug(f"CustomAgent: super().run() returned. History is None: {history is None}")
	if history and hasattr(history, 'history'):
	logger.debug(f"CustomAgent: History length: {len(history.history) if history.history else 0}") # DEBUG
	# AGENT_HEALTH_LOG - After super().run()
	logger.info(f"CustomAgent - Autonomous run finished. Result from super().run(): {'History object received' if history else 'No history object (None)'}")


	# After autonomous run, Recorder history persistence is handled by the UI's explicit stop recording.
	# The agent itself, when run with a string task, should not be responsible for this.
	# Removing the block that attempted to save Recorder traces here.

	return history

	def _convert_input_messages(self, messages: List[BaseMessage]) -> List[Dict[str, Any]]:
	converted_messages = []
	for msg in messages:
	msg_item = {}
	if isinstance(msg, HumanMessage):
	msg_item["role"] = "user"
	msg_item["content"] = msg.content
	elif isinstance(msg, AIMessage):
	msg_item["role"] = "assistant"
	# Handle tool calls if present
	if msg.tool_calls:
	msg_item["content"] = None # Standard AIMessage content is None if tool_calls are present
	msg_item["tool_calls"] = msg.tool_calls
	else:
	msg_item["content"] = msg.content
	elif hasattr(msg, 'role') and hasattr(msg, 'content'): # For generic BaseMessage with role and content
	msg_item["role"] = getattr(msg, "role", "unknown")
	msg_item["content"] = getattr(msg, "content", "")
	else:
	# Fallback or skip if message type is not directly convertible
	logger.warning(f"Skipping message of unhandled type: {type(msg)}")
	continue

	# Add reasoning_content for tool_code type messages if available
	if msg_item.get("type") == "tool_code" and isinstance(msg, AIMessage) and hasattr(msg, 'reasoning_content'):
	reasoning_content = getattr(msg, "reasoning_content", None)
	if reasoning_content:
	msg_item["reasoning_content"] = reasoning_content
	converted_messages.append(msg_item)
	return converted_messages

	def _parse_model_output(self, output: str, ActionModel: Type[BaseModel]) -> CustomAgentOutput:
	try:
	if not hasattr(self, 'AgentOutput') or not self.AgentOutput:
	self._setup_action_models() # Sets self.AgentOutput

	extracted_output: Union[str, Dict[Any, Any]] = extract_json_from_model_output(output)
	parsed_data: CustomAgentOutput

	if isinstance(extracted_output, dict):
	# If it's already a dict, assume it's valid JSON and Pydantic can handle it
	parsed_data = self.AgentOutput.model_validate(extracted_output)
	elif isinstance(extracted_output, str):
	# If it's a string, try to repair it then parse
	repaired_json_string = repair_json(extracted_output, return_objects=False)
	if not isinstance(repaired_json_string, str):
	logger.error(f"repair_json with return_objects=False did not return a string. Got: {type(repaired_json_string)}. Falling back to original extracted string.")
	# Fallback or raise error. Forcing to string for now.
	repaired_json_string = str(extracted_output) # Fallback to the original extracted string if repair fails badly
	parsed_data = self.AgentOutput.model_validate_json(repaired_json_string)
	else:
	raise ValueError(f"Unexpected output type from extract_json_from_model_output: {type(extracted_output)}")

	# Ensure the final parsed_data is indeed CustomAgentOutput
	if not isinstance(parsed_data, CustomAgentOutput):
	logger.warning(f"Parsed data is type {type(parsed_data)}, not CustomAgentOutput. Attempting conversion or default.")
	# This might happen if self.AgentOutput.model_validate/model_validate_json doesn't return the precise
	# CustomAgentOutput type but a compatible one (e.g. base AgentOutput).
	# We need to ensure it has the CustomAgentBrain structure.
	action_list = getattr(parsed_data, 'action', [])
	current_state_data = getattr(parsed_data, 'current_state', None)

	if isinstance(current_state_data, CustomAgentBrain):
	parsed_data = self.AgentOutput(action=action_list, current_state=current_state_data)
	elif isinstance(current_state_data, dict):
	try:
	brain = CustomAgentBrain(**current_state_data)
	parsed_data = self.AgentOutput(action=action_list, current_state=brain)
	except Exception as brain_ex:
	logger.error(f"Could not construct CustomAgentBrain from dict: {brain_ex}. Falling back to error brain.")
	error_brain = CustomAgentBrain(
	evaluation_previous_goal="Error",
	important_contents="Failed to reconstruct agent brain during parsing.",
	thought="Critical error in parsing agent state.",
	next_goal="Retry or report error."
	)
	parsed_data = self.AgentOutput(action=action_list, current_state=error_brain)
	else:
	logger.error("current_state is missing or not CustomAgentBrain/dict. Falling back to error brain.")
	error_brain = CustomAgentBrain(
	evaluation_previous_goal="Error",
	important_contents="Missing or invalid agent brain during parsing.",
	thought="Critical error in parsing agent state.",
	next_goal="Retry or report error."
	)
	# Ensure action_list is compatible if it came from a different model type
	# For simplicity, if we have to create an error brain, we might also want to clear actions
	# or ensure they are valid ActionModel instances. For now, passing them as is.
	parsed_data = self.AgentOutput(action=action_list, current_state=error_brain)
	return parsed_data

	except Exception as e:
	logger.error(f"Error parsing model output: {e}\\nRaw output:\\n{output}", exc_info=True)
	if not hasattr(self, 'AgentOutput') or not self.AgentOutput:
	self._setup_action_models() # Ensure self.AgentOutput is set up for fallback

	error_brain = CustomAgentBrain(
	evaluation_previous_goal="Error",
	important_contents=f"Parsing error: {str(e)}",
	thought=f"Failed to parse LLM output. Error: {str(e)}",
	next_goal="Retry or report error."
	)
	return self.AgentOutput(action=[], current_state=error_brain)

	# pass # Original empty implementation