File size: 5,103 Bytes
3e11f9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
Browser MCP Server

This module provides MCP server functionality for browser automation and interaction.
It handles tasks such as web scraping, form submission, and automated browsing.

Main functions:
- browse_url: Opens a URL and performs specified actions
- submit_form: Fills and submits forms on web pages
"""

import json
import os
import sys
import traceback

from browser_use import Agent
from browser_use.agent.views import AgentHistoryList
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from mcp.server.fastmcp import FastMCP
from pydantic import Field

from aworld.logs.util import logger

mcp = FastMCP("browser-server")
browser_system_prompt = """
===== NAVIGATION STRATEGY =====
1. START: Navigate to the most authoritative source for this information
   - For general queries: Use Google with specific search terms
   - For known sources: Go directly to the relevant website

2. EVALUATE: Assess each page methodically
   - Scan headings and highlighted text first
   - Look for data tables, charts, or official statistics
   - Check publication dates for timeliness

3. EXTRACT: Capture exactly what's needed
   - Take screenshots of visual evidence (charts, tables, etc.)
   - Copy precise text that answers the query
   - Note source URLs for citation

4. DOWNLOAD: Save the most relevant file to local path for further processing
   - Save the text if possible for futher text reading and analysis
   - Save the image if possible for futher image reasoning analysis
   - Save the pdf if possible for futher pdf reading and analysis

5. ROBOT DETECTION:
   - If the page is a robot detection page, abort immediately
   - Navigate to the most authoritative source for similar information instead

===== EFFICIENCY GUIDELINES =====
- Use specific search queries with key terms from the task
- Avoid getting distracted by tangential information
- If blocked by paywalls, try archive.org or similar alternatives
- Document each significant finding clearly and concisely

Your goal is to extract precisely the information needed with minimal browsing steps.
"""


@mcp.tool(description="Perform browser actions using the browser-use package.")
async def browser_use(
    task: str = Field(description="The task to perform using the browser."),
) -> str:
    """
    Perform browser actions using the browser-use package.
    Args:
        task (str): The task to perform using the browser.
    Returns:
        str: The result of the browser actions.
    """
    browser = Browser(
        config=BrowserConfig(
            headless=False,
            new_context_config=BrowserContextConfig(
                disable_security=True,
                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
                minimum_wait_page_load_time=10,
                maximum_wait_page_load_time=30,
            ),
        )
    )
    browser_context = BrowserContext(
        config=BrowserContextConfig(
            trace_path=os.getenv("LOG_FILE_PATH" + "/browser_trace.log")
        ),
        browser=browser,
    )
    agent = Agent(
        task=task,
        llm=ChatOpenAI(
            model=os.getenv("LLM_MODEL_NAME"),
            api_key=os.getenv("LLM_API_KEY"),
            base_url=os.getenv("LLM_BASE_URL"),
            model_name=os.getenv("LLM_MODEL_NAME"),
            openai_api_base=os.getenv("LLM_BASE_URL"),
            openai_api_key=os.getenv("LLM_API_KEY"),
            temperature=1.0,
        ),
        browser_context=browser_context,
        extend_system_message=browser_system_prompt,
    )
    try:
        browser_execution: AgentHistoryList = await agent.run(max_steps=50)
        if (
            browser_execution is not None
            and browser_execution.is_done()
            and browser_execution.is_successful()
        ):
            exec_trace = browser_execution.extracted_content()
            logger.info(
                ">>> 🌏 Browse Execution Succeed!\n"
                f">>> 💡 Result: {json.dumps(exec_trace, ensure_ascii=False, indent=4)}\n"
                ">>> 🌏 Browse Execution Succeed!\n"
            )
            return browser_execution.final_result()
        else:
            return f"Browser execution failed for task: {task}"
    except Exception as e:
        logger.error(f"Browser execution failed: {traceback.format_exc()}")
        return f"Browser execution failed for task: {task} due to {str(e)}"
    finally:
        await browser.close()
        logger.info("Browser Closed!")


def main():
    load_dotenv()
    print("Starting Browser MCP Server...", file=sys.stderr)
    mcp.run(transport="stdio")


# Make the module callable
def __call__():
    """
    Make the module callable for uvx.
    This function is called when the module is executed directly.
    """
    main()


sys.modules[__name__].__call__ = __call__

# Run the server when the script is executed directly
if __name__ == "__main__":
    main()