Spaces:
Sleeping
Sleeping
File size: 18,583 Bytes
94ff58a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
import asyncio
import pdb
import os
from pathlib import Path
from typing import Optional, Union
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
BrowserContext as PlaywrightBrowserContext,
)
from playwright.async_api import (
Playwright,
async_playwright,
)
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
import logging
from src.browser.custom_context import CustomBrowserContext
from src.browser.custom_context_config import CustomBrowserContextConfig as AppCustomBrowserContextConfig
logger = logging.getLogger(__name__)
class CustomBrowser(Browser):
# Internal attribute to store the actual Playwright Browser or BrowserContext instance
_actual_playwright_browser: Optional[Union[PlaywrightBrowser, PlaywrightBrowserContext]] = None
_playwright_browser_context_manager: Optional[PlaywrightBrowserContext] = None # For persistent context
@property
def resolved_playwright_browser(self) -> Optional[PlaywrightBrowser]:
"""Returns the underlying Playwright Browser instance if available and is a Browser, not a Context."""
if hasattr(self, '_actual_playwright_browser') and isinstance(self._actual_playwright_browser, PlaywrightBrowser):
return self._actual_playwright_browser
return None
async def async_init(self):
playwright = await async_playwright().start()
self.playwright = playwright
self._actual_playwright_browser = None # Initialize our internal attribute
if self.config.cdp_url:
logger.debug(f"Attempting to connect to existing browser via CDP: {self.config.cdp_url}")
cdp_connection_result = None
try:
cdp_connection_result = await playwright.chromium.connect_over_cdp(
self.config.cdp_url
)
if cdp_connection_result:
self._actual_playwright_browser = cdp_connection_result
logger.info(
f"Successfully connected to browser over CDP: {self._actual_playwright_browser}"
)
if not self._actual_playwright_browser.contexts:
logger.warning(
"Connected to browser over CDP, but no contexts found. A page/tab might need to be open."
)
else:
logger.warning(
f"Playwright's connect_over_cdp returned None or a falsy value ({cdp_connection_result}) without raising an exception. Treating as connection failure."
)
self._actual_playwright_browser = None
except BaseException as be:
logger.warning(
f"Failed to connect to browser over CDP ({self.config.cdp_url}). Will launch a new browser instance instead. Error type: {type(be)}, Error: {be}", exc_info=True
)
self._actual_playwright_browser = None
if self._actual_playwright_browser is None and self.config.chrome_instance_path and "Google Chrome" in self.config.chrome_instance_path:
user_data_dir = None
if hasattr(self.config, 'extra_chromium_args') and self.config.extra_chromium_args:
for arg in self.config.extra_chromium_args:
if arg.startswith('--user-data-dir='):
user_data_dir = arg.split('=')[1]
break
if user_data_dir:
launch_args = [
arg for arg in getattr(self.config, 'extra_chromium_args', [])
if not arg.startswith('--user-data-dir=')
]
logger.debug(f"Launching persistent Chrome context with UserDataDir: {user_data_dir} and args: {launch_args}")
try:
# When launching persistent context, playwright returns a BrowserContext, not a Browser.
# The context manager itself becomes the primary object to interact with.
self._playwright_browser_context_manager = await playwright.chromium.launch_persistent_context(
user_data_dir=user_data_dir,
headless=self.config.headless,
args=launch_args,
channel="chrome"
)
self._actual_playwright_browser = self._playwright_browser_context_manager # Store the context here
logger.info(f"Launched persistent Chrome. Stored BrowserContext: {self._actual_playwright_browser}")
except Exception as e_persistent_launch:
logger.error(f"Failed to launch persistent Chrome context with UserDataDir '{user_data_dir}': {e_persistent_launch}", exc_info=True)
self._actual_playwright_browser = None
if hasattr(self, '_playwright_browser_context_manager'):
self._playwright_browser_context_manager = None
else:
logger.debug(f"Attempting to launch new Chrome browser instance via executable_path: {self.config.chrome_instance_path} with args: {self.config.extra_chromium_args}")
try:
self._actual_playwright_browser = await playwright.chromium.launch(
executable_path=self.config.chrome_instance_path,
headless=self.config.headless,
args=self.config.extra_chromium_args
)
if self._actual_playwright_browser:
logger.info(f"Launched Chrome via executable_path. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}")
else:
logger.warning(f"Launching Chrome via executable_path '{self.config.chrome_instance_path}' returned a None browser object.")
self._actual_playwright_browser = None
except Exception as e_chrome_launch:
logger.error(f"Failed to launch Chrome via executable_path '{self.config.chrome_instance_path}': {e_chrome_launch}", exc_info=True)
self._actual_playwright_browser = None
if self._actual_playwright_browser is None:
logger.debug(f"Launching new default (Chromium) browser instance as fallback, with args: {self.config.extra_chromium_args}")
try:
self._actual_playwright_browser = await playwright.chromium.launch(
headless=self.config.headless,
args=self.config.extra_chromium_args
)
if self._actual_playwright_browser:
logger.info(f"Launched default Chromium as fallback. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}")
else:
logger.warning("Launching default Chromium as fallback returned a None browser object.")
self._actual_playwright_browser = None
except Exception as e_default_launch:
logger.error(f"Failed to launch default Chromium as fallback: {e_default_launch}", exc_info=True)
self._actual_playwright_browser = None
if self._actual_playwright_browser:
if isinstance(self._actual_playwright_browser, PlaywrightBrowser):
if self._actual_playwright_browser.is_connected():
logger.info(f"Playwright Browser successfully initialized and connected: {self._actual_playwright_browser}")
else:
logger.error(f"Playwright Browser initialized but not connected. Browser: {self._actual_playwright_browser}")
self._actual_playwright_browser = None
elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
try:
if self._actual_playwright_browser.pages is not None:
logger.info(f"Playwright BrowserContext successfully initialized (from persistent launch): {self._actual_playwright_browser}")
else:
logger.error(f"Playwright BrowserContext initialized, but .pages is None. Context: {self._actual_playwright_browser}")
self._actual_playwright_browser = None
except Exception as e_context_check:
logger.error(f"Playwright BrowserContext is invalid or closed: {e_context_check}. Context: {self._actual_playwright_browser}", exc_info=True)
self._actual_playwright_browser = None
else:
logger.error(f"self._actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Value: {self._actual_playwright_browser}")
self._actual_playwright_browser = None
else:
logger.error(f"All browser initialization attempts failed. Final state of self._actual_playwright_browser is None.")
async def reuse_existing_context(self,
config: Optional[AppCustomBrowserContextConfig] = None # Add optional config param
) -> Optional[CustomBrowserContext]: # Return Optional CustomBrowserContext
from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext
# Ensure CustomBrowserContext is imported for return type hinting and usage
from src.browser.custom_context import CustomBrowserContext
if not self._actual_playwright_browser:
logger.warning("reuse_existing_context called on uninitialized browser. Attempting init.")
await self.async_init()
if not self._actual_playwright_browser:
logger.error("Browser not initialized after attempt in reuse_existing_context. Cannot reuse context.")
return None # Explicitly return None on failure
base_ctx_to_wrap = None
if isinstance(self._actual_playwright_browser, PlaywrightBrowser):
pw_browser_instance = self._actual_playwright_browser
logger.debug(f"Connected PlaywrightBrowser has {len(pw_browser_instance.contexts)} contexts for potential reuse.")
found_context_with_pages = False
for i, ctx in enumerate(pw_browser_instance.contexts):
logger.debug(f" Context [{i}]: {ctx} has {len(ctx.pages)} pages.")
for j, page in enumerate(ctx.pages):
logger.debug(f" Page [{j}] URL: {page.url}")
if not found_context_with_pages and len(ctx.pages) > 0:
base_ctx_to_wrap = ctx
found_context_with_pages = True
logger.debug(f"Selecting Context [{i}] as it has pages.")
if not base_ctx_to_wrap:
if pw_browser_instance.contexts:
logger.warning("No context with pages found. Defaulting to the first context.")
base_ctx_to_wrap = pw_browser_instance.contexts[0]
else:
logger.error("No contexts found in the connected PlaywrightBrowser after attempting to connect.")
raise RuntimeError("No contexts found in existing browser to reuse after connection.")
elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
base_ctx_to_wrap = self._actual_playwright_browser
logger.debug(f"Reusing existing PlaywrightBrowserContext directly with {len(base_ctx_to_wrap.pages)} pages. Context: {base_ctx_to_wrap}")
else:
logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot determine context to reuse.")
return None # Return None on type error
# Determine the config to use for the CustomBrowserContext wrapper
config_to_use = config if config is not None else AppCustomBrowserContextConfig() # Use provided or default
logger.debug(f"Wrapping Playwright context {base_ctx_to_wrap} with CustomBrowserContext using config: {config_to_use}")
return CustomBrowserContext.from_existing(
pw_context=base_ctx_to_wrap,
browser=self,
config=config_to_use
)
async def new_context(
self,
config: AppCustomBrowserContextConfig = AppCustomBrowserContextConfig()
) -> "CustomBrowserContext":
if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser:
logger.error("Playwright browser/context holder not initialized. Call async_init() first.")
await self.async_init()
if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser:
raise RuntimeError("Failed to initialize Playwright browser/context holder in new_context.")
if isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
logger.warning("Creating new context from an existing persistent PlaywrightBrowserContext. This might indicate an architectural issue if multiple isolated contexts are expected from a persistent launch.")
playwright_context_to_wrap = self._actual_playwright_browser
logger.debug(f"Reusing persistent Playwright context: {playwright_context_to_wrap}")
elif isinstance(self._actual_playwright_browser, PlaywrightBrowser):
pw_browser_instance = self._actual_playwright_browser # For clarity
options = {}
if config.trace_path:
pass # Tracing is started on the context later
if config.save_recording_path:
options["record_video_dir"] = config.save_recording_path
options["record_video_size"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]}
if not config.no_viewport and config.browser_window_size:
options["viewport"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]}
else:
options["no_viewport"] = True
logger.debug(f"Creating new Playwright context with options: {options} from PlaywrightBrowser: {pw_browser_instance}")
playwright_context_to_wrap = await pw_browser_instance.new_context(**options)
else:
logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot create new context.")
raise TypeError(f"_actual_playwright_browser is neither PlaywrightBrowser nor PlaywrightBrowserContext.")
from src.browser.custom_context import CustomBrowserContext as CBC_in_CustomBrowser
print(f"DEBUG_INIT: ID of CustomBrowserContext class in custom_browser.py: {id(CBC_in_CustomBrowser)}")
custom_context = CBC_in_CustomBrowser(
pw_context=playwright_context_to_wrap,
browser=self,
config=config
)
print(f"DEBUG_INIT: Type of CREATED context in custom_browser.py: {type(custom_context)}, ID of its type: {id(type(custom_context))}")
if config.trace_path and playwright_context_to_wrap:
try:
await playwright_context_to_wrap.tracing.start(screenshots=True, snapshots=True, sources=True)
logger.debug(f"Context tracing started. Saving to host path: {config.trace_path}")
except Exception as e:
logger.error(f"Failed to start tracing: {e}")
return custom_context
async def close(self):
# Close the persistent context manager if it exists and is distinct
if hasattr(self, '_playwright_browser_context_manager') and self._playwright_browser_context_manager is not None:
logger.info("Closing persistent Playwright context manager (which is a BrowserContext).")
context_manager_to_close = self._playwright_browser_context_manager
assert context_manager_to_close is not None
await context_manager_to_close.close()
self._playwright_browser_context_manager = None
# Close the main browser/context object stored in _actual_playwright_browser
if hasattr(self, '_actual_playwright_browser') and self._actual_playwright_browser is not None:
browser_or_context_to_close = self._actual_playwright_browser
assert browser_or_context_to_close is not None
# If _actual_playwright_browser was the same as _playwright_browser_context_manager and already closed, skip
if browser_or_context_to_close == self._playwright_browser_context_manager and self._playwright_browser_context_manager is None:
logger.info("Actual browser/context object was the persistent context manager and is already closed.")
elif isinstance(browser_or_context_to_close, PlaywrightBrowserContext):
logger.info("Closing PlaywrightBrowserContext stored in _actual_playwright_browser.")
await browser_or_context_to_close.close()
elif isinstance(browser_or_context_to_close, PlaywrightBrowser):
if browser_or_context_to_close.is_connected():
logger.info("Closing PlaywrightBrowser stored in _actual_playwright_browser.")
await browser_or_context_to_close.close()
else:
logger.info("PlaywrightBrowser in _actual_playwright_browser is not connected or already closed.")
else:
logger.info(f"_actual_playwright_browser ({type(browser_or_context_to_close)}) is not a PlaywrightBrowser or PlaywrightBrowserContext that can be closed here, or is already closed.")
self._actual_playwright_browser = None # Clear the internal attribute
if hasattr(self, 'playwright') and self.playwright is not None:
logger.info("Stopping Playwright.")
await self.playwright.stop()
self.playwright = None
logger.info("CustomBrowser closed.") |