Spaces:
Sleeping
Sleeping
import asyncio | |
import pdb | |
import os | |
from pathlib import Path | |
from typing import Optional, Union | |
from playwright.async_api import Browser as PlaywrightBrowser | |
from playwright.async_api import ( | |
BrowserContext as PlaywrightBrowserContext, | |
) | |
from playwright.async_api import ( | |
Playwright, | |
async_playwright, | |
) | |
from browser_use.browser.browser import Browser, BrowserConfig | |
from browser_use.browser.context import BrowserContextConfig | |
from playwright.async_api import BrowserContext as PlaywrightBrowserContext | |
import logging | |
from src.browser.custom_context import CustomBrowserContext | |
from src.browser.custom_context_config import CustomBrowserContextConfig as AppCustomBrowserContextConfig | |
logger = logging.getLogger(__name__) | |
class CustomBrowser(Browser): | |
# Internal attribute to store the actual Playwright Browser or BrowserContext instance | |
_actual_playwright_browser: Optional[Union[PlaywrightBrowser, PlaywrightBrowserContext]] = None | |
_playwright_browser_context_manager: Optional[PlaywrightBrowserContext] = None # For persistent context | |
def resolved_playwright_browser(self) -> Optional[PlaywrightBrowser]: | |
"""Returns the underlying Playwright Browser instance if available and is a Browser, not a Context.""" | |
if hasattr(self, '_actual_playwright_browser') and isinstance(self._actual_playwright_browser, PlaywrightBrowser): | |
return self._actual_playwright_browser | |
return None | |
async def async_init(self): | |
playwright = await async_playwright().start() | |
self.playwright = playwright | |
self._actual_playwright_browser = None # Initialize our internal attribute | |
if self.config.cdp_url: | |
logger.debug(f"Attempting to connect to existing browser via CDP: {self.config.cdp_url}") | |
cdp_connection_result = None | |
try: | |
cdp_connection_result = await playwright.chromium.connect_over_cdp( | |
self.config.cdp_url | |
) | |
if cdp_connection_result: | |
self._actual_playwright_browser = cdp_connection_result | |
logger.info( | |
f"Successfully connected to browser over CDP: {self._actual_playwright_browser}" | |
) | |
if not self._actual_playwright_browser.contexts: | |
logger.warning( | |
"Connected to browser over CDP, but no contexts found. A page/tab might need to be open." | |
) | |
else: | |
logger.warning( | |
f"Playwright's connect_over_cdp returned None or a falsy value ({cdp_connection_result}) without raising an exception. Treating as connection failure." | |
) | |
self._actual_playwright_browser = None | |
except BaseException as be: | |
logger.warning( | |
f"Failed to connect to browser over CDP ({self.config.cdp_url}). Will launch a new browser instance instead. Error type: {type(be)}, Error: {be}", exc_info=True | |
) | |
self._actual_playwright_browser = None | |
if self._actual_playwright_browser is None and self.config.chrome_instance_path and "Google Chrome" in self.config.chrome_instance_path: | |
user_data_dir = None | |
if hasattr(self.config, 'extra_chromium_args') and self.config.extra_chromium_args: | |
for arg in self.config.extra_chromium_args: | |
if arg.startswith('--user-data-dir='): | |
user_data_dir = arg.split('=')[1] | |
break | |
if user_data_dir: | |
launch_args = [ | |
arg for arg in getattr(self.config, 'extra_chromium_args', []) | |
if not arg.startswith('--user-data-dir=') | |
] | |
logger.debug(f"Launching persistent Chrome context with UserDataDir: {user_data_dir} and args: {launch_args}") | |
try: | |
# When launching persistent context, playwright returns a BrowserContext, not a Browser. | |
# The context manager itself becomes the primary object to interact with. | |
self._playwright_browser_context_manager = await playwright.chromium.launch_persistent_context( | |
user_data_dir=user_data_dir, | |
headless=self.config.headless, | |
args=launch_args, | |
channel="chrome" | |
) | |
self._actual_playwright_browser = self._playwright_browser_context_manager # Store the context here | |
logger.info(f"Launched persistent Chrome. Stored BrowserContext: {self._actual_playwright_browser}") | |
except Exception as e_persistent_launch: | |
logger.error(f"Failed to launch persistent Chrome context with UserDataDir '{user_data_dir}': {e_persistent_launch}", exc_info=True) | |
self._actual_playwright_browser = None | |
if hasattr(self, '_playwright_browser_context_manager'): | |
self._playwright_browser_context_manager = None | |
else: | |
logger.debug(f"Attempting to launch new Chrome browser instance via executable_path: {self.config.chrome_instance_path} with args: {self.config.extra_chromium_args}") | |
try: | |
self._actual_playwright_browser = await playwright.chromium.launch( | |
executable_path=self.config.chrome_instance_path, | |
headless=self.config.headless, | |
args=self.config.extra_chromium_args | |
) | |
if self._actual_playwright_browser: | |
logger.info(f"Launched Chrome via executable_path. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}") | |
else: | |
logger.warning(f"Launching Chrome via executable_path '{self.config.chrome_instance_path}' returned a None browser object.") | |
self._actual_playwright_browser = None | |
except Exception as e_chrome_launch: | |
logger.error(f"Failed to launch Chrome via executable_path '{self.config.chrome_instance_path}': {e_chrome_launch}", exc_info=True) | |
self._actual_playwright_browser = None | |
if self._actual_playwright_browser is None: | |
logger.debug(f"Launching new default (Chromium) browser instance as fallback, with args: {self.config.extra_chromium_args}") | |
try: | |
self._actual_playwright_browser = await playwright.chromium.launch( | |
headless=self.config.headless, | |
args=self.config.extra_chromium_args | |
) | |
if self._actual_playwright_browser: | |
logger.info(f"Launched default Chromium as fallback. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}") | |
else: | |
logger.warning("Launching default Chromium as fallback returned a None browser object.") | |
self._actual_playwright_browser = None | |
except Exception as e_default_launch: | |
logger.error(f"Failed to launch default Chromium as fallback: {e_default_launch}", exc_info=True) | |
self._actual_playwright_browser = None | |
if self._actual_playwright_browser: | |
if isinstance(self._actual_playwright_browser, PlaywrightBrowser): | |
if self._actual_playwright_browser.is_connected(): | |
logger.info(f"Playwright Browser successfully initialized and connected: {self._actual_playwright_browser}") | |
else: | |
logger.error(f"Playwright Browser initialized but not connected. Browser: {self._actual_playwright_browser}") | |
self._actual_playwright_browser = None | |
elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext): | |
try: | |
if self._actual_playwright_browser.pages is not None: | |
logger.info(f"Playwright BrowserContext successfully initialized (from persistent launch): {self._actual_playwright_browser}") | |
else: | |
logger.error(f"Playwright BrowserContext initialized, but .pages is None. Context: {self._actual_playwright_browser}") | |
self._actual_playwright_browser = None | |
except Exception as e_context_check: | |
logger.error(f"Playwright BrowserContext is invalid or closed: {e_context_check}. Context: {self._actual_playwright_browser}", exc_info=True) | |
self._actual_playwright_browser = None | |
else: | |
logger.error(f"self._actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Value: {self._actual_playwright_browser}") | |
self._actual_playwright_browser = None | |
else: | |
logger.error(f"All browser initialization attempts failed. Final state of self._actual_playwright_browser is None.") | |
async def reuse_existing_context(self, | |
config: Optional[AppCustomBrowserContextConfig] = None # Add optional config param | |
) -> Optional[CustomBrowserContext]: # Return Optional CustomBrowserContext | |
from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext | |
# Ensure CustomBrowserContext is imported for return type hinting and usage | |
from src.browser.custom_context import CustomBrowserContext | |
if not self._actual_playwright_browser: | |
logger.warning("reuse_existing_context called on uninitialized browser. Attempting init.") | |
await self.async_init() | |
if not self._actual_playwright_browser: | |
logger.error("Browser not initialized after attempt in reuse_existing_context. Cannot reuse context.") | |
return None # Explicitly return None on failure | |
base_ctx_to_wrap = None | |
if isinstance(self._actual_playwright_browser, PlaywrightBrowser): | |
pw_browser_instance = self._actual_playwright_browser | |
logger.debug(f"Connected PlaywrightBrowser has {len(pw_browser_instance.contexts)} contexts for potential reuse.") | |
found_context_with_pages = False | |
for i, ctx in enumerate(pw_browser_instance.contexts): | |
logger.debug(f" Context [{i}]: {ctx} has {len(ctx.pages)} pages.") | |
for j, page in enumerate(ctx.pages): | |
logger.debug(f" Page [{j}] URL: {page.url}") | |
if not found_context_with_pages and len(ctx.pages) > 0: | |
base_ctx_to_wrap = ctx | |
found_context_with_pages = True | |
logger.debug(f"Selecting Context [{i}] as it has pages.") | |
if not base_ctx_to_wrap: | |
if pw_browser_instance.contexts: | |
logger.warning("No context with pages found. Defaulting to the first context.") | |
base_ctx_to_wrap = pw_browser_instance.contexts[0] | |
else: | |
logger.error("No contexts found in the connected PlaywrightBrowser after attempting to connect.") | |
raise RuntimeError("No contexts found in existing browser to reuse after connection.") | |
elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext): | |
base_ctx_to_wrap = self._actual_playwright_browser | |
logger.debug(f"Reusing existing PlaywrightBrowserContext directly with {len(base_ctx_to_wrap.pages)} pages. Context: {base_ctx_to_wrap}") | |
else: | |
logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot determine context to reuse.") | |
return None # Return None on type error | |
# Determine the config to use for the CustomBrowserContext wrapper | |
config_to_use = config if config is not None else AppCustomBrowserContextConfig() # Use provided or default | |
logger.debug(f"Wrapping Playwright context {base_ctx_to_wrap} with CustomBrowserContext using config: {config_to_use}") | |
return CustomBrowserContext.from_existing( | |
pw_context=base_ctx_to_wrap, | |
browser=self, | |
config=config_to_use | |
) | |
async def new_context( | |
self, | |
config: AppCustomBrowserContextConfig = AppCustomBrowserContextConfig() | |
) -> "CustomBrowserContext": | |
if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser: | |
logger.error("Playwright browser/context holder not initialized. Call async_init() first.") | |
await self.async_init() | |
if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser: | |
raise RuntimeError("Failed to initialize Playwright browser/context holder in new_context.") | |
if isinstance(self._actual_playwright_browser, PlaywrightBrowserContext): | |
logger.warning("Creating new context from an existing persistent PlaywrightBrowserContext. This might indicate an architectural issue if multiple isolated contexts are expected from a persistent launch.") | |
playwright_context_to_wrap = self._actual_playwright_browser | |
logger.debug(f"Reusing persistent Playwright context: {playwright_context_to_wrap}") | |
elif isinstance(self._actual_playwright_browser, PlaywrightBrowser): | |
pw_browser_instance = self._actual_playwright_browser # For clarity | |
options = {} | |
if config.trace_path: | |
pass # Tracing is started on the context later | |
if config.save_recording_path: | |
options["record_video_dir"] = config.save_recording_path | |
options["record_video_size"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]} | |
if not config.no_viewport and config.browser_window_size: | |
options["viewport"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]} | |
else: | |
options["no_viewport"] = True | |
logger.debug(f"Creating new Playwright context with options: {options} from PlaywrightBrowser: {pw_browser_instance}") | |
playwright_context_to_wrap = await pw_browser_instance.new_context(**options) | |
else: | |
logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot create new context.") | |
raise TypeError(f"_actual_playwright_browser is neither PlaywrightBrowser nor PlaywrightBrowserContext.") | |
from src.browser.custom_context import CustomBrowserContext as CBC_in_CustomBrowser | |
print(f"DEBUG_INIT: ID of CustomBrowserContext class in custom_browser.py: {id(CBC_in_CustomBrowser)}") | |
custom_context = CBC_in_CustomBrowser( | |
pw_context=playwright_context_to_wrap, | |
browser=self, | |
config=config | |
) | |
print(f"DEBUG_INIT: Type of CREATED context in custom_browser.py: {type(custom_context)}, ID of its type: {id(type(custom_context))}") | |
if config.trace_path and playwright_context_to_wrap: | |
try: | |
await playwright_context_to_wrap.tracing.start(screenshots=True, snapshots=True, sources=True) | |
logger.debug(f"Context tracing started. Saving to host path: {config.trace_path}") | |
except Exception as e: | |
logger.error(f"Failed to start tracing: {e}") | |
return custom_context | |
async def close(self): | |
# Close the persistent context manager if it exists and is distinct | |
if hasattr(self, '_playwright_browser_context_manager') and self._playwright_browser_context_manager is not None: | |
logger.info("Closing persistent Playwright context manager (which is a BrowserContext).") | |
context_manager_to_close = self._playwright_browser_context_manager | |
assert context_manager_to_close is not None | |
await context_manager_to_close.close() | |
self._playwright_browser_context_manager = None | |
# Close the main browser/context object stored in _actual_playwright_browser | |
if hasattr(self, '_actual_playwright_browser') and self._actual_playwright_browser is not None: | |
browser_or_context_to_close = self._actual_playwright_browser | |
assert browser_or_context_to_close is not None | |
# If _actual_playwright_browser was the same as _playwright_browser_context_manager and already closed, skip | |
if browser_or_context_to_close == self._playwright_browser_context_manager and self._playwright_browser_context_manager is None: | |
logger.info("Actual browser/context object was the persistent context manager and is already closed.") | |
elif isinstance(browser_or_context_to_close, PlaywrightBrowserContext): | |
logger.info("Closing PlaywrightBrowserContext stored in _actual_playwright_browser.") | |
await browser_or_context_to_close.close() | |
elif isinstance(browser_or_context_to_close, PlaywrightBrowser): | |
if browser_or_context_to_close.is_connected(): | |
logger.info("Closing PlaywrightBrowser stored in _actual_playwright_browser.") | |
await browser_or_context_to_close.close() | |
else: | |
logger.info("PlaywrightBrowser in _actual_playwright_browser is not connected or already closed.") | |
else: | |
logger.info(f"_actual_playwright_browser ({type(browser_or_context_to_close)}) is not a PlaywrightBrowser or PlaywrightBrowserContext that can be closed here, or is already closed.") | |
self._actual_playwright_browser = None # Clear the internal attribute | |
if hasattr(self, 'playwright') and self.playwright is not None: | |
logger.info("Stopping Playwright.") | |
await self.playwright.stop() | |
self.playwright = None | |
logger.info("CustomBrowser closed.") |