rebrowse / src /browser /custom_browser.py
zk1tty
add src/ filies
94ff58a
import asyncio
import pdb
import os
from pathlib import Path
from typing import Optional, Union
from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
BrowserContext as PlaywrightBrowserContext,
)
from playwright.async_api import (
Playwright,
async_playwright,
)
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
import logging
from src.browser.custom_context import CustomBrowserContext
from src.browser.custom_context_config import CustomBrowserContextConfig as AppCustomBrowserContextConfig
logger = logging.getLogger(__name__)
class CustomBrowser(Browser):
# Internal attribute to store the actual Playwright Browser or BrowserContext instance
_actual_playwright_browser: Optional[Union[PlaywrightBrowser, PlaywrightBrowserContext]] = None
_playwright_browser_context_manager: Optional[PlaywrightBrowserContext] = None # For persistent context
@property
def resolved_playwright_browser(self) -> Optional[PlaywrightBrowser]:
"""Returns the underlying Playwright Browser instance if available and is a Browser, not a Context."""
if hasattr(self, '_actual_playwright_browser') and isinstance(self._actual_playwright_browser, PlaywrightBrowser):
return self._actual_playwright_browser
return None
async def async_init(self):
playwright = await async_playwright().start()
self.playwright = playwright
self._actual_playwright_browser = None # Initialize our internal attribute
if self.config.cdp_url:
logger.debug(f"Attempting to connect to existing browser via CDP: {self.config.cdp_url}")
cdp_connection_result = None
try:
cdp_connection_result = await playwright.chromium.connect_over_cdp(
self.config.cdp_url
)
if cdp_connection_result:
self._actual_playwright_browser = cdp_connection_result
logger.info(
f"Successfully connected to browser over CDP: {self._actual_playwright_browser}"
)
if not self._actual_playwright_browser.contexts:
logger.warning(
"Connected to browser over CDP, but no contexts found. A page/tab might need to be open."
)
else:
logger.warning(
f"Playwright's connect_over_cdp returned None or a falsy value ({cdp_connection_result}) without raising an exception. Treating as connection failure."
)
self._actual_playwright_browser = None
except BaseException as be:
logger.warning(
f"Failed to connect to browser over CDP ({self.config.cdp_url}). Will launch a new browser instance instead. Error type: {type(be)}, Error: {be}", exc_info=True
)
self._actual_playwright_browser = None
if self._actual_playwright_browser is None and self.config.chrome_instance_path and "Google Chrome" in self.config.chrome_instance_path:
user_data_dir = None
if hasattr(self.config, 'extra_chromium_args') and self.config.extra_chromium_args:
for arg in self.config.extra_chromium_args:
if arg.startswith('--user-data-dir='):
user_data_dir = arg.split('=')[1]
break
if user_data_dir:
launch_args = [
arg for arg in getattr(self.config, 'extra_chromium_args', [])
if not arg.startswith('--user-data-dir=')
]
logger.debug(f"Launching persistent Chrome context with UserDataDir: {user_data_dir} and args: {launch_args}")
try:
# When launching persistent context, playwright returns a BrowserContext, not a Browser.
# The context manager itself becomes the primary object to interact with.
self._playwright_browser_context_manager = await playwright.chromium.launch_persistent_context(
user_data_dir=user_data_dir,
headless=self.config.headless,
args=launch_args,
channel="chrome"
)
self._actual_playwright_browser = self._playwright_browser_context_manager # Store the context here
logger.info(f"Launched persistent Chrome. Stored BrowserContext: {self._actual_playwright_browser}")
except Exception as e_persistent_launch:
logger.error(f"Failed to launch persistent Chrome context with UserDataDir '{user_data_dir}': {e_persistent_launch}", exc_info=True)
self._actual_playwright_browser = None
if hasattr(self, '_playwright_browser_context_manager'):
self._playwright_browser_context_manager = None
else:
logger.debug(f"Attempting to launch new Chrome browser instance via executable_path: {self.config.chrome_instance_path} with args: {self.config.extra_chromium_args}")
try:
self._actual_playwright_browser = await playwright.chromium.launch(
executable_path=self.config.chrome_instance_path,
headless=self.config.headless,
args=self.config.extra_chromium_args
)
if self._actual_playwright_browser:
logger.info(f"Launched Chrome via executable_path. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}")
else:
logger.warning(f"Launching Chrome via executable_path '{self.config.chrome_instance_path}' returned a None browser object.")
self._actual_playwright_browser = None
except Exception as e_chrome_launch:
logger.error(f"Failed to launch Chrome via executable_path '{self.config.chrome_instance_path}': {e_chrome_launch}", exc_info=True)
self._actual_playwright_browser = None
if self._actual_playwright_browser is None:
logger.debug(f"Launching new default (Chromium) browser instance as fallback, with args: {self.config.extra_chromium_args}")
try:
self._actual_playwright_browser = await playwright.chromium.launch(
headless=self.config.headless,
args=self.config.extra_chromium_args
)
if self._actual_playwright_browser:
logger.info(f"Launched default Chromium as fallback. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}")
else:
logger.warning("Launching default Chromium as fallback returned a None browser object.")
self._actual_playwright_browser = None
except Exception as e_default_launch:
logger.error(f"Failed to launch default Chromium as fallback: {e_default_launch}", exc_info=True)
self._actual_playwright_browser = None
if self._actual_playwright_browser:
if isinstance(self._actual_playwright_browser, PlaywrightBrowser):
if self._actual_playwright_browser.is_connected():
logger.info(f"Playwright Browser successfully initialized and connected: {self._actual_playwright_browser}")
else:
logger.error(f"Playwright Browser initialized but not connected. Browser: {self._actual_playwright_browser}")
self._actual_playwright_browser = None
elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
try:
if self._actual_playwright_browser.pages is not None:
logger.info(f"Playwright BrowserContext successfully initialized (from persistent launch): {self._actual_playwright_browser}")
else:
logger.error(f"Playwright BrowserContext initialized, but .pages is None. Context: {self._actual_playwright_browser}")
self._actual_playwright_browser = None
except Exception as e_context_check:
logger.error(f"Playwright BrowserContext is invalid or closed: {e_context_check}. Context: {self._actual_playwright_browser}", exc_info=True)
self._actual_playwright_browser = None
else:
logger.error(f"self._actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Value: {self._actual_playwright_browser}")
self._actual_playwright_browser = None
else:
logger.error(f"All browser initialization attempts failed. Final state of self._actual_playwright_browser is None.")
async def reuse_existing_context(self,
config: Optional[AppCustomBrowserContextConfig] = None # Add optional config param
) -> Optional[CustomBrowserContext]: # Return Optional CustomBrowserContext
from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext
# Ensure CustomBrowserContext is imported for return type hinting and usage
from src.browser.custom_context import CustomBrowserContext
if not self._actual_playwright_browser:
logger.warning("reuse_existing_context called on uninitialized browser. Attempting init.")
await self.async_init()
if not self._actual_playwright_browser:
logger.error("Browser not initialized after attempt in reuse_existing_context. Cannot reuse context.")
return None # Explicitly return None on failure
base_ctx_to_wrap = None
if isinstance(self._actual_playwright_browser, PlaywrightBrowser):
pw_browser_instance = self._actual_playwright_browser
logger.debug(f"Connected PlaywrightBrowser has {len(pw_browser_instance.contexts)} contexts for potential reuse.")
found_context_with_pages = False
for i, ctx in enumerate(pw_browser_instance.contexts):
logger.debug(f" Context [{i}]: {ctx} has {len(ctx.pages)} pages.")
for j, page in enumerate(ctx.pages):
logger.debug(f" Page [{j}] URL: {page.url}")
if not found_context_with_pages and len(ctx.pages) > 0:
base_ctx_to_wrap = ctx
found_context_with_pages = True
logger.debug(f"Selecting Context [{i}] as it has pages.")
if not base_ctx_to_wrap:
if pw_browser_instance.contexts:
logger.warning("No context with pages found. Defaulting to the first context.")
base_ctx_to_wrap = pw_browser_instance.contexts[0]
else:
logger.error("No contexts found in the connected PlaywrightBrowser after attempting to connect.")
raise RuntimeError("No contexts found in existing browser to reuse after connection.")
elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
base_ctx_to_wrap = self._actual_playwright_browser
logger.debug(f"Reusing existing PlaywrightBrowserContext directly with {len(base_ctx_to_wrap.pages)} pages. Context: {base_ctx_to_wrap}")
else:
logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot determine context to reuse.")
return None # Return None on type error
# Determine the config to use for the CustomBrowserContext wrapper
config_to_use = config if config is not None else AppCustomBrowserContextConfig() # Use provided or default
logger.debug(f"Wrapping Playwright context {base_ctx_to_wrap} with CustomBrowserContext using config: {config_to_use}")
return CustomBrowserContext.from_existing(
pw_context=base_ctx_to_wrap,
browser=self,
config=config_to_use
)
async def new_context(
self,
config: AppCustomBrowserContextConfig = AppCustomBrowserContextConfig()
) -> "CustomBrowserContext":
if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser:
logger.error("Playwright browser/context holder not initialized. Call async_init() first.")
await self.async_init()
if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser:
raise RuntimeError("Failed to initialize Playwright browser/context holder in new_context.")
if isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
logger.warning("Creating new context from an existing persistent PlaywrightBrowserContext. This might indicate an architectural issue if multiple isolated contexts are expected from a persistent launch.")
playwright_context_to_wrap = self._actual_playwright_browser
logger.debug(f"Reusing persistent Playwright context: {playwright_context_to_wrap}")
elif isinstance(self._actual_playwright_browser, PlaywrightBrowser):
pw_browser_instance = self._actual_playwright_browser # For clarity
options = {}
if config.trace_path:
pass # Tracing is started on the context later
if config.save_recording_path:
options["record_video_dir"] = config.save_recording_path
options["record_video_size"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]}
if not config.no_viewport and config.browser_window_size:
options["viewport"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]}
else:
options["no_viewport"] = True
logger.debug(f"Creating new Playwright context with options: {options} from PlaywrightBrowser: {pw_browser_instance}")
playwright_context_to_wrap = await pw_browser_instance.new_context(**options)
else:
logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot create new context.")
raise TypeError(f"_actual_playwright_browser is neither PlaywrightBrowser nor PlaywrightBrowserContext.")
from src.browser.custom_context import CustomBrowserContext as CBC_in_CustomBrowser
print(f"DEBUG_INIT: ID of CustomBrowserContext class in custom_browser.py: {id(CBC_in_CustomBrowser)}")
custom_context = CBC_in_CustomBrowser(
pw_context=playwright_context_to_wrap,
browser=self,
config=config
)
print(f"DEBUG_INIT: Type of CREATED context in custom_browser.py: {type(custom_context)}, ID of its type: {id(type(custom_context))}")
if config.trace_path and playwright_context_to_wrap:
try:
await playwright_context_to_wrap.tracing.start(screenshots=True, snapshots=True, sources=True)
logger.debug(f"Context tracing started. Saving to host path: {config.trace_path}")
except Exception as e:
logger.error(f"Failed to start tracing: {e}")
return custom_context
async def close(self):
# Close the persistent context manager if it exists and is distinct
if hasattr(self, '_playwright_browser_context_manager') and self._playwright_browser_context_manager is not None:
logger.info("Closing persistent Playwright context manager (which is a BrowserContext).")
context_manager_to_close = self._playwright_browser_context_manager
assert context_manager_to_close is not None
await context_manager_to_close.close()
self._playwright_browser_context_manager = None
# Close the main browser/context object stored in _actual_playwright_browser
if hasattr(self, '_actual_playwright_browser') and self._actual_playwright_browser is not None:
browser_or_context_to_close = self._actual_playwright_browser
assert browser_or_context_to_close is not None
# If _actual_playwright_browser was the same as _playwright_browser_context_manager and already closed, skip
if browser_or_context_to_close == self._playwright_browser_context_manager and self._playwright_browser_context_manager is None:
logger.info("Actual browser/context object was the persistent context manager and is already closed.")
elif isinstance(browser_or_context_to_close, PlaywrightBrowserContext):
logger.info("Closing PlaywrightBrowserContext stored in _actual_playwright_browser.")
await browser_or_context_to_close.close()
elif isinstance(browser_or_context_to_close, PlaywrightBrowser):
if browser_or_context_to_close.is_connected():
logger.info("Closing PlaywrightBrowser stored in _actual_playwright_browser.")
await browser_or_context_to_close.close()
else:
logger.info("PlaywrightBrowser in _actual_playwright_browser is not connected or already closed.")
else:
logger.info(f"_actual_playwright_browser ({type(browser_or_context_to_close)}) is not a PlaywrightBrowser or PlaywrightBrowserContext that can be closed here, or is already closed.")
self._actual_playwright_browser = None # Clear the internal attribute
if hasattr(self, 'playwright') and self.playwright is not None:
logger.info("Stopping Playwright.")
await self.playwright.stop()
self.playwright = None
logger.info("CustomBrowser closed.")