File size: 18,583 Bytes
94ff58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
import asyncio
import pdb
import os
from pathlib import Path
from typing import Optional, Union

from playwright.async_api import Browser as PlaywrightBrowser
from playwright.async_api import (
    BrowserContext as PlaywrightBrowserContext,
)
from playwright.async_api import (
    Playwright,
    async_playwright,
)
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
import logging

from src.browser.custom_context import CustomBrowserContext
from src.browser.custom_context_config import CustomBrowserContextConfig as AppCustomBrowserContextConfig

logger = logging.getLogger(__name__)


class CustomBrowser(Browser):

    # Internal attribute to store the actual Playwright Browser or BrowserContext instance
    _actual_playwright_browser: Optional[Union[PlaywrightBrowser, PlaywrightBrowserContext]] = None
    _playwright_browser_context_manager: Optional[PlaywrightBrowserContext] = None # For persistent context

    @property
    def resolved_playwright_browser(self) -> Optional[PlaywrightBrowser]:
        """Returns the underlying Playwright Browser instance if available and is a Browser, not a Context."""
        if hasattr(self, '_actual_playwright_browser') and isinstance(self._actual_playwright_browser, PlaywrightBrowser):
            return self._actual_playwright_browser
        return None

    async def async_init(self):
        playwright = await async_playwright().start()
        self.playwright = playwright
        
        self._actual_playwright_browser = None # Initialize our internal attribute

        if self.config.cdp_url:
            logger.debug(f"Attempting to connect to existing browser via CDP: {self.config.cdp_url}")
            cdp_connection_result = None 
            try:
                cdp_connection_result = await playwright.chromium.connect_over_cdp(
                    self.config.cdp_url
                )

                if cdp_connection_result:
                    self._actual_playwright_browser = cdp_connection_result
                    logger.info( 
                        f"Successfully connected to browser over CDP: {self._actual_playwright_browser}"
                    )
                    if not self._actual_playwright_browser.contexts:
                        logger.warning(
                            "Connected to browser over CDP, but no contexts found. A page/tab might need to be open."
                        )
                else:
                    logger.warning(
                        f"Playwright's connect_over_cdp returned None or a falsy value ({cdp_connection_result}) without raising an exception. Treating as connection failure."
                    )
                    self._actual_playwright_browser = None 

            except BaseException as be: 
                logger.warning(
                    f"Failed to connect to browser over CDP ({self.config.cdp_url}). Will launch a new browser instance instead. Error type: {type(be)}, Error: {be}", exc_info=True 
                )
                self._actual_playwright_browser = None

        if self._actual_playwright_browser is None and self.config.chrome_instance_path and "Google Chrome" in self.config.chrome_instance_path:
            user_data_dir = None
            if hasattr(self.config, 'extra_chromium_args') and self.config.extra_chromium_args:
                for arg in self.config.extra_chromium_args:
                    if arg.startswith('--user-data-dir='):
                        user_data_dir = arg.split('=')[1]
                        break
            
            if user_data_dir:
                launch_args = [
                    arg for arg in getattr(self.config, 'extra_chromium_args', [])
                    if not arg.startswith('--user-data-dir=')
                ]
                logger.debug(f"Launching persistent Chrome context with UserDataDir: {user_data_dir} and args: {launch_args}")
                try:
                    # When launching persistent context, playwright returns a BrowserContext, not a Browser.
                    # The context manager itself becomes the primary object to interact with.
                    self._playwright_browser_context_manager = await playwright.chromium.launch_persistent_context(
                        user_data_dir=user_data_dir,
                        headless=self.config.headless,
                        args=launch_args,
                        channel="chrome"
                    )
                    self._actual_playwright_browser = self._playwright_browser_context_manager # Store the context here
                    logger.info(f"Launched persistent Chrome. Stored BrowserContext: {self._actual_playwright_browser}")
                except Exception as e_persistent_launch:
                    logger.error(f"Failed to launch persistent Chrome context with UserDataDir '{user_data_dir}': {e_persistent_launch}", exc_info=True)
                    self._actual_playwright_browser = None
                    if hasattr(self, '_playwright_browser_context_manager'):
                        self._playwright_browser_context_manager = None

            else:
                logger.debug(f"Attempting to launch new Chrome browser instance via executable_path: {self.config.chrome_instance_path} with args: {self.config.extra_chromium_args}")
                try:
                    self._actual_playwright_browser = await playwright.chromium.launch(
                        executable_path=self.config.chrome_instance_path,
                        headless=self.config.headless,
                        args=self.config.extra_chromium_args
                    )
                    if self._actual_playwright_browser:
                        logger.info(f"Launched Chrome via executable_path. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}")
                    else:
                        logger.warning(f"Launching Chrome via executable_path '{self.config.chrome_instance_path}' returned a None browser object.")
                        self._actual_playwright_browser = None
                except Exception as e_chrome_launch:
                    logger.error(f"Failed to launch Chrome via executable_path '{self.config.chrome_instance_path}': {e_chrome_launch}", exc_info=True)
                    self._actual_playwright_browser = None
        
        if self._actual_playwright_browser is None:
            logger.debug(f"Launching new default (Chromium) browser instance as fallback, with args: {self.config.extra_chromium_args}")
            try:
                self._actual_playwright_browser = await playwright.chromium.launch(
                    headless=self.config.headless,
                    args=self.config.extra_chromium_args
                )
                if self._actual_playwright_browser:
                    logger.info(f"Launched default Chromium as fallback. Browser: {self._actual_playwright_browser}, Connected: {self._actual_playwright_browser.is_connected()}")
                else:
                    logger.warning("Launching default Chromium as fallback returned a None browser object.")
                    self._actual_playwright_browser = None
            except Exception as e_default_launch:
                logger.error(f"Failed to launch default Chromium as fallback: {e_default_launch}", exc_info=True)
                self._actual_playwright_browser = None
        
        if self._actual_playwright_browser:
            if isinstance(self._actual_playwright_browser, PlaywrightBrowser):
                if self._actual_playwright_browser.is_connected():
                    logger.info(f"Playwright Browser successfully initialized and connected: {self._actual_playwright_browser}")
                else:
                    logger.error(f"Playwright Browser initialized but not connected. Browser: {self._actual_playwright_browser}")
                    self._actual_playwright_browser = None
            elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
                try:
                    if self._actual_playwright_browser.pages is not None: 
                         logger.info(f"Playwright BrowserContext successfully initialized (from persistent launch): {self._actual_playwright_browser}")
                    else: 
                         logger.error(f"Playwright BrowserContext initialized, but .pages is None. Context: {self._actual_playwright_browser}")
                         self._actual_playwright_browser = None
                except Exception as e_context_check: 
                    logger.error(f"Playwright BrowserContext is invalid or closed: {e_context_check}. Context: {self._actual_playwright_browser}", exc_info=True)
                    self._actual_playwright_browser = None
            else:
                logger.error(f"self._actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Value: {self._actual_playwright_browser}")
                self._actual_playwright_browser = None
        else:
            logger.error(f"All browser initialization attempts failed. Final state of self._actual_playwright_browser is None.")

    async def reuse_existing_context(self,
                                   config: Optional[AppCustomBrowserContextConfig] = None # Add optional config param
                                   ) -> Optional[CustomBrowserContext]: # Return Optional CustomBrowserContext
        from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext
        # Ensure CustomBrowserContext is imported for return type hinting and usage
        from src.browser.custom_context import CustomBrowserContext 

        if not self._actual_playwright_browser: 
            logger.warning("reuse_existing_context called on uninitialized browser. Attempting init.")
            await self.async_init()
            if not self._actual_playwright_browser: 
                logger.error("Browser not initialized after attempt in reuse_existing_context. Cannot reuse context.")
                return None # Explicitly return None on failure

        base_ctx_to_wrap = None
        if isinstance(self._actual_playwright_browser, PlaywrightBrowser):
            pw_browser_instance = self._actual_playwright_browser 
            logger.debug(f"Connected PlaywrightBrowser has {len(pw_browser_instance.contexts)} contexts for potential reuse.")
            found_context_with_pages = False
            for i, ctx in enumerate(pw_browser_instance.contexts):
                logger.debug(f"  Context [{i}]: {ctx} has {len(ctx.pages)} pages.")
                for j, page in enumerate(ctx.pages):
                    logger.debug(f"    Page [{j}] URL: {page.url}")
                if not found_context_with_pages and len(ctx.pages) > 0:
                    base_ctx_to_wrap = ctx
                    found_context_with_pages = True
                    logger.debug(f"Selecting Context [{i}] as it has pages.")
            
            if not base_ctx_to_wrap:
                if pw_browser_instance.contexts:
                    logger.warning("No context with pages found. Defaulting to the first context.")
                    base_ctx_to_wrap = pw_browser_instance.contexts[0]
                else: 
                    logger.error("No contexts found in the connected PlaywrightBrowser after attempting to connect.")
                    raise RuntimeError("No contexts found in existing browser to reuse after connection.")

        elif isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
            base_ctx_to_wrap = self._actual_playwright_browser 
            logger.debug(f"Reusing existing PlaywrightBrowserContext directly with {len(base_ctx_to_wrap.pages)} pages. Context: {base_ctx_to_wrap}")
        else:
            logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot determine context to reuse.")
            return None # Return None on type error

        # Determine the config to use for the CustomBrowserContext wrapper
        config_to_use = config if config is not None else AppCustomBrowserContextConfig() # Use provided or default

        logger.debug(f"Wrapping Playwright context {base_ctx_to_wrap} with CustomBrowserContext using config: {config_to_use}")
        return CustomBrowserContext.from_existing(
            pw_context=base_ctx_to_wrap,
            browser=self, 
            config=config_to_use 
        )

    async def new_context(
            self,
            config: AppCustomBrowserContextConfig = AppCustomBrowserContextConfig()
    ) -> "CustomBrowserContext":
        
        if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser:
            logger.error("Playwright browser/context holder not initialized. Call async_init() first.")
            await self.async_init()
            if not hasattr(self, '_actual_playwright_browser') or not self._actual_playwright_browser:
                 raise RuntimeError("Failed to initialize Playwright browser/context holder in new_context.")

        if isinstance(self._actual_playwright_browser, PlaywrightBrowserContext):
            logger.warning("Creating new context from an existing persistent PlaywrightBrowserContext. This might indicate an architectural issue if multiple isolated contexts are expected from a persistent launch.")
            playwright_context_to_wrap = self._actual_playwright_browser 
            logger.debug(f"Reusing persistent Playwright context: {playwright_context_to_wrap}")

        elif isinstance(self._actual_playwright_browser, PlaywrightBrowser):
            pw_browser_instance = self._actual_playwright_browser # For clarity
            options = {}
            if config.trace_path:
                pass # Tracing is started on the context later

            if config.save_recording_path:
                options["record_video_dir"] = config.save_recording_path
                options["record_video_size"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]}

            if not config.no_viewport and config.browser_window_size:
                 options["viewport"] = {"width": config.browser_window_size["width"], "height": config.browser_window_size["height"]}
            else:
                options["no_viewport"] = True
            
            logger.debug(f"Creating new Playwright context with options: {options} from PlaywrightBrowser: {pw_browser_instance}")
            playwright_context_to_wrap = await pw_browser_instance.new_context(**options)
        else:
            logger.error(f"_actual_playwright_browser is of unexpected type: {type(self._actual_playwright_browser)}. Cannot create new context.")
            raise TypeError(f"_actual_playwright_browser is neither PlaywrightBrowser nor PlaywrightBrowserContext.")

        from src.browser.custom_context import CustomBrowserContext as CBC_in_CustomBrowser
        print(f"DEBUG_INIT: ID of CustomBrowserContext class in custom_browser.py: {id(CBC_in_CustomBrowser)}")

        custom_context = CBC_in_CustomBrowser(
            pw_context=playwright_context_to_wrap,
            browser=self,
            config=config
        )
        print(f"DEBUG_INIT: Type of CREATED context in custom_browser.py: {type(custom_context)}, ID of its type: {id(type(custom_context))}")
        
        if config.trace_path and playwright_context_to_wrap:
            try:
                await playwright_context_to_wrap.tracing.start(screenshots=True, snapshots=True, sources=True)
                logger.debug(f"Context tracing started. Saving to host path: {config.trace_path}")
            except Exception as e:
                logger.error(f"Failed to start tracing: {e}")

        return custom_context

    async def close(self):
        # Close the persistent context manager if it exists and is distinct
        if hasattr(self, '_playwright_browser_context_manager') and self._playwright_browser_context_manager is not None:
            logger.info("Closing persistent Playwright context manager (which is a BrowserContext).")
            context_manager_to_close = self._playwright_browser_context_manager
            assert context_manager_to_close is not None
            await context_manager_to_close.close()
            self._playwright_browser_context_manager = None
        
        # Close the main browser/context object stored in _actual_playwright_browser
        if hasattr(self, '_actual_playwright_browser') and self._actual_playwright_browser is not None:
            browser_or_context_to_close = self._actual_playwright_browser
            assert browser_or_context_to_close is not None

            # If _actual_playwright_browser was the same as _playwright_browser_context_manager and already closed, skip
            if browser_or_context_to_close == self._playwright_browser_context_manager and self._playwright_browser_context_manager is None:
                logger.info("Actual browser/context object was the persistent context manager and is already closed.")
            elif isinstance(browser_or_context_to_close, PlaywrightBrowserContext):
                 logger.info("Closing PlaywrightBrowserContext stored in _actual_playwright_browser.")
                 await browser_or_context_to_close.close()
            elif isinstance(browser_or_context_to_close, PlaywrightBrowser):
                 if browser_or_context_to_close.is_connected():
                     logger.info("Closing PlaywrightBrowser stored in _actual_playwright_browser.")
                     await browser_or_context_to_close.close()
                 else:
                     logger.info("PlaywrightBrowser in _actual_playwright_browser is not connected or already closed.")
            else:
                 logger.info(f"_actual_playwright_browser ({type(browser_or_context_to_close)}) is not a PlaywrightBrowser or PlaywrightBrowserContext that can be closed here, or is already closed.")
            self._actual_playwright_browser = None # Clear the internal attribute
        
        if hasattr(self, 'playwright') and self.playwright is not None:
            logger.info("Stopping Playwright.")
            await self.playwright.stop()
            self.playwright = None
        logger.info("CustomBrowser closed.")