diff --git a/src/crawlee/browsers/_playwright_browser_controller.py b/src/crawlee/browsers/_playwright_browser_controller.py index ba9aa60db8..10fd5ef46c 100644 --- a/src/crawlee/browsers/_playwright_browser_controller.py +++ b/src/crawlee/browsers/_playwright_browser_controller.py @@ -77,6 +77,7 @@ def __init__( self._last_page_opened_at = datetime.now(timezone.utc) self._total_opened_pages = 0 + self._opening_pages_count = 0 self._context_creation_lock: Lock | None = None @@ -119,7 +120,7 @@ def idle_time(self) -> timedelta: @property @override def has_free_capacity(self) -> bool: - return self.pages_count < self._max_open_pages_per_browser + return (self.pages_count + self._opening_pages_count) < self._max_open_pages_per_browser @property @override @@ -154,30 +155,35 @@ async def new_page( if not self.has_free_capacity: raise ValueError('Cannot open more pages in this browser.') - if self._use_incognito_pages: - # In incognito there is exactly one context per one page. Create new context for each new page. - new_context = await self._create_browser_context( - browser_new_context_options=browser_new_context_options, - proxy_info=proxy_info, - ) - page = await new_context.new_page() - else: - async with await self._get_context_creation_lock(): - if not self._browser_context: - self._browser_context = await self._create_browser_context( - browser_new_context_options=browser_new_context_options, - proxy_info=proxy_info, - ) - page = await self._browser_context.new_page() - - # Handle page close event - page.on(event='close', f=self._on_page_close) - - # Update internal state - self._pages.append(page) - self._last_page_opened_at = datetime.now(timezone.utc) + self._opening_pages_count += 1 - self._total_opened_pages += 1 + try: + if self._use_incognito_pages: + # In incognito there is exactly one context per one page. Create new context for each new page. + new_context = await self._create_browser_context( + browser_new_context_options=browser_new_context_options, + proxy_info=proxy_info, + ) + page = await new_context.new_page() + else: + async with await self._get_context_creation_lock(): + if not self._browser_context: + self._browser_context = await self._create_browser_context( + browser_new_context_options=browser_new_context_options, + proxy_info=proxy_info, + ) + page = await self._browser_context.new_page() + + # Handle page close event + page.on(event='close', f=self._on_page_close) + + # Update internal state + self._pages.append(page) + self._last_page_opened_at = datetime.now(timezone.utc) + + self._total_opened_pages += 1 + finally: + self._opening_pages_count -= 1 return page @override diff --git a/tests/unit/browsers/test_playwright_browser_controller.py b/tests/unit/browsers/test_playwright_browser_controller.py index 7f8e513a83..af94eadf98 100644 --- a/tests/unit/browsers/test_playwright_browser_controller.py +++ b/tests/unit/browsers/test_playwright_browser_controller.py @@ -136,3 +136,18 @@ async def delayed_launch_persistent_context(*args: Any, **kwargs: Any) -> Any: await asyncio.gather(controller.new_page(), controller.new_page()) assert mocked_context_launcher.call_count == 1 + + +async def test_max_open_pages_limit_on_concurrent_creation(controller: PlaywrightBrowserController) -> None: + pages = await asyncio.gather(controller.new_page(), controller.new_page()) + + assert controller.pages_count == 2 + + for page in pages: + await page.close() + + +async def test_max_open_pages_limit_error_on_concurrent_creation(controller: PlaywrightBrowserController) -> None: + """Test that max open pages limit is respected during concurrent page creation.""" + with pytest.raises(ValueError, match=r'Cannot open more pages in this browser.'): + await asyncio.gather(controller.new_page(), controller.new_page(), controller.new_page())