Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions src/crawlee/browsers/_playwright_browser_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
self._last_page_opened_at = datetime.now(timezone.utc)

self._total_opened_pages = 0
self._opening_pages_count = 0

self._context_creation_lock: Lock | None = None

Expand Down Expand Up @@ -119,7 +120,7 @@ def idle_time(self) -> timedelta:
@property
@override
def has_free_capacity(self) -> bool:
return self.pages_count < self._max_open_pages_per_browser
return (self.pages_count + self._opening_pages_count) < self._max_open_pages_per_browser

@property
@override
Expand Down Expand Up @@ -154,30 +155,35 @@ async def new_page(
if not self.has_free_capacity:
raise ValueError('Cannot open more pages in this browser.')

if self._use_incognito_pages:
# In incognito there is exactly one context per one page. Create new context for each new page.
new_context = await self._create_browser_context(
browser_new_context_options=browser_new_context_options,
proxy_info=proxy_info,
)
page = await new_context.new_page()
else:
async with await self._get_context_creation_lock():
if not self._browser_context:
self._browser_context = await self._create_browser_context(
browser_new_context_options=browser_new_context_options,
proxy_info=proxy_info,
)
page = await self._browser_context.new_page()

# Handle page close event
page.on(event='close', f=self._on_page_close)

# Update internal state
self._pages.append(page)
self._last_page_opened_at = datetime.now(timezone.utc)
self._opening_pages_count += 1

self._total_opened_pages += 1
try:
if self._use_incognito_pages:
# In incognito there is exactly one context per one page. Create new context for each new page.
new_context = await self._create_browser_context(
browser_new_context_options=browser_new_context_options,
proxy_info=proxy_info,
)
page = await new_context.new_page()
else:
async with await self._get_context_creation_lock():
if not self._browser_context:
self._browser_context = await self._create_browser_context(
browser_new_context_options=browser_new_context_options,
proxy_info=proxy_info,
)
page = await self._browser_context.new_page()

# Handle page close event
page.on(event='close', f=self._on_page_close)

# Update internal state
self._pages.append(page)
self._last_page_opened_at = datetime.now(timezone.utc)

self._total_opened_pages += 1
finally:
self._opening_pages_count -= 1
return page

@override
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/browsers/test_playwright_browser_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,18 @@ async def delayed_launch_persistent_context(*args: Any, **kwargs: Any) -> Any:
await asyncio.gather(controller.new_page(), controller.new_page())

assert mocked_context_launcher.call_count == 1


async def test_max_open_pages_limit_on_concurrent_creation(controller: PlaywrightBrowserController) -> None:
pages = await asyncio.gather(controller.new_page(), controller.new_page())

assert controller.pages_count == 2

for page in pages:
await page.close()


async def test_max_open_pages_limit_error_on_concurrent_creation(controller: PlaywrightBrowserController) -> None:
"""Test that max open pages limit is respected during concurrent page creation."""
with pytest.raises(ValueError, match=r'Cannot open more pages in this browser.'):
await asyncio.gather(controller.new_page(), controller.new_page(), controller.new_page())
Loading