Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 49 additions & 14 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from time import time
from typing import Awaitable, Callable, Dict, Optional, Tuple, Type, TypeVar, Union

from packaging.version import Version
from playwright._impl._errors import TargetClosedError
from playwright.async_api import (
BrowserContext,
Expand All @@ -23,15 +24,15 @@
Response as PlaywrightResponse,
Route,
)
from scrapy import Spider, signals
from scrapy import Spider, signals, __version__ as scrapy_version
from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler
from scrapy.crawler import Crawler
from scrapy.exceptions import NotSupported, ScrapyDeprecationWarning
from scrapy.http import Request, Response
from scrapy.http.headers import Headers
from scrapy.responsetypes import responsetypes
from scrapy.settings import Settings
from scrapy.utils.defer import deferred_from_coro
from scrapy.utils.defer import deferred_from_coro, maybe_deferred_to_future
from scrapy.utils.misc import load_object
from scrapy.utils.reactor import verify_installed_reactor
from twisted.internet.defer import Deferred, inlineCallbacks
Expand Down Expand Up @@ -62,6 +63,9 @@
DEFAULT_CONTEXT_NAME = "default"
PERSISTENT_CONTEXT_PATH_KEY = "user_data_dir"

_SCRAPY_VERSION = Version(scrapy_version)
_ASYNC_HANDLER_API = _SCRAPY_VERSION >= Version("2.14.0")


@dataclass
class BrowserContextWrapper:
Expand Down Expand Up @@ -138,7 +142,10 @@ class ScrapyPlaywrightDownloadHandler(HTTP11DownloadHandler):
playwright: Optional[AsyncPlaywright] = None

def __init__(self, crawler: Crawler) -> None:
super().__init__(settings=crawler.settings, crawler=crawler)
if _ASYNC_HANDLER_API:
super().__init__(crawler=crawler) # pylint: disable=no-value-for-parameter
else:
super().__init__(settings=crawler.settings, crawler=crawler)
verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
crawler.signals.connect(self._engine_started, signals.engine_started)
self.stats = crawler.stats
Expand Down Expand Up @@ -348,13 +355,24 @@ def _set_max_concurrent_context_count(self):
"playwright/context_count/max_concurrent", len(self.context_wrappers)
)

@inlineCallbacks
def close(self) -> Deferred:
logger.info("Closing download handler")
yield super().close()
yield self._deferred_from_coro(self._close())
if self.config.use_threaded_loop:
_ThreadedLoopAdapter.stop(id(self))
if _ASYNC_HANDLER_API:

async def close(self) -> None: # pylint: disable=invalid-overridden-method
logger.info("Closing download handler")
await super().close()
await maybe_deferred_to_future(self._deferred_from_coro(self._close()))
if self.config.use_threaded_loop:
_ThreadedLoopAdapter.stop(id(self))

else:

@inlineCallbacks
def close(self) -> Deferred:
logger.info("Closing download handler")
yield super().close()
yield self._deferred_from_coro(self._close())
if self.config.use_threaded_loop:
_ThreadedLoopAdapter.stop(id(self))

async def _close(self) -> None:
with suppress(TargetClosedError):
Expand All @@ -368,10 +386,27 @@ async def _close(self) -> None:
if self.playwright:
await self.playwright.stop()

def download_request(self, request: Request, spider: Spider) -> Deferred:
if request.meta.get("playwright"):
return self._deferred_from_coro(self._download_request(request, spider))
return super().download_request(request, spider)
if _ASYNC_HANDLER_API:

async def download_request( # pylint: disable=arguments-differ,invalid-overridden-method
self, request: Request
) -> Response:
if request.meta.get("playwright"):
return await maybe_deferred_to_future(
self._deferred_from_coro(self._download_request(request, self._crawler.spider))
)
return await super().download_request( # pylint: disable=no-value-for-parameter
request
)

else:

def download_request( # type: ignore[misc]
self, request: Request, spider: Spider
) -> Deferred:
if request.meta.get("playwright"):
return self._deferred_from_coro(self._download_request(request, spider))
return super().download_request(request, spider)

async def _download_request(self, request: Request, spider: Spider) -> Response:
counter = 0
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
],
python_requires=">=3.9",
install_requires=[
"packaging>=20.0",
"scrapy>=2.0,!=2.4.0",
"playwright>=1.15",
],
Expand Down
Loading