Create a new `Request` instance from a URL. This is recommended constructor for creating new `Request` instances. It generates a `Request` object from a given URL with additional options to customize HTTP method, payload, unique key, and other request properties. If no `uniq
(
cls,
url: str,
*,
method: HttpMethod = 'GET',
headers: HttpHeaders | dict[str, str] | None = None,
payload: HttpPayload | str | None = None,
label: str | None = None,
session_id: str | None = None,
unique_key: str | None = None,
keep_url_fragment: bool = False,
use_extended_unique_key: bool = False,
always_enqueue: bool = False,
enqueue_strategy: EnqueueStrategy | None = None,
max_retries: int | None = None,
**kwargs: Any,
)
| 242 | |
| 243 | @classmethod |
| 244 | def from_url( |
| 245 | cls, |
| 246 | url: str, |
| 247 | *, |
| 248 | method: HttpMethod = 'GET', |
| 249 | headers: HttpHeaders | dict[str, str] | None = None, |
| 250 | payload: HttpPayload | str | None = None, |
| 251 | label: str | None = None, |
| 252 | session_id: str | None = None, |
| 253 | unique_key: str | None = None, |
| 254 | keep_url_fragment: bool = False, |
| 255 | use_extended_unique_key: bool = False, |
| 256 | always_enqueue: bool = False, |
| 257 | enqueue_strategy: EnqueueStrategy | None = None, |
| 258 | max_retries: int | None = None, |
| 259 | **kwargs: Any, |
| 260 | ) -> Self: |
| 261 | """Create a new `Request` instance from a URL. |
| 262 | |
| 263 | This is recommended constructor for creating new `Request` instances. It generates a `Request` object from |
| 264 | a given URL with additional options to customize HTTP method, payload, unique key, and other request |
| 265 | properties. If no `unique_key` or `id` is provided, they are computed automatically based on the URL, |
| 266 | method and payload. It depends on the `keep_url_fragment` and `use_extended_unique_key` flags. |
| 267 | |
| 268 | Args: |
| 269 | url: The URL of the request. |
| 270 | method: The HTTP method of the request. |
| 271 | headers: The HTTP headers of the request. |
| 272 | payload: The data to be sent as the request body. Typically used with 'POST' or 'PUT' requests. |
| 273 | label: A custom label to differentiate between request types. This is stored in `user_data`, and it is |
| 274 | used for request routing (different requests go to different handlers). |
| 275 | session_id: ID of a specific `Session` to which the request will be strictly bound. |
| 276 | If the session becomes unavailable when the request is processed, a `RequestCollisionError` will be |
| 277 | raised. |
| 278 | unique_key: A unique key identifying the request. If not provided, it is automatically computed based on |
| 279 | the URL and other parameters. Requests with the same `unique_key` are treated as identical. |
| 280 | keep_url_fragment: Determines whether the URL fragment (e.g., `#section`) should be included in |
| 281 | the `unique_key` computation. This is only relevant when `unique_key` is not provided. |
| 282 | use_extended_unique_key: Determines whether to include the HTTP method, ID Session and payload in the |
| 283 | `unique_key` computation. This is only relevant when `unique_key` is not provided. |
| 284 | always_enqueue: If set to `True`, the request will be enqueued even if it is already present in the queue. |
| 285 | Using this is not allowed when a custom `unique_key` is also provided and will result in a `ValueError`. |
| 286 | enqueue_strategy: The strategy that will be used for enqueuing the request. |
| 287 | max_retries: Maximum number of retries for this request. Allows to override the global `max_request_retries` |
| 288 | option of `BasicCrawler`. |
| 289 | **kwargs: Additional request properties. |
| 290 | """ |
| 291 | if unique_key is not None and always_enqueue: |
| 292 | raise ValueError('`always_enqueue` cannot be used with a custom `unique_key`') |
| 293 | |
| 294 | if isinstance(headers, dict) or headers is None: |
| 295 | headers = HttpHeaders(headers or {}) |
| 296 | |
| 297 | if isinstance(payload, str): |
| 298 | payload = payload.encode() |
| 299 | |
| 300 | unique_key = unique_key or compute_unique_key( |
| 301 | url, |