| 223 | }; |
| 224 | |
| 225 | export default class ChromiumScrapePostRoute extends BrowserHTTPRoute { |
| 226 | name = BrowserlessRoutes.ChromiumScrapePostRoute; |
| 227 | accepts = [contentTypes.json]; |
| 228 | auth = true; |
| 229 | browser = ChromiumCDP; |
| 230 | concurrency = true; |
| 231 | contentTypes = [contentTypes.json]; |
| 232 | description = dedent(` |
| 233 | A JSON-based API that returns text, html, and meta-data from a given list of selectors. |
| 234 | Debugging information is available by sending in the appropriate flags in the "debugOpts" |
| 235 | property. Responds with an array of JSON objects. |
| 236 | `); |
| 237 | method = Methods.post; |
| 238 | path = [HTTPRoutes.chromiumScrape, HTTPRoutes.scrape]; |
| 239 | tags = [APITags.browserAPI]; |
| 240 | async handler( |
| 241 | req: Request, |
| 242 | res: ServerResponse, |
| 243 | logger: Logger, |
| 244 | browser: BrowserInstance, |
| 245 | ) { |
| 246 | logger.debug( |
| 247 | 'Scrape API invoked with body:', |
| 248 | redactSensitiveBodyFields(req.body), |
| 249 | ); |
| 250 | const contentType = |
| 251 | !req.headers.accept || req.headers.accept?.includes('*') |
| 252 | ? contentTypes.html |
| 253 | : req.headers.accept; |
| 254 | |
| 255 | if (!req.body) { |
| 256 | throw new BadRequest(`Couldn't parse JSON body`); |
| 257 | } |
| 258 | |
| 259 | res.setHeader('Content-Type', contentType); |
| 260 | |
| 261 | const { |
| 262 | bestAttempt = false, |
| 263 | url, |
| 264 | gotoOptions, |
| 265 | authenticate, |
| 266 | addScriptTag = [], |
| 267 | addStyleTag = [], |
| 268 | cookies = [], |
| 269 | debugOpts, |
| 270 | elements, |
| 271 | emulateMediaType, |
| 272 | html, |
| 273 | rejectRequestPattern = [], |
| 274 | requestInterceptors = [], |
| 275 | rejectResourceTypes = [], |
| 276 | setExtraHTTPHeaders, |
| 277 | setJavaScriptEnabled, |
| 278 | userAgent, |
| 279 | viewport, |
| 280 | waitForTimeout, |
| 281 | waitForFunction, |
| 282 | waitForSelector, |