(url, { html, ...opts } = {})
| 11 | |
| 12 | const Parser = { |
| 13 | async parse(url, { html, ...opts } = {}) { |
| 14 | const { |
| 15 | fetchAllPages = true, |
| 16 | fallback = true, |
| 17 | contentType = 'html', |
| 18 | headers = {}, |
| 19 | extend, |
| 20 | customExtractor, |
| 21 | } = opts; |
| 22 | |
| 23 | // if no url was passed and this is the browser version, |
| 24 | // set url to window.location.href and load the html |
| 25 | // from the current page |
| 26 | if (!url && cheerio.browser) { |
| 27 | url = window.location.href; // eslint-disable-line no-undef |
| 28 | html = html || cheerio.html(); |
| 29 | } |
| 30 | |
| 31 | const parsedUrl = URL.parse(url); |
| 32 | |
| 33 | if (!validateUrl(parsedUrl)) { |
| 34 | return { |
| 35 | error: true, |
| 36 | message: |
| 37 | 'The url parameter passed does not look like a valid URL. Please check your URL and try again.', |
| 38 | }; |
| 39 | } |
| 40 | |
| 41 | const $ = await Resource.create(url, html, parsedUrl, headers); |
| 42 | |
| 43 | // If we found an error creating the resource, return that error |
| 44 | if ($.failed) { |
| 45 | return $; |
| 46 | } |
| 47 | |
| 48 | // Add custom extractor via cli. |
| 49 | if (customExtractor) { |
| 50 | addCustomExtractor(customExtractor); |
| 51 | } |
| 52 | |
| 53 | const Extractor = getExtractor(url, parsedUrl, $); |
| 54 | // console.log(`Using extractor for ${Extractor.domain}`); |
| 55 | |
| 56 | // if html still has not been set (i.e., url passed to Parser.parse), |
| 57 | // set html from the response of Resource.create |
| 58 | if (!html) { |
| 59 | html = $.html(); |
| 60 | } |
| 61 | |
| 62 | // Cached value of every meta name in our document. |
| 63 | // Used when extracting title/author/date_published/dek |
| 64 | const metaCache = $('meta') |
| 65 | .map((_, node) => $(node).attr('name')) |
| 66 | .toArray(); |
| 67 | |
| 68 | let extendedTypes = {}; |
| 69 | if (extend) { |
| 70 | extendedTypes = selectExtendedTypes(extend, { $, url, html }); |
nothing calls this directly
no test coverage detected
searching dependent graphs…