($: CheerioAPI, selector = 'a', baseUrl = '')
| 96 | * @return An array of absolute URLs |
| 97 | */ |
| 98 | export function extractUrlsFromCheerio($: CheerioAPI, selector = 'a', baseUrl = ''): string[] { |
| 99 | const base = $('base').attr('href'); |
| 100 | const absoluteBaseUrl = base && tryAbsoluteURL(base, baseUrl); |
| 101 | |
| 102 | if (absoluteBaseUrl) { |
| 103 | baseUrl = absoluteBaseUrl; |
| 104 | } |
| 105 | |
| 106 | return $(selector) |
| 107 | .map((_i, el) => $(el).attr('href')) |
| 108 | .get() |
| 109 | .filter(Boolean) |
| 110 | .map((href) => { |
| 111 | // Throw a meaningful error when only a relative URL would be extracted instead of waiting for the Request to fail later. |
| 112 | const isHrefAbsolute = /^[a-z][a-z0-9+.-]*:/.test(href); // Grabbed this in 'is-absolute-url' package. |
| 113 | if (!isHrefAbsolute && !baseUrl) { |
| 114 | throw new Error( |
| 115 | `An extracted URL: ${href} is relative and baseUrl is not set. ` + |
| 116 | 'Provide a baseUrl to automatically resolve relative URLs.', |
| 117 | ); |
| 118 | } |
| 119 | return baseUrl ? tryAbsoluteURL(href, baseUrl) : href; |
| 120 | }) |
| 121 | .filter(Boolean) as string[]; |
| 122 | } |
no test coverage detected
searching dependent graphs…