(html, base)
| 32 | * @param {string} base |
| 33 | */ |
| 34 | export function crawl(html, base) { |
| 35 | /** @type {string[]} */ |
| 36 | const ids = []; |
| 37 | |
| 38 | /** @type {string[]} */ |
| 39 | const hrefs = []; |
| 40 | |
| 41 | let i = 0; |
| 42 | main: while (i < html.length) { |
| 43 | const char = html[i]; |
| 44 | |
| 45 | if (char === '<') { |
| 46 | if (html[i + 1] === '!') { |
| 47 | i += 2; |
| 48 | |
| 49 | if (html.slice(i, i + DOCTYPE.length).toUpperCase() === DOCTYPE) { |
| 50 | i += DOCTYPE.length; |
| 51 | while (i < html.length) { |
| 52 | if (html[i++] === '>') { |
| 53 | continue main; |
| 54 | } |
| 55 | } |
| 56 | } |
| 57 | |
| 58 | // skip cdata |
| 59 | if (html.slice(i, i + CDATA_OPEN.length) === CDATA_OPEN) { |
| 60 | i += CDATA_OPEN.length; |
| 61 | while (i < html.length) { |
| 62 | if (html.slice(i, i + CDATA_CLOSE.length) === CDATA_CLOSE) { |
| 63 | i += CDATA_CLOSE.length; |
| 64 | continue main; |
| 65 | } |
| 66 | |
| 67 | i += 1; |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | // skip comments |
| 72 | if (html.slice(i, i + COMMENT_OPEN.length) === COMMENT_OPEN) { |
| 73 | i += COMMENT_OPEN.length; |
| 74 | while (i < html.length) { |
| 75 | if (html.slice(i, i + COMMENT_CLOSE.length) === COMMENT_CLOSE) { |
| 76 | i += COMMENT_CLOSE.length; |
| 77 | continue main; |
| 78 | } |
| 79 | |
| 80 | i += 1; |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | // parse opening tags |
| 86 | const start = ++i; |
| 87 | if (TAG_OPEN.test(html[start])) { |
| 88 | while (i < html.length) { |
| 89 | if (!TAG_CHAR.test(html[i])) { |
| 90 | break; |
| 91 | } |
no test coverage detected