* From the MDN HTML elements reference, extract a list of elements.
($)
| 10 | * From the MDN HTML elements reference, extract a list of elements. |
| 11 | */ |
| 12 | function extractElements($) { |
| 13 | const excludeElements = [ |
| 14 | 'html', 'head', 'body', 'style', 'h1–h6', 'input', 'search', |
| 15 | // out of scope, different namespaces - but Mozilla added these to the |
| 16 | // above reference page Jan 2021 so we need to exclude them now. |
| 17 | // see https://github.com/mdn/content/pull/410 |
| 18 | 'svg', 'math', |
| 19 | // obsolete, non-standard, or deprecated tags |
| 20 | 'image', 'dir', 'tt', 'applet', 'noembed', 'bgsound', 'menu', 'menuitem', |
| 21 | 'noframes', |
| 22 | // experimental, don't add yet |
| 23 | 'portal', |
| 24 | 'fencedframe', |
| 25 | 'selectedcontent', |
| 26 | // Geolocation has a weird formatting catch |
| 27 | `geolocation |
| 28 | Experimental |
| 29 | ` |
| 30 | ]; |
| 31 | // `<section>` is for some reason missing from the reference tables. |
| 32 | const addElements = [ |
| 33 | 'base', |
| 34 | 'basefont', |
| 35 | 'blink', |
| 36 | 'keygen', |
| 37 | 'h1', |
| 38 | 'h2', |
| 39 | 'h3', |
| 40 | 'h4', |
| 41 | 'h5', |
| 42 | 'h6', |
| 43 | 'hgroup', |
| 44 | 'iframe', |
| 45 | 'section', |
| 46 | 'spacer', |
| 47 | ]; |
| 48 | |
| 49 | return $('td:first-child') |
| 50 | .toArray() |
| 51 | .map(el => { |
| 52 | return cheerio(el).text().replace(/[<>]/g, '') |
| 53 | }) |
| 54 | .reduce((list, element) => { |
| 55 | const subList = element.split(', '); |
| 56 | return list.concat(subList); |
| 57 | }, []) |
| 58 | .filter(element => excludeElements.indexOf(element) === -1) |
| 59 | .concat(addElements) |
| 60 | .sort() |
| 61 | .reduce((list, element) => { |
| 62 | if(!list.length || element !== list[list.length - 1]) { |
| 63 | list.push(element); |
| 64 | } |
| 65 | return list; |
| 66 | }, []); |
| 67 | } |
| 68 | |
| 69 | fetch(refUrl) |