MCPcopy
hub / github.com/marktext/marktext / normalizeHtml

Function normalizeHtml

packages/muya/test/spec/runner.ts:55–120  ·  view source on GitHub ↗
(html: string)

Source from the content-addressed store, hash-verified

53 * spec examples depend on.
54 */
55export function normalizeHtml(html: string): string {
56 let out = html;
57
58 // Normalise self-closing void elements to `<br />` style consistently.
59 const voidTags = ['br', 'hr', 'img', 'wbr', 'input'];
60 for (const tag of voidTags) {
61 const re = new RegExp(`<${tag}((?:\\s+[^>]*?)?)\\s*/?\\s*>`, 'gi');
62 out = out.replace(re, (_m, attrs: string) => {
63 const trimmed = attrs.trim();
64 return trimmed ? `<${tag} ${trimmed} />` : `<${tag} />`;
65 });
66 }
67
68 // Sort attributes inside tags alphabetically. Skip closing tags and
69 // self-closed void tags we already handled.
70 //
71 // The attrs portion is anchored to start with a valid attr-name char
72 // (`[a-z_:]` after the `i` flag). That excludes the `/` of `<br />`,
73 // which would otherwise be captured as attrs and dropped by the
74 // attr-name tokenizer, undoing the void-tag normalisation above. The
75 // `[ \t\n\r]+` separator (instead of `\s+`) plus this anchor also
76 // prevents polynomial backtracking against the trailing `[^>]*?`.
77 out = out.replace(
78 /<([a-z][\w-]*)[ \t\n\r]+([a-z_:][^>]*?)(\/?)>/gi,
79 (_m, name: string, attrs: string, selfClose: string) => {
80 // Parse attrs of the form: name | name="value" | name='value'
81 const parsed: Array<[string, string]> = [];
82 const re = /([a-z_:][\w:.-]*)(?:\s*=\s*("[^"]*"|'[^']*'|[^\s"'>`]+))?/gi;
83 let m: RegExpExecArray | null;
84 // eslint-disable-next-line no-cond-assign
85 while ((m = re.exec(attrs))) {
86 parsed.push([m[1], m[2] ?? '']);
87 }
88 parsed.sort(([a], [b]) => (a < b ? -1 : a > b ? 1 : 0));
89 const rebuilt = parsed
90 .map(([k, v]) => (v === '' ? k : `${k}=${v}`))
91 .join(' ');
92 const sc = selfClose ? ' /' : '';
93 return rebuilt
94 ? `<${name} ${rebuilt}${sc}>`
95 : `<${name}${sc}>`;
96 },
97 );
98
99 // Collapse whitespace between adjacent tag boundaries. Crucially:
100 // `>foo\n<` does NOT match because `foo` isn't whitespace — content is
101 // preserved. Only whitespace-only `>WS<` collapses.
102 out = out.replace(/>[ \t\n\r]+</g, '><');
103
104 // Strip whitespace immediately following a void self-closing tag. This
105 // covers cmark's `<br>\nfoo` vs marked's `<br>foo`: after the void-tag
106 // normalisation above both inputs reach `<br />`, then we strip the
107 // following whitespace run.
108 out = out.replace(
109 /(<(?:br|hr|wbr|img|input)\s+\/>)[ \t\n\r]+/gi,
110 '$1',
111 );
112

Callers 2

runner.spec.tsFile · 0.90
compareHtmlFunction · 0.85

Calls 4

pushMethod · 0.80
joinMethod · 0.80
replaceMethod · 0.45
mapMethod · 0.45

Tested by

no test coverage detected