MCPcopy
hub / github.com/extractus/article-extractor / normalize

Function normalize

src/utils/linker.js:148–174  ·  view source on GitHub ↗
(html, url)

Source from the content-addressed store, hash-verified

146 * @returns {string} Normalized HTML string
147 */
148export const normalize = (html, url) => {
149 const doc = new DOMParser().parseFromString(html, 'text/html')
150
151 Array.from(doc.getElementsByTagName('a')).forEach((element) => {
152 const href = element.getAttribute('href')
153 if (href) {
154 element.setAttribute('href', absolutify(url, href))
155 element.setAttribute('target', '_blank')
156 }
157 })
158
159 Array.from(doc.getElementsByTagName('img')).forEach((element) => {
160 const src = element.getAttribute('data-src') ?? element.getAttribute('src')
161 if (src) {
162 element.setAttribute('src', absolutify(url, src))
163 }
164 })
165
166 Array.from(doc.getElementsByTagName('source')).forEach((element) => {
167 const src = element.getAttribute('src')
168 if (src) {
169 element.setAttribute('src', absolutify(url, src))
170 }
171 })
172
173 return Array.from(doc.childNodes).map(element => element.outerHTML).join('')
174}
175
176/**
177 * Extract the domain from a URL, stripping the www. prefix.

Callers

nothing calls this directly

Calls 1

absolutifyFunction · 0.85

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…