MCPcopy
hub / github.com/extractus/article-extractor / extract

Function extract

src/main.js:20–37  ·  view source on GitHub ↗
(input, parserOptions = {}, fetchOptions = {})

Source from the content-addressed store, hash-verified

18 * @returns {Promise<ArticleData|null>} Extracted article data or null
19 */
20export const extract = async (input, parserOptions = {}, fetchOptions = {}) => {
21 if (!isString(input)) {
22 throw new Error('Input must be a string')
23 }
24
25 if (!isValidUrl(input)) {
26 return parseFromHtml(input, null, parserOptions)
27 }
28 const buffer = await retrieve(input, fetchOptions)
29 const text = buffer ? Buffer.from(buffer).toString().trim() : ''
30 if (!text) {
31 return null
32 }
33 const charset = getCharset(text)
34 const decoder = new TextDecoder(charset)
35 const html = decoder.decode(buffer)
36 return parseFromHtml(html, input, parserOptions)
37}
38
39/**
40 * Extract article data from an HTML string directly.

Callers 2

extractFromUrlFunction · 0.90
main.test.jsFile · 0.90

Calls 1

getCharsetFunction · 0.90

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…