MCPcopy
hub / github.com/midudev/libros-programacion-gratis / parseAdvicePage

Function parseAdvicePage

web/scripts/import-97-things.mjs:74–101  ·  view source on GitHub ↗
(html, pageUrl, localAdvicePaths)

Source from the content-addressed store, hash-verified

72};
73
74const parseAdvicePage = (html, pageUrl, localAdvicePaths) => {
75 const headingPattern = /<h2>([\s\S]*?)<br>\s*<small>Autor:\s*([\s\S]*?)<\/small><\/h2>/;
76 const heading = html.match(headingPattern);
77
78 if (!heading?.index) {
79 throw new Error(`Could not find advice heading in ${pageUrl}`);
80 }
81
82 const translationStart = html.indexOf('<p>\nTraducción:', heading.index);
83
84 if (translationStart === -1) {
85 throw new Error(`Could not find translator marker in ${pageUrl}`);
86 }
87
88 const rawContent = html.slice(heading.index + heading[0].length, translationStart);
89 const rest = html.slice(translationStart);
90 const translator = rest.match(/<p>\s*Traducción:\s*([^<]+?)\s*<\/p>/)?.[1]?.trim();
91 const originalHref = rest.match(/<a href="([^"]+)">Leer contribución original<\/a>/)?.[1];
92
93 return {
94 title: stripTags(heading[1]),
95 author: stripTags(heading[2]),
96 translator: translator ? stripTags(translator) : undefined,
97 originalHref: originalHref ? decodeEntities(originalHref) : undefined,
98 contentHtml: normalizeHtml(rawContent, pageUrl, localAdvicePaths),
99 excerpt: truncateAtWord(rawContent),
100 };
101};
102
103const toTypeScript = (advices) => `export type ProgrammerAdvice = {
104 number: number;

Callers 1

Calls 4

stripTagsFunction · 0.85
decodeEntitiesFunction · 0.85
normalizeHtmlFunction · 0.85
truncateAtWordFunction · 0.70

Tested by

no test coverage detected