MCPcopy
hub / github.com/lissy93/web-check / linkedPagesHandler

Function linkedPagesHandler

api/linked-pages.js:7–50  ·  view source on GitHub ↗
(url)

Source from the content-addressed store, hash-verified

5import { upstreamError } from './_common/upstream.js';
6
7const linkedPagesHandler = async (url) => {
8 let response;
9 try {
10 response = await httpGet(url);
11 } catch (error) {
12 return upstreamError(error, 'Linked pages fetch');
13 }
14 const html = response.data;
15 const $ = cheerio.load(html);
16 const internalLinksMap = new Map();
17 const externalLinksMap = new Map();
18
19 // Get all links on the page
20 $('a[href]').each((i, link) => {
21 const href = $(link).attr('href');
22 const absoluteUrl = urlLib.resolve(url, href);
23
24 // Check if absolute / relative, append to appropriate map or increment occurrence count
25 if (absoluteUrl.startsWith(url)) {
26 const count = internalLinksMap.get(absoluteUrl) || 0;
27 internalLinksMap.set(absoluteUrl, count + 1);
28 } else if (href.startsWith('http://') || href.startsWith('https://')) {
29 const count = externalLinksMap.get(absoluteUrl) || 0;
30 externalLinksMap.set(absoluteUrl, count + 1);
31 }
32 });
33
34 // Sort by most occurrences, remove supplicates, and convert to array
35 const internalLinks = [...internalLinksMap.entries()]
36 .sort((a, b) => b[1] - a[1])
37 .map((entry) => entry[0]);
38 const externalLinks = [...externalLinksMap.entries()]
39 .sort((a, b) => b[1] - a[1])
40 .map((entry) => entry[0]);
41
42 if (!internalLinks.length && !externalLinks.length) {
43 return {
44 skipped:
45 'No internal or external links found in the page HTML. ' +
46 'This often happens with single-page apps that render content client-side.',
47 };
48 }
49 return { internal: internalLinks, external: externalLinks };
50};
51
52export const handler = middleware(linkedPagesHandler);
53export default handler;

Callers

nothing calls this directly

Calls 2

httpGetFunction · 0.90
upstreamErrorFunction · 0.90

Tested by

no test coverage detected