MCPcopy
hub / github.com/freshframework/fresh / crawlPage

Function crawlPage

tools/check_links.ts:114–203  ·  view source on GitHub ↗
(pageUrl: URL, referrer: string)

Source from the content-addressed store, hash-verified

112}
113
114async function crawlPage(pageUrl: URL, referrer: string) {
115 const pathname = pageUrl.pathname;
116 if (visitedPages.has(pathname)) return;
117 visitedPages.add(pathname);
118
119 let res: Response;
120 try {
121 res = await fetch(pageUrl, {
122 headers: {
123 accept:
124 "text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8",
125 },
126 });
127 } catch {
128 failedLinks.push({ url: pageUrl.href, status: 0, referrer });
129 return;
130 }
131
132 if (res.status >= 400) {
133 failedLinks.push({ url: pageUrl.href, status: res.status, referrer });
134 await res.body?.cancel();
135 return;
136 }
137
138 if (!res.headers.get("content-type")?.includes("text/html")) {
139 await res.body?.cancel();
140 return;
141 }
142
143 const text = await res.text();
144 Deno.stdout.writeSync(new TextEncoder().encode("."));
145 const doc = new DOMParser().parseFromString(text, "text/html");
146
147 const linkChecks: Array<Promise<void>> = [];
148 const internalPages: Array<{ url: URL; referrer: string }> = [];
149
150 for (const link of doc.querySelectorAll("a")) {
151 const href = link.getAttribute("href")?.trim();
152 if (!href) continue;
153 if (EXCLUDED_PREFIXES.some((p) => href.startsWith(p))) continue;
154 if (href.startsWith("#")) continue;
155
156 let nextUrl: URL;
157 try {
158 nextUrl = new URL(href, pageUrl);
159 } catch {
160 continue;
161 }
162
163 // Strip fragment
164 nextUrl.hash = "";
165 const urlStr = nextUrl.href;
166
167 if (nextUrl.origin === rootUrl.origin) {
168 // Internal link -- crawl the page if it's a docs page
169 if (
170 !visitedPages.has(nextUrl.pathname) &&
171 nextUrl.pathname.startsWith("/docs")

Callers 1

check_links.tsFile · 0.85

Calls 5

checkUrlFunction · 0.85
allMethod · 0.80
addMethod · 0.65
getMethod · 0.45
textMethod · 0.45

Tested by

no test coverage detected