(url)
| 73 | } |
| 74 | |
| 75 | async function readPageHtml(url) { |
| 76 | if (!sourceDir) { |
| 77 | return fetchText(url); |
| 78 | } |
| 79 | |
| 80 | const { pathname } = new URL(url); |
| 81 | const relativePath = |
| 82 | pathname === "/" |
| 83 | ? "index.html" |
| 84 | : pathname.endsWith("/") |
| 85 | ? path.join(decodeURIComponent(pathname.slice(1)), "index.html") |
| 86 | : decodeURIComponent(pathname.slice(1)); |
| 87 | |
| 88 | return readFile(path.join(sourceDir, relativePath), "utf8"); |
| 89 | } |
| 90 | |
| 91 | function extractUrlsFromSitemap(xml) { |
| 92 | const urls = [...xml.matchAll(/<loc>(.*?)<\/loc>/g)] |