(xml)
| 89 | } |
| 90 | |
| 91 | function extractUrlsFromSitemap(xml) { |
| 92 | const urls = [...xml.matchAll(/<loc>(.*?)<\/loc>/g)] |
| 93 | .map((match) => match[1].trim()) |
| 94 | .filter((url) => url.startsWith("https://javaguide.cn/")) |
| 95 | .filter((url) => !url.includes("/assets/")) |
| 96 | .filter((url) => !url.endsWith("/404.html")); |
| 97 | |
| 98 | return maxUrls > 0 ? urls.slice(0, maxUrls) : urls; |
| 99 | } |
| 100 | |
| 101 | function recordFor({ url, title, hierarchy, content, anchor, type, position }) { |
| 102 | const recordUrl = anchor ? `${url}#${anchor}` : url; |