()
| 219 | } |
| 220 | |
| 221 | async function main() { |
| 222 | console.log( |
| 223 | sourceDir |
| 224 | ? `Reading local sitemap: ${path.join(sourceDir, "sitemap.xml")}` |
| 225 | : `Reading sitemap: ${sitemapUrl}`, |
| 226 | ); |
| 227 | const sitemap = await readSitemap(); |
| 228 | const urls = extractUrlsFromSitemap(sitemap); |
| 229 | |
| 230 | console.log(`Indexing ${urls.length} URL(s) into ${indexName}`); |
| 231 | |
| 232 | const pageRecords = await mapConcurrent( |
| 233 | urls, |
| 234 | async (url, index) => { |
| 235 | try { |
| 236 | const html = await readPageHtml(url); |
| 237 | const records = extractRecords(url, html); |
| 238 | console.log(`${index + 1}/${urls.length} ${records.length} ${url}`); |
| 239 | return records; |
| 240 | } catch (error) { |
| 241 | console.warn( |
| 242 | `${index + 1}/${urls.length} skipped ${url}: ${error.message}`, |
| 243 | ); |
| 244 | return []; |
| 245 | } |
| 246 | }, |
| 247 | concurrency, |
| 248 | ); |
| 249 | |
| 250 | const records = pageRecords.flat(); |
| 251 | console.log(`Extracted ${records.length} record(s)`); |
| 252 | |
| 253 | if (records.length === 0) { |
| 254 | throw new Error("No records extracted; aborting Algolia update."); |
| 255 | } |
| 256 | |
| 257 | await algoliaRequest(`/1/indexes/${encodeURIComponent(indexName)}/clear`, {}); |
| 258 | |
| 259 | await algoliaRequest( |
| 260 | `/1/indexes/${encodeURIComponent(indexName)}/settings`, |
| 261 | { |
| 262 | attributesForFaceting: ["type", "lang", "language", "version", "tags"], |
| 263 | attributesToRetrieve: [ |
| 264 | "hierarchy", |
| 265 | "content", |
| 266 | "anchor", |
| 267 | "url", |
| 268 | "url_without_anchor", |
| 269 | "type", |
| 270 | ], |
| 271 | attributesToHighlight: ["hierarchy", "content"], |
| 272 | attributesToSnippet: ["content:10"], |
| 273 | searchableAttributes: [ |
| 274 | "unordered(hierarchy.lvl0)", |
| 275 | "unordered(hierarchy.lvl1)", |
| 276 | "unordered(hierarchy.lvl2)", |
| 277 | "unordered(hierarchy.lvl3)", |
| 278 | "unordered(hierarchy.lvl4)", |
no test coverage detected