()
| 113 | } |
| 114 | |
| 115 | async function main() { |
| 116 | // Build the production assets, to simulate a production deployment |
| 117 | console.log('Running `npm run build` for production assets') |
| 118 | execSync('npm run build', { stdio: 'inherit' }) |
| 119 | console.log('Finish building production assets') |
| 120 | if (dryRun) { |
| 121 | console.log( |
| 122 | '\nThis is a dry run! Creating HTML for redirects and scraping the first 10 pages only.' |
| 123 | ) |
| 124 | } |
| 125 | if (singlePage) { |
| 126 | console.log(`\nScraping HTML for a single page only ${singlePage}.`) |
| 127 | } |
| 128 | console.log(`Enterprise version to archive: ${version}`) |
| 129 | const pageName = |
| 130 | singlePage && singlePage.trim().startsWith('/') ? singlePage.slice(1) : singlePage |
| 131 | const pageMap = singlePage |
| 132 | ? languageKeys.map((key) => `/${key}/enterprise-server@${version}/${pageName}`) |
| 133 | : await loadPageMap() |
| 134 | const permalinksPerVersion = singlePage |
| 135 | ? pageMap |
| 136 | : Object.keys(pageMap).filter((key) => key.includes(`/enterprise-server@${version}`)) |
| 137 | |
| 138 | const urls = dryRun |
| 139 | ? permalinksPerVersion.slice(0, 10).map((href) => `${host}${href}`) |
| 140 | : permalinksPerVersion.map((href) => `${host}${href}`) |
| 141 | |
| 142 | console.log(`Found ${urls.length} pages for version ${version}`) |
| 143 | |
| 144 | if (dryRun || singlePage) { |
| 145 | console.log(`\nScraping html for these pages only:\n${urls.join('\n')}\n`) |
| 146 | } |
| 147 | |
| 148 | // remove temp directory |
| 149 | rimraf.sync(tmpArchivalDirectory) |
| 150 | |
| 151 | const app = createApp() |
| 152 | const server = http.createServer(app) |
| 153 | server |
| 154 | .listen(port, async () => { |
| 155 | console.log(`started server on ${host}`) |
| 156 | |
| 157 | await scrape({ |
| 158 | urls, |
| 159 | urlFilter: (url) => { |
| 160 | // Do not download assets from other hosts like S3 or octodex.github.com |
| 161 | // (this will keep them as remote references in the downloaded pages) |
| 162 | return url.startsWith(`http://localhost:${port}/`) |
| 163 | }, |
| 164 | directory: tmpArchivalDirectory, |
| 165 | filenameGenerator: 'bySiteStructure', |
| 166 | requestConcurrency: 6, |
| 167 | plugins: [new RewriteAssetPathsPlugin(version, tmpArchivalDirectory)], |
| 168 | }).catch((err) => { |
| 169 | console.error('scraping error') |
| 170 | console.error(err) |
| 171 | }) |
| 172 |
no test coverage detected