(countryPath, offset)
| 360 | |
| 361 | |
| 362 | function pullTownDataSync(countryPath, offset) { |
| 363 | offset = offset || 0 |
| 364 | let data = fs.readFileSync(countryPath); |
| 365 | let jsonData = JSON.parse(data); |
| 366 | jsonData.slice(offset).forEach(function(element, index) { |
| 367 | let urls = []; |
| 368 | let url = element.url.replace('www.stats.gov.cn', IP); |
| 369 | log.debug("正在请求 => " + element.name + " => "+ element.url); |
| 370 | if (url){ |
| 371 | requestSync({ |
| 372 | url: url, |
| 373 | encoding: null |
| 374 | }, function(error, response, body) { |
| 375 | //空的文件内容必须是 [] |
| 376 | urls = JSON.parse(fs.readFileSync(TownPath) || []); |
| 377 | urls = urls.concat(parseTown(iconv.decode(body, 'gb2312'), absolutePath(element.url))); |
| 378 | fs.writeFileSync(TownPath, JSON.stringify(urls)); |
| 379 | log.debug('foreach ==> ', offset + index); |
| 380 | }); |
| 381 | sleep(500); |
| 382 | } |
| 383 | }); |
| 384 | } |
| 385 | |
| 386 | //由于数据量很大,统计局的服务器在接受大量连接时会莫名 hang 住,所以通过输出 index,不断计算偏移量,方便下一次计算 |
| 387 | //第一次从 0 开始,假如输出 index 是 3000,则下一次爬取时偏移量应该是 3001,可以通过 sleep 适当控制爬取的速度,sleep 是直接让 nodejs event loop 停住 |
nothing calls this directly
no test coverage detected