MCPcopy
hub / github.com/wecatch/china_regions / pullTownDataSync

Function pullTownDataSync

main.js:362–384  ·  view source on GitHub ↗
(countryPath, offset)

Source from the content-addressed store, hash-verified

360
361
362function pullTownDataSync(countryPath, offset) {
363 offset = offset || 0
364 let data = fs.readFileSync(countryPath);
365 let jsonData = JSON.parse(data);
366 jsonData.slice(offset).forEach(function(element, index) {
367 let urls = [];
368 let url = element.url.replace('www.stats.gov.cn', IP);
369 log.debug("正在请求 => " + element.name + " => "+ element.url);
370 if (url){
371 requestSync({
372 url: url,
373 encoding: null
374 }, function(error, response, body) {
375 //空的文件内容必须是 []
376 urls = JSON.parse(fs.readFileSync(TownPath) || []);
377 urls = urls.concat(parseTown(iconv.decode(body, 'gb2312'), absolutePath(element.url)));
378 fs.writeFileSync(TownPath, JSON.stringify(urls));
379 log.debug('foreach ==> ', offset + index);
380 });
381 sleep(500);
382 }
383 });
384}
385
386//由于数据量很大,统计局的服务器在接受大量连接时会莫名 hang 住,所以通过输出 index,不断计算偏移量,方便下一次计算
387//第一次从 0 开始,假如输出 index 是 3000,则下一次爬取时偏移量应该是 3001,可以通过 sleep 适当控制爬取的速度,sleep 是直接让 nodejs event loop 停住

Callers

nothing calls this directly

Calls 2

parseTownFunction · 0.85
absolutePathFunction · 0.85

Tested by

no test coverage detected