MCPcopy
hub / github.com/SukkaW/Surge / getTopOneMillionDomains

Function getTopOneMillionDomains

Build/validate-gfwlist.ts:16–55  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

14import { GLOBAL } from '../Source/non_ip/global';
15
16export async function getTopOneMillionDomains() {
17 const { parse: csvParser } = await import('csv-parse');
18
19 const topDomainTrie = new HostnameSmolTrie();
20 const csvParse = csvParser({ columns: false, skip_empty_lines: true });
21
22 const topDomainsZipBody = await (await $$fetch('https://tranco-list.eu/top-1m.csv.zip', {
23 headers: {
24 accept: '*/*',
25 'user-agent': 'curl/8.12.1'
26 }
27 })).arrayBuffer();
28 let entry: yauzl.Entry | null = null;
29 for await (const e of await yauzl.fromBuffer(Buffer.from(topDomainsZipBody))) {
30 if (e.filename === 'top-1m.csv') {
31 entry = e;
32 break;
33 }
34 }
35
36 const { promise, resolve, reject } = Promise.withResolvers<HostnameSmolTrie>();
37
38 const readable = await nullthrow(entry, 'top-1m.csv entry not found').openReadStream();
39 const parser = readable.pipe(csvParse);
40 parser.on('readable', () => {
41 let record;
42 while ((record = parser.read()) !== null) {
43 topDomainTrie.add(record[1]);
44 }
45 });
46
47 parser.on('end', () => {
48 resolve(topDomainTrie);
49 });
50 parser.on('error', (err) => {
51 reject(err);
52 });
53
54 return promise;
55}
56
57export async function parseGfwList() {
58 const whiteSet = new Set<string>();

Callers 2

parseDomesticListFunction · 0.90
parseGfwListFunction · 0.85

Calls 1

$$fetchFunction · 0.90

Tested by

no test coverage detected