MCPcopy
hub / github.com/yhangf/PythonCrawler / getTopSecCom

Class getTopSecCom

spiderFile/get_top_sec_com.py:14–91  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

12# nest_asyncio.apply()
13
14class getTopSecCom:
15 def __init__(self, top=None):
16 self.headers = {"Referer": "http://quote.eastmoney.com/",
17 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"}
18 self.bk_url = "http://71.push2.eastmoney.com/api/qt/clist/get?cb=jQuery1124034348162124675374_1612595298605&pn=1&pz=85&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f62&fs=b:BK0655&fields=f12,f14&_=1612595298611"
19 self.shares_api = "https://xueqiu.com/S/"
20 self.top = top
21 if not os.path.exists("./useful_sec_com_list"):
22 self.useful_sec_com_list = self.get_sec_com_code()
23 else:
24 with open("./useful_sec_com_list", "rb") as fp:
25 self.useful_sec_com_list = joblib.load(fp)
26
27 def get_sec_com_code(self):
28 html = rq.get(self.bk_url, headers=self.headers).content.decode("utf-8")
29 sec_com_list = eval(re.findall("\[(.*?)\]", html)[0])
30 useful_sec_com_list = [[i["f12"], i["f14"]] for i in sec_com_list if "ST" not in i["f14"]]
31
32 # 0和3开头的为深证上市股票前缀为sz,6开头的为上证上市股票前缀为sh
33 for sec_com in useful_sec_com_list:
34 if sec_com[0][0] == "6":
35 sec_com[0] = "sh" + sec_com[0]
36 else:
37 sec_com[0] = "sz" + sec_com[0]
38 with open("useful_sec_com_list", "wb") as fp:
39 joblib.dump(useful_sec_com_list, fp)
40 return useful_sec_com_list
41
42 async def async_get_shares_details(self, sec_com, url):
43 async with aiohttp.ClientSession() as session:
44 async with session.get(url, headers=self.headers) as response:
45 html = await response.text()
46 market_value = re.search("<td>总市值:<span>(.*?)亿</span>", html)
47 if market_value:
48 return [*sec_com, market_value.groups()[0]]
49
50 async def async_get_all_shares(self):
51 tasks = []
52 for sec_com in self.useful_sec_com_list:
53 url = self.shares_api + sec_com[0]
54 tasks.append(
55 asyncio.create_task(
56 self.async_get_shares_details(sec_com, url)
57 )
58 )
59 done, pendding = await asyncio.wait(tasks)
60 return [share.result() for share in done if share.result()]
61
62 def get_shares_details(self):
63 all_shares = []
64 for sec_com in self.useful_sec_com_list:
65 url = self.shares_api + sec_com[0]
66 response = rq.get(url, headers=self.headers).content.decode("utf-8")
67 market_value = re.search("<td>总市值:<span>(.*?)亿</span>", response)
68 if market_value:
69 all_shares.append([*sec_com, market_value.groups()[0]])
70 return all_shares
71

Callers 1

get_top_sec_com.pyFile · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected