MCPcopy
hub / github.com/nghuyong/WeiboSpider / parse_tweet_info

Function parse_tweet_info

weibospider/spiders/common.py:86–127  ·  view source on GitHub ↗

解析推文数据

(data)

Source from the content-addressed store, hash-verified

84
85
86def parse_tweet_info(data):
87 """
88 解析推文数据
89 """
90 tweet = {
91 "_id": str(data['mid']),
92 "mblogid": data['mblogid'],
93 "created_at": parse_time(data['created_at']),
94 "geo": data.get('geo', None),
95 "ip_location": data.get('region_name', None),
96 "reposts_count": data['reposts_count'],
97 "comments_count": data['comments_count'],
98 "attitudes_count": data['attitudes_count'],
99 "source": data['source'],
100 "content": data['text_raw'].replace('\u200b', ''),
101 "pic_urls": ["https://wx1.sinaimg.cn/orj960/" + pic_id for pic_id in data.get('pic_ids', [])],
102 "pic_num": data['pic_num'],
103 'isLongText': False,
104 'is_retweet': False,
105 "user": parse_user_info(data['user']),
106 }
107 if '</a>' in tweet['source']:
108 tweet['source'] = re.search(r'>(.*?)</a>', tweet['source']).group(1)
109 if 'page_info' in data and data['page_info'].get('object_type', '') == 'video':
110 media_info = None
111 if 'media_info' in data['page_info']:
112 media_info = data['page_info']['media_info']
113 elif 'cards' in data['page_info'] and 'media_info' in data['page_info']['cards'][0]:
114 media_info = data['page_info']['cards'][0]['media_info']
115 if media_info:
116 tweet['video'] = media_info['stream_url']
117 # 视频播放量
118 tweet['video_online_numbers'] = media_info.get('online_users_number', None)
119 tweet['url'] = f"https://weibo.com/{tweet['user']['_id']}/{tweet['mblogid']}"
120 if 'continue_tag' in data and data['isLongText']:
121 tweet['isLongText'] = True
122 if 'retweeted_status' in data:
123 tweet['is_retweet'] = True
124 tweet['retweet_id'] = data['retweeted_status']['mid']
125 if 'reads_count' in data:
126 tweet['reads_count'] = data['reads_count']
127 return tweet
128
129
130def parse_long_tweet(response):

Callers 4

parse_tweetMethod · 0.90
parseMethod · 0.90
parseMethod · 0.90
parseMethod · 0.90

Calls 2

parse_timeFunction · 0.85
parse_user_infoFunction · 0.85

Tested by

no test coverage detected