解析推文数据
(data)
| 84 | |
| 85 | |
| 86 | def parse_tweet_info(data): |
| 87 | """ |
| 88 | 解析推文数据 |
| 89 | """ |
| 90 | tweet = { |
| 91 | "_id": str(data['mid']), |
| 92 | "mblogid": data['mblogid'], |
| 93 | "created_at": parse_time(data['created_at']), |
| 94 | "geo": data.get('geo', None), |
| 95 | "ip_location": data.get('region_name', None), |
| 96 | "reposts_count": data['reposts_count'], |
| 97 | "comments_count": data['comments_count'], |
| 98 | "attitudes_count": data['attitudes_count'], |
| 99 | "source": data['source'], |
| 100 | "content": data['text_raw'].replace('\u200b', ''), |
| 101 | "pic_urls": ["https://wx1.sinaimg.cn/orj960/" + pic_id for pic_id in data.get('pic_ids', [])], |
| 102 | "pic_num": data['pic_num'], |
| 103 | 'isLongText': False, |
| 104 | 'is_retweet': False, |
| 105 | "user": parse_user_info(data['user']), |
| 106 | } |
| 107 | if '</a>' in tweet['source']: |
| 108 | tweet['source'] = re.search(r'>(.*?)</a>', tweet['source']).group(1) |
| 109 | if 'page_info' in data and data['page_info'].get('object_type', '') == 'video': |
| 110 | media_info = None |
| 111 | if 'media_info' in data['page_info']: |
| 112 | media_info = data['page_info']['media_info'] |
| 113 | elif 'cards' in data['page_info'] and 'media_info' in data['page_info']['cards'][0]: |
| 114 | media_info = data['page_info']['cards'][0]['media_info'] |
| 115 | if media_info: |
| 116 | tweet['video'] = media_info['stream_url'] |
| 117 | # 视频播放量 |
| 118 | tweet['video_online_numbers'] = media_info.get('online_users_number', None) |
| 119 | tweet['url'] = f"https://weibo.com/{tweet['user']['_id']}/{tweet['mblogid']}" |
| 120 | if 'continue_tag' in data and data['isLongText']: |
| 121 | tweet['isLongText'] = True |
| 122 | if 'retweeted_status' in data: |
| 123 | tweet['is_retweet'] = True |
| 124 | tweet['retweet_id'] = data['retweeted_status']['mid'] |
| 125 | if 'reads_count' in data: |
| 126 | tweet['reads_count'] = data['reads_count'] |
| 127 | return tweet |
| 128 | |
| 129 | |
| 130 | def parse_long_tweet(response): |
no test coverage detected