基本信息
源码名称:爬取微博某话题微博数据
源码大小:4.98KB
文件格式:.py
开发语言:Python
更新时间:2021-03-27
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
爬取如上图话题的信息
def get_huati_data(url, page): try: url = url '&page={}'.format(page) html = requests.get(url, headers=headers, timeout=10) soup = BeautifulSoup(html.content, "html.parser") page_numbers = soup.find('ul', class_="s-scroll").find_all('li') all_datas = soup.find_all('div', class_="card-wrap", attrs={'action-type': 'feed_list_item'}) for all_data in all_datas: mid = all_data['mid'] '\t' p = all_data.find('p', attrs={'node-type': 'feed_list_content'}) name = p['nick-name'] txt = p.text.strip() href = all_data.find('a', attrs={'action-type': 'fl_unfold'}) if href == None: href = all_data.find('p', class_='from').find_all('a')[0]['href'] else: href = href['href'] con_time = all_data.find('p', class_='from').find_all('a')[0].text.strip() card_act = all_data.find('div', class_="card-act") lis = card_act.find_all('li') zhuanfa = lis[1].text.strip().replace('转发', '') comment = lis[2].text.strip().replace('评论', '') click = lis[3].text.strip() if zhuanfa == '' or len(zhuanfa) == 0: zhuanfa = 0 if comment == '' or len(comment) == 0: comment = 0 if click == '' or len(click) == 0: click = 0 url1 = 'https:' href save_huati_data.append(mid) save_huati_data.append(name) save_huati_data.append(txt) save_huati_data.append(con_time) save_huati_data.append(zhuanfa) save_huati_data.append(comment) save_huati_data.append(click) save_huati_data.append(url1) writer.writerow(save_huati_data) print(save_huati_data) save_huati_data.clear() # return save_huati_data except Exception as e: print(e) print("第" str(page) "页获取(话题-文章-mid-url-zan)失败")