Python 爬取豆瓣

日期：2022-04-06 栏目：程序人生浏览：次

import urllib.request import time from bs4 import BeautifulSoup def url_open(url): response = urllib.request.urlopen(url) return response def parse_html(response): html_content = response.read() html_soup = BeautifulSoup(html_content, \'html.parser\', from_encoding=\'utf-8\') tag_lis = html_soup.find_all(\'li\') for li in tag_lis: em = li.find(\'em\') title = li.find_all(\'span\', class_=\'title\') # other = li.find_all(\'span\', class_=\'other\') rating = li.find(\'span\', class_=\'rating_num\') if title != []: rank=em.get_text() print("排名:" + rank + "------评分:" + str(rating.get_text()) + "-------" + title[0].get_text()) if rank==250: return None if int(rank)%25==0: url="https://movie.douban.com/top250?start="+rank+"&filter=" return url url = "https://movie.douban.com/top250?start=0&filter=" if __name__==\'__main__\': response=url_open(url) start_time=time.time() print("开始："+str(start_time)) while 1: url=parse_html(response) if url==None: break response=url_open(url) end_time=time.time() print("结束:"+str(end_time)) print("一共用了："+str(end_time-start_time)+"秒")

转载注明出处：https://www.heiqu.com/zzgzfp.html

Python 爬取 豆瓣

相关推荐

Python 爬取豆瓣