Python 爬取 豆瓣

import urllib.request import time from bs4 import BeautifulSoup def url_open(url): response = urllib.request.urlopen(url) return response def parse_html(response): html_content = response.read() html_soup = BeautifulSoup(html_content, \'html.parser\', from_encoding=\'utf-8\') tag_lis = html_soup.find_all(\'li\') for li in tag_lis: em = li.find(\'em\') title = li.find_all(\'span\', class_=\'title\') # other = li.find_all(\'span\', class_=\'other\') rating = li.find(\'span\', class_=\'rating_num\') if title != []: rank=em.get_text() print("排名:" + rank + "------评分:" + str(rating.get_text()) + "-------" + title[0].get_text()) if rank==250: return None if int(rank)%25==0: url="https://movie.douban.com/top250?start="+rank+"&filter=" return url url = "https://movie.douban.com/top250?start=0&filter=" if __name__==\'__main__\': response=url_open(url) start_time=time.time() print("开始:"+str(start_time)) while 1: url=parse_html(response) if url==None: break response=url_open(url) end_time=time.time() print("结束:"+str(end_time)) print("一共用了:"+str(end_time-start_time)+"")

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zzgzfp.html