赞
踩
每页地址变化
http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-24hours-0-0-1-1
http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-24hours-0-0-1-2
使用发开者工具 测试 书名 评论数 等 在网页源代码的 ul列表下
import requests import parsel import csv import time f = open('畅销书排行.csv', mode='a', encoding='utf-8', newline='') csv_writer = csv.DictWriter(f, fieldnames=[ '标题', '评论', '推荐', '作者', '出版日期', '出版社', '原价', '售价', '电子书价格', '详情页', ]) csv_writer.writeheader() def onepage(url): headers = { 'User-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400'} resp = requests.get(url=url, headers=headers) selector = parsel.Selector(resp.text) lis = selector.css('ul.bang_list li') for li in lis: title = li.css('.name a::text').get() comment = li.css('.star a::text').get() recomment = li.css('.tuijian::text').get() author = li.css('div:nth-child(5) a::attr(title)').get() data = li.css('div:nth-child(6) span::text').get() press = li.css('div:nth-child(6) a::text').get() price_r = li.css('.price .price_r::text').get() price_n = li.css('.price .price_n::text').get() price_e = li.css('price_e span::text').get() href = li.css('.name a::attr(href)').get() dit = { '标题': title, '评论': comment, '推荐': recomment, '作者': author, '出版日期': data, '出版社': press, '原价': price_r, '售价': price_n, '电子书价格': price_e, '详情页': href, } print(dit) csv_writer.writerow(dit) for page in range(1, 26): print(f'正在爬取第{page}页') time.sleep(1) url = f'http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-24hours-0-0-1-{page}' onepage(url)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。