赞
踩
爬取淘热卖商品“泡面”的信息,包括商品名称,店铺,链接,付款人数,价格等,用csv保存
import requests import csv import time import parsel def get_url(url): headers={ 'user-agent': '自己的user-agent', 'cookie': '自己的cookir', } response=requests.get(url=url,headers=headers,timeout=3) print('调用get') if response.status_code==200: return response.text else: return None def parse_url(): print('调用parse') htmltext=get_url(url) selector=parsel.Selector(htmltext) content=selector.css('.item') print(type(content)) global dit for li in content[0:-7]: try: name1=li.css('.info') name2=name1.css('span') name=name2.css('.title::text').get() if name: store=li.css('.shopNick::text').get() #location=li.css('::text').get() price=li.css('strong::text').get() num=li.css('.payNum::text').get().replace('人付款','') href=li.css('a::attr(href)').get() else: print('NONE') except AttributeError as e: print(e) dit={ '商品名称':name, '店铺':store, #'位置':location, '商品价格':price, '付款人数':num, '商品链接':href } print(dit) save() def save(): csv_writer.writerow(dit) print('调用save') def main(i): global url url='https://re.taobao.com/search?refpid=420435_1006&keyword=%E6%B3%A1%E9%9D%A2&_input_charset=utf-8&page={a}&isinner=0' parse_url() if __name__=='__main__': f=open('D://taobaopaomian.csv','a+',encoding='utf-8',newline='') csv_writer=csv.DictWriter(f,fieldnames=['商品名称','店铺','商品价格','付款人数','商品链接']) csv_writer.writeheader() for a in range(3): main(a) time.sleep(2) print('下载完成')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。