赞
踩
# -*- coding: utf-8 -*-
import scrapy
class EduspiderSpider(scrapy.Spider):
name = 'eduSpider'
allowed_domains = ['127.0.0.1']
start_urls = ['http://127.0.0.1:8080/imgs/']
def parse(self, response):
#********** Begin **********#
with open('images.txt','w') as f:
img = response.xpath("//div[@class='box']/div/a/img/@src")
f.write("{}\n".format(img))
#********** End **********#
#items.py
import scrapy
class ImgprojectItem(scrapy.Item):
#********** Begin **********#
img_urls=scrapy.Field() #保存图片链接
images=scrapy.Field() #保存图片名字
#********** End **********#
#pipelines.py import os,requests from ImgProject import settings class ImgprojectPipeline(object): def process_item(self, item, spider): #********** Begin **********# dir_path='{}'.format(settings.IMAGES_STORE) #文件夹路径:从配置文件settings中导入定义好的路径 if not os.path.exists(dir_path): os.makedirs(dir_path) name=item['images'] img_url=item['img_urls'] img_path=dir_path+'/'+name+'.jpg' #图片的最终存储路径 img=requests.get(img_url,headers=settings.DEFAULT_REQUEST_HEADERS) #对图片的url发出请求,准备下载 with open(img_path,'wb') as f: #使用wb方式保存图片 f.write(img.content) #********** End **********#
#imgspider.py #-*- coding: utf-8 -*- import scrapy from ImgProject.items import ImgprojectItem class ImgspierSpider(scrapy.Spider): name = 'imgspier' allowed_domains = ['127.0.0.1'] start_urls = ['http://127.0.0.1:8080/imgs/'] def parse(self, response): #********** Begin **********# img_srcs = response.xpath('//div/a/img/@src').extract() for img_src in img_srcs: #for循环遍历列表 name=img_src.split('/')[-1].split('.')[0] #提取图片名字 item=ImgprojectItem() #实例化类 item['img_urls']="http://127.0.0.1:8080"+img_src #把图片链接拼贴完整放到img_urls中 #127.0.0.1后加:8080 item['images']=name #把图片名字放到images中 yield item #把得到的item返回到pipelines.py中 #********** End **********#
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。