赞
踩
目标网站:彼岸图网
首先导入所需包
import os
import time
import requests
from lxml import etree
做好伪装(F12获取信息)
headers = {
'User-Agent': '',
'Referer': '',
'Cookie': ''
}
获取总页数(非固定)
# 获取总页数
def get_last_page():
response = requests.get('http://pic.netbian.com/', headers=headers)
response.encoding = "GBK"
html = etree.HTML(response.text)
last_page = html.xpath('//div[@class="page"]/a[10]/text()')
last = ''.join(last_page)
print("本站一共有{0}页\n".format(last))
return last
由于彼岸图网的高清图片需要在进入一个网页才能拿到,所以先获取小图链接
首页的链接并不规则,须单独设置
def get_main_page(): last = get_last_page() for page in range(1, int(last) + 1): print('\n第{0}页'.format(page)) if page == 1: url = 'http://pic.netbian.com/' else: url = 'http://pic.netbian.com//index_{0}.html'.format(page) response = requests.get(url, headers=headers) response.encoding = "GBK" if response.status_code == 200: html = etree.HTML(response.text) biglink_list = html.xpath('//div[@class="slist"]//li/a/@href') for link in biglink_list: get_img_link('http://pic.netbian.com/'+link) return biglink_list
进一步访问小图链接,解析高清图链并输出图片名称和链接
# 获取高清图链接
def get_img_link(link):
global pic_sum
try:
response = requests.get(link, headers=headers)
response.encoding = "GBK"
if response.status_code == 200:
html = etree.HTML(response.text)
img_list = html.xpath('//div[@class="photo-pic"]/a/img/@src')
title_list = html.xpath('//div[@class="photo-pic"]/a/img/@title')
for title, img in zip(title_list, img_list):
pic_sum = pic_sum+1
print('{0}:{1}声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/weixin_40725706/article/detail/460848
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。