赞
踩
网站介绍:是一个 Cos 网站,该类网站很容易 消失 在互联网中,为了让数据存储下来,我们盘它。
源代码:
- import urllib.request
- from urllib.parse import urljoin
- from lxml import etree
- import re
- import requests
- x1=re.compile(r'<li><a href="(.*?).html">')
- c1= re.compile(r"<img src='(.*?)' id='bigimg'",re.S)
- d1=re.compile(r'<title>(.*?)</title>')
- i=1
- for i in range(8,98):
- baseurl="http://www.cosplay8.com/pic/chinacos/list_22_"
- i=i+1
- url=baseurl+str(i)+".html"
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'}
- res = urllib.request.Request(url=url, headers=headers)
- try:
- respone = urllib.request.urlopen(res, timeout=1000)
- except Exception as err:
- print("出现异常" + str(err))
- respones = respone.read().decode('utf-8')
- # print(respones)
- x1 = re.compile(r'<li><a href="(.*?).html">')
- x2 = re.findall(x1, respones) # 一页中所以详情页链接
- # print(x2)
-
- for x3 in x2:
- # print(1)
- x4 = x3
- a1 = 1
- for a1 in range(1, 10):
- baseurl = 'http://www.cosplay8.com'
- a1 = a1 + 1
- lasturl = baseurl + x4 + '_' + str(a1) + '.html' # 詳情頁的url
- url = lasturl
- # print(lasturl)
- # print('开始下载图片---请稍后')
- # respones1 = requests.get(lasturl).content
- # 获取图片链接
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'}
- res = urllib.request.Request(url=url, headers=headers)
- try:
- respone = urllib.request.urlopen(res, timeout=100)
- except Exception as err:
- print("出现异常" + str(err))
- respones = respone.read().decode('utf-8')
- c2 = re.findall(c1, respones) # 一半的图片链接
- d2 = re.findall(d1, respones) # 标题
- # print(c2)
- # print(d2)
- for c3 in c2:
- c4 = c3
- url1 = "http://www.cosplay8.com" + c4
- print(url1)
- for d3 in d2:
- d4 = d3
- try:
- respones1 = requests.get(url1, timeout=5).content
- except Exception as err:
- print("出现异常" + str(err))
- try:
- with open('cosplay\\' + d4 + '.jpg', mode='wb') as f:#保存路径自己设置
- f.write(respones1)
- print('正在保存壁纸')
- print('图片下载' + str(a1) + '张')
- except Exception as err:
- print("出现异常" + str(err))
- print('图片下载结束')
- print('打印'+str(i)+'页')
data:image/s3,"s3://crabby-images/deb9d/deb9d52e6c78f73fbfaadc6e519fd00d286664e1" alt=""
下面展示一些成果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。