赞
踩
- 淘票票电影热榜网址:
- https://dianying.taobao.com/showList.htm?spm=a1z21.6646273.city.2.4ed46d6ekOc3wH&n_s=new&city=310100
网站截图:
很多人学习python,不知道从何学起。
很多人学习python,掌握了基本语法过后,不知道在哪里寻找案例上手。
很多已经做案例的人,却不知道如何去学习更加高深的知识。
那么针对这三类人,我给大家提供一个好的学习平台,免费领取视频教程,电子书籍,以及课程的源代码!??¤
QQ群:961562169
spider4taopiaopiao.py
爬取网站电影排行榜
- import requests
- import re
- import os
- import time
- import json
-
- def mySpider():
- # 伪装 用于可以伪装成浏览器。
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
- }
-
- print("网页请求中...")
- time.sleep(0.5)
- url = "https://dianying.taobao.com/showList.htm?spm=a1z21.6646273.city.2.4ed46d6ekOc3wH&n_s=new&city=310100"
- response = requests.get(url, headers=headers)
- html = response.text # 获取html信息
- # print(html)
- print("网页信息已获取...")
- time.sleep(0.5)
-
- destinationPath = "result.txt"
- fd = open(destinationPath,"w+",encoding='utf-8')
- fd.writelines(html)
-
- end = html.find('<!-- 即将热映 -->')
- # print("位置为:",end)
- if end != -1:
- html = html[:end]
-
- fd.close()
-
- s = '<img width="160" height="224" data-src="(.*?)" src=' +\
- '.*?<span class="bt-l">(.+?)</span>.*?<span class="bt-r">(\d.\d)?</span>' + \
- ".*?<span>导演:(.*?)</span>" + ".*?<span>主演:(.*?)</span>" + ".*?<span>类型:(.*?)</span>"+\
- ".*?<span>地区:(.*?)</span>" + ".*?<span>语言:(.*?)</span>" + ".*?<span>片长:(.*?)</span>"
- pattern = re.compile(s,re.S)
- items = re.findall(pattern, html)
-
- # print(items)
- # print(type(items))
- # print(type(html))
-
- for outer in range(len(items)):
- items[outer] = list(items[outer])
- for i in range(len(items[outer])):
- if items[outer][i] == "":
- items[outer][i] = "暂无信息"
- else:
- # pass # ·
- items[outer][i] = items[outer][i].replace("·","·")
-
- # print(items)
-
- destinationPath = "items.json"
- fd = open(destinationPath,"w+",encoding='utf-8')
- json.dump(items,fd)
- fd.close()
-
-
- # 建立下载目录
- dir_name = "./images"
- if not os.path.exists(dir_name):
- os.mkdir(dir_name)
-
- cnt = 0
- for item in items:
- url = item[0] # 以'/'来分割字符串
- file_name = str(cnt) + ".jpg"
- cnt += 1
- response = requests.get(url, headers=headers)
- # 保存
- with open(dir_name + "/" + file_name, 'wb') as f:
- f.write(response.content) # 将图片写入到文件夹下保存
- info = "图片文件: {0:25}{1}".format(file_name," 成功下载...")
- print(info)
-
- # print(items)
- return items
-
-
-
- if __name__ == "__main__":
- # pass
- mySpider()
运行结果展示:
GUI4Spider.py
制作简易的tkinter GUI 图形化用户交互界面
- from spider4taopiaopiao import mySpider
- from tkinter import *
- import time
- from PIL import Image,ImageTk
- import json
-
- # sourcePath = "items.json"
- # fs = open(sourcePath,"r",encoding='utf-8')
- # items = json.load(fs)
- # fs.close()
-
-
- # print(len(items))
- # print(items)
- items = mySpider()
- # 0图片链接 1电影名 2评分 3导演 4主演 5类型 6地区 7语言 8片长
- infoMap = {
- 0:"图片链接:", 1:"电影名:", 2:"评分:", 3:"导演:",
- 4:"主演:", 5:"类型:", 6:"地区:", 7:"语言:", 8:"片长:"
- }
- current_rank = 1
- total_rank = len(items)
-
- root = Tk()
- root.title("淘票票电影热映排行榜,更新时间:"+\
- time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
- root.geometry('800x800')
- root.iconbitmap("movie.ico")
-
- def showPre():
- global current_rank,total_rank
- if current_rank <= 1:
- current_rank = 2
- current_rank -= 1
- print("显示前一部电影...",current_rank)
- labimgconfig()
- labInfoConfig()
-
- def showNxt():
- global current_rank,total_rank
- if current_rank >= total_rank:
- current_rank = total_rank-1
- current_rank += 1
- print("显示后一部电影...",current_rank)
- labimgconfig()
- labInfoConfig()
-
- def labimgconfig():
- filename = "images/" + str(current_rank-1) + ".jpg"
- global newImage
- newImage = getImage(filename)
- labimg.config(image=newImage)
-
- def getImage(filename):
- imageJPG = Image.open(filename)
- image = ImageTk.PhotoImage(imageJPG)
- return image
-
- def labInfoConfig():
- info = items[current_rank-1]
- for i in range(len(labInfo)):
- labInfo[i].config(text=infoMap[i+1]+info[i+1])
- labRank.config(text="排名:#" +str(current_rank))
-
-
- image = getImage("images/0.jpg")
- labimg = Label(root) # 设置Widget控件显示的图像
- labimg.config(image=image)
- labimg.pack() # 包装与定位组件
-
- colors = ["Red","Orange","Yellow","Green","Blue","Violet","Purple","Chocolate"]
- labInfo = []
- for color in colors:
- labtemp = Label(root,bg=color,width=200,height=3,wraplength=1000)
- labtemp.pack()
- labInfo.append(labtemp)
- labRank = Label(root,bg="Red",width=9,height=3,text="排名:#" +str(current_rank))
- labRank.pack()
-
- # labName = Label(root,bg="Red",width=50,height=2)
- # labName.pack()
- # labScore = Label(root,bg="Orange",width=50,height=2)
- # labScore.pack()
- # labDirector = Label(root,bg="Yellow",width=50,height=2)
- # labDirector.pack()
- # labActor = Label(root,bg="Green",width=50,height=2)
- # labActor.pack()
- # labType = Label(root,bg="Blue",width=50,height=2)
- # labType.pack()
- # labDistrict = Label(root,bg="Violet",width=50,height=2)
- # labDistrict.pack()
- # labLanguage = Label(root,bg="Purple",width=50,height=2)
- # labLanguage.pack()
- # labLength = Label(root,bg="Chocolate",width=50,height=2)
- # labLength.pack()
-
-
- btnPre = Button(root,width=15,height=5,text="显示前一个",command=showPre)
- btnNxt = Button(root,width=15,height=5,text="显示后一个",command=showNxt)
- btnPre.pack(side=LEFT,anchor=S)
- btnNxt.pack(side=RIGHT,anchor=S)
- showPre()
-
- root.mainloop()
-
运行结果如下:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。