赞
踩
1、爬取百度
- import requests
- keyword = "Python"
- try:
- kv = {'wd':keyword}
- r = requests.get("http://www.baidu.com/s",params=kv)
- print(r.request.url)
- r.raise_for_status()
- print(r.text)
- except:
- print("爬取失败")
2、查询IP地址
- import requests
- url = "https://ip138.com/iplookup.asp?ip="
- kv={'user-agent':'Mozilla/5.0'}
- try:
- r= requests.get(url+'ip'+'&action=2','202.204.80.112',headers=kv)
- r.raise_for_status()
- r.encoding = r.apparent_encoding
- print(r.text[-500:])
- except:
- print("失败")
3、股票爬取
- import requests
- from bs4 import BeautifulSoup
- import traceback
- import re
- def getHTMLText(url):
- try:
- r=requests.get(url)
- r.raise_for_ststus()
- r.encoding=r.apparent_encoding
- return r.text
- except:
- return""
- def getStockList(lst,stockURL):
- html=getHTMLText(stockURL)
- soup=BeautifulSoup(html,'html.parser')
- a=soup.find_all('a')
- for i in a:
- try:
- href=i.attrs['href']
- lst.append(re.finall(r"[s][hz]\d{6}",href)[0])
- except:
- continue
- return ""
- def getStockInfo(lst,stockURL,fpath):
- count=0
- timel=time.time()
- for stock in lst:
- url = stockURL + stock
- html = getHTMLText(url)
- try:
- if html == "":
- continue
- infoDict = {}
- soup = BeautifulSoup(html,'html.parser')
- stockInfo = soup.find('header',attrs = {'class':'stock_title'})
-
- name = stockInfo.find_all(attrs = {'h1':''})[0]
- infoDict.update({'股票名称':name.text.split('</h1>')[0]})
-
- stockInfo = soup.find('div',attrs = {'class':'stock_top clearfix'})
-
- keyList = stockInfo.find_all('dt')
- valueList = stockInfo.find_all('dd')
- for i in range(len(keyList)):
- key = keyList[i].text
- val = valueList[i].text
- infoDict[key] = val
- with open(fpath,'a',encoding = 'utf-8') as f:
- f.write(str(infoDict) + '\n')
- count = count + 1
- time2 = time.time() - time1
- print('\r当前速度:{:.2f}%\t用时:{:.2f}秒'.format((count * 100 /len(lst)),time2),end ='')
- except:
- count = count + 1
- time2 = time.time() - time1
- print('\r当前速度:{:.2f}%\t用时:{:.2f}秒'.format((count * 100 /len(lst)),time2),end ='')
- traceback.print_exc()
- continue
- return ""
- def main():
- stock_list_url = 'https://hq.gucheng.com/gpdmylb.html'
- stock_info_url = 'https://hq.gucheng.com/'
- output_file = 'C:\\StockInfo.txt'
- slist = []
- getStockList(slist,stock_list_url)
- getStockInfo(slist,stock_info_url,output_file)
- main()

4、爬取京东商品
- import requests
- url="http://item.jd.com/2967929.html"
- try:
- r=requests.get(url)
- r.raise_for_status()
- r.encoding=r.apparent_encoding
- print(r.text[:1000])
- except:
- print("爬取失败")
5、爬取图片
- import requests
- import os
- url= "http://pic.87g.com/upload/2020/0102/20200102093322295.jpg"
- root= "C://Users//15133//Pictures//Camera Roll"
- path=root+url.split('/')[-1]
- try:
- if not os.path.exists(root):
- os.mkdir(root)
- if not os.path.exists(path):
- r=requests.get(url)
- with open(path,'wb') as f:
- f.write(r.content)
- f.close()
- print("成功")
- else:
- print("存在")
- except:
- print("失败")
-

6、图表库练习
- import matplotlib.pyplot as plt
- labels= 'Frogs', 'Hogs', 'Dogs', 'Logs'
- sizes = [15, 30,45, 10]
- explode = (0, 0.1,0, 0)
- plt.pie(sizes, explode=explode, labels=labels, autopct= '%1.1f%%',shadow=False, startangle=90)
- plt. show()
7、图片转水墨画
- from PIL import Image
- import numpy as np
-
- a = np.asarray(Image.open('C:\Users\15133\Pictures\Saved Pictures\QQ图片20191029201945.jpg').convert('L')).astype('float')
-
- depth = 10. # (0-100)
- grad = np.gradient(a) #取图像灰度的梯度值
- grad_x, grad_y = grad #分别取横纵图像梯度值
- grad_x = grad_x*depth/100.
- grad_y = grad_y*depth/100.
- A = np.sqrt(grad_x**2 + grad_y**2 + 1.)
- uni_x = grad_x/A
- uni_y = grad_y/A
- uni_z = 1./A
-
- vec_el = np.pi/2.2 # 光源的俯视角度,弧度值
- vec_az = np.pi/4. # 光源的方位角度,弧度值
- dx = np.cos(vec_el)*np.cos(vec_az) #光源对x 轴的影响
- dy = np.cos(vec_el)*np.sin(vec_az) #光源对y 轴的影响
- dz = np.sin(vec_el) #光源对z 轴的影响
-
- b = 255*(dx*uni_x + dy*uni_y + dz*uni_z) #光源归一化
- b = b.clip(0,255)
-
-
- im = Image.fromarray(b.astype('uint8')) #重构图像
- im.save('C:\Users\15133\Pictures\Saved Pictures\QQ图片20191029201945.jpg')
-

8、爬取高校排名
- import requests
- from bs4 import BeautifulSoup
- import bs4
- def getHTMLText(url) :
- try:
- r= requests.get(url,timeout=30)
- r.raise_for_status()
- r.encoding=r.apparent_encoding
- return r.text
- except:
- return ""
-
- def fillUnivList(ulist,html):
- soup=BeautifulSoup(html,"html.parser")
- for tr in soup.find('tbody').children:
- if isinstance(tr,bs4.element.Tag):
- tds=tr('td')
- ulist.append([tds[0].string,tds[1].string,tds[3].string])
- def printUnivList(ulist,num):
- print("{:^10}\t{:^6}\t{:^10}".format("排名","学校","总分"))
- for i in range(num):
- u=ulist[i]
- print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))
- def main():
- uinfo=[]
- url='http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html'
- html=getHTMLText(url)
- fillUnivList(uinfo,html)
- printUnivList(uinfo,20)# 20 unives
- main()
-

9、亚马逊商品爬取
- import requests
- url = "https://www.amazon.cn/gp/product/B01M8L5Z3Y"
- try:
- kv = {'user-agent':'Mozilla/5.0'}
- r = requests.get(url,headers=kv)
- r.raise_for_status()
- r.encoding = r.apparent_encoding
- print(r.text[1000:2000])
- except:
- print("爬取失败")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。