赞
踩
python爬虫主要需要urllib
方法1
- import urllib.parse,urllib.request
- import ssl ssl._create_default_https_context = ssl._create_unverified_context
- req = urllib.request.Request(url = url,headers = headers)
- response = urllib.request.urlopen(req)
- return response.read().decode(coding)
方法2
- #导入包
-
- import urllib.request
-
- #函数
-
- def main():
-
- preservation()
-
- def gethtml_http(url):
-
- try:
-
- response = urllib.request.urlopen(url,timeout = 5)
-
- htmlfile = response.read().decode("utf-8")
-
- except urllib.error.URLError as e:
-
- print("超时")
-
- return htmlfile
-
- def preservation():
-
- h = gethtml_http("http://www.baidu.com")
-
- print(h)
-
- #import os
-
- #os.rename("内部储存\hhh.py\baidu.txt","内部储存\hhh.py\baidu.html")
-
- if __name__ == "__main__":
-
- main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。