赞
踩
使用 python 下载超大文件,直接全部下载,文件过大,可能会造成内存不足,这时候要使用 requests 的 stream 模式
主要代码如下
iter_content:一块一块的遍历要下载的内容
iter_lines:一行一行的遍历要下载的内容
- def download_file(url, file_pname, chunk_size=1024*4):
- """
- url: file url
- file_pname: file save path
- chunk_size: chunk size
- """# 第一种
- response_data_file = requests.get(url, stream=True)
- with open(file_pname, 'wb') as f:
- for chunk in response_data_file.iter_content(chunk_size=chunk_size):
- if chunk:
- f.write(chunk)
-
- # 第二种with requests.get(url, stream=True) as req:
- with open(file_pname, 'wb') as f:
- for chunk in req.iter_content(chunk_size=chunk_size):
- if chunk:
- f.write(chunk)
-
- # 下载大文件 应用实例:
- def Big_Download(session,url_inquire,headers,form_data):
- response = session.post(url=url_inquire,data=form_data,headers=headers,verify=False,stream=True)
- # 获取文件大小
- file_size = int(response.headers['content-length'])
- with tqdm(total=file_size, unit='B', unit_scale=True, unit_divisor=1024, ascii=True, desc='Expense.json') as bar:
- with session.post(url=url_inquire,data=form_data,headers=headers,verify=False,stream=True) as r:
- with open('Expense.json', 'wb') as fp:
- for chunk in r.iter_content(chunk_size=512):
- if chunk:
- fp.write(chunk)
- bar.update(len(chunk))
- #批量文件下载
- import requests
- from bs4 import BeautifulSoup
-
- archive_url = "http://www-personal.umich.edu/~csev/books/py4inf/media/"
- def get_links():
- r = requests.get(archive_url)
- soup = BeautifulSoup(r.content, 'html5lib')
- links = soup.findAll('a')
- video_links = [archive_url + link['href'] for link in links if link['href'].endswith('mp4')]
-
- return video_links
-
- def download_series(video_links):
- for link in video_links:
- file_name = link.split('/')[-1]
- print("Downloading file:%s" % file_name)
- r = requests.get(link, stream=True)
- # download started
- with open(file_name, 'wb') as f:
- for chunk in r.iter_content(chunk_size=1024 * 1024):
- if chunk:
- f.write(chunk)
- print("%s downloaded!\n" % file_name)
- print("All videos downloaded!")
- return
-
- if __name__ == "__main__":
- video_links = get_links()
- download_series(video_links)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。