赞
踩
一个爬虫案例
from bs4 import BeautifulSoup import requests import re import os import xlrd import xlwt from xlutils.copy import copy import random import json import datetime import time IS_FIRST = True ROOT_PATH = os.path.abspath('..') # 设置根目录 File = '' headers_get = { 'Accept': 'text/html, application/xhtml+xml, */*', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', } headers_post = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8' } ITEM1 = [] # excel中sheet0的表头 ITEM2 = ['zcxh', 'djcps', 'ggxh'] # excel中sheet1的表头 DEFAULT_TITLE = 'yydmhc' # sheet中默认第一列的表头 def generate_random_num(randomlength=7): """ 获取随机码 :param randomlength: 自动变化部分字符长度 :return: """ random_str = '158808' base_str = '0123456789' length = len(base_str) - 1 for i in range(randomlength): random_str += base_str[random.randint(0, length)] return random_str def gain_legal_info(code): """ 信息(请求返回的是一个html,通过解析html标签来获取内容) :param code: 代码 :return: """ start_time = time.time() url = 'xxxx' # 请求地址 html = requests.get(url, headers=he
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。