&&&&&
- # -*- coding: utf-8 -*
- from xlwt import Workbook
- import requests
- from bs4 import BeautifulSoup
- import sys
- reload(sys)
- sys.setdefaultencoding('utf8')
- import time
-
- def beida(page):
- url = 'http://162.105.134.150/searchCompy'
- data ={
- 'eventId':'',
- 'loginName':'',
- 'keyWords':'',
- 'page.currentPage':page,
- 'qc.coName': '',
- 'qc.year': '0',
- 'qc.lp': '',
- 'qc.province': '',
- 'qc.co39': '0',
- 'qc.co42': '0',
- 'qc.co_data_15': '0',
- 'qc.co35': '0',
- 'qc.co_data_12': '0',
- 'qc.co_data_16': '0',
- 'qc.co34': '0',
- 'qc.active': ''
- }
- response =requests.post(url,data=data)
- soup =BeautifulSoup(response.text,'lxml')
- tableList = soup.find('div',class_='m-cont').find_all('tr')
- tableList.pop(0)
- dataInforList =[]
- for data in tableList:
- inforList = data.find_all('td')
- inforData = []
- for info in inforList:
- inforData.append( ''.join(info.text.split()))
- dataInforList.append(inforData)
- return dataInforList
-
-
-
-
- def saveToExecl(start,end):
- book = Workbook(encoding='utf-8') # 设置execl编码格式
- sheet1 = book.add_sheet('Sheet 1') # 操作execl表格
- sheet1.write(0, 0, u'序号')
- sheet1.write(0, 1, u'法人单位名称')
- sheet1.write(0, 2, u'法人')
- sheet1.write(0, 3, u'省(自治区、直辖市)')
- sheet1.write(0, 4, u'街道')
- sheet1.write(0, 5, u'年份')
- sheet1.write(0, 6, u'组织机构代码')
- sheet1.write(0, 7, u'主要业务活动')
- sheet1.write(0, 8, u'行业')
- sheet1.write(0, 9, u'登记注册类型')
- sheet1.write(0, 10, u'企业控股情况')
- sheet1.write(0, 11, u'隶属关系')
- sheet1.write(0, 12, u'企业营业状态')
- sheet1.write(0, 13, u'机构类型')
- sheet1.write(0, 14, u'营业收入(元)')
- sheet1.write(0, 15, u'企业规模')
- sheet1.write(0, 16, u'轻重工业')
-
- writeDataList = []
- print "The number of pages being downloaded now...."
- for page in range(int(start), int(end)):
- try:
- writeDataList += beida(page)
- print page
- except:
- print page
- time.sleep(3)
- writeDataList += beida(page)
- datalist = writeDataList
- for data in range(0, len(datalist)): # 遍历数据列表,然后把数据写入表格中
- line01 = datalist[data][0]
- line02 = datalist[data][1]
- line03 = datalist[data][2]
- line04 = datalist[data][3]
- line05 = datalist[data][4]
- line06 = datalist[data][5]
- line07 = datalist[data][6]
- line08 = datalist[data][7]
- line09 = datalist[data][8]
- line10 = datalist[data][9]
- line11 = datalist[data][10]
- line12 = datalist[data][11]
- line13 = datalist[data][12]
- line14 = datalist[data][13]
- line15 = datalist[data][14]
- line16 = datalist[data][15]
- line17 = datalist[data][16]
-
- sheet1.write(data + 1, 0, line01)
- sheet1.write(data + 1, 1, line02)
- sheet1.write(data + 1, 2, line03)
- sheet1.write(data + 1, 3, line04)
- sheet1.write(data + 1, 4, line05)
- sheet1.write(data + 1, 5, line06)
- sheet1.write(data + 1, 6, line07)
- sheet1.write(data + 1, 7, line08)
- sheet1.write(data + 1, 8, line09)
- sheet1.write(data + 1, 9, line10)
- sheet1.write(data + 1, 10, line11)
- sheet1.write(data + 1, 11, line12)
- sheet1.write(data + 1, 12, line13)
- sheet1.write(data + 1, 13, line14)
- sheet1.write(data + 1, 14, line15)
- sheet1.write(data + 1, 15, line16)
- sheet1.write(data + 1, 16, line17)
- fileName = '中国工业企业数据库'+ str(start) +'-'+ str(end) +'.xls'
- book.save(u"%s" % fileName)
-
-
- if __name__ == "__main__":
- print "*********************Chinese industrial enterprise database download program*********************"
- start = raw_input("please input start page number: ")
- end = raw_input("please input end page number: ")
- saveToExecl(start,end)
&&&&&