&&&&&

  1. # -*- coding: utf-8 -*
  2. from xlwt import Workbook
  3. import requests
  4. from bs4 import  BeautifulSoup
  5. import sys
  6. reload(sys)
  7. sys.setdefaultencoding('utf8')
  8. import time
  9. def  beida(page):
  10.     url = 'http://162.105.134.150/searchCompy'
  11.     data ={
  12.         'eventId':'',
  13.         'loginName':'',
  14.         'keyWords':'',
  15.         'page.currentPage':page,
  16.         'qc.coName''',
  17.         'qc.year''0',
  18.         'qc.lp''',
  19.         'qc.province''',
  20.         'qc.co39''0',
  21.         'qc.co42''0',
  22.         'qc.co_data_15''0',
  23.         'qc.co35''0',
  24.         'qc.co_data_12''0',
  25.         'qc.co_data_16''0',
  26.         'qc.co34''0',
  27.         'qc.active'''
  28.     }
  29.     response =requests.post(url,data=data)
  30.     soup =BeautifulSoup(response.text,'lxml')
  31.     tableList = soup.find('div',class_='m-cont').find_all('tr')
  32.     tableList.pop(0)
  33.     dataInforList =[]
  34.     for data in tableList:
  35.         inforList = data.find_all('td')
  36.         inforData = []
  37.         for info in inforList:
  38.             inforData.append( ''.join(info.text.split()))
  39.         dataInforList.append(inforData)
  40.     return dataInforList
  41. def  saveToExecl(start,end):
  42.     book = Workbook(encoding='utf-8')  # 设置execl编码格式
  43.     sheet1 = book.add_sheet('Sheet 1')  # 操作execl表格
  44.     sheet1.write(00u'序号')
  45.     sheet1.write(01u'法人单位名称')
  46.     sheet1.write(02u'法人')
  47.     sheet1.write(03u'省(自治区、直辖市)')
  48.     sheet1.write(04u'街道')
  49.     sheet1.write(05u'年份')
  50.     sheet1.write(06u'组织机构代码')
  51.     sheet1.write(07u'主要业务活动')
  52.     sheet1.write(08u'行业')
  53.     sheet1.write(09u'登记注册类型')
  54.     sheet1.write(010u'企业控股情况')
  55.     sheet1.write(011u'隶属关系')
  56.     sheet1.write(012u'企业营业状态')
  57.     sheet1.write(013u'机构类型')
  58.     sheet1.write(014u'营业收入(元)')
  59.     sheet1.write(015u'企业规模')
  60.     sheet1.write(016u'轻重工业')
  61.     writeDataList = []
  62.     print "The number of pages being downloaded now...."
  63.     for page in range(int(start), int(end)):
  64.         try:
  65.             writeDataList += beida(page)
  66.             print page
  67.         except:
  68.             print page
  69.             time.sleep(3)
  70.             writeDataList += beida(page)
  71.     datalist = writeDataList
  72.     for data in range(0len(datalist)):  # 遍历数据列表,然后把数据写入表格中
  73.         line01 = datalist[data][0]
  74.         line02 = datalist[data][1]
  75.         line03 = datalist[data][2]
  76.         line04 = datalist[data][3]
  77.         line05 = datalist[data][4]
  78.         line06 = datalist[data][5]
  79.         line07 = datalist[data][6]
  80.         line08 = datalist[data][7]
  81.         line09 = datalist[data][8]
  82.         line10 = datalist[data][9]
  83.         line11 = datalist[data][10]
  84.         line12 = datalist[data][11]
  85.         line13 = datalist[data][12]
  86.         line14 = datalist[data][13]
  87.         line15 = datalist[data][14]
  88.         line16 = datalist[data][15]
  89.         line17 = datalist[data][16]
  90.         sheet1.write(data + 10, line01)
  91.         sheet1.write(data + 11, line02)
  92.         sheet1.write(data + 12, line03)
  93.         sheet1.write(data + 13, line04)
  94.         sheet1.write(data + 14, line05)
  95.         sheet1.write(data + 15, line06)
  96.         sheet1.write(data + 16, line07)
  97.         sheet1.write(data + 17, line08)
  98.         sheet1.write(data + 18, line09)
  99.         sheet1.write(data + 19, line10)
  100.         sheet1.write(data + 110, line11)
  101.         sheet1.write(data + 111, line12)
  102.         sheet1.write(data + 112, line13)
  103.         sheet1.write(data + 113, line14)
  104.         sheet1.write(data + 114, line15)
  105.         sheet1.write(data + 115, line16)
  106.         sheet1.write(data + 116, line17)
  107.     fileName = '中国工业企业数据库'str(start) +'-'str(end) +'.xls'
  108.     book.save(u"%s" % fileName)
  109. if __name__ == "__main__":
  110.     print "*********************Chinese industrial enterprise database download program*********************"
  111.     start = raw_input("please input start page number: ")
  112.     end = raw_input("please input end page number: ")
  113.     saveToExecl(start,end)


中国工业企业数据库.png


&&&&&