赞
踩
知识点:多线程,读取csv,xpath
- import json
- import csv
- import requests
- import threading
- import lxml
- import lxml.etree
-
- #递归锁
- rLock=threading.RLock()
-
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'}
-
- #获取区域
- def getAreaList(url):
- html=requests.get(url,headers=headers).text
- mytree=lxml.etree.HTML(html)
- areList=mytree.xpath('//div[@data-role="ershoufang"]//a')
- areaDict={}
- for area in areList:
- # 区域名
- areaName=area.xpath('./text()')[0]
- #url
- areaUrl = 'https://gz.lianjia.com'+area.xpath('./@href')[0]
- #print(areaName,areaUrl)
- areaDict[areaName]=areaUrl
- print(areaName,areaUrl)
- return areaDict
-
-
- #获取区域页数
- def getAreaPage(areaUrl,areaName):
- html = requests.get(are
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。