赞
踩
#! /usr/bin/python # -*- coding: utf-8 -*- # @Author : declan # @Time : 2020/06/14 22:59 # @File : lianjia.py # @Software: PyCharm import requests from lxml import etree from fake_useragent import UserAgent import pymysql,warnings db = pymysql.connect("localhost","root","password",charset='utf8') cursor=db.cursor() create_db="create database if not exists lianjiadb character set utf8;" use_db="use lianjiadb" create_tab ="create table if not exists houseprice(id INT PRIMARY KEY auto_increment,housename VARCHAR(50),totalprice VARCHAR(50)) charset=utf8;" warnings.filterwarnings("ignore") cursor.execute(create_db) cursor.execute(use_db) cursor.execute(create_tab) headers={'User-Agent':UserAgent().random} base_url='https://su.lianjia.com/ershoufang/pg{}/' def load_page(url): try: res=requests.get(url,headers=headers) if res.status_code==200: print('页面请求完毕') return res.text except: print('网络访问错误') #解析HTML拿到想要的数据 def parse_page(html): xpath_content=etree.HTML(html) xpath_datas = xpath_content.xpath('//*[@class="info clear"]') for data in xpath_datas: title=data.xpath('./div[1]/a/text()') price=data.xpath('./div[6]//div[@class="totalPrice"]/span/text()') print(price) new_data=zip(title,price) # print(new_data) for tup_data in new_data: insert_mysql(tup_data) print('数据写入完毕') def insert_mysql(data): insert_data="insert into houseprice(housename,totalprice) values(%s,%s)" if data: cursor.execute(insert_data,list(data)) db.commit() def main(): for i in range(1,10): url=base_url.format(str(i)) html=load_page(url) parse_page(html) if __name__ == '__main__': main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。