当前位置:   article > 正文

使用python 爬取链家房产信息并存入Mysql数据中_爬取链家并保存在数据库

爬取链家并保存在数据库
#! /usr/bin/python
# -*- coding: utf-8 -*-
# @Author  : declan
# @Time    : 2020/06/14 22:59
# @File    : lianjia.py
# @Software: PyCharm
import requests
from lxml import etree
from fake_useragent import UserAgent
import pymysql,warnings

db = pymysql.connect("localhost","root","password",charset='utf8')
cursor=db.cursor()
create_db="create database if not exists lianjiadb character set utf8;"
use_db="use lianjiadb"
create_tab ="create table if not exists houseprice(id INT PRIMARY KEY auto_increment,housename VARCHAR(50),totalprice VARCHAR(50)) charset=utf8;"
warnings.filterwarnings("ignore")
cursor.execute(create_db)
cursor.execute(use_db)
cursor.execute(create_tab)

headers={'User-Agent':UserAgent().random}

base_url='https://su.lianjia.com/ershoufang/pg{}/'
def load_page(url):
    try:
        res=requests.get(url,headers=headers)
        if res.status_code==200:
            print('页面请求完毕')
            return res.text
    except:
        print('网络访问错误')
#解析HTML拿到想要的数据
def parse_page(html):
    xpath_content=etree.HTML(html)
    xpath_datas = xpath_content.xpath('//*[@class="info clear"]')
    for data in xpath_datas:
        title=data.xpath('./div[1]/a/text()')
        price=data.xpath('./div[6]//div[@class="totalPrice"]/span/text()')
        print(price)
        new_data=zip(title,price)
        # print(new_data)
        for tup_data in new_data:
            insert_mysql(tup_data)
            print('数据写入完毕')
def insert_mysql(data):
    insert_data="insert into houseprice(housename,totalprice) values(%s,%s)"
    if data:
        cursor.execute(insert_data,list(data))
        db.commit()
def main():
    for i in range(1,10):
        url=base_url.format(str(i))
        html=load_page(url)
        parse_page(html)
if __name__ == '__main__':
    main()
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/196543
推荐阅读
相关标签
  

闽ICP备14008679号