赞
踩
id | int |
title | varchar(255) |
area | varchar(255) |
salary | varchar(255) |
edu_list | varchar(255) |
company_name | varchar(255) |
company_tag_list | varchar(255) |
info_desc | varchar(255) |
href | varchar(255) |
- import time
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- import pymysql
- from urllib import parse
-
-
- KeyWord = input("请输入你要搜索的岗位关键字:")
- KeyWords = parse.quote(parse.quote(KeyWord))
-
-
- def con():
- db = pymysql.connect(
- port=3306,
- user='root',
- password='输入密码',
- db='输入数据库名',
- charset='utf8'
- )
- sql = 'CREATE TABLE {}(`id` int(11) NOT NULL AUTO_INCREMENT,`title` VARCHAR(255) DEFAULT NULL,`area` VARCHAR(' \
- '255) DEFAULT NULL,`salary` VARCHAR(255) DEFAULT NULL,`edu_list` VARCHAR(255) DEFAULT NULL,' \
- '`company_name` VARCHAR(255) DEFAULT NULL,`company_tag_list` VARCHAR(255) DEFAULT NULL,`info_desc` VARCHAR(' \
- '255) DEFAULT NULL,' \
- '`href` VARCHAR(500) DEFAULT NULL,PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;'.format(KeyWord)
- cursor = db.cursor()
- cursor.execute(sql)
- cursor.close()
- db.close()
-
-
- class save:
- con()
- db = pymysql.connect(
- port=3306,
- user='root',
- password='输入密码',
- db='输入数据库名',
- charset='utf8'
- )
- cursor = db.cursor()
- try:
- sql_1 = """insert into {}(title,area,salary,edu_list,company_name,company_tag_list,info_desc,
- href) values(%s, %s, %s, %s, %s, %s,%s,%s)""".format(KeyWord)
- except:
- print("数据有问题")
-
-
- def get_job_info():
- lit = driver.find_elements(by=By.CSS_SELECTOR, value='.job-card-wrapper')
- # print(lit)
- for li in lit:
- title = li.find_element(by=By.CSS_SELECTOR, value='.job-name ').text
- area = li.find_element(by=By.CSS_SELECTOR, value='.job-area').text
- salary = li.find_element(by=By.CSS_SELECTOR, value='.salary').text
- edu_list = li.find_element(by=By.CSS_SELECTOR, value='.tag-list').text
- company_name = li.find_element(by=By.CSS_SELECTOR, value='.company-name').text
- company_tag_list = li.find_element(by=By.CSS_SELECTOR, value='.company-tag-list').text
- info_desc = li.find_element(by=By.CSS_SELECTOR, value='.info-desc').text
- href = li.find_element(by=By.CSS_SELECTOR, value='.job-card-left').get_attribute('href')
- dit = {
- '标题': title,
- '地区': area,
- '薪资': salary,
- '经验': edu_list,
- '公司名称': company_name,
- '公司领域': company_tag_list,
- '福利': info_desc,
- '详情页': href,
- }
- title = dit.get('标题')
- area = dit.get('地区')
- salary = dit.get('薪资')
- edu_list = dit.get('经验')
- company_name = dit.get('公司名称')
- company_tag_list = dit.get('公司领域')
- info_desc = dit.get('福利')
- href = dit.get('详情页')
- # print(title)
- save.cursor.execute(save.sql_1,
- (title, area, salary, edu_list, company_name, company_tag_list, info_desc, href))
- save.db.commit()
-
- # print(dit)
-
-
- if __name__ == '__main__':
- driver = webdriver.Chrome()
- driver.implicitly_wait(10)
- for page in range(1, 11):
- try:
- print(f'正在获取{page}页的数据')
- driver.get('xxxxxxxxxxxxxxxxxxxxxx')#网页
- time.sleep(1)
- get_job_info()
- next_page = driver.find_element(by=By.CSS_SELECTOR,
- value='.options-pages a:nth-child(10)')
- if next_page:
- next_page.click()
- else:
- print("没数据")
- except:
- pass
- driver.quit()

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。