当前位置:   article > 正文

selenium爬取招聘数据并存入MySQL_selenium获取table的数据插入数据库

selenium获取table的数据插入数据库

一、表的结构

 

idint
 title           varchar(255) 
area            varchar(255) 
salary           varchar(255) 
edu_list        varchar(255) 
company_name     varchar(255) 
company_tag_list varchar(255) 
info_descvarchar(255) 
hrefvarchar(255) 

二、代码

  1. import time
  2. from selenium import webdriver
  3. from selenium.webdriver.common.by import By
  4. import pymysql
  5. from urllib import parse
  6. KeyWord = input("请输入你要搜索的岗位关键字:")
  7. KeyWords = parse.quote(parse.quote(KeyWord))
  8. def con():
  9. db = pymysql.connect(
  10. port=3306,
  11. user='root',
  12. password='输入密码',
  13. db='输入数据库名',
  14. charset='utf8'
  15. )
  16. sql = 'CREATE TABLE {}(`id` int(11) NOT NULL AUTO_INCREMENT,`title` VARCHAR(255) DEFAULT NULL,`area` VARCHAR(' \
  17. '255) DEFAULT NULL,`salary` VARCHAR(255) DEFAULT NULL,`edu_list` VARCHAR(255) DEFAULT NULL,' \
  18. '`company_name` VARCHAR(255) DEFAULT NULL,`company_tag_list` VARCHAR(255) DEFAULT NULL,`info_desc` VARCHAR(' \
  19. '255) DEFAULT NULL,' \
  20. '`href` VARCHAR(500) DEFAULT NULL,PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;'.format(KeyWord)
  21. cursor = db.cursor()
  22. cursor.execute(sql)
  23. cursor.close()
  24. db.close()
  25. class save:
  26. con()
  27. db = pymysql.connect(
  28. port=3306,
  29. user='root',
  30. password='输入密码',
  31. db='输入数据库名',
  32. charset='utf8'
  33. )
  34. cursor = db.cursor()
  35. try:
  36. sql_1 = """insert into {}(title,area,salary,edu_list,company_name,company_tag_list,info_desc,
  37. href) values(%s, %s, %s, %s, %s, %s,%s,%s)""".format(KeyWord)
  38. except:
  39. print("数据有问题")
  40. def get_job_info():
  41. lit = driver.find_elements(by=By.CSS_SELECTOR, value='.job-card-wrapper')
  42. # print(lit)
  43. for li in lit:
  44. title = li.find_element(by=By.CSS_SELECTOR, value='.job-name ').text
  45. area = li.find_element(by=By.CSS_SELECTOR, value='.job-area').text
  46. salary = li.find_element(by=By.CSS_SELECTOR, value='.salary').text
  47. edu_list = li.find_element(by=By.CSS_SELECTOR, value='.tag-list').text
  48. company_name = li.find_element(by=By.CSS_SELECTOR, value='.company-name').text
  49. company_tag_list = li.find_element(by=By.CSS_SELECTOR, value='.company-tag-list').text
  50. info_desc = li.find_element(by=By.CSS_SELECTOR, value='.info-desc').text
  51. href = li.find_element(by=By.CSS_SELECTOR, value='.job-card-left').get_attribute('href')
  52. dit = {
  53. '标题': title,
  54. '地区': area,
  55. '薪资': salary,
  56. '经验': edu_list,
  57. '公司名称': company_name,
  58. '公司领域': company_tag_list,
  59. '福利': info_desc,
  60. '详情页': href,
  61. }
  62. title = dit.get('标题')
  63. area = dit.get('地区')
  64. salary = dit.get('薪资')
  65. edu_list = dit.get('经验')
  66. company_name = dit.get('公司名称')
  67. company_tag_list = dit.get('公司领域')
  68. info_desc = dit.get('福利')
  69. href = dit.get('详情页')
  70. # print(title)
  71. save.cursor.execute(save.sql_1,
  72. (title, area, salary, edu_list, company_name, company_tag_list, info_desc, href))
  73. save.db.commit()
  74. # print(dit)
  75. if __name__ == '__main__':
  76. driver = webdriver.Chrome()
  77. driver.implicitly_wait(10)
  78. for page in range(1, 11):
  79. try:
  80. print(f'正在获取{page}页的数据')
  81. driver.get('xxxxxxxxxxxxxxxxxxxxxx')#网页
  82. time.sleep(1)
  83. get_job_info()
  84. next_page = driver.find_element(by=By.CSS_SELECTOR,
  85. value='.options-pages a:nth-child(10)')
  86. if next_page:
  87. next_page.click()
  88. else:
  89. print("没数据")
  90. except:
  91. pass
  92. driver.quit()

三、展示 

82833c2ef15c4567ba445954204ec38c.png

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小蓝xlanll/article/detail/642568
推荐阅读
相关标签
  

闽ICP备14008679号