赞
踩
- # -*- coding: utf-8 -*-
- """
- Created on Mon Mar 21 11:45:12 2022
- @author: lenovo
- """
-
- #书香门第
- # -*- coding: utf-8 -*-
-
-
-
- from selenium import webdriver
- import time
- import re
- from selenium.webdriver.common.keys import Keys
-
- class novel(object):
-
- def __init__(self):
- # 开始时的url
- self.start_url = "http://www.txtnovel.top/forum-95-3.html"
- #实例化谷歌设置选项
- option = webdriver.ChromeOptions()
- #添加保持登录的数据路径:安装目录一般在C:\Users\lenovo\AppData\Local\Google\Chrome\User Data
- option.add_argument(r"user-data-dir=C:\Users\lenovo\AppData\Local\Google\Chrome\UserDefault")
- # 实例化一个Chrome对象
- self.driver = webdriver.Chrome(options=option)
- self.novellist =[]
-
-
- def __del__(self):
- self.driver.quit()
- #用了option就不用登陆了
- def login(self):
- try:
- #找到登录框,输入账号密码
- self.driver.find_element_by_xpath("//*[@id='ls_username']").send_keys("")
- self.driver.find_element_by_xpath("//*[@id='ls_password']").send_keys("")
- self.driver.find_element_by_xpath("//*[@id='ls_cookietime']").click()
-
- #模拟点击登录
- self.driver.find_element_by_xpath("//*[@id='lsform']/div/div/table/tbody/tr[2]/td[3]/button").click()
- print("登录成功")
- except:
- print("登录失败")
-
- def get_content(self):
- # 先让程序两秒,保证页面所有内容都可以加载出来
- time.sleep(2)
- # 获取进入下一页的标签
- next_page = 'false'
- next_page = self.driver.find_element_by_link_text('下一页')
- # 提取需要的数据
- try:
- elements = self.driver.find_elements_by_xpath("//tbody[starts-with(@id,'normalthread')]/tr/th/a[1]")
- for i in elements:
- novelurl = i.get_attribute('href')
- self.novellist.append(novelurl)
- except:
- print("爬取"+str(self.driver.current_url)+"小说列表失败")
- # 返回下一页标签
- return next_page
-
- def reply(self):
- self.driver.find_element_by_xpath("//*[@id='fastpostmessage']").send_keys("谢谢楼主分享")
- self.driver.find_element_by_xpath("//*[@id='fastpostsubmit']").click()
- time.sleep(2)
-
- def run(self):
- # 启动chrome并定位到相应页面
- self.driver.get(self.start_url)
- time.sleep(3)
- page = 0
- #self.login()
- #time.sleep(2)
- while True:
- page += 1
- # 开始提取数据,并获取下一页的元素
- next_page = self.get_content()
- if page > 2:
- break
- if next_page == 'false':
- break
- # 点击下一页
- next_page.click()
- for url in self.novellist:
- self.driver.get(url)
- time.sleep(18)
- self.driver.find_element_by_xpath("//*[@id='fastpostmessage']").send_keys("谢谢楼主分享")
- self.driver.find_element_by_xpath("//*[@id='fastpostsubmit']").click()
-
-
-
- def test():
- #实例化谷歌设置选项
- option = webdriver.ChromeOptions()
- #添加保持登录的数据路径:安装目录一般在C:\Users\lenovo\AppData\Local\Google\Chrome\User Data
- option.add_argument(r"user-data-dir=C:\Users\lenovo\AppData\Local\Google\Chrome\UserDefault")
- # 实例化一个Chrome对象
- driver = webdriver.Chrome(options=option)
- driver.get("http://www.txtnovel.top/thread-4010493-1-1.html")
- time.sleep(2)
- driver.find_element_by_xpath("//*[@id='fastpostmessage']").send_keys("谢谢楼主分享")
- time.sleep(1)
- driver.find_element_by_xpath("//*[@id='fastpostsubmit']").click()
- time.sleep(20)
-
-
- if __name__=='__main__':
- novel_spider = novel()
- novel_spider.run()
- #test()
-
-
-
加入了每小时50回复的限制
- # -*- coding: utf-8 -*-
- """
- Created on Mon Mar 21 11:45:12 2022
- @author: lenovo
- """
-
- #书香门第刷金币
- #每小时限制发帖50个
- # -*- coding: utf-8 -*-
-
-
-
- from selenium import webdriver
- import time
- import re
- import os
- from selenium.webdriver.common.keys import Keys
-
- class novel(object):
-
- def __init__(self):
- # 开始时的url
- self.start_url = "http://www.txtnovel.top/forum-95-"
- self.start_page =5
-
- #实例化谷歌设置选项
- option = webdriver.ChromeOptions()
- #添加保持登录的数据路径:安装目录一般在C:\Users\lenovo\AppData\Local\Google\Chrome\User Data
- option.add_argument(r"user-data-dir=C:\Users\lenovo\AppData\Local\Google\Chrome\UserDefault")
- # 实例化一个Chrome对象
- self.driver = webdriver.Chrome(options=option)
- self.novellist =[]
-
-
- def __del__(self):
- self.driver.quit()
-
- def login(self):
- try:
- #找到登录框,输入账号密码
- self.driver.find_element_by_xpath("//*[@id='ls_username']").send_keys("")
- self.driver.find_element_by_xpath("//*[@id='ls_password']").send_keys("")
- self.driver.find_element_by_xpath("//*[@id='ls_cookietime']").click()
-
- #模拟点击登录
- self.driver.find_element_by_xpath("//*[@id='lsform']/div/div/table/tbody/tr[2]/td[3]/button").click()
- print("登录成功")
- except:
- print("登录失败")
-
- def get_content(self):
- # 先让程序两秒,保证页面所有内容都可以加载出来
- time.sleep(2)
- # 获取进入下一页的标签
- next_page = 'false'
- next_page = self.driver.find_element_by_link_text('下一页')
- # 提取需要的数据
- try:
- elements = self.driver.find_elements_by_xpath("//tbody[starts-with(@id,'normalthread')]/tr/th/a[1]")
- for i in elements:
- novelurl = i.get_attribute('href')
- self.novellist.append(novelurl)
- except:
- print("爬取"+str(self.driver.current_url)+"小说列表失败")
- # 返回下一页标签
- return next_page
-
- def reply(self):
- file = open('novellist.txt', 'r', encoding='utf-8')
- self.novellist = eval(file.read())
- file.close()
- for i in range(50):
- url = self.novellist[i]
- self.driver.get(url)
- time.sleep(15)
- self.driver.find_element_by_xpath("//*[@id='fastpostmessage']").send_keys("谢谢楼主分享")
- self.driver.find_element_by_xpath("//*[@id='fastpostsubmit']").click()
- time.sleep(5)
- del self.novellist[0 : 50]
- file1 = open('novellist.txt', 'w', encoding='utf8')
- file1.write(str(self.novellist))
- file1.close()
-
- #每次加载三页的url
- def geturl(self):
- # 启动chrome并定位到相应页面
- self.driver.get(self.start_url+ str(self.start_page) +'.html')
- time.sleep(3)
- #self.login()
- #time.sleep(2)
- pagenum = 0
- while True:
- # 开始提取数据,并获取下一页的元素
- next_page = self.get_content()
- pagenum += 1
- self.start_page += 1
- if pagenum >= 3:
- file1 = open('novellist.txt', 'w', encoding='utf8')
- file1.write(str(self.novellist))
- file1.close()
- break
- if next_page == 'false':
- break
- # 点击下一页
- next_page.click()
-
-
- def run(self):
- while True:
- nowlen = len(self.novellist)
- if nowlen < 50:
- self.geturl()
- self.reply()
- time.sleep(60*60)
-
- def test():
- #实例化谷歌设置选项
- option = webdriver.ChromeOptions()
- #添加保持登录的数据路径:安装目录一般在C:\Users\lenovo\AppData\Local\Google\Chrome\User Data
- option.add_argument(r"user-data-dir=C:\Users\lenovo\AppData\Local\Google\Chrome\UserDefault")
- # 实例化一个Chrome对象
- driver = webdriver.Chrome(options=option)
- driver.get("http://www.txtnovel.top/thread-4010493-1-1.html")
- time.sleep(2)
- driver.find_element_by_xpath("//*[@id='fastpostmessage']").send_keys("谢谢楼主分享")
- time.sleep(2)
- driver.find_element_by_xpath("//*[@id='fastpostsubmit']").click()
- time.sleep(20)
-
-
- if __name__=='__main__':
- novel_spider = novel()
- novel_spider.run()
- #test()
-
-
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。