赞
踩
第一篇
from selenium import webdriver
url = 'https://www.bilibili.com/video/BV1iN4y1a7KJ'
options = webdriver.ChromeOptions()
options.add_experimental_option('detach', True)
driver = webdriver.Chrome(options=options)
driver.get(url)
import time
time.sleep(5)
html = driver.page_source
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'lxml')
title = soup.find('h1', class_="video-title")
count = soup.find('span', class_="view item")
dm = soup.find('span', class_="dm item")
datetime = soup.find('span', class_="pubdate-text")
comments = soup.find_all('div', class_="content-warp")
comments_text = []
for comment in comments:
name = comment.find('div', class_="user-info").text
text = comment.find('span', class_="reply-content").text
comments_text.append({
'name': name,
'text': text
})
# 输出结果
print(f"标题:{title.text},播放量:{count.text.strip()},弹幕数:{dm.text.strip()}")
for comment in comments_text:
print(f"评论:\nID:{comment['name']},评论内容:{comment['text']}")
driver.close()
第二篇
from selenium import webdriver
from selenium.webdriver.common.by import By
url = 'https://www.bilibili.com/v/popular/all/'
options = webdriver.ChromeOptions()
options.add_experimental_option('detach',True)
driver = webdriver.ChromiumEdge(options=options)
driver.get(url)
html = driver.page_source
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
result = soup.find_all('div',class_='video-card')
for item in result:
title = item.find('p',class_='video-name')
up = item.find('span',class_='up-name__text')
play = item.find('span',class_='play-text')
print(f'标题: {title.text.strip()}, Up: {up.text.strip()}, 播放量: {play.text.strip()}')
第三篇
import requests
from bs4 import BeautifulSoup
url = 'https://yjsy.hunnu.edu.cn/zsks/sszk1.htm'
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
headers = {
'User-Agent': ua
}
response = requests.get(url,headers=headers)
response.encoding = 'utf-8'
print(response.text)
html = response.text
soup = BeautifulSoup(response.text, 'lxml')
result = soup.find_all('a',target="_blank")
for item in result:
print(item.text)
for item in result:
if '2024' in item.text and '招生简章' in item.text:
print(item.text)
第四篇
from bs4 import BeautifulSoup
import requests
url = 'https://yjsy.hunnu.edu.cn/zsks/sszk1.htm'
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'
headers = {
'User-Agent':ua
}
html = requests.get(url,headers=headers)
html.encoding = 'utf-8'
soup = BeautifulSoup(html.text,'lxml')
result = soup.find_all('a',target='_blank')
for item in result:
if '2024' in item.text:
print(item.text)
第五篇
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
options = webdriver.ChromeOptions()
options.add_experimental_option('detach',True)
driver = webdriver.ChromiumEdge(options=options)
driver.get('https://yjsy.hunnu.edu.cn/')
xpath1 = "//ul[@class='menu']/li[4]/a"
button1 = driver.find_element(By.XPATH,xpath1)
ActionChains(driver).move_to_element(button1).perform()
xpath2 = "//ul[@class='menu']/li[4]/ul/li[2]"
button2 = driver.find_element(By.XPATH,xpath2)
ActionChains(driver).move_to_element(button2).perform()
time.sleep(3)
ActionChains(driver).move_to_element(button2).click(button2).perform()
html = driver.page_source
print(html)
from bs4 import BeautifulSoup
soup = BeautifulSoup(html,'lxml')
result = soup.find_all('a',target='_blank')
for item in result:
if '2024' in item.text:
print(item.text)
第六篇
#抓取网页源代码
import requests
ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
html = requests.get('https://yjsy.hunnu.edu.cn/zsks/sszk1.htm',headers={'user-Agent':ua})
html.encoding = 'utf-8'
print(html.text)
#使用xpath筛选数据
xpath = "//a[@target='_blank']"
from lxml import etree
page = etree.HTML(html.text)
result = page.xpath(xpath)
#print(result)
#输出所有符合xpath的结果
for item in result:
print(item.text)
#对感兴趣的数据进行提纯
print()
print("符合条件的结果有")
for item in result:
if '2024' in item.text and '招生简章' in item.text:
print(item.text)
第七篇
from selenium import webdriver
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.add_experimental_option('detach',True)
driver = webdriver.ChromiumEdge(options=options)
driver.get('https://www.baidu.com')
import time
time.sleep(3)
driver.find_element(By.ID,'kw').send_keys("湘潭理工学院")
time.sleep(3)
driver.find_element(By.ID,'su').click()
driver.save_screenshot('baidu.png')
#html = driver.page_source
#print(html)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。