python爬取新笔趣阁小说_python爬取笔趣阁小说

作者：神奇cpp | 2024-07-14 14:38:58

踩

python爬取笔趣阁小说

1.首先导入爬虫所需的模块：


#  导入所需的模块
import requests
from bs4 import BeautifulSoup
import os
'运行

2.用os库创建保存小说的文件夹：


# 创建小说目录
if not os.path.exists("GT病毒进化者"):
    os.mkdir("GT病毒进化者")

3.设置请求头：


# 设置请求头
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
}
'运行

4.根据url找到小说的目录页面：


# 请求小说的目录页面
url = "https://www.xxbqg5200.com/shu/13515/"
res = requests.get(url, headers=headers)
res.encoding = "gbk"

5.解析页面所有的章节链接：


 
url2="https://www.xxbqg5200.com"
# 解析目录页面获取所有章节链接
soup = BeautifulSoup(res.text, "html.parser")
chapters = soup.find("div", id="list").find_all("a")

6.遍历每个章节链接，请求页面并保存内容到指定目录：


for chapter in chapters:
    chapter_url = url2 + chapter["href"]
    # print(chapter['href'])
    # print(chapter_url)
    chapter_res = requests.get(chapter_url, headers=headers)
    chapter_res.encoding = "gbk"
    chapter_soup = BeautifulSoup(chapter_res.text, "html.parser")
 
    # print(chapter_soup)
    chapter_title = chapter_soup.find("div", class_="bookname").h1.text
    chapter_content = chapter_soup.find("div", id="content").text.strip()
    with open(f"GT病毒进化者/{chapter_title}.txt", "w", encoding="utf-8") as f:
        f.write(chapter_content)
    print(f"{chapter_title} 保存成功！")

完整代码：


#  导入所需的模块
import requests
from bs4 import BeautifulSoup
import os
 
# 创建小说目录
if not os.path.exists("GT病毒进化者"):
    os.mkdir("GT病毒进化者")
 
# 设置请求头
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"
}
 
# 请求小说的目录页面
url = "https://www.xxbqg5200.com/shu/13515/"
res = requests.get(url, headers=headers)
res.encoding = "gbk"
 
url2="https://www.xxbqg5200.com"
# 解析目录页面获取所有章节链接
soup = BeautifulSoup(res.text, "html.parser")
chapters = soup.find("div", id="list").find_all("a")
 
# 遍历每个章节链接，请求页面并保存章节内容
for chapter in chapters:
    chapter_url = url2 + chapter["href"]
    # print(chapter['href'])
    # print(chapter_url)
    chapter_res = requests.get(chapter_url, headers=headers)
    chapter_res.encoding = "gbk"
    chapter_soup = BeautifulSoup(chapter_res.text, "html.parser")
 
    # print(chapter_soup)
    chapter_title = chapter_soup.find("div", class_="bookname").h1.text
    chapter_content = chapter_soup.find("div", id="content").text.strip()
    with open(f"GT病毒进化者/{chapter_title}.txt", "w", encoding="utf-8") as f:
        f.write(chapter_content)
    print(f"{chapter_title} 保存成功！")

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/神奇cpp/article/detail/825130