赞
踩
DrissionPage 是一个基于 python 的网页自动化工具。
它既能控制浏览器,也能收发数据包,还能把两者合而为一。
可兼顾浏览器自动化的便利性和 requests 的高效率。
它功能强大,内置无数人性化设计和便捷功能。
它的语法简洁而优雅,代码量少,对新手友好。
douyin.py:
- # ---encoding:utf-8---
- # @Time : 2024/1/13 16:43
- # @Author : stzz Wang
- # @Email :1050100468@qq.com
- # @Site :
- # @File : douyin.py
- # @Project : douyi_analysis
- # @Software: PyCharm
- import os
- import sys
-
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
- sys.path.append(BASE_DIR)
-
- from DrissionPage import ChromiumOptions, SessionOptions, WebPage
- from CODES.config.CONFIG import *
-
-
- class DouYin:
- def __init__(self):
- co = ChromiumOptions(ini_path=Config.drission_page_init_file_path)
- so = SessionOptions(ini_path=Config.drission_page_init_file_path)
-
- self.page = WebPage(chromium_options=co, session_or_options=so)
-
-
- def start_listen(self):
- self.page.listen.start()
-
- def end_listen(self):
- self.page.listen.pause(True)
- self.page.listen.stop()
-
- def load_page(self, url):
- self.page.get(url)
-
douyin_without_watermarker_analysis.py:
- # ---encoding:utf-8---
- # @Time : 2024/1/13 16:53
- # @Author : stzz Wang
- # @Email :1050100468@qq.com
- # @Site :
- # @File : douyin_without_watermarker_analysis.py
- # @Project : douyi_analysis
- # @Software: PyCharm
- import os
- import sys
- import time
-
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
- sys.path.append(BASE_DIR)
-
- from fastapi import APIRouter
- from CODES.controllers.model.douyin import *
- from CODES.config.CONFIG import *
- import json
- from pydantic import BaseModel
-
- douyin_wwa = APIRouter()
-
- douyin_instance = DouYin()
-
-
- class DouYinWithoutWatermarker(BaseModel):
- url: str
-
-
- @douyin_wwa.post("/douyin_without_watermarker_analysis")
- async def douyin_without_watermarker_analysis(accept: DouYinWithoutWatermarker):
- douyin_instance.load_page(accept.url)
- douyin_instance.start_listen()
- page = douyin_instance.page
- start_time = time.time()
- try:
- while True:
- res = page.listen.wait() # 等待并获取一个数据包
- if "https://www.douyin.com/aweme/v1/web/aweme/post/" in res.url:
- data = json.loads(res._raw_body)
- data_list = data["aweme_list"]
- data = []
- for item in data_list:
- d = {
- "title" : item["desc"],
- "urls" : item["video"]["play_addr"]["url_list"]
- }
- data.append(d)
- break
-
- use_time = time.time() - start_time
- data = {
- "data": data,
- "use_time": use_time
- }
- except Exception as e:
- data = {
- "data": e,
- "error_code": 500
- }
- finally:
- douyin_instance.end_listen()
- return data
完整代码在github上:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。