赞
踩
- import requests
- from bs4 import BeautifulSoup
- import time
- import pandas as pd
- import random
-
- # 准备工作,提前建好DataFrame,方便存储数据
- count = 1
- random_number = random.randint(10,20)
- df = pd.DataFrame(columns = ['序号','攻略链接','简评','地点','出发时间','出游天数','人均花销','同行人','游玩方式'])
-
- # 有些字段网页信息不全,为防止爬虫报错,做报错预处理
- def arrs(x):
- try:
- return x.select('div.user_info h1 span.title')[0].get_text(strip=True)
- except:
- return '缺失内容'
-
- def titles(x):
- try:
- return x.select('.b_crumb_cont a')[1].get_text(strip=True)
- except:
- return '缺失内容'
- def whns(x):
- try:
- return x.select('.when p span.data')[0].get_text(strip=True)
- except:
- return 0
- def longs(x):
- try:
- return x.select('.howlong p span.data')[0].get_text(strip=True)
- except:
- return 0
- def moneys(x):
- try:
- return x.select('.howmuch p span.data')[0].get_text(strip=True)
- except:
- return 0
- def whos(x):
- try:
- return x.select('.who p span.data')[0].get_text(strip=True)
- except:
- return '缺失内容'
- def hows(x):
- try:
- return x.select('.how p span.data')[0].get_text(strip=True)
- except:
- return '缺失内容'
-
- # 开始获取数据,并使用BS进行解析,解析语言选择CSS
- for i in range(1,201,):
- # 获取响应
- url = f"https://travel.qunar.com/travelbook/list.htm?page={i}&order=hot_heat"
- headers = {
- "user-Agent":"https://www.qunar.com/',headers='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
- }
- response =
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。