赞
踩
import requests from lxml import etree if __name__ == '__main__': headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0' } url = 'https://www.gushicimingju.com/novel/sanguoyanyi/' page_text = requests.get(url=url, headers=headers).text tree = etree.HTML(page_text) # 找到对应的标签 li_list = tree.xpath('//div[@class="main-content"]/ul/li') fp = open('三国演义章节标题.txt', 'w', encoding='utf-8') for li in li_list: title = li.xpath('./a/text()')[0] fp.write(title + '\n') print(title)
import requests import os from idna import unicode from lxml import etree if __name__=='__main__': url = 'https://pic.netbian.com/4kdongwu/' headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0' } response = requests.get(url=url, headers=headers) page_text = response.text tree = etree.HTML(page_text) li_list = tree.xpath('//div[@class="slist"]/ul/li') print(li_list) if not os.path.exists('pictures'): os.mkdir('pictures') for li in li_list: img_src = 'https://pic.netbian.com/' + li.xpath('./a/img/@src')[0] img_name = li.xpath('./a/img/@alt')[0] + '.jpg' img_name = img_name.encode('iso-8859-1') img_name = img_name.decode('gbk') print(img_name ) # print(img_src) img_data = requests.get(url=img_src , headers=headers).content img_path = 'pictures/' + img_name with open(img_path,'wb') as fp: fp.write(img_data) print('over!!!')
from lxml import etree import requests if __name__=='__main__': url = 'http://www.aqistudy.cn/historydata/' headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0' } response = requests.get(url=url, headers=headers) page_text = response.text tree = etree.HTML(page_text) # 存储所有城市名称 city = [] # 爬取热门城市 hot_city_li = tree.xpath('//div[@class="bottom"]/ul/li') for li in hot_city_li: hot_city = li.xpath('./a/text()')[0] print(hot_city) city.append(hot_city) # 非热门城市 nohot_city_li = tree.xpath('//ul[@class="unstyled"]/div/li') for li in nohot_city_li: nohot_city = li.xpath('./a/text()')[0] print(nohot_city) city.append(nohot_city) print('-----------------') print(city) print(len(city))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。