赞
踩
博主介绍:✌全网粉丝10W+,前互联网大厂软件研发、集结硕博英豪成立工作室。专注于计算机相关专业毕业设计项目实战6年之久,选择我们就是选择放心、选择安心毕业✌感兴趣的可以先收藏起来,点赞、关注不迷路✌
毕业设计:2023-2024年计算机毕业设计1000套(建议收藏)
毕业设计:2023-2024年最新最全计算机专业毕业设计选题汇总
技术栈:
Python语言、Flask框架、网络爬虫技术、Echarts可视化、当当网数据、HTML
(1)数据可视化大屏
(2)图书小说作者前十
(3)图书数据浏览
(4)图书畅销榜
(5)注册登录界面
(6)后台数据管理
(7)数据爬虫
该系统是一个基于Python语言和Flask框架开发的图书数据爬取分析可视化系统。系统通过网络爬虫技术从当当网上爬取图书数据,并通过Echarts可视化库将数据以图表的形式展示出来。用户可以通过系统界面输入关键词进行图书搜索,系统将根据关键词从当当网上爬取相关的图书信息。
系统主要包括以下功能:
该系统的开发使用了Python语言作为主要开发语言,Flask框架用于搭建系统的Web界面,网络爬虫技术用于从当当网上爬取图书数据,Echarts可视化库用于将数据以图表的形式展示出来,HTML用于实现系统的界面展示。系统具有图书搜索、数据分析和数据导出等功能,能够方便用户进行图书数据的分析和可视化展示。
import requests import ssl from bs4 import BeautifulSoup import json import traceback ssl._create_default_https_context = ssl._create_unverified_context from project import models import time import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def resou(): for i in range(1,6): url = 'http://api.dangdang.com/community7/faxian/hot-search-word-bang?ct=iphone&cv=9.5.1&user_client=iphone&client_version=9.5.1&union_id=537-50&permanent_id=20180417200316522357078741534625445&udid=7891F89FFBFB287CF94E92A9765A1EFB&time_code=3cf2905b201653ef3197664e5bf4cf5e×tamp=1560409597&global_province_id=111%20%20%20%20&a=hot-search-word-bang-for-pc&c=faxian&imageSize=h&catpath=01.00.00.00.00.00&page={}&pageSize=10'.format(i) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } h = requests.get(url=url,headers=headers,verify=False) print(h) for resu in h.json()['data']: if not models.resou_Books.query.filter(models.resou_Books.word==resu['word']).all(): models.db.session.add( models.resou_Books( word=resu['word'], pv=resu['pv'] ) ) else: res = models.resou_Books.query.filter(models.resou_Books.word==resu['word']).all() res[0].pv = resu['pv'] models.db.session.commit() def haoping(): for iz in range(1, 25): url = 'http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-2-{}'.format(iz) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } req = requests.get(url=url,headers=headers,verify=False) # print(req) soup = BeautifulSoup(req.text,'html.parser') lis = soup.select('ul.bang_list > li') for li in lis: time.sleep(0.5) try: dicts = {} dicts['name'] = li.select('div.name')[0].text.strip() dicts['pinglun'] = str(li.select('div.star > a')[0].text).strip().replace('条评论', '') dicts['tujian'] = li.select('div.star > span')[1].text.replace('%推荐','') dicts['auth'] = li.select('div.publisher_info')[0].text.split('/')[0].strip() dicts['shangshi'] = li.select('div.publisher_info')[0].text.split('/')[1].strip() dicts['chubanshe'] = li.select('div.publisher_info')[0].text.split('/')[2].strip() dicts['price_n'] = li.select('span.price_n')[0].text.replace('¥', '') dicts['price_r'] = li.select('span.price_r')[0].text.replace('¥', '') print(dicts) if not models.haoping_Books.query.filter(models.haoping_Books.name == dicts['name']).all(): models.db.session.add( models.haoping_Books( name=dicts['name'], pinglun=dicts['pinglun'], tujian=dicts['tujian'], auth=dicts['auth'], shangshi=dicts['shangshi'], chubanshe=dicts['chubanshe'], price_n=dicts['price_n'], price_r=dicts['price_r'], ) ) models.db.session.commit() except : print(traceback.format_exc()) pass def tushuchangxiao_count(): for iz in range(1,25): url = 'http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-{}'.format(iz) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } req = requests.get(url=url,headers=headers,verify=False) # print(req) soup = BeautifulSoup(req.text,'html.parser') lis = soup.select('ul.bang_list.clearfix.bang_list_mode > li') for li in lis: time.sleep(0.1) dicts = {} try: dicts['name'] = li.select('div.name')[0].text.strip() dicts['pinglun'] = str(li.select('div.star > a')[0].text).strip().replace('条评论','') dicts['tujian'] = li.select('div.star > span')[1].text.replace('%推荐','') dicts['publisher_info'] = li.select('div.publisher_info')[0].text dicts['publisher_info2'] = li.select('div.publisher_info')[1].text dicts['price_n'] = li.select('span.price_n')[0].text.replace('¥','') dicts['price_r'] = li.select('span.price_r')[0].text.replace('¥','') url = li.select('div.pic > a')[0].attrs.get('href') print(url) req2 = requests.get(url=url,verify=False,headers=headers) soup2 = BeautifulSoup(req2.text, 'html.parser') itype = soup2.select('div#breadcrumb > a')[1].text.strip() dicts['itype'] = itype if not models.changxiao_Books_count.query.filter(models.changxiao_Books_count.name == dicts['name']).all(): models.db.session.add( models.changxiao_Books_count( name=dicts['name'], pinglun=dicts['pinglun'], tujian=dicts['tujian'], publisher_info=dicts['publisher_info'], publisher_info2=dicts['publisher_info2'], price_n=dicts['price_n'], price_r=dicts['price_r'], type=dicts['itype'] ) ) models.db.session.commit() print(dicts) except Exception as e: print(e) pass def tushuchangxiao(): result = { "小说":"http://bang.dangdang.com/books/bestsellers/01.03.00.00.00.00-recent30-0-0-1-{}", "童书":"http://bang.dangdang.com/books/bestsellers/01.41.00.00.00.00-recent30-0-0-1-{}", "文学":"http://bang.dangdang.com/books/bestsellers/01.05.00.00.00.00-recent30-0-0-1-{}" } for key,_value in result.items(): for iz in range(1,25): url = _value.format(iz) print(url) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } req = requests.get(url=url,headers=headers,verify=False) # print(req) soup = BeautifulSoup(req.text,'html.parser') lis = soup.select('ul.bang_list.clearfix.bang_list_mode > li') for li in lis: time.sleep(0.1) dicts = {} try: dicts['name'] = li.select('div.name')[0].text.strip() dicts['pinglun'] = str(li.select('div.star > a')[0].text).strip().replace('条评论','') dicts['tujian'] = li.select('div.star > span')[1].text.replace('%推荐','') dicts['publisher_info'] = li.select('div.publisher_info')[0].text dicts['publisher_info2'] = li.select('div.publisher_info')[1].text dicts['price_n'] = li.select('span.price_n')[0].text.replace('¥','') dicts['price_r'] = li.select('span.price_r')[0].text.replace('¥','') if not models.changxiao_Books.query.filter(models.changxiao_Books.name == dicts['name']).all(): models.db.session.add( models.changxiao_Books( name=dicts['name'], pinglun=dicts['pinglun'], tujian=dicts['tujian'], publisher_info=dicts['publisher_info'], publisher_info2=dicts['publisher_info2'], price_n=dicts['price_n'], price_r=dicts['price_r'], type=key ) ) models.db.session.commit() print(dicts) except Exception as e: print(traceback.format_exc()) pass if __name__ == '__main__': resou() haoping() # tushuchangxiao() # tushuchangxiao_count()
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/540852
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。