import requests from bs4 import BeautifulSoup import datetime import jieba import matplotlib.pyplot as plt import tkinter as tk import tkinter.messagebox '''基于微博热搜的舆论分析系统 --Kevinwang 可以实现对热搜分类,过滤显示及绘制热搜分类饼状图的功能 2020/6/24''' def drawpic():#饼状图绘制 plt.rcParams['font.sans-serif']='SimHei'#设置中文显示 lable=['政务','娱乐','政治','疫情','科技'] explode=[0.01,0.01,0.01,0.01,0.01] values=[gov_count,entertaiment_count,poltic_count,battle_count,tech_count] for i in range (0,len(lable)): #去掉没有检索到的类别 if values[i]==0: #因为数组长度动态变化,所以只能先变成0,然后去掉0元素,比较复复杂 lable[i]=0 explode.remove(0.01) for i in range(0,len(lable)): if 0 in values: values.remove(0) if 0 in lable: lable.remove(0) plt.title('用户关注分布') plt.pie(values,explode=explode,labels=lable,autopct='%1.1f%%') plt.show() def getTrending():#获取热搜内容 global gov_count,entertaiment_count,poltic_count,battle_count r = requests.get("https://s.weibo.com/top/summary") soup = BeautifulSoup(r.text, 'lxml') items = soup.find(class_="data").find_all(name="tr") a = '置顶'.ljust(2, ' '), items[1].find(class_="td-02").a.string top = ''.join(a) words = jieba.lcut(top) for i in words: # 置顶判别 if i in entertaiment: top = top + " 娱乐" entertaiment_count = entertaiment_count + 1 elif i in gov: top = top + " 政务" gov_count = gov_count + 1 elif i in poltic: top = top + " 政治" poltic_count = poltic_count + 1 elif i in battle: top = top + " 疫情" battle_count=battle_count+1 trending.append(top) for item in items[2:]: # 热搜类型判别 mes = item.find(class_="td-01 ranktop").string.ljust(4, ' '), item.find(class_="td-02").a.string mess = "".join(mes) words = jieba.lcut(mess) for i in words: if i in entertaiment: mess = mess + " 娱乐" entertaiment_count = entertaiment_count + 1 elif i in gov: mess = mess + " 政务" gov_count = gov_count + 1 elif i in poltic: mess = mess + " 政治" poltic_count = poltic_count + 1 elif i in battle: mess = mess + " 疫情" battle_count = battle_count + 1 trending.append(mess) def filter(num):#根据种类过滤新闻 show.delete(0.0 ,'end') nowTime = datetime.datetime.now().strftime('%F %T') # 现在时间 show.insert('1.0',nowTime) for i in range(0,len(t)): if t[i]>=2: if i==3: show.insert('end',' 今日主题:疫情\nToday is still a hard day.') elif i==1: show.insert('end',' 今日主题:娱乐\nWe are fine today.') elif i==0: show.insert('end', ' 今日主题:政治\nSomething is happening in the world.') show.insert('end','\n热搜排名 热搜内容') kind='' count=0 if num=='0': for i in range(0, len(trending)): show.insert('end','\n'+trending[i]) count=1 elif num == '1': kind = '政务' elif num == '2': kind = '娱乐' elif num == '3': kind = '政治' elif num == '4': kind = '疫情' elif num == '5': kind = '科技' for i in range(0,len(trending)): if trending[i][-2]+trending[i][-1]==kind: show.insert('end','\n'+trending[i]) count=1 if count==0: tk.messagebox.showinfo('Tip','The kind of message did not appear in the trending!') else: pass def confirm1(event): type1=select.get() if type1 == 'quit': tk.messagebox.askquestion('Confirm','Are you sure to leave') form.destroy() elif type1 != '1' and type1 != '2' and type1 != '3' and type1 != '3' and type1 != '4' and type1 != '5' and type1 != '0': tk.messagebox.showinfo('Tip','Please input the number from one to five') else: filter(type1) select.delete(0) #分类依据,分类准确程度取决于词组和分词准确性 gov=["卫生部","教育部","卫健委",'工信部','法律','民法典','教育厅','外交部','禁毒','毒品','犯罪','赵立坚'] entertaiment=["彭于晏","胡歌","迪丽热巴","宁静","易烊千玺","杨紫",'导演','演员','杨超越','于正','周杰伦','张雨绮','于蓝','快乐大本营'] poltic=["香港","台湾","人大",'常委','两会','俄罗斯','阅兵','全国人大'] battle=["抗疫","疫情","境外输入","核酸检测","确诊","病毒","新发地",'健康码','新冠','疫苗'] tech=["ios","华为","5G",'北斗','卫星'] gov_count=0 entertaiment_count=0 poltic_count=0 battle_count=0 tech_count=0 trending=[] jieba.add_word('周杰伦') jieba.add_word('张雨绮') jieba.add_word('快乐大本营') jieba.add_word('于蓝') if __name__ == '__main__':#在被调用时生效,防止重复执行 getTrending() t=[gov_count,entertaiment_count,poltic_count,battle_count,tech_count] form=tk.Tk() form.title('Public opinion analyze system --KevinWang') width=500 height=500 form.resizable(width=False,height=False) screenwidth = form.winfo_screenwidth() #放于屏幕中央 screenheight = form.winfo_screenheight() alignstr = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2) form.geometry(alignstr) w=tk.Label(form,text='0.all message 1.gov 2.entermaintent 3.politic 4.coronavirus 5.tech\n Press quit to leave\n Select the type you want to see') select=tk.Entry(form,bd=3) confirm=tk.Button(form,text='确认',command=confirm1,width=8,height=2) confirm.bind_all('<Return>',confirm1) #绑定快捷键enter analyze=tk.Button(form,text='舆情分析',command=drawpic,width=8,height=2) show=tk.Text(form,width=70,height=28) w.pack() select.pack() show.pack() confirm.pack(side='right') analyze.pack(side='right') form.mainloop()
