赞
踩
需要的资源:
字体文件,防止中文乱码
背景图片,好像设置了也没什么作用,用空再研究下
import re import requests from wordcloud import WordCloud,STOPWORDS,ImageColorGenerator import matplotlib.pyplot as plt import os # 获取网页的html def open_url(url): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36'} response = requests.get(url=url, headers=headers) response.encoding = 'utf-8' html = response.text return html # 获取弹幕并保存 def getdanmu(avurl): # 获取视频的cid html=open_url(avurl) danmu_id = re.findall(r'cid=(\d+)&', html)[0] danmu_url = 'http://comment.bilibili.com/{}.xml'.format(danmu_id) print("cid是:",danmu_id) headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'} response = requests.get(url=danmu_url, headers=headers, verify=False) content = response.content.decode('utf-8') pattern = re.compile(r'<d.*?>(.*?)</d>') data_list = pattern.findall(content) print(data_list) if os.path.exists('Barrage.txt'): # 如果文件存在 os.remove('Barrage.txt') for li in data_list: with open('Barrage.txt', 'a', encoding='utf-8')as fp: fp.write(li + '\n') # 生成词云 def ciyun(): f = open('Barrage.txt', 'r', encoding='utf-8').read() # 导入TXT文档 backgroup_image = plt.imread(r'D:\Users\GengKY\PycharmProjects\day01\getbilibilixml\_28170.png') # 背景图的路径 wordcloud = WordCloud( width=1000, # 像素的宽 height=800, # 像素的高 margin=2, # 边缘值空白值 background_color='white', # 背景颜色,默认黑色 font_path=r'D:\Users\GengKY\PycharmProjects\day01\getbilibilixml\AliHYAiHei-Beta.ttf', ##字体路径 mask=backgroup_image, ##背景图//笼罩图 max_font_size=200, ##默认最大值 # min_font_size=16, #默认最小值 ).generate(f) plt.imshow(wordcloud) plt.axis('off') ##坐标轴关闭 plt.show() if __name__ == '__main__': avurl=input("请输入bilibili视频连接: ") getdanmu(avurl) ciyun()
测试:
https://www.bilibili.com/video/BV1cr4y1F7XF
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。