赞
踩
目录
- import requests, re, json, pandas as pd, time
- from selenium import webdriver # selenium2.48.0 支持phantomjs
- from lxml import etree
- from tqdm import tqdm
- from snownlp import SnowNLP
- import time
- # pip install selenium==2.48.0 -i https://mirror.baidu.com/pypi/simple
- # 表明我是谁??
- driver = webdriver.PhantomJS(executable_path=r'C:\Users\wang\Desktop\phantomjs-2.1.1-windows (1)\bin\phantomjs.exe')
- # C:\Users\wang\Desktop\phantomjs-2.1.1-windows (1)\bin
- data_text=[]
- data_time=[]
- with open("data_time_txt","a+",encoding="utf-8")as f:
- for i in tqdm(range(1,2000)):
- url = "http://guba.eastmoney.com/list,zssh000001_"+str(i)+".html"
- driver.get(url=url)
- tree = etree.HTML(driver.page_source)
- data_wenbens = tree.xpath('.//div[@class="articleh normal_post odd"]/span[@class="l3 a3"]/a/@title')
- print(data_wenbens)
- data_times = tree.xpath('.//div[@class="articleh normal_post odd"]/span[@class="l5 a5"]/text()')
- print(data_times)
- if len(data_times)==len(data_wenbens) and len(data_times)>2:
- for j in range(len(data_times)):
- temp_text=data_wenbens[j]
- temp_time=str(data_times[j]).split(" ")[0]
- nlp = SnowNLP(str(temp_text))
- f.write(str(temp_text)+"__****__"+str(temp_time)+"__****__"+str(nlp.sentiments)+"\n")
- time.sleep(3)
-
写入txt 用__****__ 做分割 ,使用 SnowNLP 做开源的情感分析
$上证指数(SH000001)$天天就知道拉证券__****__06-10__****__0.7380620377395649 来来来,冲鸭,为了牛市,全仓梭哈__****__06-10__****__0.32144879323501274 该死的狐狸,我彻底踏空了,你心真坏,什么鱼尾诱多行情,纯粹,现在我也不敢__****__06-10__****__0.1948958915690776 昨天调整给你们机会你们把握不住,怪谁[大笑]__****__06-10__****__0.8689312767645354 接刀行情__****__06-10__****__0.890142453148024 无量反弹继续跌__****__06-10__****__0.1925025397374015 缩量反抽发套中。小心接飞刀,随时跳水。__****__06-10__****__0.1587406976493484 $上证指数(SH000001)$等你跳水__****__06-10__****__0.5348199281758914 为什么叫A股__****__06-10__****__0.31041609369117307 $上证指数(SH000001)$我是空狗,说实话,大盘长的心好慌__****__06-10__****__0.7589232220245196 $上证指数(SH000001)$散户真正亏夲牛市开启__****__06-10__****__0.7707121238079945 $上证指数(SH000001)$东南亚四国光伏豁免只是面子操作,取消中国关税才是__****__06-10__****__0.7173039202475434 【转载】碳达峰碳中和是国家中长期战略的重要组成部分__****__06-10__****__0.9995428587322593 比亚迪加油,争取创下万亿市值公司炒到一千倍市盈率历史记录,让世界看看我大A的霸气__****__06-10__****__0.9990492400663739 别等了,上车看风景[微笑]__****__06-10__****__0.9463208775361536 $上证指数(SH000001)$量化交易有力度__****__06-10__****__0.10875990306767358 最近大a真牛逼__****__06-10__****__0.5586189117390599 $上证指数(SH000001)$不要轻视两特大城市疫情对上市公司的影响,中报会很__****__06-10__****__0.31133644182461606 $上证指数(SH000001)$低开洗盘,己明确上攻趋势[大笑][大笑][大笑]__****__06-10__****__0.8614209827857494 诱多__****__06-10__****__0.875
根据每天的数据做股票的情感分析数据:
- import numpy as np
- data_scorce=[[] for i in range(10)]
- print(data_scorce)
- data_time=[i for i in range(10) ]
- with open("data_time_txt","r",encoding="utf-8")as f:
- f=f.readlines()
- for line in f:
- sorce=line.split("__****__")[-1]
- time=line.split("__****__")[1][-2:]
- if time=="10":
- data_scorce[int(time)-1].append(float(sorce))
- if time[0]=="0":
- data_scorce[int(time[-1])-1].append(float(sorce))
- import matplotlib.pyplot as plt
- import pandas as pd
- plt.rcParams['font.sans-serif']=['SimHei']
- plt.rcParams['axes.unicode_minus']=False
- from pylab import *
- score=[np.mean(i) for i in data_scorce]
- x_low=[i for i in range(len(score))]
- plt.xlabel('日期',fontsize=8)
- plt.ylabel('情感指数',fontsize=8)
- plt.plot(x_low,score,label='时间情感指数',color="r")
- plt.title("上证指数股票评论—情感分析")
- plt.show()
所有的代码数据:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。