赞
踩
import numpy as np
import pandas as pd
from gensim.models import word2vec
train_df = pd.read_csv('../data/train_set.csv', sep='\t')
test_df = pd.read_csv('../data/test_a.csv', sep='\t')
list_data = list(pd.concat((train_df['text'], test_df['text']), axis=0, ignore_index=True).map(lambda x:x.split(' ')))
model = word2vec.Word2Vec(list_data, min_count=1, window=10, size=120, seed=7)
model.init_sims(replace=True)
# 保存模型,供日後使用
model.save("../emb/word2vec.h5")
model = word2vec.Word2Vec.load("../emb/word2vec.h5")
#输入与“3370”相近的10个词
for key in model.wv.similar_by_word('3370', topn =10):
print(key)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。