赞
踩
from gensim.models import KeyedVectors
from annoy import AnnoyIndex
from numpy import dot
from gensim import utils, matutils
import numpy as np
keyword_type={}
word_emb = {}
#loadEmbedding():
wv_from_text = KeyedVectors.load_word2vec_format("Tencent_AILab_ChineseEmbedding.txt", binary=False)
for key in wv_from_text.vocab.keys():
word_emb[key]=wv_from_text[key]
print dot(matutils.unitvec(wv_from_text[w1]), matutils.unitvec(wv_from_text[w2]))
matutils.unitvec将一维array归一化
input_vector = np.random.uniform(size=(10,))
#array([0.39128945, 0.45510596, 0.42401018, 0.33814469, 0.31290301,
# 0.15597097, 0.74319954, 0.60262496, 0.30601651, 0.53778863])
unit_vector = matutils.unitvec(input_vector)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。