赞
踩
使用K-means做词聚类需要用到word2vec做词向量化预处理。
# @Author : LinYimeng
代码传送门:
- # -*- coding: utf-8 -*-
- # @Author : LinYimeng
- import multiprocessing
- import gensim
- from gensim.test.utils import common_texts, get_tmpfile
- from gensim.models import word2vec,Word2Vec
- from gensim.models import KeyedVectors
- # import logging
- import os
-
- # logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
- sentences = word2vec.LineSentence('one.txt')
- model = Word2Vec(sentences,size = 256, min_count=1, window=5,sg=0,workers=multiprocessing.cpu_count())
- model.save("w2v_model1.bin")
- #model.wv.save_word2vec_format('w2v_model1.txt',binary = False)
- #模型储存与加载
-
- #计算一个词的最近似的词:
- gensim.models.Word2Vec.load("w2v_model1.bin")
- for key in model.similar_by_word('广告',topn=10):
- print(key)
-
- #计算两个词的相似度:
- p
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。