当前位置:   article > 正文

基于卷积神经网络的电视节目推荐_基于卷积神经网络cnn的电影推荐系统

基于卷积神经网络cnn的电影推荐系统

 

本文使用文本卷积神经网络,并使用自己的电视节目数据集完成电影推荐的任务。 

本文在参考了 https://blog.csdn.net/chengcheng1394/article/details/78820529 的基础上,采用自己的数据集对代码进行修改,终于运行出来了!不过对于神经网络是如何搭建的,具体实现原理还是不是很懂的!

 

需要安装TensorFlow1.0,Python3.5 

模型设计:图片来自https://blog.csdn.net/chengcheng1394/article/details/78820529

跟上图不同的是,本文采用的数据集没有用户属性中的性别,年龄和职业编号,仅有用户号。

电影id和用户id以及评分数据都不需要进行转换,但电影名称和节目类型的数据需要将其转为对应的数字,不然没法处理。 

文本卷积神经网络的图如下, 

图片来自Kim Yoon的论文:Convolutional Neural Networks for Sentence Classification

http://www.wildml.com/2015/11/understanding-convolutional-neural-networks-for-nlp/

  1. # -*- coding: utf-8 -*-
  2. import pandas as pd
  3. from sklearn.model_selection import train_test_split
  4. import numpy as np
  5. from collections import Counter
  6. import tensorflow as tf
  7. import os
  8. import pickle
  9. from tensorflow.python.ops import math_ops
  10. from urllib.request import urlretrieve
  11. from os.path import isfile, isdir
  12. from tqdm import tqdm
  13. def load_data():
  14. """
  15. Load Dataset from File
  16. """
  17. os.chdir('E:/广电大数据营销推荐项目案例/数据清洗/电视节目信息数据预处理')
  18. # # #读取User数据
  19. users = pd.read_table('./wordsbag/dataprocess/data/week/mydata/data1_users.csv', sep=',', header='infer', engine = 'python')
  20. users_orig = users.values
  21. #读取Movie数据集
  22. # movies_title = ['MovieID', 'Title', 'Genres']
  23. movies = pd.read_table('./wordsbag/dataprocess/data/week/mydata/data1_tv.csv', sep=',', header='infer', engine = 'python')
  24. movies = movies.filter(regex='program_id|program_title|genres_good')
  25. movies_orig = movies.values
  26. #电影类型转数字字典
  27. genres_set = set()
  28. for val in movies['genres_good'].str.split('/'):
  29. genres_set.update(val)
  30. genres_set.add('<PAD>')
  31. genres2int = {val:ii for ii, val in enumerate(genres_set)}
  32. #将电影类型转成等长数字列表,长度是18
  33. genres_count = 18
  34. genres_map = {val:[genres2int[row] for row in val.split('/')] for ii,val in enumerate(set(movies['genres_good']))}
  35. for key in genres_map:
  36. for cnt in range(genres_count - len(genres_map[key])):
  37. genres_map[key].insert(len(genres_map[key]) + cnt,genres2int['<PAD>'])
  38. movies['genres_good'] = movies['genres_good'].map(genres_map)
  39. #电影Title转数字字典
  40. title_set = set()
  41. for val in movies['program_title'].str.split():
  42. title_set.update(val)
  43. title_set.add('<PAD>')
  44. title2int = {val:ii for ii, val in enumerate(title_set)}
  45. #将电影Title转成等长数字列表,长度是15
  46. title_count = 15
  47. title_map = {val:[title2int[row] for row in val.split()] for ii,val in enumerate(set(movies['program_title']))}
  48. for key in title_map:
  49. for cnt in range(title_count - len(title_map[key])):
  50. title_map[key].insert(len(title_map[key]) + cnt,title2int['<PAD>'])
  51. movies['program_title'] = movies['program_title'].map(title_map)
  52. #读取评分数据集,评分为1-10分
  53. ratings = pd.read_table('./wordsbag/dataprocess/data/week/mydata/data1_ratings.csv', sep=',', header='infer', engine = 'python')
  54. ratings = ratings.filter(regex='user_id|program_id|rating')
  55. #合并表
  56. data = pd.merge(ratings,movies)
  57. #将数据分成X和y两张表
  58. target_fields = ['rating']
  59. features_pd, targets_pd = data.drop(target_fields, axis=1), data[target_fields]
  60. features = features_pd.values
  61. targets_values = targets_pd.values
  62. return title_count, title_set, genres_count, genres2int, features, targets_values, ratings, users, movies, data, movies_orig,users_orig
  63. title_count, title_set, genres_count, genres2int, features, targets_values, ratings, users, movies, data, movies_orig,users_orig= load_data()
  64. pickle.dump((title_count, title_set, genres_count, genres2int, features, targets_values, ratings, movies, data, movies_orig,users_orig), open('./wordsbag/dataprocess/data/week/mydata/preprocess.p', 'wb'))
  65. # 预处理后
  66. #users.head()
  67. movies.head()
  68. movies.values[0]
  69. title_count, title_set, genres_count, genres2int, features, targets_values, ratings, movies, data, movies_orig,users_orig = pickle.load(open('./wordsbag/dataprocess/data/week/mydata/preprocess.p', mode='rb'))
  70. import tensorflow as tf
  71. import os
  72. import pickle
  73. def save_params(params):
  74. """
  75. Save parameters to file
  76. """
  77. pickle.dump(params, open('./wordsbag/dataprocess/data/week/mydata/params.p', 'wb'))
  78. def load_params():
  79. """
  80. Load parameters from file
  81. """
  82. return pickle.load(open('./wordsbag/dataprocess/data/week/mydata/params.p', mode='rb'))
  83. # 编码实现
  84. #嵌入矩阵的维度
  85. embed_dim = 32
  86. #用户ID个数
  87. uid_max = max(features.take(0,1)) + 1 # 1966+1=1967
  88. print(uid_max)
  89. #电影ID个数
  90. movie_id_max = max(features.take(1,1)) + 1 # 995+1 = 996
  91. print(movie_id_max)
  92. #电影类型个数
  93. movie_categories_max = max(genres2int.values()) + 1 # 104为什么有这么多重复的个数
  94. print(movie_categories_max)
  95. #电影名单词个数
  96. movie_title_max = len(title_set) # 501+1=502
  97. print(movie_title_max)
  98. #对电影类型嵌入向量做加和操作的标志,考虑过使用mean做平均,但是没实现mean
  99. combiner = "sum"
  100. #电影名长度
  101. sentences_size = title_count # = 15
  102. #文本卷积滑动窗口,分别滑动2, 3, 4, 5个单词
  103. window_sizes = {2, 3, 4, 5}
  104. #文本卷积核数量
  105. filter_num = 8
  106. #电影ID转下标的字典,数据集中电影ID跟下标不一致,比如第5行的数据电影ID不一定是5
  107. movieid2idx = {val[0]:i for i, val in enumerate(movies.values)}
  108. print(movieid2idx)
  109. # 超参
  110. # Number of Epochs
  111. num_epochs = 5
  112. # Batch Size
  113. batch_size = 256
  114. dropout_keep = 0.5
  115. # Learning Rate
  116. learning_rate = 0.0001
  117. # Show stats for every n number of batches
  118. show_every_n_batches = 20
  119. save_dir = './wordsbag/dataprocess/data/week/mydata/save2'
  120. # 输入
  121. def get_inputs():
  122. uid = tf.placeholder(tf.int32, [None, 1], name="uid")
  123. movie_id = tf.placeholder(tf.int32, [None, 1], name="movie_id")
  124. movie_categories = tf.placeholder(tf.int32, [None, 18], name="movie_categories")
  125. movie_titles = tf.placeholder(tf.int32, [None, 15], name="movie_titles")
  126. targets = tf.placeholder(tf.int32, [None, 1], name="targets")
  127. LearningRate = tf.placeholder(tf.float32, name = "LearningRate")
  128. dropout_keep_prob = tf.placeholder(tf.float32, name = "dropout_keep_prob")
  129. return uid, movie_id, movie_categories, movie_titles, targets, LearningRate, dropout_keep_prob
  130. # 构建神经网络
  131. def get_user_embedding(uid):
  132. with tf.name_scope("user_embedding"):
  133. uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, embed_dim], -1, 1), name = "uid_embed_matrix")
  134. uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name = "uid_embed_layer")
  135. return uid_embed_layer
  136. #将User的嵌入矩阵一起全连接生成User的特征
  137. def get_user_feature_layer(uid_embed_layer):
  138. with tf.name_scope("user_fc"):
  139. #第一层全连接
  140. uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name = "uid_fc_layer", activation=tf.nn.relu)
  141. #第二层全连接
  142. user_combine_layer = tf.concat([uid_fc_layer], 2) #(?, 1, 128)
  143. user_combine_layer = tf.contrib.layers.fully_connected(user_combine_layer, 200, tf.tanh) #(?, 1, 200)
  144. user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200])
  145. return user_combine_layer, user_combine_layer_flat
  146. #定义Movie ID的嵌入矩阵
  147. def get_movie_id_embed_layer(movie_id):
  148. with tf.name_scope("movie_embedding"):
  149. movie_id_embed_matrix = tf.Variable(tf.random_uniform([movie_id_max, embed_dim], -1, 1), name = "movie_id_embed_matrix")
  150. movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name = "movie_id_embed_layer")
  151. return movie_id_embed_layer
  152. #对电影类型的多个嵌入向量做加和
  153. def get_movie_categories_layers(movie_categories):
  154. with tf.name_scope("movie_categories_layers"):
  155. movie_categories_embed_matrix = tf.Variable(tf.random_uniform([movie_categories_max, embed_dim], -1, 1), name = "movie_categories_embed_matrix")
  156. movie_categories_embed_layer = tf.nn.embedding_lookup(movie_categories_embed_matrix, movie_categories, name = "movie_categories_embed_layer")
  157. if combiner == "sum":
  158. movie_categories_embed_layer = tf.reduce_sum(movie_categories_embed_layer, axis=1, keep_dims=True)
  159. # elif combiner == "mean":
  160. return movie_categories_embed_layer
  161. # Movie Title的文本卷积网络实现
  162. def get_movie_cnn_layer(movie_titles):
  163. #从嵌入矩阵中得到电影名对应的各个单词的嵌入向量
  164. with tf.name_scope("movie_embedding"):
  165. movie_title_embed_matrix = tf.Variable(tf.random_uniform([movie_title_max, embed_dim], -1, 1), name = "movie_title_embed_matrix")
  166. movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_titles, name = "movie_title_embed_layer")
  167. movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1)
  168. #对文本嵌入层使用不同尺寸的卷积核做卷积和最大池化
  169. pool_layer_lst = []
  170. for window_size in window_sizes:
  171. with tf.name_scope("movie_txt_conv_maxpool_{}".format(window_size)):
  172. filter_weights = tf.Variable(tf.truncated_normal([window_size, embed_dim, 1, filter_num],stddev=0.1),name = "filter_weights")
  173. filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias")
  174. conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1,1,1,1], padding="VALID", name="conv_layer")
  175. relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer,filter_bias), name ="relu_layer")
  176. maxpool_layer = tf.nn.max_pool(relu_layer, [1,sentences_size - window_size + 1 ,1,1], [1,1,1,1], padding="VALID", name="maxpool_layer")
  177. pool_layer_lst.append(maxpool_layer)
  178. #Dropout层
  179. with tf.name_scope("pool_dropout"):
  180. pool_layer = tf.concat(pool_layer_lst, 3, name ="pool_layer")
  181. max_num = len(window_sizes) * filter_num
  182. pool_layer_flat = tf.reshape(pool_layer , [-1, 1, max_num], name = "pool_layer_flat")
  183. dropout_layer = tf.nn.dropout(pool_layer_flat, dropout_keep_prob, name = "dropout_layer")
  184. return pool_layer_flat, dropout_layer
  185. # 将Movie的各个层一起做全连接
  186. def get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer):
  187. with tf.name_scope("movie_fc"):
  188. #第一层全连接
  189. movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name = "movie_id_fc_layer", activation=tf.nn.relu)
  190. movie_categories_fc_layer = tf.layers.dense(movie_categories_embed_layer, embed_dim, name = "movie_categories_fc_layer", activation=tf.nn.relu)
  191. #第二层全连接
  192. movie_combine_layer = tf.concat([movie_id_fc_layer, movie_categories_fc_layer, dropout_layer], 2) #(?, 1, 96)
  193. movie_combine_layer = tf.contrib.layers.fully_connected(movie_combine_layer, 200, tf.tanh) #(?, 1, 200)
  194. movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200])
  195. return movie_combine_layer, movie_combine_layer_flat
  196. #构建计算图
  197. tf.reset_default_graph()
  198. train_graph = tf.Graph()
  199. with train_graph.as_default():
  200. #获取输入占位符
  201. uid, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob = get_inputs()
  202. #获取User的4个嵌入向量
  203. uid_embed_layer = get_user_embedding(uid)
  204. #得到用户特征
  205. user_combine_layer, user_combine_layer_flat = get_user_feature_layer(uid_embed_layer)
  206. #获取电影ID的嵌入向量
  207. movie_id_embed_layer = get_movie_id_embed_layer(movie_id)
  208. #获取电影类型的嵌入向量
  209. movie_categories_embed_layer = get_movie_categories_layers(movie_categories)
  210. #获取电影名的特征向量
  211. pool_layer_flat, dropout_layer = get_movie_cnn_layer(movie_titles)
  212. #得到电影特征
  213. movie_combine_layer, movie_combine_layer_flat = get_movie_feature_layer(movie_id_embed_layer,
  214. movie_categories_embed_layer,
  215. dropout_layer)
  216. #计算出评分,要注意两个不同的方案,inference的名字(name值)是不一样的,后面做推荐时要根据name取得tensor
  217. with tf.name_scope("inference"):
  218. #将用户特征和电影特征作为输入,经过全连接,输出一个值的方案
  219. #简单的将用户特征和电影特征做矩阵乘法得到一个预测评分
  220. inference = tf.reduce_sum(user_combine_layer_flat * movie_combine_layer_flat, axis=1)
  221. inference = tf.expand_dims(inference, axis=1)
  222. with tf.name_scope("loss"):
  223. # MSE损失,将计算值回归到评分
  224. cost = tf.losses.mean_squared_error(targets, inference )
  225. loss = tf.reduce_mean(cost)
  226. # 优化损失
  227. # train_op = tf.train.AdamOptimizer(lr).minimize(loss) #cost
  228. global_step = tf.Variable(0, name="global_step", trainable=False)
  229. optimizer = tf.train.AdamOptimizer(lr)
  230. gradients = optimizer.compute_gradients(loss) #cost
  231. train_op = optimizer.apply_gradients(gradients, global_step=global_step)
  232. # 取得batch
  233. def get_batches(Xs, ys, batch_size):
  234. for start in range(0, len(Xs), batch_size):
  235. end = min(start + batch_size, len(Xs))
  236. yield Xs[start:end], ys[start:end]
  237. # 训练网络
  238. #%matplotlib inline
  239. #%config InlineBackend.figure_format = 'retina'
  240. import matplotlib.pyplot as plt
  241. import time
  242. import datetime
  243. losses = {'train':[], 'test':[]}
  244. with tf.Session(graph=train_graph) as sess:
  245. #搜集数据给tensorBoard用
  246. # Keep track of gradient values and sparsity
  247. grad_summaries = []
  248. for g, v in gradients:
  249. if g is not None:
  250. grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name.replace(':', '_')), g)
  251. sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name.replace(':', '_')), tf.nn.zero_fraction(g))
  252. grad_summaries.append(grad_hist_summary)
  253. grad_summaries.append(sparsity_summary)
  254. grad_summaries_merged = tf.summary.merge(grad_summaries)
  255. # Output directory for models and summaries
  256. timestamp = str(int(time.time()))
  257. out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
  258. print("Writing to {}\n".format(out_dir))
  259. # Summaries for loss and accuracy
  260. loss_summary = tf.summary.scalar("loss", loss)
  261. # Train Summaries
  262. train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged])
  263. train_summary_dir = os.path.join(out_dir, "summaries", "train")
  264. train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
  265. # Inference summaries
  266. inference_summary_op = tf.summary.merge([loss_summary])
  267. inference_summary_dir = os.path.join(out_dir, "summaries", "inference")
  268. inference_summary_writer = tf.summary.FileWriter(inference_summary_dir, sess.graph)
  269. sess.run(tf.global_variables_initializer())
  270. saver = tf.train.Saver()
  271. for epoch_i in range(num_epochs):
  272. #将数据集分成训练集和测试集,随机种子不固定
  273. train_X,test_X, train_y, test_y = train_test_split(features,
  274. targets_values,
  275. test_size = 0.2,
  276. random_state = 0)
  277. train_batches = get_batches(train_X, train_y, batch_size)
  278. test_batches = get_batches(test_X, test_y, batch_size)
  279. #训练的迭代,保存训练损失
  280. for batch_i in range(len(train_X) // batch_size):
  281. x, y = next(train_batches)
  282. categories = np.zeros([batch_size, 18])
  283. for i in range(batch_size):
  284. categories[i] = x.take(3,1)[i]
  285. # categories[i] = x.take(6,1)[i]
  286. titles = np.zeros([batch_size, sentences_size])
  287. for i in range(batch_size):
  288. titles[i] = x.take(2,1)[i]
  289. feed = {
  290. uid: np.reshape(x.take(0,1), [batch_size, 1]),
  291. movie_id: np.reshape(x.take(1,1), [batch_size, 1]),
  292. movie_categories: categories, #x.take(3,1)
  293. movie_titles: titles, #x.take(2,1)
  294. targets: np.reshape(y, [batch_size, 1]),
  295. dropout_keep_prob: dropout_keep, #dropout_keep
  296. lr: learning_rate}
  297. step, train_loss, summaries, _ = sess.run([global_step, loss, train_summary_op, train_op], feed) #cost
  298. losses['train'].append(train_loss)
  299. train_summary_writer.add_summary(summaries, step) #
  300. # Show every <show_every_n_batches> batches
  301. if (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0:
  302. time_str = datetime.datetime.now().isoformat()
  303. print('{}: Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'.format(
  304. time_str,
  305. epoch_i,
  306. batch_i,
  307. (len(train_X) // batch_size),
  308. train_loss))
  309. #使用测试数据的迭代
  310. for batch_i in range(len(test_X) // batch_size):
  311. x, y = next(test_batches)
  312. categories = np.zeros([batch_size, 18])
  313. for i in range(batch_size):
  314. categories[i] = x.take(3,1)[i] #x.take(3,1)中的3要根据自己的数据去做修改
  315. titles = np.zeros([batch_size, sentences_size])
  316. for i in range(batch_size):
  317. titles[i] = x.take(2,1)[i] #x.take(2,1)中的2要根据自己的数据去做修改
  318. feed = {
  319. uid: np.reshape(x.take(0,1), [batch_size, 1]),
  320. movie_id: np.reshape(x.take(1,1), [batch_size, 1]),
  321. movie_categories: categories, #x.take(3,1)
  322. movie_titles: titles, #x.take(2,1)
  323. targets: np.reshape(y, [batch_size, 1]),
  324. dropout_keep_prob: 1,
  325. lr: learning_rate}
  326. step, test_loss, summaries = sess.run([global_step, loss, inference_summary_op], feed) #cost
  327. #保存测试损失
  328. losses['test'].append(test_loss)
  329. inference_summary_writer.add_summary(summaries, step) #
  330. time_str = datetime.datetime.now().isoformat()
  331. if (epoch_i * (len(test_X) // batch_size) + batch_i) % show_every_n_batches == 0:
  332. print('{}: Epoch {:>3} Batch {:>4}/{} test_loss = {:.3f}'.format(
  333. time_str,
  334. epoch_i,
  335. batch_i,
  336. (len(test_X) // batch_size),
  337. test_loss))
  338. # Save Model
  339. saver.save(sess, save_dir) #, global_step=epoch_i
  340. print('Model Trained and Saved')
  341. # 保存参数
  342. save_params((save_dir))
  343. load_dir = load_params()
  344. # 显示训练Loss
  345. plt.plot(losses['train'], label='Training loss')
  346. plt.legend()
  347. _ = plt.ylim()
  348. # 显示测试Loss
  349. plt.plot(losses['test'], label='Test loss')
  350. plt.legend()
  351. _ = plt.ylim()
  352. # 获取Tensors
  353. def get_tensors(loaded_graph):
  354. uid = loaded_graph.get_tensor_by_name("uid:0")
  355. movie_id = loaded_graph.get_tensor_by_name("movie_id:0")
  356. movie_categories = loaded_graph.get_tensor_by_name("movie_categories:0")
  357. movie_titles = loaded_graph.get_tensor_by_name("movie_titles:0")
  358. targets = loaded_graph.get_tensor_by_name("targets:0")
  359. dropout_keep_prob = loaded_graph.get_tensor_by_name("dropout_keep_prob:0")
  360. lr = loaded_graph.get_tensor_by_name("LearningRate:0")
  361. #两种不同计算预测评分的方案使用不同的name获取tensor inference
  362. # inference = loaded_graph.get_tensor_by_name("inference/inference/BiasAdd:0")
  363. inference = loaded_graph.get_tensor_by_name("inference/ExpandDims:0") # 之前是MatMul:0 因为inference代码修改了 这里也要修改 感谢网友 @清歌 指出问题
  364. movie_combine_layer_flat = loaded_graph.get_tensor_by_name("movie_fc/Reshape:0")
  365. user_combine_layer_flat = loaded_graph.get_tensor_by_name("user_fc/Reshape:0")
  366. return uid, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference, movie_combine_layer_flat, user_combine_layer_flat
  367. #指定用户和电影进行评分
  368. def rating_movie(user_id_val, movie_id_val):
  369. loaded_graph = tf.Graph() #
  370. with tf.Session(graph=loaded_graph) as sess: #
  371. # Load saved model
  372. loader = tf.train.import_meta_graph(load_dir + '.meta')
  373. loader.restore(sess, load_dir)
  374. # Get Tensors from loaded model
  375. uid, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, inference,_, __ = get_tensors(loaded_graph) #loaded_graph
  376. categories = np.zeros([1, 18])
  377. categories[0] = movies.values[movieid2idx[movie_id_val]][2]
  378. titles = np.zeros([1, sentences_size])
  379. titles[0] = movies.values[movieid2idx[movie_id_val]][1]
  380. feed = {
  381. uid: np.reshape(users.values[user_id_val-1][0], [1, 1]),
  382. movie_id: np.reshape(movies.values[movieid2idx[movie_id_val]][0], [1, 1]),
  383. movie_categories: categories, #x.take(6,1)
  384. movie_titles: titles, #x.take(5,1)
  385. dropout_keep_prob: 1}
  386. # Get Prediction
  387. inference_val = sess.run([inference], feed)
  388. return (inference_val)
  389. rating_movie(23, 1)
  390. #生成Movie特征矩阵
  391. loaded_graph = tf.Graph() #
  392. movie_matrics = []
  393. with tf.Session(graph=loaded_graph) as sess: #
  394. # Load saved model
  395. loader = tf.train.import_meta_graph(load_dir + '.meta')
  396. loader.restore(sess, load_dir)
  397. # Get Tensors from loaded model
  398. uid, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, movie_combine_layer_flat, __ = get_tensors(loaded_graph) #loaded_graph
  399. for item in movies.values:
  400. categories = np.zeros([1, 18])
  401. categories[0] = item.take(2)
  402. titles = np.zeros([1, sentences_size])
  403. titles[0] = item.take(1)
  404. feed = {
  405. movie_id: np.reshape(item.take(0), [1, 1]),
  406. movie_categories: categories, #x.take(3,1)
  407. movie_titles: titles, #x.take(2,1)
  408. dropout_keep_prob: 1}
  409. movie_combine_layer_flat_val = sess.run([movie_combine_layer_flat], feed)
  410. movie_matrics.append(movie_combine_layer_flat_val)
  411. pickle.dump((np.array(movie_matrics).reshape(-1, 200)), open('movie_matrics.p', 'wb'))
  412. movie_matrics = pickle.load(open('movie_matrics.p', mode='rb'))
  413. #生成User特征矩阵
  414. loaded_graph = tf.Graph() #
  415. users_matrics = []
  416. with tf.Session(graph=loaded_graph) as sess: #
  417. # Load saved model
  418. loader = tf.train.import_meta_graph(load_dir + '.meta')
  419. loader.restore(sess, load_dir)
  420. # Get Tensors from loaded model
  421. uid, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob, _, __,user_combine_layer_flat = get_tensors(loaded_graph) #loaded_graph
  422. for item in users.values:
  423. feed = {
  424. uid: np.reshape(item.take(0), [1, 1]),
  425. dropout_keep_prob: 1}
  426. user_combine_layer_flat_val = sess.run([user_combine_layer_flat], feed)
  427. users_matrics.append(user_combine_layer_flat_val)
  428. pickle.dump((np.array(users_matrics).reshape(-1, 200)), open('./wordsbag/dataprocess/data/week/mydata/users_matrics.p', 'wb'))
  429. users_matrics = pickle.load(open('./wordsbag/dataprocess/data/week/mydata/users_matrics.p', mode='rb'))
  430. users_matrics = pickle.load(open('./wordsbag/dataprocess/data/week/mydata/users_matrics.p', mode='rb'))
  431. #
  432. #
  433. def recommend_same_type_movie(movie_id_val, top_k = 20):
  434. loaded_graph = tf.Graph() #
  435. with tf.Session(graph=loaded_graph) as sess: #
  436. # Load saved model
  437. loader = tf.train.import_meta_graph(load_dir + '.meta')
  438. loader.restore(sess, load_dir)
  439. norm_movie_matrics = tf.sqrt(tf.reduce_sum(tf.square(movie_matrics), 1, keep_dims=True))
  440. normalized_movie_matrics = movie_matrics / norm_movie_matrics
  441. #推荐同类型的电影
  442. probs_embeddings = (movie_matrics[movieid2idx[movie_id_val]]).reshape([1, 200])
  443. probs_similarity = tf.matmul(probs_embeddings, tf.transpose(normalized_movie_matrics))
  444. sim = (probs_similarity.eval())
  445. # results = (-sim[0]).argsort()[0:top_k]
  446. # print(results)
  447. print("您看的电影是:{}".format(movies_orig[movieid2idx[movie_id_val]]))
  448. print("以下是给您的推荐:")
  449. p = np.squeeze(sim)
  450. p[np.argsort(p)[:-top_k]] = 0
  451. p = p / np.sum(p)
  452. results = set()
  453. while len(results) != 10:
  454. c = np.random.choice(501, 1, p=p)[0]
  455. results.add(c)
  456. for val in (results):
  457. print(val)
  458. print(movies_orig[val])
  459. return results
  460. recommend_same_type_movie(20, 10)

调用recommend_your_favorite_movie(用户收看过的节目id,推荐个数N) ,可以输入和用户收看节目标签类似的节目。

recommend_your_favorite_movie(20, 10)

运行结果如下: 

  1. 您看的电影是:[20 '欢乐颂' '剧情 / 爱情']
  2. 以下是给您的推荐:
  3. 257
  4. [401 '生活启示录' '剧情 / 爱情']
  5. 193
  6. [275 '朋友圈' '剧情 / 爱情']
  7. 262
  8. [409 '平凡岁月' '剧情 / 爱情']
  9. 426
  10. [770 '公主夜游记' '剧情 / 爱情']
  11. 460
  12. [867 '观音山' '剧情 / 爱情']
  13. 46
  14. [75 '人间至味是清欢' '剧情 / 爱情']
  15. 303
  16. [498 '上海滩' '剧情 / 爱情']
  17. 16
  18. [20 '欢乐颂' '剧情 / 爱情']
  19. 308
  20. [505 '逆光飞翔' '剧情 / 爱情']
  21. 86
  22. [134 '向幸福出发' '剧情 / 爱情']

基于深度学习的推荐算法研究,路漫漫其修远兮。。。。

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家小花儿/article/detail/356352
推荐阅读
相关标签
  

闽ICP备14008679号