赞
踩
- import tensorflow as tf
- from tensorflow import keras
- from tensorflow.keras import datasets
- import os
-
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 有助于减少tf环境构建时的废话
-
- # x: [60k, 28, 28],
- # y: [60k]
- (x, y), _ = datasets.mnist.load_data() # 自动下载数据集
- # x: [0~255] => [0~1.]
- x = tf.convert_to_tensor(x, dtype=tf.float32) / 255. # 将numpy数据转化为tensor数据
- y = tf.convert_to_tensor(y, dtype=tf.int32)
-
- print(x.shape, y.shape, x.dtype, y.dtype) # 检查数据的shape和类型是否与预期一致
- print(tf.reduce_min(x), tf.reduce_max(x)) # 输出x和y的最小值和最大值
- print(tf.reduce_min(y), tf.reduce_max(y))
-
- # 输出:
- # (60000, 28, 28) (60000,) <dtype: 'float32'> <dtype: 'int32'>
- # tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)
- # tf.Tensor(0, shape=(), dtype=int32) tf.Tensor(9, shape=(), dtype=int32)
-
-
- train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128) # 创建数据集对象
- train_iter = iter(train_db) # 创建迭代器
- sample = next(train_iter) # 用next对迭代器不停迭代,直到获得最终数据
- print('batch:', sample[0].shape, sample[1].shape) # 输出batch中第一和第二个元素
- # 输出:batch (128, 28, 28) (128,)
-
- # 降维[b, 784] => [b, 256] => [b, 128] => [b, 10]
- # [dim_in, dim_out], [dim_out]
- w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1)) # 用variable包装权值,使之在之后可以进行求导训练
- b1 = tf.Variable(tf.zeros([256]))
- w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1)) # 截断正态分布
- b2 = tf.Variable(tf.zeros([128]))
- w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
- b3 = tf.Variable(tf.zeros([10]))
- # tf.truncated_normal(shape, mean, stddev, dtype, seed, name)
- #shape:表示生成随机数的维度
- # mean:正太分布的均值,默认为0
- # stddev:正太分布的标准差
- # dtype:生成正太分布数据的类型
- # seed:一个整数,当设置之后,每次生成的随机数都一样
- # name:正太分布的名字
- # tf.truncated_normal([2, 2], mean=0, stddev=0.2, dtype=tf.float32, seed=1, name='v')
- # 均值mean=0,stddev=0.2,则生成的随机数与均值差不能大于两倍中误差,即范围为:[-0.4,0.4]
- # tf.truncated_normal([2, 2], mean=0, stddev=0.1, dtype=tf.float32, seed=1, name='v')
- # 设置stddev=0.1,则生成的范围:[-0.2,0.2]
-
- lr = 1e-3 # 表示梯度步长(学习速率)
-
- for epoch in range(10): # iterate db for 10,从0开始,到9结束的10个数,迭代十次
- for step, (x, y) in enumerate(train_db): # for every batch
- # x:[128, 28, 28]
- # y: [128]
-
- # [b, 28, 28] => [b, 28*28]
- x = tf.reshape(x, [-1, 28*28]) # 这里的x的shape为[b,28*28],从[b,w,h]变成[b,w*h]
- # 对输入特征项的维度变换,-1会自动计算b
- with tf.GradientTape() as tape: # 自动求导计算梯度,只会跟踪tf.Variable类型数据
- # x: [b, 28*28]
- # h1 = x@w1 + b1
- # x[b, 784]@w1[784, 256] + b1[256] => h1[b, 256] + b2[256] => h2[b, 256] + b3[b, 256]
- h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256]) # [b,784] @ [784,256] + [b,256] = [b,256]
- # 激活函数,引入非线性因子
- h1 = tf.nn.relu(h1)
- # [b, 256] => [b, 128]
- h2 = h1@w2 + b2
- h2 = tf.nn.relu(h2) # ==2== 从隐含层1到隐含层2,[b,256] @ [256,128] + [b,128] = [b,128]
- # [b, 128] => [b, 10]
- out = h2@w3 + b3 # ==3== 从隐含层2到输出层,[b,128] @ [128,10] + [b,10] = [b,10]
-
- # compute loss,计算误差,mse = mean(sum(y-out)^2)
- # out: [b, 10],使y: [b] => y_onehot: [b, 10]
- # 使分类进行独热编码(实际上是一种升维),depth表示类型(标签)数量
- # out: [b, 10]
- # y: [b] => [b, 10]
- y_onehot = tf.one_hot(y, depth=10)
-
- # mse = mean(sum(y-out)^2)
- # [b, 10]计算误差,输出值out的shape为[b,10],onehot编码后真实值y的shape为[b,10]
- # 计算均方差 mse = mean(sum((y-out)^2)
- loss = tf.square(y_onehot - out)
- # mean: scalar
- loss = tf.reduce_mean(loss)
-
- # compute gradients梯度计算
- grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
- # print(grads)
- # w1 = w1 - lr * w1_grad
- # w1 = w1 - lr * grads[0]中易错点:新的w1为Tenor类型,没有经过Variable,不能自动求导
-
- # 原地更新方法:assign_sub(ref, value)方法:ref = ref - value
- # tensor. assign_sub(value):功能tensor = tensor - assign_sub
- w1.assign_sub(lr * grads[0])
- b1.assign_sub(lr * grads[1])
- w2.assign_sub(lr * grads[2])
- b2.assign_sub(lr * grads[3])
- w3.assign_sub(lr * grads[4])
- b3.assign_sub(lr * grads[5])
-
- # 每100个batch计算之后输出loss数值,用float将loss由tensor转换为numpy数值
- if step % 100 == 0:
- print(epoch, step, 'loss:', float(loss))
-
- # 一个epoch代表全部数据进入网络一次,这个时候,整个网络结构只对这批数据全部走完一次
- # batch指的是将一个epoch(所有数据)分割成几份,每份的大小为batch size

代码解释都来自于自己理解和网上资料整合
代码可直接运行,输出结果
- (60000, 28, 28) (60000,) <dtype: 'float32'> <dtype: 'int32'>
- tf.Tensor(0.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)
- tf.Tensor(0, shape=(), dtype=int32) tf.Tensor(9, shape=(), dtype=int32)
- batch: (128, 28, 28) (128,)
- 0 0 loss: 0.33306556940078735
- 0 100 loss: 0.20801925659179688
- 0 200 loss: 0.18559986352920532
- 0 300 loss: 0.16979117691516876
- 0 400 loss: 0.1634766310453415
- 1 0 loss: 0.16542527079582214
- 1 100 loss: 0.1560421586036682
- 1 200 loss: 0.15167365968227386
- 1 300 loss: 0.14091989398002625
- 1 400 loss: 0.13649582862854004
- 2 0 loss: 0.1405315101146698
- 2 100 loss: 0.136332705616951
- 2 200 loss: 0.13225948810577393
- 2 300 loss: 0.12394670397043228
- 2 400 loss: 0.1201285719871521
- 3 0 loss: 0.12452125549316406
- 3 100 loss: 0.12340305745601654
- 3 200 loss: 0.11915738880634308
- 3 300 loss: 0.11255736649036407
- 3 400 loss: 0.109268918633461
- 4 0 loss: 0.11321870237588882
- 4 100 loss: 0.1141565814614296
- 4 200 loss: 0.1097048670053482
- 4 300 loss: 0.10436363518238068
- 4 400 loss: 0.10156302154064178
- 5 0 loss: 0.10487793385982513
- 5 100 loss: 0.10716714709997177
- 5 200 loss: 0.10255954414606094
- 5 300 loss: 0.098089799284935
- 5 400 loss: 0.09579628705978394
- 6 0 loss: 0.09836559742689133
- 6 100 loss: 0.10161657631397247
- 6 200 loss: 0.09695516526699066
- 6 300 loss: 0.0931091457605362
- 6 400 loss: 0.09125898778438568
- 7 0 loss: 0.09311607480049133
- 7 100 loss: 0.0970434620976448
- 7 200 loss: 0.0923660472035408
- 7 300 loss: 0.08903666585683823
- 7 400 loss: 0.08764063566923141
- 8 0 loss: 0.08880780637264252
- 8 100 loss: 0.09320847690105438
- 8 200 loss: 0.08853678405284882
- 8 300 loss: 0.0856456384062767
- 8 400 loss: 0.084683857858181
- 9 0 loss: 0.08519560843706131
- 9 100 loss: 0.08989918977022171
- 9 200 loss: 0.08525765687227249
- 9 300 loss: 0.08276523649692535
- 9 400 loss: 0.0821666494011879

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。