基于VGG16的图像风格迁移_TF2_图像风格迁移全变差正则

作者：菜鸟追梦旅行 | 2024-06-12 20:09:15

踩

图像风格迁移全变差正则

文章目录

前言
主要环境配置
项目目录结构
完整代码
结果展示

前言

本节代码属于《A Neural Algorithm of Artistic Style》论文的复现，网上可找到其大量的原理解释，故不在此赘述，仅展示代码部分。

主要环境配置

tensoflow 2.6.0
keras 2.6.0
注意keras和tensorflow版本要对应，否则使用tf.keras会报错

项目目录结构

–vgg16图像风格迁移
|–content（内容图片目录）
|–style（风格图片目录）
|–output（输出结果目录）
|–main.py

完整代码

import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from PIL import Image
import os

# 图片加载
'''
:param img_path: 图片路径
:param max_dim: 指定图片大小
:return: NHWC四维，取值为[0:1]的图片
'''
def load_img(img_path,max_dim=800):
    # 根据路径读取文件内容
    img = tf.io.read_file(img_path)
    # 将文件解码为3通道的图片数据(shape:HWC,值:[0,1])
    img = tf.image.decode_image(img,channels=3,dtype=tf.float32)
    # 取shape的HW
    shape = tf.cast(tf.shape(img)[:-1],tf.float32)
    long_dim = max(shape)
    # 获得扩充系数
    scale = max_dim/long_dim
    new_shape = tf.cast(shape*scale,tf.int32)
    # 图片改变大小
    img = tf.image.resize(img,new_shape)
    # HWC->NHWC
    img = img[tf.newaxis,:]
    return img

# 图片展示
def img_show(image,title=None):
    if len(image.shape)>3:
        image = tf.squeeze(image)
    plt.imshow(image)
    plt.axis('off')
    if title:
        plt.title(title)
    plt.show()

# 以vgg16模型为基础，创建自定义模型
def vgg_layers(layer_names):
    # 加载vgg16的卷积部分
    vgg = tf.keras.applications.vgg16.VGG16(include_top=False,weights='imagenet')
    # 整个过程vgg模型部分不参与训练
    vgg.trainable = False
    # 获取指定的层
    outputs = [vgg.get_layer(name).output for name in layer_names]
    # 定义一个新模型，输入与VGG16相同，输出是指定层的输出
    model = tf.keras.Model([vgg.input],outputs)
    return model

# 获取格拉姆矩阵
def gram_matrix(input_tensor):
    # 爱因斯坦求和，bijc 和 bijd 都表示 input_tensor 的四个维度
    # 比如input_tensor=(1,512,512,3) 对应 b=1,i=512,j=512,c=3,d=3
    # bcd表示计算和特征图与特征图之间的相关性
    res = tf.linalg.einsum('bijc,bijd->bcd',input_tensor,input_tensor)
    # 输入特征图的shape
    shape = tf.shape(input_tensor)
    # 输入图像的高*宽得到特征值的数量
    num_locations = tf.cast(shape[1]*shape[2],tf.float32)
    # res矩阵每个值都除以 特征值的数量
    return res/(num_locations)

# 风格与内容结果矩阵提取类
'''
最后返回的结果为
{
    'style':{
        'block1_conv1':tf.Tensor矩阵
        'block2_conv1':tf.Tensor矩阵
        'block3_conv1':tf.Tensor矩阵
        'block4_conv1':tf.Tensor矩阵
        'block5_conv1':tf.Tensor矩阵
    },
    'content':{
        'block5_conv2':tf.Tensor矩阵
    }
}
'''
class StyeleContentModel(tf.keras.models.Model):
    def __init__(self,style_layers,content_layers):
        super(StyeleContentModel,self).__init__()
        # 获得输出风格层和内容层的模型(1个输入，6个输出)
        self.vgg = vgg_layers(style_layers+content_layers)
        self.style_layers = style_layers
        self.content_layers = content_layers
        self.num_style_layers = len(style_layers)

    def call(self,inputs):
        # 图像预处理:减去颜色均值，RGB->BGR
        preprocess_input = tf.keras.applications.vgg16.preprocess_input(inputs * 255)
        # 随机生成图输入模型，提取风格层和内容层的输出
        outputs = self.vgg(preprocess_input)
        # 获得风格特征输出 和 内容特征输出
        style_outputs,content_outputs = (outputs[:self.num_style_layers],outputs[self.num_style_layers:])

        # 计算style_outputs
        style_outputs = [gram_matrix(i) for i in style_outputs]
        # 将style_outputs的矩阵存入字典
        style_dict = {style_name:value for style_name,value in zip(self.style_layers,style_outputs)}
        # 将content_outputs的矩阵存入字典
        content_dict = {content_name: value for content_name, value in zip(self.content_layers, content_outputs)}

        return {'content':content_dict,'style':style_dict}

# 定义风格和内容损失
def style_content_loss(outputs,style_weight,content_weight,num_content_layers,num_style_layers,style_targets,content_targets):
    # 模型输出的风格特征
    style_outputs = outputs['style']
    # 模型输出的内容特征
    content_outputs = outputs['content']
    # 计算风格loss:输入图的风格特征和风格图的风格特征损失
    style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2) for name in style_outputs.keys()])
    style_loss*=style_weight/num_style_layers
    # 计算内容loss：输入图的内容特征和内容图的内容特征损失
    content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2) for name in content_outputs.keys()])
    content_loss *= content_weight / num_content_layers
    loss = style_loss+content_loss
    return loss

# 全变差正则，用于图片去噪
def total_variation_loss(image):
    x_deltas = image[:,:,1:,:]-image[:,:,:-1,:]
    y_deltas = image[:,1:,:,:]-image[:,:-1,:,:]
    return tf.reduce_mean(x_deltas**2) + tf.reduce_mean(y_deltas**2)
def clip_0_1(image):
    return tf.clip_by_value(image,clip_value_min=0.0,clip_value_max=1.0)

@tf.function()
def train_step(image,total_varuation_weight,style_weight,content_weight,num_content_layers,num_style_layers,style_targets,content_targets,opt):
    with tf.GradientTape() as tape:
        outputs = extrator(image)
        loss = style_content_loss(outputs,style_weight,content_weight,num_content_layers,num_style_layers,style_targets,content_targets)
        loss+=total_varuation_weight*total_variation_loss(image)
    grad = tape.gradient(loss,image)
    opt.apply_gradients([(grad,image)])
    image.assign(clip_0_1(image))


if __name__ == '__main__':
    # 1.设置初始化参数
    # 图片最大边长
    max_dim = 256
    # 内容图片路径
    content_path = 'content/content.jpg'
    # 风格图片路径
    style_path = 'style/vangogh.jpg'
    # 风格权重
    style_weight = 10
    # 内容权重
    content_weight = 1
    # 全变差正则权重
    total_varuation_weight = 1e5
    # 迭代次数
    epochs = 101

    # 2.读取并展示图片
    content_img = load_img(content_path,max_dim)
    style_img = load_img(style_path,max_dim)
    img_show(content_img,'Content Image')
    img_show(style_img,'Style Image')

    # 3.设置卷积层
    # 计算 Style Loss 的卷积层
    style_layers = [
        'block1_conv1',
        'block2_conv1',
        'block3_conv1',
        'block4_conv1',
        'block5_conv1'
    ]
    # 计算 Content Loss 的卷积层
    content_layers = ['block5_conv2']
    num_style_layers=len(style_layers)
    num_content_layers=len(content_layers)

    # 4.创建风格内容特征提取实例
    extrator = StyeleContentModel(style_layers,content_layers)

    # 提取风格图片的格拉姆特征
    style_targets = extrator(style_img)['style']

    # 提取内特图片的格特征
    content_targets = extrator(content_img)['content']

    # 初始化要训练的图片(直接使用内容图)
    image = tf.Variable(content_img)

    opt = tf.optimizers.Adam(learning_rate=0.02,beta_1=0.99,epsilon=1e-1)
	
	# 5.训练
    os.makedirs('output',exist_ok=True)
    for n in range(epochs):
        train_step(image,total_varuation_weight,style_weight,content_weight,num_content_layers,num_style_layers,style_targets,content_targets,opt)
        print(n)
        if n%10 == 0:
        	img_show(image.read_value(),"Train setp:{}".format(n))
        s_image = tf.squeeze(image)
        s_image = Image.fromarray(np.uint8(s_image.numpy()*255))
        s_image.save('S2P2S/'+'steps_'+str(n)+'.jpg')

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202