VAE-BO思路整理_vae模型训练

作者：正经夜光杯 | 2024-08-05 15:17:58

踩

vae模型训练

一、VAE模型训练

1.1 训练集

1.1.1 生成自定大小的矩阵（二进制字符串对应的矩阵）（扩充后）

1.1.2 根据矩阵生成对应像素的训练图片

1.2 自定义与加载训练集

for i in os.listdir(train_dir):
    train_images.append(os.path.join(train_dir, i))

# 图像预处理
img_transform = transforms.Compose([
    transforms.ToTensor(),
    # 通过Normalize计算过后，将数据归一化到[-1,1]
    transforms.Normalize(mean=0.5, std=0.5)
])
# 自定义训练集
class MyTrainset(Dataset):
    imgs = []

    def __init__(self, transform=img_transform, target_transform=None):
        self.imgs = train_images
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, index):
        image = self.imgs[index]
        img = Image.open(image).convert('1')
        img = img.resize((IMG_W, IMG_H))
        if self.transform is not None:
            img = self.transform(img)
        return img
trainSet = MyTrainset()  # 实例化自定义数据集
dataloader = DataLoader(dataset=trainSet, batch_size=batch_size, shuffle=True) # 加载数据集
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

1.2 VAE网络代码编写

1.2.1 定义网络层结构

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.fc1 = nn.Linear(1296, 512)
        self.fc21 = nn.Linear(512, z_dim)
        self.fc22 = nn.Linear(512, z_dim)
        self.fc3 = nn.Linear(z_dim, 512)
        self.fc4 = nn.Linear(512, 1296)

    def encode(self, x):
        h1 = F.leaky_relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def decode(self, z):
        dh1 = F.leaky_relu(self.fc3(z))
        return F.tanh(self.fc4(dh1))

    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparametrize(mu, logvar)
        return self.decode(z), mu, logvar
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

1.2.2 定义损失函数

def loss_function(recon_x, x, mu, logvar):
    BCE = reconstruction_function(recon_x, x)  # mse loss
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    return BCE + KLD
1
2
3
4
5

1.2.3 定义优化器

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
saved_model_path, log_path, image_ori_path, image_recon_path = form_results()
1
2

1.2.4 核心训练过程

for epoch in range(n_epochs):
    model.train()
    train_loss = 0
    for batch_idx, data in enumerate(dataloader):
        all_batchs = int(len(dataloader.dataset) / batch_size)
        img = data
        img = img.view(img.size(0), -1)
        # data.resize_(BATCH_SIZE,36)
        img = Variable(img)
        if torch.cuda.is_available():
            img = img.cuda()
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(img)
        loss = loss_function(recon_batch, img, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()

        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch,
                batch_idx * len(img),
                len(dataloader.dataset), 100. * batch_idx / len(dataloader),
                loss.item() / (len(img) * IMG_H * IMG_W)))
        if batch_idx + 1 == all_batchs:
            # if not os.path.exists("../data/originalImages_36_36"):
            #     os.mkdir("../data/originalImages_36_36")
            # if not os.path.exists("../data/reconstructed_36_36"):
            #     os.mkdir("../data/reconstructed_36_36")
            # if not os.path.exists("../data/reconstructedData"):
            #     os.mkdir("../data/reconstructedData")

            ori_data = to_img(img[:16].data)
            save_image(ori_data, image_ori_path + f'ori_image_{epoch}_{batch_idx}.png')
            recon_data = to_img(recon_batch[:16].data)
            save_image(recon_data, image_recon_path +
                       f'recon_image_{epoch}_{batch_idx}.png')
            # np.savetxt(f'reconstructedData/recon_data_{epoch}_{batch_idx}.data',recon_data,delimiter=',')
    print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / (len(dataloader.dataset) * IMG_H * IMG_W)))
    # 保存损失到data文件中
    with open(log_path + 'loss.data', 'a') as file:
        file.write(str(epoch))
        file.write("    ")
        file.write(str(train_loss / (len(dataloader.dataset) * IMG_H * IMG_W)))
        file.write("\n")
    file.close()

torch.save(model.state_dict(), saved_model_path + 'vae.pth')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

1.3 模型测试

1.3.1 模型测试精度对比

# 通过改变地址从而改变需要测试的模型
path_model = results_path + '/2022_11_11_15_44_54_(10_0.001_32_500_36_36)_Adversarial_Autoencoder/Saved_models/vae.pth'
# 实例化网络结构
model = VAE()
# 加载模型
model.load_state_dict(torch.load(path_model), True)
1
2
3
4
5
6

测试结果
在这里插入图片描述
第一张图为刚开始训练时的单通道图像，第二张为训练结束解码后的图像，第三张为原图）
及误差分析对比图

从上到下分别为不同输出维度和训练批大小对应模型的图像，即z10_b32,z8_b32,z6_b32,z10_b64,z8_b64,z6_b64,可见精度最高的为输出维度10，批大小为为32）

1.3.2 随机张量生成

1.随机生成一定数量的张量

random_sample = torch.randn(1, 10) # [batch, channel, height, width]若参数个数为四则如此，现表示1*10的矩阵张量
1

2.输入模型进行解码，得到解码后的张量值
3.将张量进行归一化处理，将范围控制在（0,1）之间

tensor_decode = model.decode(random_sample).clamp(0, 1) # # torch.Size([1, 1296])
1

4.矩阵压缩，将原先扩大的矩阵进行还原

def transferMatrix(matrix_demo, broad):
    height = int(matrix_demo.shape[0] / broad)
    # print(height)
    width = int(matrix_demo.shape[1] / broad)
    # print(width)
    new_list = []
    for index1 in range(0, height):
        for index2 in range(0, width):
            count = 0
            for index3 in range(index1 * broad, index1 * broad + 6):
                for index4 in range(index2 * broad, index2 * broad + 6):
                    # print(index3, index4)
                    count += matrix_demo[index3, index4]
                    # print(count)
            if count / (broad * broad) < 0.5:
                new_list.append(0)
            if count / (broad * broad) >= 0.5:
                new_list.append(1)
    # print(len(new_list))
    # new_matrix = np.mat(new_list).reshape(6, 6)
    return new_list
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

5.张量转换为二进制字符串（遍历二维矩阵）

list_tensor_decode = tensor_decode.tolist()
        # print(list_tensor_decode)
        path_init = log_path + 'test_{}.npy'.format(i)
        matrix = np.mat(list_tensor_decode).reshape(36, 36)
        trans_matrix = transferMatrix(matrix, 6) # 4.张量转化为二进制字符串
        np.save(path_init, trans_matrix)
        print(np.load(path_init))
1
2
3
4
5
6
7

1.3.3 张量解码测试

# 1.生成张量
1.1.1 随机生成
random_sample = torch.randn(1, 10)  # [batch, channel, height, width] # torch.Size([1, 10])
1.1.12 数组转张量
random_sample = torch.tensor(arr)
1
2
3
4
5

保存在同一目录下

二、贝叶斯优化

2.1 基本代码编写

2.1.1 定义需要优化的目标函数

def black_box_function(A, B, C, D, E, F, G, H, I, J):
    """Function with unknown internals we wish to maximize.

    This is just serving as an example, for all intents and
    purposes think of the internals of this function, i.e.: the process
    which generates its output values, as unknown.
    """
    return np.exp(A + log(B)) * (C ** 2 - D ** 3) / np.exp(E + F) * G ** 3 / np.exp(H) * (math.sqrt(I) * J)
    # 这里的目标函数为自定义，目的是为了测试贝叶斯的搜索极值的收敛能力
1
2
3
4
5
6
7
8
9

2.1.2 定义优化函数（优化器）

def opt_function():
    # 定义输入参数边界
    pbounds = {'A': (-3, 3), 'B': (0.1, 3), 'C': (-3, 3), 'D': (-3, 3), 'E': (-3, 3), 'F': (-3, 3), 'G': (-3, 3),
               'H': (-3, 3), 'I': (1, 3), 'J': (-3, 3)}
    # 黑盒函数输出的目标值
    def black_box(A, B, C, D, E, F, G, H, I, J):
        return black_box_function(
            A=A,
            B=B,
            C=C, D=D, E=E, F=F, G=G, H=H, I=I, J=J
        )
    # 优化器对目标值的优化
    optimizer = BayesianOptimization(
        f=black_box,
        pbounds=pbounds,
        verbose=2,  # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
        random_state=1 # 随机种子
    )
    optimizer.maximize(
        init_points=0, # 初始使用的数据个数
        n_iter=100, # 设置迭代次数
        acq="ei"  # 采集函数EI（expected Improvement）
    )
    # 保存日志
    with open('./Test_logs.json', 'a', encoding='utf-8', errors='replace')as f:
        f.write(str(optimizer.max))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

2.2 黑盒函数

2.2.1 思路

具体思路：黑盒函数返回的目标值是贝叶斯优化的目标，即热导率大小，判断MD计算完毕后再进行热导率的计算-运行themal脚本，判断热导率文件是否生成再进行返回，并且判断热导率文件内是否有数字（设置随机种子，让选取的参数保持一致，预防断电后MD失败）

1.贝叶斯优化出来的参数转为数组、数组转为张量 √
2.返回的值大小为读取的热导率大小，文件命名方式为输入的参数 √
3.追踪每次推荐的参数数组（debug后发现是在x_probe中，x_probe格式为字典形式）√
4.将VAE_test部分函数复制到VAE_model中并存入bayesian_optimization.py同级目录
5.在bayesian_optimization.py同级目录中的__init__.py中添加需要引入的函数，以便调用 √

2.2.2 PPE链结构分子重建

1.car文件生成
demo已完成，后续工作将保存的文件用return 地址的形式返回 √
2.car文件内原子排序 √
3.根据car文件生成mdf文件 √
1.ctrl+R 批量替换
4.将car文件变为正确格式 √
5.data文件生成 √
msi2lmp.exe 1111111111111111 -class 2 -frc ./pcff.frc -i
6.复制in文件 √
7.编写脚本生成单体长度为36的结构 √
8.lammps计算热导率 √
mpiexec -np 4 lmp_mpi -in TC.in
9.black_function函数修改返回值 √
10.实现全自动

1.首次运行随机产生张量
2.调用VAE模型进行解码
3.调用脚本将转码结果转换为字符串
4.调用脚本生成对应的data文件
5.运行MD后，调用脚本计算热导率
6.黑盒函数返回参数列表对应文件位置中的热导率值（批量解码测试集数据，确定pbounds设置范围）
7.贝叶斯网络推荐参数
重复2~7的过程

实验结果

在这里插入图片描述

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/正经夜光杯/article/detail/932943