赞
踩
★★★ 本文源自AlStudio社区精品项目,【点击此处】查看更多精品内容 >>>
作者:云洋(内蒙古自治区大数据中心),张宏理(欧尾猫科技),周原野(百度),张艳博(百度)
AI for Science共创计划
领域 | 课题名称 | 开放性 | 备注 |
---|---|---|---|
CFD | 基于数据驱动的污染物扩散深度学习模型案例 | Y | 外部单位提供数据集,并可以联合实现(tbd)。数据集详细描述:120个动图数据:3个风速*5个释放源点位 * 8个风向,选择CV 或者时序类模型(模型不限制)提取数据特征,得到污染物扩散模型,可对污染物扩散进行预测。 |
本教程基于PaddlePaddle 2.4.0 编写,如果你的环境不是本版本,请先参考官网安装 PaddlePaddle 2.4.0。
import paddle
import numpy as np
import paddle.vision.transforms as T
import paddle.nn as nn
import paddle.nn.functional as F
import re
import cv2
from PIL import Image
import matplotlib.pyplot as plt
print(paddle.__version__)
# 如果遇到粉色,请再运行一次
2.4.0
%cd /home/aistudio/
/home/aistudio
# !test -d PaddleScience ||unzip /home/aistudio/PaddleScience.zip
# git clone https://github.com/PaddlePaddle/PaddleScience.git
%cd /home/aistudio/PaddleScience
%env PYTHONPATH=/home/aistudio/PaddleScience
# %export PYTHONPATH=$PWD:$PYTHONPATH
!echo $PYTHONPATH
/home/aistudio/PaddleScience
env: PYTHONPATH=/home/aistudio/PaddleScience
/home/aistudio/PaddleScience
基于数据驱动的污染物扩散深度学习模型案例
!pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple
# scipy卸载重新安装
!pip uninstall scipy -y
!pip install -r requirements.txt
import paddlescience as psci
# 如果遇到粉色,请再运行一次
本案例将使用欧帕提亚提供的 基于数据驱动的污染物扩散深度学习模型案例 数据集
原数据:点击查看
更改过数据:点击查看
基于数据驱动的污染物扩散深度学习模型案例
详细描述:
# 解压挂载数据集
# %cd /home/aistudio
# !test -d pollutiondata ||unzip -o /home/aistudio/data/data198102/data1.zip -d pollutiondata
# 查看目录
# !tree -d pollutiondata
将gif图拆解成png,整个过程需要等待一会,将每张gif图拆解成181张png
(新版数据集已经处理完毕)
展示拆解出来的png图
# img1 = Image.open('/home/aistudio/demo/frame_1.png') # img2 = Image.open('/home/aistudio/demo/frame_50.png') # img3 = Image.open('/home/aistudio/demo/frame_100.png') # img4 = Image.open('/home/aistudio/demo/frame_140.png') # img5 = Image.open('/home/aistudio/demo/frame_180.png') # imgSize = img1.size # print(imgSize) # plt.imshow(img1) # plt.show() # plt.imshow(img2) # plt.show() # plt.imshow(img3) # plt.show() # plt.imshow(img4) # plt.show() # plt.imshow(img5) # plt.show()
根据残差值,提取时序间隔png图像的差值,生成gif图,并提取相应时序数据
# img1 = Image.open('/home/aistudio/demo/frame_159.png') # img2 = Image.open('/home/aistudio/demo/frame_160.png') # # 将图像转换为Tensor格式 # img1 = T.ToTensor()(img1) # # # 计算MSE # mse = paddle.mean(paddle.square(img1 - img2)) # # 计算差分图像 # diff = paddle.abs(img1 - img2) # # 将差分图像转换为numpy数组并显示 # diff = diff.numpy().transpose(1, 2, 0) # plt.imshow(diff) # plt.show()
# 污染物单通道灰色 class PollutionDataset(paddle.io.Dataset): def __init__(self, image_files, transform=None): self.image_files = image_files self.transform = transform def __getitem__(self, idx): img = Image.open(self.image_files[idx]).convert('L') if self.transform: img = self.transform(img) return img def __len__(self): return len(self.image_files) def prepare_dataloader(dataset, batch_size=8, shuffle=True, num_workers=0): data_loader = paddle.io.DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers ) return data_loader # rgb数据集 class CustomDataset(paddle.io.Dataset): def __init__(self, image_files, transform=None): self.image_files = image_files self.transform = transform def __getitem__(self, idx): img = Image.open(self.image_files[idx]).convert('RGB') if self.transform: img = self.transform(img) return img def __len__(self): return len(self.image_files)
image_files = [] train_data = [] test_data = [] # 加载数据集 for num in range(1,148): img = '/home/aistudio/demo/frame_{}.png'.format(num) if num<100: train_data.append(img) else: test_data.append(img) image_files.append(img) dataset = CustomDataset(image_files, transform=None) # 训练数据集 100张 0-99 train_dataset = CustomDataset(train_data, transform=None) # 测试数据集 81张 100-181 test_dataset = CustomDataset(test_data, transform=None) plt.imshow(dataset[0]) plt.show() plt.imshow(dataset[145]) plt.show()
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/cbook/__init__.py:2349: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
if isinstance(obj, collections.Iterator):
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/cbook/__init__.py:2366: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
return list(data) if isinstance(data, collections.MappingView) else data
lse data
import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle.nn.utils import weight_norm # 创建基础卷积层 def create_layer(in_channels, out_channels, kernel_size, wn=True, bn=True, activation=nn.ReLU, convolution=nn.Conv2D): assert kernel_size % 2 == 1 layer = [] conv = convolution(in_channels, out_channels, kernel_size, padding=kernel_size // 2) if wn: conv = weight_norm(conv) layer.append(conv) if activation is not None: layer.append(activation()) if bn: layer.append(nn.BatchNorm2D(out_channels)) return nn.Sequential(*layer) # 创建Encoder中的单个块 def create_encoder_block(in_channels, out_channels, kernel_size, wn=True, bn=True, activation=nn.ReLU, layers=2): encoder = [] for i in range(layers): _in = out_channels _out = out_channels if i == 0: _in = in_channels encoder.append(create_layer(_in, _out, kernel_size, wn, bn, activation, nn.Conv2D)) return nn.Sequential(*encoder) # 创建Decoder中的单个块 def create_decoder_block(in_channels, out_channels, kernel_size, wn=True, bn=True, activation=nn.ReLU, layers=2, final_layer=False): decoder = [] for i in range(layers): _in = in_channels _out = in_channels _bn = bn _activation = activation if i == 0: _in = in_channels * 2 if i == layers - 1: _out = out_channels if final_layer: _bn = False _activation = None decoder.append(create_layer(_in, _out, kernel_size, wn, _bn, _activation, nn.Conv2DTranspose)) return nn.Sequential(*decoder) # 创建Encoder def create_encoder(in_channels, filters, kernel_size, wn=True, bn=True, activation=nn.ReLU, layers=2): encoder = [] for i in range(len(filters)): if i == 0: encoder_layer = create_encoder_block(in_channels, filters[i], kernel_size, wn, bn, activation, layers) else: encoder_layer = create_encoder_block(filters[i - 1], filters[i], kernel_size, wn, bn, activation, layers) encoder = encoder + [encoder_layer] return nn.Sequential(*encoder) # 创建Decoder def create_decoder(out_channels, filters, kernel_size, wn=True, bn=True, activation=nn.ReLU, layers=2): decoder = [] for i in range(len(filters)): if i == 0: decoder_layer = create_decoder_block(filters[i], out_channels, kernel_size, wn, bn, activation, layers, final_layer=True) else: decoder_layer = create_decoder_block(filters[i], filters[i - 1], kernel_size, wn, bn, activation, layers, final_layer=False) decoder = [decoder_layer] + decoder return nn.Sequential(*decoder) # 创建DeepCFD网络 class UNetEx(nn.Layer): def __init__(self, in_channels, out_channels, kernel_size=3, filters=[16, 32, 64], layers=3, weight_norm=True, batch_norm=True, activation=nn.ReLU, final_activation=None): super().__init__() assert len(filters) > 0 self.final_activation = final_activation self.encoder = create_encoder(in_channels, filters, kernel_size, weight_norm, batch_norm, activation, layers) decoders = [] for i in range(out_channels): decoders.append(create_decoder(1, filters, kernel_size, weight_norm, batch_norm, activation, layers)) self.decoders = nn.Sequential(*decoders) def encode(self, x): tensors = [] indices = [] sizes = [] for encoder in self.encoder: x = encoder(x) sizes.append(x.shape) tensors.append(x) x, ind = F.max_pool2d(x, 2, 2, return_mask=True) indices.append(ind) return x, tensors, indices, sizes def decode(self, _x, _tensors, _indices, _sizes): y = [] for _decoder in self.decoders: x = _x tensors = _tensors[:] indices = _indices[:] sizes = _sizes[:] for decoder in _decoder: tensor = tensors.pop() size = sizes.pop() ind = indices.pop() # 反池化操作,为上采样 x = F.max_unpool2d(x, ind, 2, 2, output_size=size) x = paddle.concat([tensor, x], axis=1) x = decoder(x) y.append(x) return paddle.concat(y, axis=1) def forward(self, x): x, tensors, indices, sizes = self.encode(x) x = self.decode(x, tensors, indices, sizes) if self.final_activation is not None: x = self.final_activation(x) return x
# 保存模型
def save_model(model, path):
paddle.save(model.state_dict(), path)
# 加载模型
def load_model(model, path, device):
state_dict = paddle.load(path)
model.set_state_dict(state_dict)
# 批次 留意显存大小(500次大约3小时)
epochs = 150
# 学习率
lr = 1e-3
展示使用demo数据进行训练,需要对pollutiondata下的数据进行全量训练,训练出来的模型效果更佳
def train(model, train_dataset, criterion, optimizer, device, num_epochs): loss_history = [] epoch_loss = 0 for epoch in range(num_epochs): optimizer.clear_grad() for batch_id in range(len(train_dataset)-1): inputs = train_dataset[batch_id] outputs_true = train_dataset[batch_id+1] # optimizer.clear_grad() inputs = T.ToTensor()(inputs) inputs = paddle.unsqueeze(inputs, 0) outputs_true = T.ToTensor()(outputs_true) outputs_true = paddle.unsqueeze(outputs_true, 0) # filter background in image, therehold = 0.5 # theta = 0.5 # inputs = nn.functional.relu(inputs-theta) # outputs_true = nn.functional.relu(outputs_true-theta) outputs = model(inputs) # print(outputs) # print(inputs) loss = criterion(outputs, outputs_true) if batch_id % 10 ==0: print('epoch:',epoch,'batch_id:',batch_id,'loss:',loss.numpy()) loss.backward() # optimizer.step() epoch_loss += loss.item() optimizer.step() epoch_loss /= len(train_dataset) # print(epoch_loss) # return loss_history.append(epoch_loss) print("Epoch [{}/{}], Loss: {:.8f}".format(epoch + 1, num_epochs, loss.numpy()[0])) # 保存模型 if epoch % 10 == 0: save_model(model, '/home/aistudio/pollution_model.pdparams') print("Training complete.") return loss_history
device = paddle.set_device('gpu' if paddle.is_compiled_with_cuda() else 'cpu') # 加载训练模型 #load_model(model, 'pollution_model.pdparams', device) # 可以选择模型 # model = PollutionModel() # model = CNNLSTMModel() # model = FourcastNet() model = UNetEx(3,3,3) img_array = np.array(train_dataset[0]) # 获取图像的尺寸和通道数 shape = img_array.shape # 模型可视化 paddle.summary(model,(1,shape[2],shape[1],shape[0])) criterion = paddle.nn.MSELoss() # criterion = paddle.nn.CrossEntropyLoss() optimizer = paddle.optimizer.Adam(learning_rate=lr, parameters=model.parameters()) loss_history = train(model, train_dataset, criterion, optimizer, device, epochs)
# 绘制loss曲线
plt.plot(loss_history)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.show()
def test(): # 初始化结果列表 results = [] # 测试集合起始点 inputs = test_dataset[0] inputs = T.ToTensor()(inputs) inputs = paddle.unsqueeze(inputs, 0) # 是否supervise flag_supervise = True device = paddle.set_device('gpu' if paddle.is_compiled_with_cuda() else 'cpu') # model = CNNLSTMModel() model = UNetEx(3,3,3) load_model(model,'/home/aistudio/pollution_model.pdparams',device) for num in range(1,10): # 进行预测 outputs = model(inputs) outputs_np = outputs.numpy() outputs_np = np.squeeze(outputs_np, axis=0) # 去除第一个维度(batch_size) outputs_np = np.transpose(outputs_np, (1, 2, 0)) # 将通道维度调整为最后一个维度 outputs_np = (255 * np.clip(outputs_np, 0, 1)).astype('uint8') #outputs_np = outputs_np.transpose([1, 2, 0]) #outputs_np_uint8 = (outputs_np * 255).astype(np.uint8) # 将预测结果添加到结果列表中 results.append(outputs_np) if flag_supervise == False: # 将预测结果作为下一帧的输入 inputs = outputs else: # 使用真实数据预测 inputs = test_dataset[num+1] inputs = T.ToTensor()(inputs) inputs = paddle.unsqueeze(inputs, 0) #create_gif(results, '/home/aistudio/generated_pollution.gif') return results results = test()
import imageio
imageio.mimsave('/home/aistudio/generated_pollution.gif', results, duration=0.1)
scaled_outputs = results[0]
scaled_outputs = np.squeeze(scaled_outputs)
# 将 NumPy 数组转换为 PIL 图像
pil_image = Image.fromarray(scaled_outputs, mode='RGB')
new_width = 333
new_height = 332
# 使用新的尺寸缩放图像
resized_image = pil_image.resize((new_width, new_height), Image.ANTIALIAS)
# 展示缩放后的图像
resized_image.show()
本次污染物扩散案例,学习了很多CFD相关的知识,污染物扩散相关的知识以及空气动力学相关的知识。对计算机科学有了一定的认识。
PaddleScience 能发展的越来越好,更多的交叉学科可以融入到飞桨科学中。
全量跑,让模型更佳精准的优化,网络的优化需要持续完善。
目前的学习率太低,所以看上去基本没什么变化
改进点和优化点:
最后感谢提供帮助的百度周原野同学
以及数据提供方欧帕提亚的大力支持
此文章为转载
原文链接
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。