当前位置: article > 正文

阿里开源黑白图片上色算法DDColor的部署与测试并将模型转onnx后用c++推理

作者：小小林熬夜学编程 | 2024-04-24 08:11:46

踩

文章目录

阿里开源黑白图片上色算法DDColor的部署与测试并将模型转onnx后用c++推理

简介

DDColor是一种基于深度学习的图像上色技术，它利用卷积神经网络（CNN）对黑白图像进行上色处理。该模型通常包含一个编码器和一个解码器，编码器提取图像的特征，解码器则根据这些特征生成颜色。DDColor模型能够处理多种类型的图像，并生成自然且逼真的颜色效果。它在图像编辑、电影后期制作以及历史照片修复等领域有广泛的应用。

环境部署

下载源码

git clone https://github.com/piddnad/DDColor.git
1

安装环境

conda create -n ddcolor python=3.9
conda activate ddcolor
pip install -r requirements.txt
python3 setup.py develop
pip install modelscope
pip install onnx
pip install onnxruntime
1
2
3
4
5
6
7

下载模型

这里下载
或者运行下面的脚本下载：

from modelscope.hub.snapshot_download import snapshot_download
model_dir = snapshot_download('damo/cv_ddcolor_image-colorization', cache_dir='./modelscope')
print('model assets saved to %s'%model_dir)
#模型会被下载到modelscope/damo/cv_ddcolor_image-colorization/pytorch_model.pt
1
2
3
4

测试一下

import argparse
import cv2
import numpy as np
import os
from tqdm import tqdm
import torch
from basicsr.archs.ddcolor_arch import DDColor
import torch.nn.functional as F

class ImageColorizationPipeline(object):

    def __init__(self, model_path, input_size=256, model_size='large'):

        self.input_size = input_size
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')

        if model_size == 'tiny':
            self.encoder_name = 'convnext-t'
        else:
            self.encoder_name = 'convnext-l'

        self.decoder_type = "MultiScaleColorDecoder"

        if self.decoder_type == 'MultiScaleColorDecoder':
            self.model = DDColor(
                encoder_name=self.encoder_name,
                decoder_name='MultiScaleColorDecoder',
                input_size=[self.input_size, self.input_size],
                num_output_channels=2,
                last_norm='Spectral',
                do_normalize=False,
                num_queries=100,
                num_scales=3,
                dec_layers=9,
            ).to(self.device)
        else:
            self.model = DDColor(
                encoder_name=self.encoder_name,
                decoder_name='SingleColorDecoder',
                input_size=[self.input_size, self.input_size],
                num_output_channels=2,
                last_norm='Spectral',
                do_normalize=False,
                num_queries=256,
            ).to(self.device)

        self.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))['params'],strict=False)
        self.model.eval()

    @torch.no_grad()
    def process(self, img):
        self.height, self.width = img.shape[:2]
        # print(self.width, self.height)
        # if self.width * self.height < 100000:
        #     self.input_size = 256

        img = (img / 255.0).astype(np.float32)
        orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]  # (h, w, 1)

        # resize rgb image -> lab -> get grey -> rgb
        img = cv2.resize(img, (self.input_size, self.input_size))
        img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]
        img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1)
        img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB)

        tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0).to(self.device)

        # (1, 2, self.height, self.width)
        output_ab = self.model(tensor_gray_rgb).cpu()

        # resize ab -> concat original l -> rgb
        output_ab_resize = F.interpolate(output_ab, size=(self.height, self.width))[0].float().numpy().transpose(1, 2, 0)
        output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1)
        output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR)

        output_img = (output_bgr * 255.0).round().astype(np.uint8)

        return output_img


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_path', type=str,default='pretrain/net_g_200000.pth')
    parser.add_argument('--input_size', type=int,default=512, help='input size for model')
    parser.add_argument('--model_size', type=str,default='large', help='ddcolor model size')
    args = parser.parse_args()

    colorizer = ImageColorizationPipeline(model_path=args.model_path, input_size=args.input_size, model_size=args.model_size)

    img = cv2.imread("./down.jpg")
    image_out = colorizer.process(img)
    cv2.imwrite("./downout.jpg", image_out)


if __name__ == '__main__':
    main()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

python test.py  --model_path=./modelscope/damo/cv_ddcolor_image-colorization/pytorch_model.pt
1

看看效果

在这里插入图片描述

在这里插入图片描述
效果看起来非常的nice！

模型转onnx

import argparse
import cv2
import numpy as np
import os
from tqdm import tqdm
import torch
from basicsr.archs.ddcolor_arch import DDColor
import torch.nn.functional as F

class ImageColorizationPipeline(object):

    def __init__(self, model_path, input_size=256, model_size='large'):
        
        self.input_size = input_size
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')

        if model_size == 'tiny':
            self.encoder_name = 'convnext-t'
        else:
            self.encoder_name = 'convnext-l'

        self.decoder_type = "MultiScaleColorDecoder"

        if self.decoder_type == 'MultiScaleColorDecoder':
            self.model = DDColor(
                encoder_name=self.encoder_name,
                decoder_name='MultiScaleColorDecoder',
                input_size=[self.input_size, self.input_size],
                num_output_channels=2,
                last_norm='Spectral',
                do_normalize=False,
                num_queries=100,
                num_scales=3,
                dec_layers=9,
            ).to(self.device)
        else:
            self.model = DDColor(
                encoder_name=self.encoder_name,
                decoder_name='SingleColorDecoder',
                input_size=[self.input_size, self.input_size],
                num_output_channels=2,
                last_norm='Spectral',
                do_normalize=False,
                num_queries=256,
            ).to(self.device)

        print(model_path)

        self.model.load_state_dict(
            torch.load(model_path, map_location=torch.device('cpu'))['params'],
            strict=False)
        self.model.eval()

    @torch.no_grad()
    def process(self, img):
        self.height, self.width = img.shape[:2]
        # print(self.width, self.height)
        # if self.width * self.height < 100000:
        #     self.input_size = 256

        img = (img / 255.0).astype(np.float32)
        orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]  # (h, w, 1)

        # resize rgb image -> lab -> get grey -> rgb
        img = cv2.resize(img, (self.input_size, self.input_size))
        img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]
        img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1)
        img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB)

        tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0).to(self.device)
        output_ab = self.model(tensor_gray_rgb).cpu()  # (1, 2, self.height, self.width)
        
        # resize ab -> concat original l -> rgb
        output_ab_resize = F.interpolate(output_ab, size=(self.height, self.width))[0].float().numpy().transpose(1, 2, 0)
        output_lab = np.concatenate((orig_l, output_ab_resize), axis=-1)
        output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR)

        output_img = (output_bgr * 255.0).round().astype(np.uint8)    

        return output_img


    @torch.no_grad()
    def expirt_onnx(self, img):
        self.height, self.width = img.shape[:2]
        
        img = (img / 255.0).astype(np.float32)
        orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]  # (h, w, 1)

        # resize rgb image -> lab -> get grey -> rgb
        img = cv2.resize(img, (self.input_size, self.input_size))
        img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]
        img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1)
        img_gray_rgb = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB)

        tensor_gray_rgb = torch.from_numpy(img_gray_rgb.transpose((2, 0, 1))).float().unsqueeze(0).to(self.device)
        
        mymodel = self.model.to('cpu')
        tensor_gray_rgb = tensor_gray_rgb.to('cpu')
        onnx_save_path = "color.onnx"

        torch.onnx.export(mymodel,  # 要导出的模型
                          tensor_gray_rgb,  # 模型的输入
                          onnx_save_path,  # 导出的文件路径
                          export_params=True,  # 是否将训练参数导出
                          opset_version=12,  # 导出的ONNX的操作集版本
                          do_constant_folding=True,  # 是否执行常量折叠优化
                          input_names=['input'],  # 输入张量的名称
                          output_names=['output'],  # 输出张量的名称
                          dynamic_axes={'input': {0: 'batch_size'}, 
                                        'output': {0: 'batch_size'}})
        return



def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_path', type=str, default='pretrain/net_g_200000.pth')
    parser.add_argument('--input_size', type=int, default=512, help='input size for model')
    parser.add_argument('--model_size', type=str, default='large', help='ddcolor model size')
    args = parser.parse_args()

    colorizer = ImageColorizationPipeline(model_path=args.model_path, input_size=args.input_size, model_size=args.model_size)

    img = cv2.imread("./down.jpg")
    image_out = colorizer.expirt_onnx(img)
    # image_out = colorizer.process(img)
    # cv2.imwrite("./downout.jpg", image_out)


if __name__ == '__main__':
    main()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

python model2onnx.py  --model_path=./modelscope/damo/cv_ddcolor_image-colorization/pytorch_model.pt
1

测试一下生成的onnx模型

import onnxruntime
import cv2
import numpy as np

def colorize_image(input_image_path, output_image_path, model_path):
    input_image = cv2.imread(input_image_path)

    img = (input_image / 255.0).astype(np.float32)
    orig_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]  # (h, w, 1)
    img = cv2.resize(img, (512, 512))
    img_l = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)[:, :, :1]
    img_gray_lab = np.concatenate((img_l, np.zeros_like(img_l), np.zeros_like(img_l)), axis=-1)
    input_blob = cv2.cvtColor(img_gray_lab, cv2.COLOR_LAB2RGB)

    # Change data layout from HWC to CHW
    input_blob = np.transpose(input_blob, (2, 0, 1))
    input_blob = np.expand_dims(input_blob, axis=0)  # Add batch dimension

    # Initialize ONNX Runtime Inference Session
    session = onnxruntime.InferenceSession(model_path)

    # Perform inference
    output_blob = session.run(None, {'input': input_blob})[0]

    # Post-process the output
    output_blob = np.squeeze(output_blob)  # Remove batch dimension

    # Separate ab channels
    # Change data layout from CHW to HWC
    output_ab = output_blob.transpose((1, 2, 0))

    # Resize to match input image size
    output_ab = cv2.resize(output_ab, (input_image.shape[1], input_image.shape[0]))
    output_lab = np.concatenate((orig_l, output_ab), axis=-1)

    # Convert LAB to BGR
    output_bgr = cv2.cvtColor(output_lab, cv2.COLOR_LAB2BGR)

    output_bgr = output_bgr*255

    # Save the colorized image
    cv2.imwrite(output_image_path, output_bgr)


# Define paths
input_image_path = 'down.jpg'
output_image_path = 'downout2.jpg'
model_path = 'color.onnx'

# Perform colorization
colorize_image(input_image_path, output_image_path, model_path)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

python testonnx.py
1

看看效果

在这里插入图片描述
嗯，模型没有问题，下面开始用c++推理

C++ 推理

未完待续

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/478405