赞
踩
一、 数据准备,生成lmdb文件
方案一:直接通过cmd执行以下命令:
convert_imageset.exe --resize_height=240 --resize_width=240 --shuffle --backend="lmdb" G:\ G:\caffe_python\label\label_train.txt G:\caffe_python\label\train_lmdb
convert_imageset.exe 由caffe编译时得到,路径一般为:\Build\x64\Release。resize_height & resize_width对原始图片resize到统一大小。G:\为训练图片的路径。label_train.txt为训练图片的标注数据,格式:filename label。最后生成的lmdb文件为G:\caffe_python\label\train_lmdb。同理可生成验证集lmdb文件。
train_lmdb中数据保存格式
方案二:通过Python脚本生成lmdb文件
- # -*- coding: utf-8 -*-
-
- import sys
- reload(sys)
- sys.setdefaultencoding('utf-8')
- sys.path.insert(0, 'C:/Anaconda3/envs/py27/Lib/site-packages/pycaffe')
-
- import caffe
- import lmdb
- import random
- import cv2
- import numpy as np
- from caffe.proto import caffe_pb2
- from sklearn.model_selection import train_test_split
- from Bconfig import config
-
- def get_dataset(label_dir):
- with open(label_dir, 'r') as f:
- annotations = f.readlines()
- random.shuffle(annotations)
- dataset = []
- for annotation in annotations:
- annotation = annotation.strip().split(' ')
- data_example = dict()
- data_example['filename'] = annotation[0]
- data_example['label'] = int(annotation[1])
- dataset.append(data_example)
- return dataset
-
- if __name__ == '__main__':
- train_image_path = config.train_image_path
- train_list_path = config.train_list_path
- batch_size = config.BATCH_SIZE
- patchSize = config.PatchSize
- lmdb_file = config.trainlmdb_file #存放lmdb数据的目录
-
- dataset = get_dataset(train_list_path)
- trainDataset, testDataset = train_test_split(dataset, test_size=0.1, random_state=1)
- fw = open(config.lmdb_record, 'w')
-
- #打开lmdb环境,生成一个数据文件,定义最大空间1e12
- lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))
- lmdb_txn = lmdb_env.begin(write=True) #创建操作数据库句柄
- datum = caffe_pb2.Datum()
-
- for idx, image_example in enumerate(trainDataset):
- filename = image_example['filename']
- label = image_example['label']
-
- image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),-1)
- resizeImg = cv2.resize(image, (patchSize, patchSize))
- #save in datum
- datum = caffe.io.array_to_datum(resizeImg, label) #lmdb每一个数据都由键值对构成
- key_str = '{:0>8d}'.format(idx) #生成一个用递增顺序排列的定长唯一的key
- lmdb_txn.put(key_str.encode(), datum.SerializeToString()) #调用句柄,写入内存
- print('{:0>8d}'.format(idx) + ':' + filename)
-
- # write batch
- if (idx+1) % batch_size == 0:
- lmdb_txn.commit()
- lmdb_txn = lmdb_env.begin(write=True)
- print(idx + 1)
-
- # write last batch
- if (idx+1) % batch_size != 0:
- lmdb_txn.commit()
- print('last batch')
- print(idx + 1)
-
- lmdb_env.close() #结束后释放资源
-
- lmdb_env1 = lmdb.open(config.vallmdb_file, map_size=int(1e10))
- lmdb_txn1 = lmdb_env1.begin(write=True) #创建操作数据库句柄
- datum = caffe_pb2.Datum()
-
- for idt, image_example in enumerate(testDataset):
- filename = image_example['filename']
- label = image_example['label']
-
- image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),-1)
- resizeImg = cv2.resize(image, (patchSize, patchSize))
- #save in datum
- datum = caffe.io.array_to_datum(resizeImg, label) #lmdb每一个数据都由键值对构成
- key_str = '{:0>8d}'.format(idt) #生成一个用递增顺序排列的定长唯一的key
- lmdb_txn1.put(key_str.encode(), datum.SerializeToString()) #调用句柄,写入内存
- print('{:0>8d}'.format(idt) + ':' + filename)
-
- # write batch
- if (idt+1) % batch_size == 0:
- lmdb_txn1.commit()
- lmdb_txn1 = lmdb_env1.begin(write=True)
- print(idt + 1)
-
- # write last batch
- if (idt+1) % batch_size != 0:
- lmdb_txn1.commit()
- print('last batch')
- print(idt + 1)
-
- lmdb_env1.close() #结束后释放资源
-
- fw.write('trainDataset size: %d, testDataset size: %d' %(idx+1, idt+1))
- fw.close()
二、计算训练数据均值
compute_image_mean --backend="lmdb" train_lmdb mean.binaryproto
compute_image_mean.exe 由caffe编译时得到,只需计算训练集的均值。
同理也可由Python脚本生成:
- import sys
- sys.path.insert(0, 'C:/Anaconda3/envs/py27/Lib/site-packages/pycaffe')
- import caffe
- import numpy as np
- from Bconfig import config
-
- blob = caffe.proto.caffe_pb2.BlobProto()
- bin_mean = open(config.mean_binary, 'rb').read()
- blob.ParseFromString(bin_mean)
- arr = np.array(caffe.io.blobproto_to_array(blob))
- npy_mean = arr[0]
- np.save(config.mean_npy, npy_mean)
以上生成的是逐像素均值,均值文件大小与图像维度保持一致,图片维度为M*N*C,则均值文件维度为M*N*C。
caffe还可以使用逐通道减均值,每个通道的均值为一个数,直接作为参数在prototxt文件中指定即可,如下图
- layer {
- name: "InputData"
- type: "Data"
- top: "data"
- top: "label"
- transform_param {
- mirror: false
- crop_size: 240
- mean_value: 78.3
- mean_value: 76.7
- mean_value: 73.2
- }
- data_param {
- ...
- }
- }
关于通道均值的计算,在compute_image_mean.cpp中,保存的是像素均值,同时最后会输出channel mean:
- for (int c = 0; c < channels; ++c) {
- for (int i = 0; i < dim; ++i) {
- mean_values[c] += sum_blob.data(dim * c + i);
- }
- LOG(INFO) << "mean_value channel [" << c << "]: " << mean_values[c] / dim;
- }
三、搭建深度学习网络结构
1.卷积层:
- from caffe import layers as L
- n = caffe.NetSpec()
- n.data, n.label = L.Data(source=lmdb, name='InputData', backend=P.Data.LMDB,
- batch_size=batch_size, ntop=2,
- transform_param=dict(crop_size=240, mean_file=mean_file, mirror=False))
- n.conv1 = L.Convolution(n.data, kernel_size=3, stride=1, pad=1, num_output=32,
- weight_filler=dict(type='xavier'),
- bias_term=True,
- bias_filler=dict(type='constant'),
- name='conv')
2.激励层ReLU:
n.relu = L.ReLU(n.conv, in_place=True, name='conv_relu')
in_place字段为True,表示其top和bottom是一样的情况。
3.池化层:
- n.pool_max = L.Pooling(n.relu1, pool=P.Pooling.MAX, kernel_size=4, stride=3, pad=0,
- name='pool_max')
- n.pool_ave = L.Pooling(n.conv_out, pool=P.Pooling.AVE, kernel_size=12, stride=1, pad=0,
- name='pool_ave')
- n.global_ave = L.Pooling(n.dense3, pool=P.Pooling.AVE, global_pooling=True,
- name='pool_global_ave')
caffe中卷积层输出的feature map尺寸计算方式为:
out_size = (in_size + 2*pad - kernel_size) / stride +1
池化层输出的feature map尺寸计算方式为:
out_size = ceil [ (in_size + 2*pad - kernel_size) / stride ]+1
其计算方式与tensorflow略有不同,caffe中卷积和池化层的输出尺寸计算方式也不一样。详细可查阅caffe源码“caffe/layers/conv_layer.cpp” 和“caffe/layers/pooling_layer.cpp”
4.BatchNorm层:
- n.bachnorm = L.BatchNorm(n.pool1, include=dict(phase=caffe.TRAIN), in_place=True,
- batch_norm_param=dict(moving_average_fraction=0.9), name='bn')
- n.scale_bn=L.Scale(n.bachnorm1, scale_param=dict(bias_term=True), in_place=True, name='bn_scale')
5.全连接层(fully_connected):
- n.innerP = L.InnerProduct(n.scale_bn, num_output=class_num,
- weight_filler=dict(type='xavier'),
- bias_filler=dict(type='constant',value=0),
- name='inner_product')
6.Softmax层:
n.loss = L.SoftmaxWithLoss(n.innerP, n.label)
7.Accuracy层:
n.acc = L.Accuracy(n.innerP, n.label, accuracy_param=dict(top_k=1))
将网络结构写入到prototxt文件,即caffe格式的网络结构搭建文件。将生成好的.prototxt文件拷入到http://ethereon.github.io/netscope/#/editor,即可查看网络结构图。
数据输入网络时,L.Data输入参数即为之前生成的lmdb格式数据,若要对图片采取一些额外的在线数据增强,或针对已生成的lmdb数据改变增强方式,可在data_layer.cpp中修改源码(位于caffe目录下.\src\caffe\layers),针对每一个batch读入的数据进行在线数据增强等处理。具体修改方式可见https://blog.csdn.net/qq295456059/article/details/53494612。
四、Solver文件
利用Solver文件配置相关训练及测试参数。
- from caffe.proto import caffe_pb2
- from Bconfig import config
-
- sp = caffe_pb2.SolverParameter()
-
- solver_file = config.solver_file #solver文件保存位置
-
- sp.train_net = config.train_proto #上一环节得到的prototxt文件
- sp.test_net.append(config.val_proto)
-
- sp.test_interval = 1405 #测试间隔
- sp.test_iter.append(157) # 测试迭代次数
- sp.max_iter = 210750 #最大迭代次数
- sp.base_lr = 0.001 #基础学习率
- sp.momentum = 0.9 #momentum系数
- sp.weight_decay = 5e-4 #权值衰减系数
- sp.lr_policy = 'step' #学习率衰减方法
- sp.stepsize = 70250
- sp.gamma = 0.1 #学习率衰减指数
- sp.display = 1405
- sp.snapshot = 1405
- sp.snapshot_prefix = './model/BeltClassify' #保存model前半部分
- sp.type = "SGD" #优化算法
- sp.solver_mode = caffe_pb2.SolverParameter.GPU
-
- with open(solver_file, 'w') as f:
- f.write(str(sp))
五、训练模型
- import caffe
- caffe.set_device(0)
- caffe.set_mode_gpu()
- solver = caffe.SGDSolver('G:/Belt_CaffeP/prototxt/solver.prototxt')
- test_iter = 157
- test_interval = 1405
- epoch_num = 150
- #solver.net.forward()
- #solver.solve()
- #iter = solver.iter
- for i in range(epoch_num):
- for j in range(test_interval):
- solver.step(1) #单步训练更新参数
- loss_train = solver.net.blobs['loss'].data
- acc_train = solver.net.blobs['acc'].data
- print('epoch %d %d/%d: loss_train: %.4f, accuracy_train: %.4f' %(i, j, test_interval, loss_train, acc_train))
-
- for test_i in range(test_iter):
- solver.test_nets[0].forward() #test net
- loss_test = solver.test_nets[0].blobs['loss'].data
- acc_test = solver.test_nets[0].blobs['acc'].data
- print('epoch %d %d/%d: loss: %.4f, accuracy: %.4f' %(i, test_i, test_iter, loss_test, acc_test))
-
- f.close()
以上可得到每一轮迭代时训练集和测试集的loss及accuracy。
六、测试图片
方法一:直接调用opencv 读入的图像
- net = caffe.Net(prototxt, model, caffe.TEST)
- img_bgr = cv2.imdecode(np.fromfile('XXX.jpg', dtype=np.uint8), -1)
- image = img_bgr - mean #减均值操作
- input_img = image.transpose((2,0,1))
- net.blobs['data'].data[...] = input_img
- output = net.forward()
- prob = output['prob'][0] #'prob'为prototxt文件中最后一层的输出量
- label_pre = prob.argsort()[-1]
-
方法二:caffe加载图像
- net = caffe.Net(prototxt, model, caffe.TEST)
- image = caffe.io.load_image('XXX.jpg')
-
- transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
- transformer.set_transpose('data', (2,0,1)) # H*W*C --> C*H*W
- transformer.set_raw_scale('data', 255)
- transformer.set_mean('data', np.array([mean1, mean2, mean3]))
- transformer.set_channel_swap('data', (2,1,0))
-
- net.blobs['data'].data[...] = transformer.preprocess('data',img)
- output = net.forward()
- prob = output['prob'][0]
- label_pre = np.argsort(-prob)[0]
caffe.io.load_image加载的图片为RGB格式,0~1(float),而caffe中图像为BGR格式,图像存储范围[0, 255],因此需转换维度空间,以及取值缩放到0~255。transformer中不考虑设置的图像变换顺序,transformer.preprocess 函数中写明了(1)set_transpose (2)channel_swap (3)raw_scale (4)减mean。
而opencv 读取的图像即为BGR格式,范围为0~255,无需做变换。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。