赞
踩
一:介绍:
经典resnet网络是由何凯明团队于2015年提出,论文名为《Deep Residual Learning for Image Recognition》
resnet网络所要解决的问题为深度神经网络的“退化”问题,即随着神经网络搭建的越深,拟合效果却越差的问题,并且这个问题不是由过拟合诱发的。
resnet也成为残差网络,网络由残差块构建:
残差块由多个级联的卷积层和一个shortcut connections组成,将二者的输出值累加后,通过ReLU激活层得到残差块的输出。多个残差块可以串联起来,从而实现更深的网络。
残差块有两种设计方式
左图针对较浅的网络,如ResNet-18/34;右图针对较深的网络,又称为”bottleneck” building block,如ResNet-50/101/152,使用此方式的目的就是为了降低参数数目。
论文给出了五种不同层数的resnet
ResNet-18/34对应的每个残差块的卷积kernel大小依次是3*3、3*3,ResNet-50/101/152对应的每个残差块的卷积kernel大小依次是1*1、3*3、1*1。
论文中给出了层数为34的ResNet网络结构
二:实现垃圾分类
1.准备数据集:
2.加载数据集:
- class garbage_datasets(Dataset):
- def __init__(self, filepath):
- self.images = []
- self.labels = []
- self.transform = transform
- for filename in tqdm(os.listdir(filepath+'Hazardous waste')):
- image = Image.open(filepath+'Hazardous waste/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(0)
- for filename in tqdm(os.listdir(filepath+'Kitchen waste')):
- image = Image.open(filepath+'Kitchen waste/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(1)
- for filename in tqdm(os.listdir(filepath+'Other garbage')):
- image = Image.open(filepath+'Other garbage/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(2)
- for filename in tqdm(os.listdir(filepath+'Recyclable garbage')):
- image = Image.open(filepath+'Recyclable garbage/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(3)
- self.labels = torch.LongTensor(self.labels)
-
- def __getitem__(self, index):
- return self.images[index], self.labels[index]
-
- def __len__(self):
- images = np.array(self.images)
- len = images.shape[0]
- return len
-
-
- train_data = garbage_datasets('data/train/')
- train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True)
-
- val_data = garbage_datasets('data/val/')
- val_loader = DataLoader(val_data,batch_size = batch_size)

3.构建网络:
- class Bottleneck(nn.Module):
- extention=4
- def __init__(self,inplanes,planes,stride,downsample=None):
-
- super(Bottleneck, self).__init__()
-
- self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,stride=stride,bias=False)
- self.bn1=nn.BatchNorm2d(planes)
-
- self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
- self.bn2=nn.BatchNorm2d(planes)
-
- self.conv3=nn.Conv2d(planes,planes*self.extention,kernel_size=1,stride=1,bias=False)
- self.bn3=nn.BatchNorm2d(planes*self.extention)
-
- self.relu=nn.ReLU( )
-
-
- self.downsample=downsample
- self.stride=stride
-
- def forward(self,x):
-
- residual=x
-
- out=self.conv1(x)
- out=self.bn1(out)
- out=self.relu(out)
-
- out=self.conv2(out)
- out=self.bn2(out)
- out=self.relu(out)
-
- out=self.conv3(out)
- out=self.bn3(out)
- out=self.relu(out)
-
-
- if self.downsample is not None:
- residual=self.downsample(x)
-
-
- out=out + residual
- out=self.relu(out)
-
- return out
-
-
- class ResNet(nn.Module):
- def __init__(self,block,layers,num_class):
-
- self.inplane=64
- super(ResNet, self).__init__()
-
-
- self.block=block
- self.layers=layers
-
-
- self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
- self.bn1=nn.BatchNorm2d(self.inplane)
- self.relu=nn.ReLU()
- self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
-
-
- self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
- self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
- self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
- self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
-
-
- self.avgpool=nn.AvgPool2d(7)
- self.fc=nn.Linear(512*block.extention,num_class)
-
- def forward(self,x):
-
- out=self.conv1(x)
- out=self.bn1(out)
- out=self.relu(out)
- out=self.maxpool(out)
-
-
- out=self.stage1(out)
- out=self.stage2(out)
- out=self.stage3(out)
- out=self.stage4(out)
-
-
- out=self.avgpool(out)
- out=torch.flatten(out,1)
- out=self.fc(out)
-
- return out
-
- def make_layer(self,block,plane,block_num,stride=1):
- block_list=[]
- downsample=None
- if(stride!=1 or self.inplane!=plane*block.extention):
- downsample=nn.Sequential(
- nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
- nn.BatchNorm2d(plane*block.extention)
- )
- conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
- block_list.append(conv_block)
- self.inplane=plane*block.extention
-
- for i in range(1,block_num):
- block_list.append(block(self.inplane,plane,stride=1))
-
- return nn.Sequential(*block_list)
-
-
-
-
- model=ResNet(Bottleneck,[3,4,6,3],4)

4.训练模型:
- def train(epoch):
- model.train()
- print("epoch:",epoch+1)
- running_loss = 0.0
- for batch_idx,data in enumerate(train_loader,0):
- inputs, targets = data
- inputs, targets = inputs.to(device),targets.to(device)
-
- optimizer.zero_grad()
- outputs = model(inputs)
-
- loss = criterion(outputs,targets)
- loss.backward()
- optimizer.step()
- running_loss = running_loss + loss.item()
-
- print('train loss: %.3f' % (running_loss/batch_idx))
- torch.save(model.state_dict(), './model1.pth')

5.验证模型:
- def val():
- model.eval()
- correct = 0
- total = 0
- with torch.no_grad():
- for data in val_loader:
- images, labels = data
- images, labels = images.to(device), labels.to(device)
- outputs = model(images)
- _, predicted = torch.max(outputs.data, dim=1)
- total += labels.size(0)
- correct += (predicted == labels).sum().item()
- print('accuracy on test set: %d %% ' % (100*correct/total))
- return correct/total
6.测试模型:
- def test(imgpath):
-
- font={ 'color': 'red',
- 'size': 20,
- 'family': 'Times New Roman',
- 'style':'italic'}
-
- o_img = Image.open(imgpath)
- o_img1 = o_img.resize((224,224))
-
- img = transform(o_img1)
- img = img.unsqueeze(0)
- img = img.cuda()
- print(img.shape)
-
- model = ResNet(Bottleneck,[3,4,6,3],4)
- model.load_state_dict(torch.load("model.pth"))
- model = model.cuda()
-
- output = model(img)
- _, predict = torch.max(output,dim=1)
- if predict == 0:
- print("Hazardous waste")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Hazardous waste", fontdict=font)
- plt.show()
- if predict == 1:
- print("Kitchen waste")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Kitchen waste", fontdict=font)
- plt.show()
- if predict == 2:
- print("Other garbage")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Other garbage", fontdict=font)
- plt.show()
- if predict == 3:
- print("Recyclable garbage")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Recyclable garbage", fontdict=font)
- plt.show()

源代码:
- import torch.nn as nn
- import torch
- import numpy as np
- from torch.utils.data import DataLoader,Dataset
- from torchvision import transforms
- import torchvision
- import torch.nn.functional as F
- import torch.optim as optim
- import os
- from tqdm import tqdm
- from PIL import Image
- import matplotlib.pyplot as plt
-
- batch_size = 8
-
- transform = transforms.Compose([transforms.ToTensor(),
- transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
-
- torch.cuda.empty_cache()
-
- class garbage_datasets(Dataset):
- def __init__(self, filepath):
- self.images = []
- self.labels = []
- self.transform = transform
- for filename in tqdm(os.listdir(filepath+'Hazardous waste')):
- image = Image.open(filepath+'Hazardous waste/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(0)
- for filename in tqdm(os.listdir(filepath+'Kitchen waste')):
- image = Image.open(filepath+'Kitchen waste/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(1)
- for filename in tqdm(os.listdir(filepath+'Other garbage')):
- image = Image.open(filepath+'Other garbage/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(2)
- for filename in tqdm(os.listdir(filepath+'Recyclable garbage')):
- image = Image.open(filepath+'Recyclable garbage/'+filename)
- image = image.resize((224,224))
- image = self.transform(image)
- self.images.append(image)
- self.labels.append(3)
- self.labels = torch.LongTensor(self.labels)
-
- def __getitem__(self, index):
- return self.images[index], self.labels[index]
-
- def __len__(self):
- images = np.array(self.images)
- len = images.shape[0]
- return len
-
-
- train_data = garbage_datasets('data/train/')
- train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True)
-
- val_data = garbage_datasets('data/val/')
- val_loader = DataLoader(val_data,batch_size = batch_size)
-
-
- class Bottleneck(nn.Module):
- extention=4
- def __init__(self,inplanes,planes,stride,downsample=None):
-
- super(Bottleneck, self).__init__()
-
- self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,stride=stride,bias=False)
- self.bn1=nn.BatchNorm2d(planes)
-
- self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
- self.bn2=nn.BatchNorm2d(planes)
-
- self.conv3=nn.Conv2d(planes,planes*self.extention,kernel_size=1,stride=1,bias=False)
- self.bn3=nn.BatchNorm2d(planes*self.extention)
-
- self.relu=nn.ReLU( )
-
-
- self.downsample=downsample
- self.stride=stride
-
- def forward(self,x):
-
- residual=x
-
- out=self.conv1(x)
- out=self.bn1(out)
- out=self.relu(out)
-
- out=self.conv2(out)
- out=self.bn2(out)
- out=self.relu(out)
-
- out=self.conv3(out)
- out=self.bn3(out)
- out=self.relu(out)
-
-
- if self.downsample is not None:
- residual=self.downsample(x)
-
-
- out=out + residual
- out=self.relu(out)
-
- return out
-
-
- class ResNet(nn.Module):
- def __init__(self,block,layers,num_class):
-
- self.inplane=64
- super(ResNet, self).__init__()
-
-
- self.block=block
- self.layers=layers
-
-
- self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
- self.bn1=nn.BatchNorm2d(self.inplane)
- self.relu=nn.ReLU()
- self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
-
-
- self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
- self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
- self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
- self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
-
-
- self.avgpool=nn.AvgPool2d(7)
- self.fc=nn.Linear(512*block.extention,num_class)
-
- def forward(self,x):
-
- out=self.conv1(x)
- out=self.bn1(out)
- out=self.relu(out)
- out=self.maxpool(out)
-
-
- out=self.stage1(out)
- out=self.stage2(out)
- out=self.stage3(out)
- out=self.stage4(out)
-
-
- out=self.avgpool(out)
- out=torch.flatten(out,1)
- out=self.fc(out)
-
- return out
-
- def make_layer(self,block,plane,block_num,stride=1):
- block_list=[]
- downsample=None
- if(stride!=1 or self.inplane!=plane*block.extention):
- downsample=nn.Sequential(
- nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
- nn.BatchNorm2d(plane*block.extention)
- )
- conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
- block_list.append(conv_block)
- self.inplane=plane*block.extention
-
- for i in range(1,block_num):
- block_list.append(block(self.inplane,plane,stride=1))
-
- return nn.Sequential(*block_list)
-
-
-
-
- model=ResNet(Bottleneck,[3,4,6,3],4)
-
- device = torch.device('cuda'if torch.cuda.is_available else 'cpu')
-
- model.to(device)
-
- model.load_state_dict(torch.load("model1.pth"))
-
- criterion = torch.nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters(),lr = 0.001)
-
-
- def train(epoch):
- model.train()
- print("epoch:",epoch+1)
- running_loss = 0.0
- for batch_idx,data in enumerate(train_loader,0):
- inputs, targets = data
- inputs, targets = inputs.to(device),targets.to(device)
-
- optimizer.zero_grad()
- outputs = model(inputs)
-
- loss = criterion(outputs,targets)
- loss.backward()
- optimizer.step()
- running_loss = running_loss + loss.item()
-
- print('train loss: %.3f' % (running_loss/batch_idx))
- torch.save(model.state_dict(), './model1.pth')
-
- def val():
- model.eval()
- correct = 0
- total = 0
- with torch.no_grad():
- for data in val_loader:
- images, labels = data
- images, labels = images.to(device), labels.to(device)
- outputs = model(images)
- _, predicted = torch.max(outputs.data, dim=1)
- total += labels.size(0)
- correct += (predicted == labels).sum().item()
- print('accuracy on test set: %d %% ' % (100*correct/total))
- return correct/total
-
- if __name__ == '__main__':
- acc_list = []
- epoch_list = []
-
- for epoch in range(5):
- train(epoch)
- acc = val()
- acc_list.append(acc)
- epoch_list.append(epoch + 1)
-
- plt.plot(epoch_list,acc_list)
- plt.ylabel("ACC")
- plt.xlabel("Epoch")
- plt.show()
-
-

测试源码:
- from torchvision import transforms
- from PIL import Image
- import matplotlib.pyplot as plt
- import torch
- import torch.nn as nn
-
-
- transform = transforms.Compose([transforms.ToTensor(),
- transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
-
- class Bottleneck(nn.Module):
-
- extention=4
- def __init__(self,inplanes,planes,stride,downsample=None):
-
- super(Bottleneck, self).__init__()
-
- self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,stride=stride,bias=False)
- self.bn1=nn.BatchNorm2d(planes)
-
- self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
- self.bn2=nn.BatchNorm2d(planes)
-
- self.conv3=nn.Conv2d(planes,planes*self.extention,kernel_size=1,stride=1,bias=False)
- self.bn3=nn.BatchNorm2d(planes*self.extention)
-
- self.relu=nn.ReLU( )
-
-
- self.downsample=downsample
- self.stride=stride
-
- def forward(self,x):
-
- residual=x
-
- out=self.conv1(x)
- out=self.bn1(out)
- out=self.relu(out)
-
- out=self.conv2(out)
- out=self.bn2(out)
- out=self.relu(out)
-
- out=self.conv3(out)
- out=self.bn3(out)
- out=self.relu(out)
-
-
- if self.downsample is not None:
- residual=self.downsample(x)
-
-
- out=out + residual
- out=self.relu(out)
-
- return out
-
-
- class ResNet(nn.Module):
- def __init__(self,block,layers,num_class):
-
- self.inplane=64
- super(ResNet, self).__init__()
-
-
- self.block=block
- self.layers=layers
-
-
- self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
- self.bn1=nn.BatchNorm2d(self.inplane)
- self.relu=nn.ReLU()
- self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
-
-
- self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
- self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
- self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
- self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
-
-
- self.avgpool=nn.AvgPool2d(7)
- self.fc=nn.Linear(512*block.extention,num_class)
-
- def forward(self,x):
-
- out=self.conv1(x)
- out=self.bn1(out)
- out=self.relu(out)
- out=self.maxpool(out)
-
-
- out=self.stage1(out)
- out=self.stage2(out)
- out=self.stage3(out)
- out=self.stage4(out)
-
-
- out=self.avgpool(out)
- out=torch.flatten(out,1)
- out=self.fc(out)
-
- return out
-
- def make_layer(self,block,plane,block_num,stride=1):
- block_list=[]
- downsample=None
- if(stride!=1 or self.inplane!=plane*block.extention):
- downsample=nn.Sequential(
- nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
- nn.BatchNorm2d(plane*block.extention)
- )
- conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
- block_list.append(conv_block)
- self.inplane=plane*block.extention
-
- for i in range(1,block_num):
- block_list.append(block(self.inplane,plane,stride=1))
-
- return nn.Sequential(*block_list)
-
- def test(imgpath):
-
- font={ 'color': 'red',
- 'size': 20,
- 'family': 'Times New Roman',
- 'style':'italic'}
-
- o_img = Image.open(imgpath)
- o_img1 = o_img.resize((224,224))
-
- img = transform(o_img1)
- img = img.unsqueeze(0)
- img = img.cuda()
- print(img.shape)
-
- model = ResNet(Bottleneck,[3,4,6,3],4)
- model.load_state_dict(torch.load("model.pth"))
- model = model.cuda()
-
- output = model(img)
- _, predict = torch.max(output,dim=1)
- if predict == 0:
- print("Hazardous waste")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Hazardous waste", fontdict=font)
- plt.show()
- if predict == 1:
- print("Kitchen waste")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Kitchen waste", fontdict=font)
- plt.show()
- if predict == 2:
- print("Other garbage")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Other garbage", fontdict=font)
- plt.show()
- if predict == 3:
- print("Recyclable garbage")
- plt.imshow(o_img)
- plt.text(0, -6.0, "Recyclable garbage", fontdict=font)
- plt.show()
-
-
- if __name__ == "__main__":
- test('data/test/Hazardous waste/2.jpg')

最终验证集的准确率可达到70%
附几张测试成功的图片
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。