resnet50网络实现垃圾分类_resnet50垃圾分类

作者：盐析白兔 | 2024-04-06 15:28:30

踩

resnet50垃圾分类

一：介绍：

经典resnet网络是由何凯明团队于2015年提出，论文名为《Deep Residual Learning for Image Recognition》

resnet网络所要解决的问题为深度神经网络的“退化”问题，即随着神经网络搭建的越深，拟合效果却越差的问题，并且这个问题不是由过拟合诱发的。

resnet也成为残差网络，网络由残差块构建：

残差块由多个级联的卷积层和一个shortcut connections组成，将二者的输出值累加后，通过ReLU激活层得到残差块的输出。多个残差块可以串联起来，从而实现更深的网络。

残差块有两种设计方式

左图针对较浅的网络，如ResNet-18/34；右图针对较深的网络，又称为”bottleneck” building block，如ResNet-50/101/152，使用此方式的目的就是为了降低参数数目。

论文给出了五种不同层数的resnet

ResNet-18/34对应的每个残差块的卷积kernel大小依次是3*3、3*3，ResNet-50/101/152对应的每个残差块的卷积kernel大小依次是1*1、3*3、1*1。

论文中给出了层数为34的ResNet网络结构

二：实现垃圾分类

1.准备数据集：

2.加载数据集：


class garbage_datasets(Dataset):
    def __init__(self, filepath):
        self.images = []
        self.labels = []
        self.transform = transform
        for filename in tqdm(os.listdir(filepath+'Hazardous waste')):
            image = Image.open(filepath+'Hazardous waste/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(0)
        for filename in tqdm(os.listdir(filepath+'Kitchen waste')):
            image = Image.open(filepath+'Kitchen waste/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(1)  
        for filename in tqdm(os.listdir(filepath+'Other garbage')):
            image = Image.open(filepath+'Other garbage/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(2)  
        for filename in tqdm(os.listdir(filepath+'Recyclable garbage')):
            image = Image.open(filepath+'Recyclable garbage/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(3)                
        self.labels = torch.LongTensor(self.labels)
    
    def __getitem__(self, index):
        return self.images[index], self.labels[index]
 
    def __len__(self):
        images = np.array(self.images)
        len = images.shape[0]
        return len
 
 
train_data = garbage_datasets('data/train/')
train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True)
 
val_data = garbage_datasets('data/val/')
val_loader = DataLoader(val_data,batch_size = batch_size)

3.构建网络：


class Bottleneck(nn.Module):
    extention=4
    def __init__(self,inplanes,planes,stride,downsample=None):
 
        super(Bottleneck, self).__init__()
 
        self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,stride=stride,bias=False)
        self.bn1=nn.BatchNorm2d(planes)
 
        self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2=nn.BatchNorm2d(planes)
 
        self.conv3=nn.Conv2d(planes,planes*self.extention,kernel_size=1,stride=1,bias=False)
        self.bn3=nn.BatchNorm2d(planes*self.extention)
 
        self.relu=nn.ReLU(  )
 
 
        self.downsample=downsample
        self.stride=stride
 
    def forward(self,x):
 
        residual=x
 
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
 
        out=self.conv2(out)
        out=self.bn2(out)
        out=self.relu(out)
 
        out=self.conv3(out)
        out=self.bn3(out)
        out=self.relu(out)
 
 
        if self.downsample is not None:
            residual=self.downsample(x)
 
 
        out=out + residual
        out=self.relu(out)
 
        return out
 
 
class ResNet(nn.Module):
    def __init__(self,block,layers,num_class):
 
        self.inplane=64
        super(ResNet, self).__init__()
 
 
        self.block=block
        self.layers=layers
 
 
        self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
        self.bn1=nn.BatchNorm2d(self.inplane)
        self.relu=nn.ReLU()
        self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 
 
        self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
        self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
        self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
        self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
 
 
        self.avgpool=nn.AvgPool2d(7)
        self.fc=nn.Linear(512*block.extention,num_class)
 
    def forward(self,x):
 
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        out=self.maxpool(out)
 
 
        out=self.stage1(out)
        out=self.stage2(out)
        out=self.stage3(out)
        out=self.stage4(out)
 
 
        out=self.avgpool(out)
        out=torch.flatten(out,1)
        out=self.fc(out)
 
        return out
 
    def make_layer(self,block,plane,block_num,stride=1):
        block_list=[]
        downsample=None
        if(stride!=1 or self.inplane!=plane*block.extention):
            downsample=nn.Sequential(
                nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
                nn.BatchNorm2d(plane*block.extention)
            )
        conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
        block_list.append(conv_block)
        self.inplane=plane*block.extention
 
        for i in range(1,block_num):
            block_list.append(block(self.inplane,plane,stride=1))
 
        return nn.Sequential(*block_list)
 
 
 
 
model=ResNet(Bottleneck,[3,4,6,3],4)

4.训练模型：


def train(epoch):
    model.train()
    print("epoch:",epoch+1)
    running_loss = 0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs, targets = data
        inputs, targets = inputs.to(device),targets.to(device)
 
        optimizer.zero_grad()
        outputs = model(inputs)
 
        loss = criterion(outputs,targets)
        loss.backward()
        optimizer.step()
        running_loss = running_loss + loss.item()
 
    print('train loss: %.3f' % (running_loss/batch_idx))
    torch.save(model.state_dict(), './model1.pth')

5.验证模型：


def val():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('accuracy on test set: %d %% ' % (100*correct/total))
    return correct/total

6.测试模型：


def test(imgpath):
 
    font={	'color': 'red',
		'size': 20,
		'family': 'Times New Roman',
    	'style':'italic'}
 
    o_img = Image.open(imgpath)
    o_img1 = o_img.resize((224,224))
 
    img = transform(o_img1)
    img = img.unsqueeze(0)
    img = img.cuda()
    print(img.shape)
 
    model = ResNet(Bottleneck,[3,4,6,3],4)
    model.load_state_dict(torch.load("model.pth")) 
    model = model.cuda()
   
    output = model(img)
    _, predict = torch.max(output,dim=1)
    if predict == 0:
        print("Hazardous waste")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Hazardous waste", fontdict=font)
        plt.show()
    if predict == 1:
        print("Kitchen waste")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Kitchen waste", fontdict=font)
        plt.show() 
    if predict == 2:
        print("Other garbage")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Other garbage", fontdict=font)
        plt.show() 
    if predict == 3:
        print("Recyclable garbage")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Recyclable garbage", fontdict=font)
        plt.show()

源代码：


import torch.nn as nn
import torch
import numpy as np
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import os
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
 
batch_size = 8
 
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
 
torch.cuda.empty_cache()
 
class garbage_datasets(Dataset):
    def __init__(self, filepath):
        self.images = []
        self.labels = []
        self.transform = transform
        for filename in tqdm(os.listdir(filepath+'Hazardous waste')):
            image = Image.open(filepath+'Hazardous waste/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(0)
        for filename in tqdm(os.listdir(filepath+'Kitchen waste')):
            image = Image.open(filepath+'Kitchen waste/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(1)  
        for filename in tqdm(os.listdir(filepath+'Other garbage')):
            image = Image.open(filepath+'Other garbage/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(2)  
        for filename in tqdm(os.listdir(filepath+'Recyclable garbage')):
            image = Image.open(filepath+'Recyclable garbage/'+filename)
            image = image.resize((224,224))
            image = self.transform(image)
            self.images.append(image)
            self.labels.append(3)                
        self.labels = torch.LongTensor(self.labels)
    
    def __getitem__(self, index):
        return self.images[index], self.labels[index]
 
    def __len__(self):
        images = np.array(self.images)
        len = images.shape[0]
        return len
 
 
train_data = garbage_datasets('data/train/')
train_loader = DataLoader(train_data,batch_size = batch_size,shuffle = True)
 
val_data = garbage_datasets('data/val/')
val_loader = DataLoader(val_data,batch_size = batch_size)
 
 
class Bottleneck(nn.Module):
    extention=4
    def __init__(self,inplanes,planes,stride,downsample=None):
 
        super(Bottleneck, self).__init__()
 
        self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,stride=stride,bias=False)
        self.bn1=nn.BatchNorm2d(planes)
 
        self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2=nn.BatchNorm2d(planes)
 
        self.conv3=nn.Conv2d(planes,planes*self.extention,kernel_size=1,stride=1,bias=False)
        self.bn3=nn.BatchNorm2d(planes*self.extention)
 
        self.relu=nn.ReLU(  )
 
 
        self.downsample=downsample
        self.stride=stride
 
    def forward(self,x):
 
        residual=x
 
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
 
        out=self.conv2(out)
        out=self.bn2(out)
        out=self.relu(out)
 
        out=self.conv3(out)
        out=self.bn3(out)
        out=self.relu(out)
 
 
        if self.downsample is not None:
            residual=self.downsample(x)
 
 
        out=out + residual
        out=self.relu(out)
 
        return out
 
 
class ResNet(nn.Module):
    def __init__(self,block,layers,num_class):
 
        self.inplane=64
        super(ResNet, self).__init__()
 
 
        self.block=block
        self.layers=layers
 
 
        self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
        self.bn1=nn.BatchNorm2d(self.inplane)
        self.relu=nn.ReLU()
        self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 
 
        self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
        self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
        self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
        self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
 
 
        self.avgpool=nn.AvgPool2d(7)
        self.fc=nn.Linear(512*block.extention,num_class)
 
    def forward(self,x):
 
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        out=self.maxpool(out)
 
 
        out=self.stage1(out)
        out=self.stage2(out)
        out=self.stage3(out)
        out=self.stage4(out)
 
 
        out=self.avgpool(out)
        out=torch.flatten(out,1)
        out=self.fc(out)
 
        return out
 
    def make_layer(self,block,plane,block_num,stride=1):
        block_list=[]
        downsample=None
        if(stride!=1 or self.inplane!=plane*block.extention):
            downsample=nn.Sequential(
                nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
                nn.BatchNorm2d(plane*block.extention)
            )
        conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
        block_list.append(conv_block)
        self.inplane=plane*block.extention
 
        for i in range(1,block_num):
            block_list.append(block(self.inplane,plane,stride=1))
 
        return nn.Sequential(*block_list)
 
 
 
 
model=ResNet(Bottleneck,[3,4,6,3],4)
 
device = torch.device('cuda'if torch.cuda.is_available else 'cpu')
 
model.to(device)
 
model.load_state_dict(torch.load("model1.pth")) 
 
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = 0.001)
 
 
def train(epoch):
    model.train()
    print("epoch:",epoch+1)
    running_loss = 0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs, targets = data
        inputs, targets = inputs.to(device),targets.to(device)
 
        optimizer.zero_grad()
        outputs = model(inputs)
 
        loss = criterion(outputs,targets)
        loss.backward()
        optimizer.step()
        running_loss = running_loss + loss.item()
 
    print('train loss: %.3f' % (running_loss/batch_idx))
    torch.save(model.state_dict(), './model1.pth')  
 
def val():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('accuracy on test set: %d %% ' % (100*correct/total))
    return correct/total
 
if __name__ == '__main__':
    acc_list = []
    epoch_list = []
 
    for epoch in range(5):
        train(epoch)
        acc = val()
        acc_list.append(acc)
        epoch_list.append(epoch + 1)
 
    plt.plot(epoch_list,acc_list)
    plt.ylabel("ACC")
    plt.xlabel("Epoch")
    plt.show()

测试源码：


from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
 
 
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
 
class Bottleneck(nn.Module):
 
    extention=4
    def __init__(self,inplanes,planes,stride,downsample=None):
 
        super(Bottleneck, self).__init__()
 
        self.conv1=nn.Conv2d(inplanes,planes,kernel_size=1,stride=stride,bias=False)
        self.bn1=nn.BatchNorm2d(planes)
 
        self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
        self.bn2=nn.BatchNorm2d(planes)
 
        self.conv3=nn.Conv2d(planes,planes*self.extention,kernel_size=1,stride=1,bias=False)
        self.bn3=nn.BatchNorm2d(planes*self.extention)
 
        self.relu=nn.ReLU(  )
 
 
        self.downsample=downsample
        self.stride=stride
 
    def forward(self,x):
 
        residual=x
 
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
 
        out=self.conv2(out)
        out=self.bn2(out)
        out=self.relu(out)
 
        out=self.conv3(out)
        out=self.bn3(out)
        out=self.relu(out)
 
 
        if self.downsample is not None:
            residual=self.downsample(x)
 
 
        out=out + residual
        out=self.relu(out)
 
        return out
 
 
class ResNet(nn.Module):
    def __init__(self,block,layers,num_class):
 
        self.inplane=64
        super(ResNet, self).__init__()
 
 
        self.block=block
        self.layers=layers
 
 
        self.conv1=nn.Conv2d(3,self.inplane,kernel_size=7,stride=2,padding=3,bias=False)
        self.bn1=nn.BatchNorm2d(self.inplane)
        self.relu=nn.ReLU()
        self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
 
 
        self.stage1=self.make_layer(self.block,64,layers[0],stride=1)
        self.stage2=self.make_layer(self.block,128,layers[1],stride=2)
        self.stage3=self.make_layer(self.block,256,layers[2],stride=2)
        self.stage4=self.make_layer(self.block,512,layers[3],stride=2)
 
 
        self.avgpool=nn.AvgPool2d(7)
        self.fc=nn.Linear(512*block.extention,num_class)
 
    def forward(self,x):
 
        out=self.conv1(x)
        out=self.bn1(out)
        out=self.relu(out)
        out=self.maxpool(out)
 
 
        out=self.stage1(out)
        out=self.stage2(out)
        out=self.stage3(out)
        out=self.stage4(out)
 
 
        out=self.avgpool(out)
        out=torch.flatten(out,1)
        out=self.fc(out)
 
        return out
 
    def make_layer(self,block,plane,block_num,stride=1):
        block_list=[]
        downsample=None
        if(stride!=1 or self.inplane!=plane*block.extention):
            downsample=nn.Sequential(
                nn.Conv2d(self.inplane,plane*block.extention,stride=stride,kernel_size=1,bias=False),
                nn.BatchNorm2d(plane*block.extention)
            )
        conv_block=block(self.inplane,plane,stride=stride,downsample=downsample)
        block_list.append(conv_block)
        self.inplane=plane*block.extention
 
        for i in range(1,block_num):
            block_list.append(block(self.inplane,plane,stride=1))
 
        return nn.Sequential(*block_list)
 
def test(imgpath):
 
    font={	'color': 'red',
		'size': 20,
		'family': 'Times New Roman',
    	'style':'italic'}
 
    o_img = Image.open(imgpath)
    o_img1 = o_img.resize((224,224))
 
    img = transform(o_img1)
    img = img.unsqueeze(0)
    img = img.cuda()
    print(img.shape)
 
    model = ResNet(Bottleneck,[3,4,6,3],4)
    model.load_state_dict(torch.load("model.pth")) 
    model = model.cuda()
   
    output = model(img)
    _, predict = torch.max(output,dim=1)
    if predict == 0:
        print("Hazardous waste")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Hazardous waste", fontdict=font)
        plt.show()
    if predict == 1:
        print("Kitchen waste")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Kitchen waste", fontdict=font)
        plt.show() 
    if predict == 2:
        print("Other garbage")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Other garbage", fontdict=font)
        plt.show() 
    if predict == 3:
        print("Recyclable garbage")
        plt.imshow(o_img)
        plt.text(0, -6.0, "Recyclable garbage", fontdict=font)
        plt.show()   
 
 
if __name__ == "__main__":
    test('data/test/Hazardous waste/2.jpg')

最终验证集的准确率可达到70%

附几张测试成功的图片

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/盐析白兔/article/detail/372773