resnet with pytorch

Question 1

I was studying resnet and wanted to program it with pytorch.

I searched for some examples (github, google), but it was hard to understand the code completely.

So I programmed resnet myself, and it works.

But, I want to check just in case if I did something wrong.

Can I get some code reviews, and if I had some mistakes, can someone correct me? T

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
train_transform = transforms.Compose(
 [transforms.RandomCrop(32, padding=4),
 transforms.RandomHorizontalFlip(),
 transforms.ToTensor(),
 transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
test_transform = transforms.Compose(
 [transforms.ToTensor(),
 transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
train = torchvision.datasets.CIFAR100(root='D:/User_DATA/Desktop/파이토치 연습/CIFAR-100',
 train=True, transform=train_transform,
 download=True)
test = torchvision.datasets.CIFAR100(root='D:/User_DATA/Desktop/파이토치 연습/CIFAR-100',
 train=False, transform=test_transform,
 download=True)
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=100, shuffle=True)
cuda = torch.device('cuda')
class ResNet(nn.Module):
 def __init__(self):
 super(ResNet, self).__init__()
 self.conv1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=2, padding=0)
 self.conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=2, padding=0)
 self.conv3 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2, padding=0)
 self.batch1 = nn.BatchNorm2d(128)
 self.batch2 = nn.BatchNorm2d(256)
 self.batch3 = nn.BatchNorm2d(512)
 # 32 32 3
 self.conv1_layer = nn.Sequential(
 nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(64),
 nn.ReLU()
 )
 # 32 32 64
 self.conv2_layer = nn.Sequential(
 nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(64),
 nn.ReLU(),
 nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(64)
 )
 # 32 32 64
 self.conv3_1_layer = nn.Sequential(
 nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1),
 nn.BatchNorm2d(128),
 nn.ReLU(),
 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(128)
 )
 # 16 16 128
 self.conv3_2_layer = nn.Sequential(
 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(128),
 nn.ReLU(),
 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(128)
 )
 # 16 16 128
 self.conv4_1_layer = nn.Sequential(
 nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
 nn.BatchNorm2d(256),
 nn.ReLU(),
 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(256)
 )
 # 8 8 256
 self.conv4_2_layer = nn.Sequential(
 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(256),
 nn.ReLU(),
 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(256)
 )
 # 8 8 256
 self.conv5_1_layer = nn.Sequential(
 nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
 nn.BatchNorm2d(512),
 nn.ReLU(),
 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(512)
 )
 # 4 4 512
 self.conv5_2_layer = nn.Sequential(
 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(512),
 nn.ReLU(),
 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(512)
 )
 self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
 # 2 2 512
 self.fc_layer = nn.Sequential(
 nn.Linear(2 * 2 * 512, 100)
 )
 def forward(self, x):
 x = self.conv1_layer(x)
 shortcut = x
 x = self.conv2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = self.conv1(x)
 shortcut = self.batch1(shortcut)
 shortcut = nn.ReLU()(shortcut)
 x = self.conv3_1_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv3_2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = self.conv2(x)
 shortcut = self.batch2(shortcut)
 shortcut = nn.ReLU()(shortcut)
 x = self.conv4_1_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv4_2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = self.conv3(x)
 shortcut = self.batch3(shortcut)
 shortcut = nn.ReLU()(shortcut)
 x = self.conv5_1_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv5_2_layer(x)
 x = nn.ReLU()(x + shortcut)
 x = self.maxpool(x)
 x = x.view(x.size(0), -1)
 x = self.fc_layer(x)
 return x
model = ResNet()
model = model.cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=5e-4, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5000, gamma=0.5) # 10 epochs
cost = 0
iterations = []
train_losses = []
test_losses = []
train_acc = []
test_acc = []
for epoch in range(100):
 model.train()
 correct = 0
 for X, Y in train_loader:
 X = X.to(cuda)
 Y = Y.to(cuda)
 optimizer.zero_grad()
 hypo = model(X)
 cost = loss(hypo, Y)
 cost.backward()
 optimizer.step()
 scheduler.step()
 prediction = hypo.data.max(1)[1]
 correct += prediction.eq(Y.data).sum()
 model.eval()
 correct2 = 0
 for data, target in test_loader:
 data = data.to(cuda)
 target = target.to(cuda)
 output = model(data)
 cost2 = loss(output, target)
 prediction = output.data.max(1)[1]
 correct2 += prediction.eq(target.data).sum()
 print("Epoch : {:>4} / cost : {:>.9}".format(epoch + 1, cost))
 iterations.append(epoch)
 train_losses.append(cost.tolist())
 test_losses.append(cost2.tolist())
 train_acc.append((100*correct/len(train_loader.dataset)).tolist())
 test_acc.append((100*correct2/len(test_loader.dataset)).tolist())
# del train_loader
# torch.cuda.empty_cache()
model.eval()
correct = 0
for data, target in test_loader:
 data = data.to(cuda)
 target = target.to(cuda)
 output = model(data)
 prediction = output.data.max(1)[1]
 correct += prediction.eq(target.data).sum()
print('Test set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
plt.subplot(121)
plt.plot(range(1, len(iterations)+1), train_losses, 'b--')
plt.plot(range(1, len(iterations)+1), test_losses, 'r--')
plt.subplot(122)
plt.plot(range(1, len(iterations)+1), train_acc, 'b-')
plt.plot(range(1, len(iterations)+1), test_acc, 'r-')
plt.title('loss and accuracy')
plt.show()

Question 2

import torchvision.transforms as transforms

To make your code easier to read, use from imports:

from torchvision.transforms import *
from torchvision.datasets import CIFAR100

Then you can remove all long and useless class prefixes littering your code.

x = nn.ReLU()(x + shortcut)

Don't instantiate classes in the forward method. Either use

import torch.nn.functional as F
def forward(...):
 x = F.relu(x + shortcut)

or instantiate the layer in the constructor:

def __init__(...):
 self.relu = ReLU(inplace = True)
def forward(...):
 x = self.relu(x + shortcut)

Another thing, a convolutional layer should always (afaik) be unbiased if it is followed by a batch norm layer, so add bias = False to their constructors.

x = self.maxpool(x)

This may work, but it is you probably want to use AdaptiveAvgPool2d(1) instead. For CIFAR100 a 2048x100 fully-connected classifier layer is overkill. Other than that I think your ResNet18 implementation is mostly correct.

score 2 · Answer 1 · 2025-02-18 11:33:20Z

import torchvision.transforms as transforms

To make your code easier to read, use from imports:

from torchvision.transforms import *
from torchvision.datasets import CIFAR100

Then you can remove all long and useless class prefixes littering your code.

x = nn.ReLU()(x + shortcut)

Don't instantiate classes in the forward method. Either use

import torch.nn.functional as F
def forward(...):
 x = F.relu(x + shortcut)

or instantiate the layer in the constructor:

def __init__(...):
 self.relu = ReLU(inplace = True)
def forward(...):
 x = self.relu(x + shortcut)

Another thing, a convolutional layer should always (afaik) be unbiased if it is followed by a batch norm layer, so add bias = False to their constructors.

x = self.maxpool(x)

This may work, but it is you probably want to use AdaptiveAvgPool2d(1) instead. For CIFAR100 a 2048x100 fully-connected classifier layer is overkill. Other than that I think your ResNet18 implementation is mostly correct.

Stack Exchange Network

resnet with pytorch

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

resnet with pytorch

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions