2
\$\begingroup\$

I was studying resnet and wanted to program it with pytorch.

I searched for some examples (github, google), but it was hard to understand the code completely.

So I programmed resnet myself, and it works.

But, I want to check just in case if I did something wrong.

Can I get some code reviews, and if I had some mistakes, can someone correct me? T

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
train_transform = transforms.Compose(
 [transforms.RandomCrop(32, padding=4),
 transforms.RandomHorizontalFlip(),
 transforms.ToTensor(),
 transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
test_transform = transforms.Compose(
 [transforms.ToTensor(),
 transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
train = torchvision.datasets.CIFAR100(root='D:/User_DATA/Desktop/파이토치 연습/CIFAR-100',
 train=True, transform=train_transform,
 download=True)
test = torchvision.datasets.CIFAR100(root='D:/User_DATA/Desktop/파이토치 연습/CIFAR-100',
 train=False, transform=test_transform,
 download=True)
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=100, shuffle=True)
cuda = torch.device('cuda')
class ResNet(nn.Module):
 def __init__(self):
 super(ResNet, self).__init__()
 self.conv1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=2, padding=0)
 self.conv2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=1, stride=2, padding=0)
 self.conv3 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=2, padding=0)
 self.batch1 = nn.BatchNorm2d(128)
 self.batch2 = nn.BatchNorm2d(256)
 self.batch3 = nn.BatchNorm2d(512)
 # 32 32 3
 self.conv1_layer = nn.Sequential(
 nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(64),
 nn.ReLU()
 )
 # 32 32 64
 self.conv2_layer = nn.Sequential(
 nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(64),
 nn.ReLU(),
 nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(64)
 )
 # 32 32 64
 self.conv3_1_layer = nn.Sequential(
 nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1),
 nn.BatchNorm2d(128),
 nn.ReLU(),
 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(128)
 )
 # 16 16 128
 self.conv3_2_layer = nn.Sequential(
 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(128),
 nn.ReLU(),
 nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(128)
 )
 # 16 16 128
 self.conv4_1_layer = nn.Sequential(
 nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1),
 nn.BatchNorm2d(256),
 nn.ReLU(),
 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(256)
 )
 # 8 8 256
 self.conv4_2_layer = nn.Sequential(
 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(256),
 nn.ReLU(),
 nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(256)
 )
 # 8 8 256
 self.conv5_1_layer = nn.Sequential(
 nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1),
 nn.BatchNorm2d(512),
 nn.ReLU(),
 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(512)
 )
 # 4 4 512
 self.conv5_2_layer = nn.Sequential(
 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(512),
 nn.ReLU(),
 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
 nn.BatchNorm2d(512)
 )
 self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
 # 2 2 512
 self.fc_layer = nn.Sequential(
 nn.Linear(2 * 2 * 512, 100)
 )
 def forward(self, x):
 x = self.conv1_layer(x)
 shortcut = x
 x = self.conv2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = self.conv1(x)
 shortcut = self.batch1(shortcut)
 shortcut = nn.ReLU()(shortcut)
 x = self.conv3_1_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv3_2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = self.conv2(x)
 shortcut = self.batch2(shortcut)
 shortcut = nn.ReLU()(shortcut)
 x = self.conv4_1_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv4_2_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = self.conv3(x)
 shortcut = self.batch3(shortcut)
 shortcut = nn.ReLU()(shortcut)
 x = self.conv5_1_layer(x)
 x = nn.ReLU()(x + shortcut)
 shortcut = x
 x = self.conv5_2_layer(x)
 x = nn.ReLU()(x + shortcut)
 x = self.maxpool(x)
 x = x.view(x.size(0), -1)
 x = self.fc_layer(x)
 return x
model = ResNet()
model = model.cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=5e-4, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5000, gamma=0.5) # 10 epochs
cost = 0
iterations = []
train_losses = []
test_losses = []
train_acc = []
test_acc = []
for epoch in range(100):
 model.train()
 correct = 0
 for X, Y in train_loader:
 X = X.to(cuda)
 Y = Y.to(cuda)
 optimizer.zero_grad()
 hypo = model(X)
 cost = loss(hypo, Y)
 cost.backward()
 optimizer.step()
 scheduler.step()
 prediction = hypo.data.max(1)[1]
 correct += prediction.eq(Y.data).sum()
 model.eval()
 correct2 = 0
 for data, target in test_loader:
 data = data.to(cuda)
 target = target.to(cuda)
 output = model(data)
 cost2 = loss(output, target)
 prediction = output.data.max(1)[1]
 correct2 += prediction.eq(target.data).sum()
 print("Epoch : {:>4} / cost : {:>.9}".format(epoch + 1, cost))
 iterations.append(epoch)
 train_losses.append(cost.tolist())
 test_losses.append(cost2.tolist())
 train_acc.append((100*correct/len(train_loader.dataset)).tolist())
 test_acc.append((100*correct2/len(test_loader.dataset)).tolist())
# del train_loader
# torch.cuda.empty_cache()
model.eval()
correct = 0
for data, target in test_loader:
 data = data.to(cuda)
 target = target.to(cuda)
 output = model(data)
 prediction = output.data.max(1)[1]
 correct += prediction.eq(target.data).sum()
print('Test set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
plt.subplot(121)
plt.plot(range(1, len(iterations)+1), train_losses, 'b--')
plt.plot(range(1, len(iterations)+1), test_losses, 'r--')
plt.subplot(122)
plt.plot(range(1, len(iterations)+1), train_acc, 'b-')
plt.plot(range(1, len(iterations)+1), test_acc, 'r-')
plt.title('loss and accuracy')
plt.show()
toolic
14.5k5 gold badges29 silver badges203 bronze badges
asked Mar 26, 2021 at 12:55
\$\endgroup\$

1 Answer 1

2
\$\begingroup\$

import torchvision.transforms as transforms

To make your code easier to read, use from imports:

from torchvision.transforms import *
from torchvision.datasets import CIFAR100

Then you can remove all long and useless class prefixes littering your code.

x = nn.ReLU()(x + shortcut)

Don't instantiate classes in the forward method. Either use

import torch.nn.functional as F
def forward(...):
 x = F.relu(x + shortcut)

or instantiate the layer in the constructor:

def __init__(...):
 self.relu = ReLU(inplace = True)
def forward(...):
 x = self.relu(x + shortcut)

Another thing, a convolutional layer should always (afaik) be unbiased if it is followed by a batch norm layer, so add bias = False to their constructors.

x = self.maxpool(x)

This may work, but it is you probably want to use AdaptiveAvgPool2d(1) instead. For CIFAR100 a 2048x100 fully-connected classifier layer is overkill. Other than that I think your ResNet18 implementation is mostly correct.

answered Feb 18 at 11:33
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.