728x90
Train PyTorch NN¶
In this post, we will use PyTorch to classify MNIST Dataset and check how accurate the model is in test dataset.
1. Import libraries¶
In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
2. Defining the Model Architecture¶
- In this section, we will create a Convolutional Neural Network (CNN) model.
- Our model will consist of convolutional layers, dropout layers, and fully connected layers, all implemented using PyTorch.
In [2]:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1) # log_softmax for each percentage of image(ref: https://discuss.pytorch.org/t/dimension-for-logsoftmax/49042)
return output
Q. What is super(Net, self).__init__()
?¶
- The super(Net, self).__init__() method is a way to call the initialization method of a parent class (or superclass) within Python's inheritance mechanism.
- What happens if we omit super(Net, self).__init__()?
- Omitting it will lead to errors like the one shown above. Hence, we need to invoke this method before the Module.__init__() call to properly utilize the features provided by nn.Module.
In [3]:
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad() # This zeros out the accumulated gradients from the previous iteration.
output = model(data)
loss = F.nll_loss(output, target) # compute negative log-likelihood loss
loss.backward() # computes the gradients of the loss with respect to the model's parameters, using backpropagation
optimizer.step() # updates the model's parameters using the computed gradients and the specified optimization algorithm (like SGD, Adam, etc.).
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
In [4]:
def test(model, device, test_loader):
model.eval() # dropout won't drop any units and batch normalization will use running statistics instead of batch statistics
test_loss = 0
correct = 0
with torch.no_grad(): # ensures that the operations within it do not track gradients
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
# sum up batch loss
test_loss += F.nll_loss(output, target, reduction='sum').item()
## reduction: reduction argument specifies how to aggregate the individual sample losses in a batch
## item(): This converts the result from a tensor to a standard Python number
# get the index of the max log-probability
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
# eq(): checks element-wise equality and returns a Boolean tensor
# view_as(): reshapes one tensor to have the same shape as another tensor.
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
6. Run¶
In [7]:
# def main():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=1, metavar='N',
help='number of epochs to train (default: 14)')
parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
help='learning rate (default: 1.0)')
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
help='Learning rate step gamma (default: 0.7)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--no-mps', action='store_true', default=False,
help='disables macOS GPU training')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
help='For Saving the current Model')
args, unknown = parser.parse_known_args()
In [8]:
# Select Device
use_cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()
torch.manual_seed(args.seed)
if use_cuda:
device = torch.device("cuda")
elif use_mps:
device = torch.device("mps")
else:
device = torch.device("cpu")
train_kwargs = {'batch_size': args.batch_size}
test_kwargs = {'batch_size': args.test_batch_size}
if use_cuda:
cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
In [9]:
# Load dataset
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
dataset1 = datasets.MNIST('../data', train=True, download=True,
transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
transform=transform)
## dataloader
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
import time
training_start_time = time.time()
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
test(model, device, test_loader)
scheduler.step()
print('Training finished, took {:.2f}s'.format(time.time() - training_start_time))
if args.save_model:
torch.save(model.state_dict(), "mnist_cnn.pt")
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz
100.0%
Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz
100.0%
Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz
100.0%
Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz
100.0%
Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw Train Epoch: 1 [0/60000 (0%)] Loss: 2.300024 Train Epoch: 1 [640/60000 (1%)] Loss: 1.141236 Train Epoch: 1 [1280/60000 (2%)] Loss: 0.711558 Train Epoch: 1 [1920/60000 (3%)] Loss: 0.534700 Train Epoch: 1 [2560/60000 (4%)] Loss: 0.394732 Train Epoch: 1 [3200/60000 (5%)] Loss: 0.266415 Train Epoch: 1 [3840/60000 (6%)] Loss: 0.233061 Train Epoch: 1 [4480/60000 (7%)] Loss: 0.214440 Train Epoch: 1 [5120/60000 (9%)] Loss: 0.619316 Train Epoch: 1 [5760/60000 (10%)] Loss: 0.206452 Train Epoch: 1 [6400/60000 (11%)] Loss: 0.418914 Train Epoch: 1 [7040/60000 (12%)] Loss: 0.228332 Train Epoch: 1 [7680/60000 (13%)] Loss: 0.176347 Train Epoch: 1 [8320/60000 (14%)] Loss: 0.153546 Train Epoch: 1 [8960/60000 (15%)] Loss: 0.230625 Train Epoch: 1 [9600/60000 (16%)] Loss: 0.181143 Train Epoch: 1 [10240/60000 (17%)] Loss: 0.361177 Train Epoch: 1 [10880/60000 (18%)] Loss: 0.189232 Train Epoch: 1 [11520/60000 (19%)] Loss: 0.392473 Train Epoch: 1 [12160/60000 (20%)] Loss: 0.177749 Train Epoch: 1 [12800/60000 (21%)] Loss: 0.197366 Train Epoch: 1 [13440/60000 (22%)] Loss: 0.145951 Train Epoch: 1 [14080/60000 (23%)] Loss: 0.208425 Train Epoch: 1 [14720/60000 (25%)] Loss: 0.462393 Train Epoch: 1 [15360/60000 (26%)] Loss: 0.154787 Train Epoch: 1 [16000/60000 (27%)] Loss: 0.223398 Train Epoch: 1 [16640/60000 (28%)] Loss: 0.088585 Train Epoch: 1 [17280/60000 (29%)] Loss: 0.105454 Train Epoch: 1 [17920/60000 (30%)] Loss: 0.161331 Train Epoch: 1 [18560/60000 (31%)] Loss: 0.302365 Train Epoch: 1 [19200/60000 (32%)] Loss: 0.151357 Train Epoch: 1 [19840/60000 (33%)] Loss: 0.180794 Train Epoch: 1 [20480/60000 (34%)] Loss: 0.029116 Train Epoch: 1 [21120/60000 (35%)] Loss: 0.204436 Train Epoch: 1 [21760/60000 (36%)] Loss: 0.048799 Train Epoch: 1 [22400/60000 (37%)] Loss: 0.123405 Train Epoch: 1 [23040/60000 (38%)] Loss: 0.158138 Train Epoch: 1 [23680/60000 (39%)] Loss: 0.224575 Train Epoch: 1 [24320/60000 (41%)] Loss: 0.062561 Train Epoch: 1 [24960/60000 (42%)] Loss: 0.106391 Train Epoch: 1 [25600/60000 (43%)] Loss: 0.162630 Train Epoch: 1 [26240/60000 (44%)] Loss: 0.120360 Train Epoch: 1 [26880/60000 (45%)] Loss: 0.299113 Train Epoch: 1 [27520/60000 (46%)] Loss: 0.204707 Train Epoch: 1 [28160/60000 (47%)] Loss: 0.064838 Train Epoch: 1 [28800/60000 (48%)] Loss: 0.084399 Train Epoch: 1 [29440/60000 (49%)] Loss: 0.068435 Train Epoch: 1 [30080/60000 (50%)] Loss: 0.054200 Train Epoch: 1 [30720/60000 (51%)] Loss: 0.083719 Train Epoch: 1 [31360/60000 (52%)] Loss: 0.057806 Train Epoch: 1 [32000/60000 (53%)] Loss: 0.168371 Train Epoch: 1 [32640/60000 (54%)] Loss: 0.103747 Train Epoch: 1 [33280/60000 (55%)] Loss: 0.143636 Train Epoch: 1 [33920/60000 (57%)] Loss: 0.073810 Train Epoch: 1 [34560/60000 (58%)] Loss: 0.071079 Train Epoch: 1 [35200/60000 (59%)] Loss: 0.096664 Train Epoch: 1 [35840/60000 (60%)] Loss: 0.180809 Train Epoch: 1 [36480/60000 (61%)] Loss: 0.078782 Train Epoch: 1 [37120/60000 (62%)] Loss: 0.085670 Train Epoch: 1 [37760/60000 (63%)] Loss: 0.237285 Train Epoch: 1 [38400/60000 (64%)] Loss: 0.171452 Train Epoch: 1 [39040/60000 (65%)] Loss: 0.016421 Train Epoch: 1 [39680/60000 (66%)] Loss: 0.039804 Train Epoch: 1 [40320/60000 (67%)] Loss: 0.123339 Train Epoch: 1 [40960/60000 (68%)] Loss: 0.179846 Train Epoch: 1 [41600/60000 (69%)] Loss: 0.050442 Train Epoch: 1 [42240/60000 (70%)] Loss: 0.017428 Train Epoch: 1 [42880/60000 (71%)] Loss: 0.173062 Train Epoch: 1 [43520/60000 (72%)] Loss: 0.182993 Train Epoch: 1 [44160/60000 (74%)] Loss: 0.013076 Train Epoch: 1 [44800/60000 (75%)] Loss: 0.178473 Train Epoch: 1 [45440/60000 (76%)] Loss: 0.167469 Train Epoch: 1 [46080/60000 (77%)] Loss: 0.169292 Train Epoch: 1 [46720/60000 (78%)] Loss: 0.224109 Train Epoch: 1 [47360/60000 (79%)] Loss: 0.088818 Train Epoch: 1 [48000/60000 (80%)] Loss: 0.144786 Train Epoch: 1 [48640/60000 (81%)] Loss: 0.038873 Train Epoch: 1 [49280/60000 (82%)] Loss: 0.030595 Train Epoch: 1 [49920/60000 (83%)] Loss: 0.070011 Train Epoch: 1 [50560/60000 (84%)] Loss: 0.124998 Train Epoch: 1 [51200/60000 (85%)] Loss: 0.224721 Train Epoch: 1 [51840/60000 (86%)] Loss: 0.066481 Train Epoch: 1 [52480/60000 (87%)] Loss: 0.028962 Train Epoch: 1 [53120/60000 (88%)] Loss: 0.203254 Train Epoch: 1 [53760/60000 (90%)] Loss: 0.151286 Train Epoch: 1 [54400/60000 (91%)] Loss: 0.035127 Train Epoch: 1 [55040/60000 (92%)] Loss: 0.032851 Train Epoch: 1 [55680/60000 (93%)] Loss: 0.188284 Train Epoch: 1 [56320/60000 (94%)] Loss: 0.036238 Train Epoch: 1 [56960/60000 (95%)] Loss: 0.046339 Train Epoch: 1 [57600/60000 (96%)] Loss: 0.106009 Train Epoch: 1 [58240/60000 (97%)] Loss: 0.072917 Train Epoch: 1 [58880/60000 (98%)] Loss: 0.001254 Train Epoch: 1 [59520/60000 (99%)] Loss: 0.013900 Test set: Average loss: 0.0484, Accuracy: 9838/10000 (98%) Training finished, took 17.14s
In [13]:
# visualize result
import matplotlib.pyplot as plt
test_samples = enumerate(test_loader)
for i in range(5): # get 5th image
b_i, (sample_data, sample_targets) = next(test_samples)
plt.imshow(sample_data[0][0], cmap='gray', interpolation='none')
print(f"model {model(sample_data.to(device)).data.max(1)[1][0]}")
print(f"ground {sample_targets[0]}")
model 9 ground 9
728x90
'Computer Science > PyTorch' 카테고리의 다른 글
[PyTorch] Image Captioning(CNN-LSTM) (0) | 2023.11.03 |
---|---|
[PyTorch] Intro (0) | 2023.10.29 |