Define Several Neural Networks, Criterion function, Optimizer.
Train Neural Network using Batch Normalization and no Batch Normalization
Table of Contents
In this lab, you will build a Neural Network using Batch Normalization and compare it to a Neural Network that does not use Batch Normalization. You will use the MNIST dataset to test your network.
# These are the libraries will be used for this lab.# Using the following line code to install the torchvision library# !mamba install -y torchvisionimport torch import torch.nn as nnimport torchvision.transforms as transformsimport torchvision.datasets as dsetsimport torch.nn.functional as Fimport matplotlib.pylab as pltimport numpy as nptorch.manual_seed(0)
<torch._C.Generator at 0x7fe38026e6b0>
Neural Network Module and Training Function
Define the neural network module or class
Neural Network Module with two hidden layers using Batch Normalization
# Define the Neural Network Model using Batch Normalizationclass NetBatchNorm(nn.Module):# Constructordef__init__(self, in_size, n_hidden1, n_hidden2, out_size):super(NetBatchNorm, self).__init__()self.linear1 = nn.Linear(in_size, n_hidden1)self.linear2 = nn.Linear(n_hidden1, n_hidden2)self.linear3 = nn.Linear(n_hidden2, out_size)self.bn1 = nn.BatchNorm1d(n_hidden1)self.bn2 = nn.BatchNorm1d(n_hidden2)# Predictiondef forward(self, x): x =self.bn1(torch.sigmoid(self.linear1(x))) x =self.bn2(torch.sigmoid(self.linear2(x))) x =self.linear3(x)return x# Activations, to analyze results def activation(self, x): out = [] z1 =self.bn1(self.linear1(x)) out.append(z1.detach().numpy().reshape(-1)) a1 = torch.sigmoid(z1) out.append(a1.detach().numpy().reshape(-1).reshape(-1)) z2 =self.bn2(self.linear2(a1)) out.append(z2.detach().numpy().reshape(-1)) a2 = torch.sigmoid(z2) out.append(a2.detach().numpy().reshape(-1))return out
Neural Network Module with two hidden layers with out Batch Normalization
# Class Net for Neural Network Modelclass Net(nn.Module):# Constructordef__init__(self, in_size, n_hidden1, n_hidden2, out_size):super(Net, self).__init__()self.linear1 = nn.Linear(in_size, n_hidden1)self.linear2 = nn.Linear(n_hidden1, n_hidden2)self.linear3 = nn.Linear(n_hidden2, out_size)# Predictiondef forward(self, x): x = torch.sigmoid(self.linear1(x)) x = torch.sigmoid(self.linear2(x)) x =self.linear3(x)return x# Activations, to analyze results def activation(self, x): out = [] z1 =self.linear1(x) out.append(z1.detach().numpy().reshape(-1)) a1 = torch.sigmoid(z1) out.append(a1.detach().numpy().reshape(-1).reshape(-1)) z2 =self.linear2(a1) out.append(z2.detach().numpy().reshape(-1)) a2 = torch.sigmoid(z2) out.append(a2.detach().numpy().reshape(-1))return out
Define a function to train the model. In this case the function returns a Python dictionary to store the training loss and accuracy on the validation data
# Define the function to train modeldef train(model, criterion, train_loader, validation_loader, optimizer, epochs=100): i =0 useful_stuff = {'training_loss':[], 'validation_accuracy':[]} for epoch inrange(epochs):for i, (x, y) inenumerate(train_loader): model.train() optimizer.zero_grad() z = model(x.view(-1, 28*28)) loss = criterion(z, y) loss.backward() optimizer.step() useful_stuff['training_loss'].append(loss.data.item()) correct =0for x, y in validation_loader: model.eval() yhat = model(x.view(-1, 28*28)) _, label = torch.max(yhat, 1) correct += (label == y).sum().item() accuracy =100* (correct /len(validation_dataset)) useful_stuff['validation_accuracy'].append(accuracy)return useful_stuff
Make Some Data
Load the training dataset by setting the parameters train to True and convert it to a tensor by placing a transform object int the argument transform
# load the train datasettrain_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
Load the validating dataset by setting the parameters train False and convert it to a tensor by placing a transform object into the argument transform
# load the train datasetvalidation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
create the training-data loader and the validation-data loader object
# Create Data Loader for both train and validatingtrain_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False)
Define Neural Network, Criterion function, Optimizer and Train the Model
Create the criterion function
# Create the criterion functioncriterion = nn.CrossEntropyLoss()
Variables for Neural Network Shape hidden_dim used for number of neurons in both hidden layers.
# Set the parametersinput_dim =28*28hidden_dim =100output_dim =10
Train Neural Network using Batch Normalization and no Batch Normalization
Train Neural Network using Batch Normalization :
# Create model, optimizer and train the modelmodel_norm = NetBatchNorm(input_dim, hidden_dim, hidden_dim, output_dim)optimizer = torch.optim.Adam(model_norm.parameters(), lr =0.1)training_results_Norm=train(model_norm , criterion, train_loader, validation_loader, optimizer, epochs=5)
Train Neural Network with no Batch Normalization:
# Create model without Batch Normalization, optimizer and train the modelmodel = Net(input_dim, hidden_dim, hidden_dim, output_dim)optimizer = torch.optim.Adam(model.parameters(), lr =0.1)training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=5)
Analyze Results
Compare the histograms of the activation for the first layer of the first sample, for both models.
model.eval()model_norm.eval()out=model.activation(validation_dataset[0][0].reshape(-1,28*28))plt.hist(out[2],label='model with no batch normalization' )out_norm=model_norm.activation(validation_dataset[0][0].reshape(-1,28*28))plt.hist(out_norm[2],label='model with normalization')plt.xlabel("activation ")plt.legend()plt.show()
We see the activations with Batch Normalization are zero centred and have a smaller variance.
Compare the training loss for each iteration
# Plot the diagram to show the lossplt.plot(training_results['training_loss'], label='No Batch Normalization')plt.plot(training_results_Norm['training_loss'], label='Batch Normalization')plt.ylabel('Cost')plt.xlabel('iterations ') plt.legend()plt.show()
Compare the validating accuracy for each iteration
# Plot the diagram to show the accuracyplt.plot(training_results['validation_accuracy'],label='No Batch Normalization')plt.plot(training_results_Norm['validation_accuracy'],label='Batch Normalization')plt.ylabel('validation accuracy')plt.xlabel('epochs ') plt.legend()plt.show()