# Import the libraries we need to use in this lab
# Using the following line code to install the torchvision library
# !mamba install -y torchvision
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pylab as plt
import numpy as np
0) torch.manual_seed(
Test Uniform, Default and Xavier Uniform Initialization on MNIST dataset with tanh activation
Objective for this Notebook
- Define Several Neural Network, Criterion function, Optimizer
- Test Uniform, Default and Xavier Initialization
Table of Contents
In this lab, you will test PyTroch Default Initialization, Xavier Initialization and Uniform Initialization on the MNIST dataset.
- Neural Network Module and Training Function
- Make Some Data
- Define Several Neural Network, Criterion function, Optimizer
- Test Uniform, Default and Xavier Initialization
- Analyze Results
Estimated Time Needed: 25 min
Preparation
We’ll need the following libraries:
Neural Network Module and Training Function
Define the neural network module or class with Xavier Initialization
# Define the neural network with Xavier initialization
class Net_Xavier(nn.Module):
# Constructor
def __init__(self, Layers):
super(Net_Xavier, self).__init__()
self.hidden = nn.ModuleList()
for input_size, output_size in zip(Layers, Layers[1:]):
= nn.Linear(input_size, output_size)
linear
torch.nn.init.xavier_uniform_(linear.weight)self.hidden.append(linear)
# Prediction
def forward(self, x):
= len(self.hidden)
L for (l, linear_transform) in zip(range(L), self.hidden):
if l < L - 1:
= torch.tanh(linear_transform(x))
x else:
= linear_transform(x)
x return x
Define the neural network module with Uniform Initialization:
# Define the neural network with Uniform initialization
class Net_Uniform(nn.Module):
# Constructor
def __init__(self, Layers):
super(Net_Uniform, self).__init__()
self.hidden = nn.ModuleList()
for input_size, output_size in zip(Layers, Layers[1:]):
= nn.Linear(input_size, output_size)
linear 0, 1)
linear.weight.data.uniform_(self.hidden.append(linear)
# Prediction
def forward(self, x):
= len(self.hidden)
L for (l, linear_transform) in zip(range(L), self.hidden):
if l < L - 1:
= torch.tanh(linear_transform(x))
x else:
= linear_transform(x)
x return x
Define the neural network module with PyTroch Default Initialization
# Define the neural network with Default initialization
class Net(nn.Module):
# Constructor
def __init__(self, Layers):
super(Net, self).__init__()
self.hidden = nn.ModuleList()
for input_size, output_size in zip(Layers, Layers[1:]):
= nn.Linear(input_size, output_size)
linear self.hidden.append(linear)
# Prediction
def forward(self, x):
= len(self.hidden)
L for (l, linear_transform) in zip(range(L), self.hidden):
if l < L - 1:
= torch.tanh(linear_transform(x))
x else:
= linear_transform(x)
x return x
Define a function to train the model, in this case the function returns a Python dictionary to store the training loss and accuracy on the validation data
# function to Train the model
def train(model, criterion, train_loader, validation_loader, optimizer, epochs = 100):
= 0
i = {'training_loss':[], 'validation_accuracy':[]}
loss_accuracy
for epoch in range(epochs):
for i,(x, y) in enumerate(train_loader):
optimizer.zero_grad()= model(x.view(-1, 28 * 28))
z = criterion(z, y)
loss
loss.backward()
optimizer.step()'training_loss'].append(loss.data.item())
loss_accuracy[
= 0
correct for x, y in validation_loader:
= model(x.view(-1, 28 * 28))
yhat = torch.max(yhat, 1)
_, label += (label==y).sum().item()
correct = 100 * (correct / len(validation_dataset))
accuracy 'validation_accuracy'].append(accuracy)
loss_accuracy[
return loss_accuracy
Make Some Data
Load the training dataset by setting the parameters train
to True
and convert it to a tensor by placing a transform object int the argument transform
# Create the train dataset
= dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()) train_dataset
Load the testing dataset by setting the parameters train
to False
and convert it to a tensor by placing a transform object int the argument transform
# Create the validation dataset
= dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor()) validation_dataset
Create the training-data loader and the validation-data loader object
# Create Dataloader for both train dataset and validation dataset
= torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)
train_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False) validation_loader
Define Neural Network, Criterion function, Optimizer and Train the Model
Create the criterion function
# Define criterion function
= nn.CrossEntropyLoss() criterion
Create the model with 100 hidden layers
# Set the parameters
= 28 * 28
input_dim = 10
output_dim = [input_dim, 100, 10, 100, 10, 100, output_dim]
layers = 15 epochs
Test PyTorch Default Initialization, Xavier Initialization, Uniform Initialization
Train the network using PyTorch Default Initialization
# Train the model with default initialization
= Net(layers)
model = 0.01
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = train(model, criterion, train_loader, validation_loader, optimizer, epochs=epochs) training_results
Train the network using Xavier Initialization function
# Train the model with Xavier initialization
= Net_Xavier(layers)
model_Xavier = torch.optim.SGD(model_Xavier.parameters(), lr=learning_rate)
optimizer = train(model_Xavier, criterion, train_loader, validation_loader, optimizer, epochs=epochs) training_results_Xavier
Train the network using Uniform Initialization
# Train the model with Uniform initialization
= Net_Uniform(layers)
model_Uniform = torch.optim.SGD(model_Uniform.parameters(), lr=learning_rate)
optimizer = train(model_Uniform, criterion, train_loader, validation_loader, optimizer, epochs=epochs) training_results_Uniform
Analyse Results
Compare the training loss for each initialization
# Plot the loss
'training_loss'], label='Xavier')
plt.plot(training_results_Xavier['training_loss'], label='Default')
plt.plot(training_results['training_loss'], label='Uniform')
plt.plot(training_results_Uniform['loss')
plt.ylabel('iteration ')
plt.xlabel('training loss iterations')
plt.title( plt.legend()
compare the validation loss for each model
# Plot the accuracy
'validation_accuracy'], label='Xavier')
plt.plot(training_results_Xavier['validation_accuracy'], label='Default')
plt.plot(training_results['validation_accuracy'], label='Uniform')
plt.plot(training_results_Uniform['validation accuracy')
plt.ylabel('epochs')
plt.xlabel( plt.legend()