# Import the libraries for this lab
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from matplotlib.colors import ListedColormap
from torch.utils.data import Dataset, DataLoader
1)
torch.manual_seed(1) np.random.seed(
Neural Networks with Momentum
Objective for this Notebook
- Train Different Neural Networks Model different values for the Momentum Parameter.
- Compare Results of Different Momentum Terms.
Table of Contents
In this lab, you will see how different values for the momentum parameters affect the convergence rate of a neural network.
- Neural Network Module and Function for Training
- Train Different Neural Networks Model different values for the Momentum Parameter
- Compare Results of Different Momentum Terms
Estimated Time Needed: 25 min
Preparation
We’ll need the following libraries:
Functions used to plot:
# Define a function for plot the decision region
def plot_decision_regions_3class(model, data_set):
= ListedColormap(['#FFAAAA', '#AAFFAA','#00AAFF'])
cmap_light = ListedColormap(['#FF0000', '#00FF00','#00AAFF'])
cmap_bold =data_set.x.numpy()
X=data_set.y.numpy()
y= .02
h = X[:, 0].min() - 0.1 , X[:, 0].max() + 0.1
x_min, x_max = X[:, 1].min() - 0.1 , X[:, 1].max() + 0.1
y_min, y_max = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
xx, yy =torch.torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
XX=torch.max(model(XX),1)
_,yhat=yhat.numpy().reshape(xx.shape)
yhat=cmap_light)
plt.pcolormesh(xx, yy, yhat, cmap==0,0], X[y[:]==0,1], 'ro', label='y=0')
plt.plot(X[y[:]==1,0], X[y[:]==1,1], 'go', label='y=1')
plt.plot(X[y[:]==2,0], X[y[:]==2,1], 'o', label='y=2')
plt.plot(X[y[:]"decision region")
plt.title( plt.legend()
Create the dataset class
# Create the dataset class
class Data(Dataset):
# modified from: http://cs231n.github.io/neural-networks-case-study/
# Constructor
def __init__(self, K=3, N=500):
= 2
D = np.zeros((N * K, D)) # data matrix (each row = single example)
X = np.zeros(N * K, dtype='uint8') # class labels
y for j in range(K):
= range(N * j, N * (j + 1))
ix = np.linspace(0.0, 1, N) # radius
r = np.linspace(j * 4, (j + 1) * 4, N) + np.random.randn(N) * 0.2 # theta
t = np.c_[r * np.sin(t), r * np.cos(t)]
X[ix] = j
y[ix]
self.y = torch.from_numpy(y).type(torch.LongTensor)
self.x = torch.from_numpy(X).type(torch.FloatTensor)
self.len = y.shape[0]
# Getter
def __getitem__(self, index):
return self.x[index], self.y[index]
# Get Length
def __len__(self):
return self.len
# Plot the diagram
def plot_data(self):
self.x[self.y[:] == 0, 0].numpy(), self.x[self.y[:] == 0, 1].numpy(), 'o', label="y=0")
plt.plot(self.x[self.y[:] == 1, 0].numpy(), self.x[self.y[:] == 1, 1].numpy(), 'ro', label="y=1")
plt.plot(self.x[self.y[:] == 2, 0].numpy(),self.x[self.y[:] == 2, 1].numpy(), 'go',label="y=2")
plt.plot( plt.legend()
Neural Network Module and Function for Training
Create Neural Network Module using ModuleList()
# Create dataset object
class Net(nn.Module):
# Constructor
def __init__(self, Layers):
super(Net, self).__init__()
self.hidden = nn.ModuleList()
for input_size, output_size in zip(Layers, Layers[1:]):
self.hidden.append(nn.Linear(input_size, output_size))
# Prediction
def forward(self, activation):
= len(self.hidden)
L for (l, linear_transform) in zip(range(L), self.hidden):
if l < L - 1:
= F.relu(linear_transform(activation))
activation else:
= linear_transform(activation)
activation return activation
Create the function for training the model.
# Define the function for training the model
def train(data_set, model, criterion, train_loader, optimizer, epochs=100):
= []
LOSS = []
ACC for epoch in range(epochs):
for x, y in train_loader:
optimizer.zero_grad()= model(x)
yhat = criterion(yhat, y)
loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
LOSS.append(loss.item())
ACC.append(accuracy(model,data_set))
={"Loss":LOSS, "Accuracy":ACC}
results = plt.subplots()
fig, ax1 = 'tab:red'
color =color)
ax1.plot(LOSS,color'epoch', color=color)
ax1.set_xlabel('total loss', color=color)
ax1.set_ylabel(= 'y', color=color)
ax1.tick_params(axis
= ax1.twinx()
ax2 = 'tab:blue'
color 'accuracy', color=color) # we already handled the x-label with ax1
ax2.set_ylabel(=color)
ax2.plot(ACC, color='y', color=color)
ax2.tick_params(axis# otherwise the right y-label is slightly clipped
fig.tight_layout()
plt.show()return results
Define a function used to calculate accuracy.
# Define a function for calculating accuracy
def accuracy(model, data_set):
= torch.max(model(data_set.x), 1)
_, yhat return (yhat == data_set.y).numpy().mean()
Train Different Networks Model different values for the Momentum Parameter
Crate a dataset object using Data
# Create the dataset and plot it
= Data()
data_set
data_set.plot_data()= data_set.y.view(-1) data_set.y
Dictionary to contain different cost and accuracy values for each epoch for different values of the momentum parameter.
# Initialize a dictionary to contain the cost and accuracy
= {"momentum 0": {"Loss": 0, "Accuracy:": 0}, "momentum 0.1": {"Loss": 0, "Accuracy:": 0}} Results
Create a network to classify three classes with 1 hidden layer with 50 neurons and a momentum value of zero.
# Train a model with 1 hidden layer and 50 neurons
= [2, 50, 3]
Layers = Net(Layers)
model = 0.10
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = DataLoader(dataset=data_set, batch_size=20)
train_loader = nn.CrossEntropyLoss()
criterion "momentum 0"] = train(data_set, model, criterion, train_loader, optimizer, epochs=100)
Results[ plot_decision_regions_3class(model, data_set)
Create a network to classify three classes with 1 hidden layer with 50 neurons and a momentum value of 0.1.
# Train a model with 1 hidden layer and 50 neurons with 0.1 momentum
= [2, 50, 3]
Layers = Net(Layers)
model = 0.10
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.1)
optimizer = DataLoader(dataset=data_set, batch_size=20)
train_loader = nn.CrossEntropyLoss()
criterion "momentum 0.1"] = train(data_set, model, criterion, train_loader, optimizer, epochs=100)
Results[ plot_decision_regions_3class(model, data_set)
Create a network to classify three classes with 1 hidden layer with 50 neurons and a momentum value of 0.2.
# Train a model with 1 hidden layer and 50 neurons with 0.2 momentum
= [2, 50, 3]
Layers = Net(Layers)
model = 0.10
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.2)
optimizer = DataLoader(dataset=data_set, batch_size=20)
train_loader = nn.CrossEntropyLoss()
criterion "momentum 0.2"] = train(data_set, model, criterion, train_loader, optimizer, epochs=100)
Results[ plot_decision_regions_3class(model, data_set)
Create a network to classify three classes with 1 hidden layer with 50 neurons and a momentum value of 0.4.
# Train a model with 1 hidden layer and 50 neurons with 0.4 momentum
= [2, 50, 3]
Layers = Net(Layers)
model = 0.10
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.4)
optimizer = DataLoader(dataset=data_set, batch_size=20)
train_loader = nn.CrossEntropyLoss()
criterion "momentum 0.4"] = train(data_set, model, criterion, train_loader, optimizer, epochs=100)
Results[ plot_decision_regions_3class(model, data_set)
Create a network to classify three classes with 1 hidden layer with 50 neurons and a momentum value of 0.5.
# Train a model with 1 hidden layer and 50 neurons with 0.5 momentum
= [2, 50, 3]
Layers = Net(Layers)
model = 0.10
learning_rate = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.5)
optimizer = DataLoader(dataset=data_set, batch_size=20)
train_loader = nn.CrossEntropyLoss()
criterion "momentum 0.5"] = train(data_set, model, criterion, train_loader, optimizer, epochs=100)
Results[ plot_decision_regions_3class(model,data_set)
Compare Results of Different Momentum Terms
The plot below compares results of different momentum terms. We see that in general. The Cost decreases proportionally to the momentum term, but larger momentum terms lead to larger oscillations. While the momentum term decreases faster, it seems that a momentum term of 0.2 reaches the smallest value for the cost.
# Plot the Loss result for each term
for key, value in Results.items():
'Loss'],label=key)
plt.plot(value[
plt.legend()'epoch')
plt.xlabel('Total Loss or Cost') plt.ylabel(
The accuracy seems to be proportional to the momentum term.
# Plot the Accuracy result for each term
for key, value in Results.items():
'Accuracy'],label=key)
plt.plot(value[
plt.legend()'epoch')
plt.xlabel('Accuracy') plt.ylabel(