# Import the libraries we need for this lab
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from dataidea_science.plots import plot_error_surfaces
Training Two Parameter Mini-Batch Gradient Decent
Training Two Parameter, Mini-Batch Gradient Decent, Training Two Parameter Mini-Batch Gradient Decent]
Linear Regression 1D: Training Two Parameter Mini-Batch Gradient Decent
Objective
- How to use Mini-Batch Gradient Descent to train model.
Table of Contents
In this Lab, you will practice training a model by using Mini-Batch Gradient Descent.
- Make Some Data
- Create the Model and Cost Function (Total Loss)
- Train the Model: Batch Gradient Descent
- Train the Model: Stochastic Gradient Descent with Dataset DataLoader
- Train the Model: Mini Batch Gradient Decent: Batch Size Equals 5
- Train the Model: Mini Batch Gradient Decent: Batch Size Equals 10
Estimated Time Needed: 30 min
Preparation
We’ll need the following libraries:
The class plot_error_surfaces
is just to help you visualize the data space and the parameter space during training and has nothing to do with PyTorch.
Make Some Data
Import PyTorch and set random seed:
# Import PyTorch library
import torch
1) torch.manual_seed(
<torch._C.Generator at 0x7dc5dfe66290>
Generate values from -3 to 3 that create a line with a slope of 1 and a bias of -1. This is the line that you need to estimate. Add some noise to the data:
# Generate the data with noise and the line
= torch.arange(-3, 3, 0.1).view(-1, 1)
X = 1 * X - 1
f = f + 0.1 * torch.randn(X.size()) Y
Plot the results:
# Plot the line and the data
'o', label = 'y', c='g')
plt.plot(X.numpy(), Y.numpy(), = 'f', c='b')
plt.plot(X.numpy(), f.numpy(), label 'x')
plt.xlabel('y')
plt.ylabel(
plt.legend() plt.show()
Create the Model and Cost Function (Total Loss)
Define the forward
function:
# Define the prediction function
def forward(x):
return w * x + b
Define the cost or criterion function:
# Define the cost function
def criterion(yhat, y):
return torch.mean((yhat - y) ** 2)
Create a plot_error_surfaces
object to visualize the data space and the parameter space during training:
# Create a plot_error_surfaces object.
= plot_error_surfaces(15, 13, X, Y, 30) get_surface
<Figure size 640x480 with 0 Axes>
Train the Model: Batch Gradient Descent (BGD)
Define train_model_BGD
function.
# Define the function for training model
= torch.tensor(-15.0, requires_grad = True)
w = torch.tensor(-10.0, requires_grad = True)
b = 0.1
lr = []
LOSS_BGD
def train_model_BGD(epochs):
for epoch in range(epochs):
= forward(X)
Yhat = criterion(Yhat, Y)
loss
LOSS_BGD.append(loss)
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())
get_surface.plot_ps()
loss.backward()= w.data - lr * w.grad.data
w.data = b.data - lr * b.grad.data
b.data
w.grad.data.zero_() b.grad.data.zero_()
Run 10 epochs of batch gradient descent: bug data space is 1 iteration ahead of parameter space.
# Run train_model_BGD with 10 iterations
10) train_model_BGD(
Stochastic Gradient Descent (SGD) with Dataset DataLoader
Create a plot_error_surfaces
object to visualize the data space and the parameter space during training:
# Create a plot_error_surfaces object.
= plot_error_surfaces(15, 13, X, Y, 30, go = False) get_surface
Import Dataset
and DataLoader
libraries
# Import libraries
from torch.utils.data import Dataset, DataLoader
Create Data
class
# Create class Data
class Data(Dataset):
# Constructor
def __init__(self):
self.x = torch.arange(-3, 3, 0.1).view(-1, 1)
self.y = 1 * X - 1
self.len = self.x.shape[0]
# Getter
def __getitem__(self, index):
return self.x[index], self.y[index]
# Get length
def __len__(self):
return self.len
Create a dataset object and a dataloader object:
# Create Data object and DataLoader object
= Data()
dataset = DataLoader(dataset = dataset, batch_size = 1) trainloader
Define train_model_SGD
function for training the model.
# Define train_model_SGD function
= torch.tensor(-15.0, requires_grad = True)
w = torch.tensor(-10.0, requires_grad = True)
b = []
LOSS_SGD = 0.1
lr def train_model_SGD(epochs):
for epoch in range(epochs):
= forward(X)
Yhat
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), criterion(Yhat, Y).tolist())
get_surface.plot_ps()
LOSS_SGD.append(criterion(forward(X), Y).tolist())for x, y in trainloader:
= forward(x)
yhat = criterion(yhat, y)
loss
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())
loss.backward()= w.data - lr * w.grad.data
w.data = b.data - lr * b.grad.data
b.data
w.grad.data.zero_()
b.grad.data.zero_() get_surface.plot_ps()
Run 10 epochs of stochastic gradient descent: bug data space is 1 iteration ahead of parameter space.
# Run train_model_SGD(iter) with 10 iterations
10) train_model_SGD(
Mini Batch Gradient Descent: Batch Size Equals 5
Create a plot_error_surfaces
object to visualize the data space and the parameter space during training:
# Create a plot_error_surfaces object.
= plot_error_surfaces(15, 13, X, Y, 30, go = False)
get_surface get_surface
<__main__.plot_error_surfaces at 0x77fc19f16660>
Create Data
object and create a Dataloader
object where the batch size equals 5:
# Create DataLoader object and Data object
= Data()
dataset = DataLoader(dataset = dataset, batch_size = 5) trainloader
Define train_model_Mini5
function to train the model.
# Define train_model_Mini5 function
= torch.tensor(-15.0, requires_grad = True)
w = torch.tensor(-10.0, requires_grad = True)
b = []
LOSS_MINI5 = 0.1
lr
def train_model_Mini5(epochs):
for epoch in range(epochs):
= forward(X)
Yhat
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), criterion(Yhat, Y).tolist())
get_surface.plot_ps()
LOSS_MINI5.append(criterion(forward(X), Y).tolist())for x, y in trainloader:
= forward(x)
yhat = criterion(yhat, y)
loss
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())
loss.backward()= w.data - lr * w.grad.data
w.data = b.data - lr * b.grad.data
b.data
w.grad.data.zero_() b.grad.data.zero_()
Run 10 epochs of mini-batch gradient descent: bug data space is 1 iteration ahead of parameter space.
# Run train_model_Mini5 with 10 iterations.
10) train_model_Mini5(
Mini Batch Gradient Descent: Batch Size Equals 10
Create a plot_error_surfaces
object to visualize the data space and the parameter space during training:
# Create a plot_error_surfaces object.
= plot_error_surfaces(15, 13, X, Y, 30, go = False) get_surface
Create Data
object and create a Dataloader
object batch size equals 10
# Create DataLoader object
= Data()
dataset = DataLoader(dataset = dataset, batch_size = 10) trainloader
Define train_model_Mini10
function for training the model.
# Define train_model_Mini5 function
= torch.tensor(-15.0, requires_grad = True)
w = torch.tensor(-10.0, requires_grad = True)
b = []
LOSS_MINI10 = 0.1
lr
def train_model_Mini10(epochs):
for epoch in range(epochs):
= forward(X)
Yhat
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), criterion(Yhat, Y).tolist())
get_surface.plot_ps()
LOSS_MINI10.append(criterion(forward(X),Y).tolist())for x, y in trainloader:
= forward(x)
yhat = criterion(yhat, y)
loss
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())
loss.backward()= w.data - lr * w.grad.data
w.data = b.data - lr * b.grad.data
b.data
w.grad.data.zero_() b.grad.data.zero_()
Run 10 epochs of mini-batch gradient descent: bug data space is 1 iteration ahead of parameter space.
# Run train_model_Mini5 with 10 iterations.
10) train_model_Mini10(
Plot the loss for each epoch:
= [loss.item() for loss in LOSS_BGD] LOSS_BGD_
# Plot out the LOSS for each method
= "Batch Gradient Descent")
plt.plot(LOSS_BGD_,label = "Stochastic Gradient Descent")
plt.plot(LOSS_SGD,label = "Mini-Batch Gradient Descent, Batch size: 5")
plt.plot(LOSS_MINI5,label = "Mini-Batch Gradient Descent, Batch size: 10")
plt.plot(LOSS_MINI10,label plt.legend()
Practice
Perform mini batch gradient descent with a batch size of 20. Store the total loss for each epoch in the list LOSS20.
# Practice: Perform mini batch gradient descent with a batch size of 20.
= Data()
dataset = DataLoader(dataset = dataset, batch_size = 20)
trainloader = torch.tensor(-15.0, requires_grad = True)
w = torch.tensor(-10.0, requires_grad = True)
b
= []
LOSS_MINI20 = 0.1
lr
def my_train_model(epochs):
for epoc in range(epochs):
= forward(X)
Yhat
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), criterion(Yhat, Y).tolist())
get_surface.plot_ps()
LOSS_MINI20.append(criterion(forward(X), Y).tolist())for x, y in trainloader:
= forward(x)
yhat = criterion(yhat, y)
loss
get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())
loss.backward()= w.data - lr * w.grad.data
w.data = b.data - lr * b.grad.data
b.data
w.grad.data.zero_() b.grad.data.zero_()
Double-click here for the solution.
Plot a graph that shows the LOSS results for all the methods.
# Practice: Plot a graph to show all the LOSS functions
# Type your code here
Double-click here for the solution.