# Import the libraries we need for the lab
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset, DataLoader
0) torch.manual_seed(
Using Dropout in Regression
Objective for this Notebook
- Create the Model and Cost Function the PyTorch way.
- Learn Batch Gradient Descent
Table of Contents
In this lab, you will see how adding dropout to your model will decrease overfitting.
Estimated Time Needed: 20 min
Preparation
We’ll need the following libraries
Make Some Data
Create polynomial dataset class:
# Create Data object
class Data(Dataset):
# Constructor
def __init__(self, N_SAMPLES=40, noise_std=1, train=True):
self.x = torch.linspace(-1, 1, N_SAMPLES).view(-1, 1)
self.f = self.x ** 2
if train != True:
1)
torch.manual_seed(self.y = self.f + noise_std * torch.randn(self.f.size())
self.y = self.y.view(-1, 1)
0)
torch.manual_seed(else:
self.y = self.f + noise_std * torch.randn(self.f.size())
self.y = self.y.view(-1, 1)
# Getter
def __getitem__(self, index):
return self.x[index], self.y[index]
# Get Length
def __len__(self):
return self.len
# Plot the data
def plot(self):
= (6.1, 10))
plt.figure(figsize self.x.numpy(), self.y.numpy(), label="Samples")
plt.scatter(self.x.numpy(), self.f.numpy() ,label="True Function", color='orange')
plt.plot("x")
plt.xlabel("y")
plt.ylabel(-1, 1))
plt.xlim((-2, 2.5))
plt.ylim((="best")
plt.legend(loc plt.show()
Create a dataset object:
# Create the dataset object and plot the dataset
= Data()
data_set data_set.plot()
Get some validation data:
# Create validation dataset object
= Data(train=False) validation_set
Create the Model, Optimizer, and Total Loss Function (Cost)
Create a custom module with three layers. in_size
is the size of the input features, n_hidden
is the size of the layers, and out_size
is the size. p
is dropout probability. The default is 0 which is no dropout.
# Create the class for model
class Net(nn.Module):
# Constructor
def __init__(self, in_size, n_hidden, out_size, p=0):
super(Net, self).__init__()
self.drop = nn.Dropout(p=p)
self.linear1 = nn.Linear(in_size, n_hidden)
self.linear2 = nn.Linear(n_hidden, n_hidden)
self.linear3 = nn.Linear(n_hidden, out_size)
def forward(self, x):
= F.relu(self.drop(self.linear1(x)))
x = F.relu(self.drop(self.linear2(x)))
x = self.linear3(x)
x return x
Create two model objects: model
had no dropout, and model_drop has a dropout probability of 0.5:
# Create the model objects
= Net(1, 300, 1)
model = Net(1, 300, 1, p=0.5) model_drop
Train the Model via Mini-Batch Gradient Descent
Set the model using dropout to training mode; this is the default mode, but it’s good practice.
# Set the model to train mode
model_drop.train()
Train the model by using the Adam optimizer. See the unit on other optimizers. Use the mean square loss:
# Set the optimizer and criterion function
= torch.optim.Adam(model.parameters(), lr=0.01)
optimizer_ofit = torch.optim.Adam(model_drop.parameters(), lr=0.01)
optimizer_drop = torch.nn.MSELoss() criterion
Initialize a dictionary that stores the training and validation loss for each model:
# Initialize the dict to contain the loss results
={}
LOSS'training data no dropout']=[]
LOSS['validation data no dropout']=[]
LOSS['training data dropout']=[]
LOSS['validation data dropout']=[] LOSS[
Run 500 iterations of batch gradient descent:
# Train the model
= 500
epochs
def train_model(epochs):
for epoch in range(epochs):
= model(data_set.x)
yhat = model_drop(data_set.x)
yhat_drop = criterion(yhat, data_set.y)
loss = criterion(yhat_drop, data_set.y)
loss_drop
#store the loss for both the training and validation data for both models
'training data no dropout'].append(loss.item())
LOSS['validation data no dropout'].append(criterion(model(validation_set.x), validation_set.y).item())
LOSS['training data dropout'].append(loss_drop.item())
LOSS[eval()
model_drop.'validation data dropout'].append(criterion(model_drop(validation_set.x), validation_set.y).item())
LOSS[
model_drop.train()
optimizer_ofit.zero_grad()
optimizer_drop.zero_grad()
loss.backward()
loss_drop.backward()
optimizer_ofit.step()
optimizer_drop.step()
train_model(epochs)
Set the model with dropout to evaluation mode:
# Set the model with dropout to evaluation mode
eval() model_drop.
Make a prediction by using both models:
# Make the prediction
= model(data_set.x)
yhat = model_drop(data_set.x) yhat_drop
Plot predictions of both models. Compare them to the training points and the true function:
# Plot the predictions for both models
=(6.1, 10))
plt.figure(figsize
="Samples")
plt.scatter(data_set.x.numpy(), data_set.y.numpy(), label="True function", color='orange')
plt.plot(data_set.x.numpy(), data_set.f.numpy(), label='no dropout', c='r')
plt.plot(data_set.x.numpy(), yhat.detach().numpy(), label="dropout", c ='g')
plt.plot(data_set.x.numpy(), yhat_drop.detach().numpy(), label
"x")
plt.xlabel("y")
plt.ylabel(-1, 1))
plt.xlim((-2, 2.5))
plt.ylim((= "best")
plt.legend(loc plt.show()
You can see that the model using dropout does better at tracking the function that generated the data. We use the log to make the difference more apparent
Plot out the loss for training and validation data on both models:
# Plot the loss
=(6.1, 10))
plt.figure(figsizefor key, value in LOSS.items():
=key)
plt.plot(np.log(np.array(value)), label
plt.legend()"iterations")
plt.xlabel("Log of cost or total loss") plt.ylabel(
You see that the model without dropout performs better on the training data, but it performs worse on the validation data. This suggests overfitting. However, the model using dropout performs better on the validation data, but worse on the training data.
What’s on your mind? Put it in the comments!
Back to top