DataLoaders
author: Juma Shafara date: "2024-09-04" title: DataLoaders Practice keywords: [Training Two Parameter, Mini-Batch Gradient Decent, Training Two Parameter Mini-Batch Gradient Decent] description: In this lab, you will review how to make a prediction in several different ways by using PyTorch.¶
In [1]:
Copied!
import pandas as pd
import dataidea_science as ds
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import pandas as pd import dataidea_science as ds import torch from torch.utils.data import Dataset from torch.utils.data import DataLoader
In [2]:
Copied!
boston_ = ds.loadDataset('boston')
boston_ = ds.loadDataset('boston')
Custom Dataset¶
In [3]:
Copied!
class BostonDataset(Dataset):
def __init__(self):
# define our dataset
self.data = boston_
self.x = torch.tensor(self.data.drop('MEDV', axis=1).values, dtype=torch.float32)
self.y = torch.tensor(self.data.MEDV.values, dtype=torch.float32)
self.samples = self.data.shape[0]
def __getitem__(self, index):
# access samples
return self.x[index], self.y[index]
def __len__(self):
# len(dataset)
return self.samples
class BostonDataset(Dataset): def __init__(self): # define our dataset self.data = boston_ self.x = torch.tensor(self.data.drop('MEDV', axis=1).values, dtype=torch.float32) self.y = torch.tensor(self.data.MEDV.values, dtype=torch.float32) self.samples = self.data.shape[0] def __getitem__(self, index): # access samples return self.x[index], self.y[index] def __len__(self): # len(dataset) return self.samples
In [4]:
Copied!
boston_dataset = BostonDataset()
row_1 = boston_dataset[1]
print('Row 1 Features:', row_1[0])
print('Row 1 Outcome:', row_1[1])
length_ = len(boston_dataset)
print('Total Samples: ', length_)
boston_dataset = BostonDataset() row_1 = boston_dataset[1] print('Row 1 Features:', row_1[0]) print('Row 1 Outcome:', row_1[1]) length_ = len(boston_dataset) print('Total Samples: ', length_)
Row 1 Features: tensor([2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01, 6.4210e+00, 7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02, 1.7800e+01, 3.9690e+02, 9.1400e+00]) Row 1 Outcome: tensor(21.6000) Total Samples: 506
DataLoaders¶
In [10]:
Copied!
boston_dataloader = DataLoader(dataset=boston_dataset,
batch_size=3,
shuffle=True,
num_workers=2)
boston_dataloader = DataLoader(dataset=boston_dataset, batch_size=3, shuffle=True, num_workers=2)
In [12]:
Copied!
for batch_no, (x, y) in enumerate(boston_dataloader):
print(f'Batch: {batch_no}:')
print(f'Data: {x}')
print(f'Labels: {y}')
if batch_no == 0:
break
for batch_no, (x, y) in enumerate(boston_dataloader): print(f'Batch: {batch_no}:') print(f'Data: {x}') print(f'Labels: {y}') if batch_no == 0: break
Batch: 0: Data: tensor([[2.9819e-01, 0.0000e+00, 6.2000e+00, 0.0000e+00, 5.0400e-01, 7.6860e+00, 1.7000e+01, 3.3751e+00, 8.0000e+00, 3.0700e+02, 1.7400e+01, 3.7751e+02, 3.9200e+00], [6.8012e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 7.1300e-01, 6.0810e+00, 8.4400e+01, 2.7175e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9690e+02, 1.4700e+01], [1.5874e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.7100e-01, 6.5450e+00, 9.9100e+01, 1.5192e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9690e+02, 2.1080e+01]]) Labels: tensor([46.7000, 20.0000, 10.9000])