Transforms
author: Juma Shafara date: "2024-09-04" title: Transform Practice keywords: [Training Two Parameter, Mini-Batch Gradient Decent, Training Two Parameter Mini-Batch Gradient Decent] description: In this lab, you will review how to make a prediction in several different ways by using PyTorch.¶
Transforms¶
In [1]:
Copied!
import dataidea_science as ds
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import dataidea_science as ds import torch from torch.utils.data import Dataset from torch.utils.data import DataLoader
In [2]:
Copied!
boston_ = ds.loadDataset('boston')
boston_ = ds.loadDataset('boston')
Custom Dataset¶
In [3]:
Copied!
class BostonDataset(Dataset):
def __init__(self, transform=None):
# define our dataset
self.data = boston_
self.x = self.data.drop('MEDV', axis=1).values
self.y = self.data.MEDV.values
self.samples = self.data.shape[0]
self.transform = transform
def __getitem__(self, index):
# access samples
sample = (self.x[index], self.y[index])
if self.transform:
sample = self.transform(sample)
return sample
def __len__(self):
# len(dataset)
return self.samples
class BostonDataset(Dataset): def __init__(self, transform=None): # define our dataset self.data = boston_ self.x = self.data.drop('MEDV', axis=1).values self.y = self.data.MEDV.values self.samples = self.data.shape[0] self.transform = transform def __getitem__(self, index): # access samples sample = (self.x[index], self.y[index]) if self.transform: sample = self.transform(sample) return sample def __len__(self): # len(dataset) return self.samples
In [4]:
Copied!
boston_dataset = BostonDataset()
row_1 = boston_dataset[1]
print('Row 1 Features:', row_1[0])
print('Row 1 Outcome:', row_1[1])
length_ = len(boston_dataset)
print('Total Samples: ', length_)
boston_dataset = BostonDataset() row_1 = boston_dataset[1] print('Row 1 Features:', row_1[0]) print('Row 1 Outcome:', row_1[1]) length_ = len(boston_dataset) print('Total Samples: ', length_)
Row 1 Features: [2.7310e-02 0.0000e+00 7.0700e+00 0.0000e+00 4.6900e-01 6.4210e+00 7.8900e+01 4.9671e+00 2.0000e+00 2.4200e+02 1.7800e+01 3.9690e+02 9.1400e+00] Row 1 Outcome: 21.6 Total Samples: 506
DataLoader¶
In [5]:
Copied!
boston_dataloader = DataLoader(dataset=boston_dataset,
batch_size=3,
shuffle=True,
num_workers=2)
boston_dataloader = DataLoader(dataset=boston_dataset, batch_size=3, shuffle=True, num_workers=2)
In [6]:
Copied!
for batch_no, (x, y) in enumerate(boston_dataloader):
print(f'Batch: {batch_no}:')
print(f'Data: {x}')
print(f'Labels: {y}')
if batch_no == 0:
break
for batch_no, (x, y) in enumerate(boston_dataloader): print(f'Batch: {batch_no}:') print(f'Data: {x}') print(f'Labels: {y}') if batch_no == 0: break
Batch: 0: Data: tensor([[9.7617e-01, 0.0000e+00, 2.1890e+01, 0.0000e+00, 6.2400e-01, 5.7570e+00, 9.8400e+01, 2.3460e+00, 4.0000e+00, 4.3700e+02, 2.1200e+01, 2.6276e+02, 1.7310e+01], [2.9090e-01, 0.0000e+00, 2.1890e+01, 0.0000e+00, 6.2400e-01, 6.1740e+00, 9.3600e+01, 1.6119e+00, 4.0000e+00, 4.3700e+02, 2.1200e+01, 3.8808e+02, 2.4160e+01], [5.5007e-01, 2.0000e+01, 3.9700e+00, 0.0000e+00, 6.4700e-01, 7.2060e+00, 9.1600e+01, 1.9301e+00, 5.0000e+00, 2.6400e+02, 1.3000e+01, 3.8789e+02, 8.1000e+00]], dtype=torch.float64) Labels: tensor([15.6000, 14.0000, 36.5000], dtype=torch.float64)
Transformer¶
In [8]:
Copied!
class TensorTransformer:
def __init__(self, dtype=torch.float32):
self.dtype = dtype
def __call__(self, sample):
x_tensor = torch.tensor(data=sample[0], dtype=self.dtype)
y_tensor = torch.tensor(data=sample[1], dtype=self.dtype)
return x_tensor, y_tensor
class TensorTransformer: def __init__(self, dtype=torch.float32): self.dtype = dtype def __call__(self, sample): x_tensor = torch.tensor(data=sample[0], dtype=self.dtype) y_tensor = torch.tensor(data=sample[1], dtype=self.dtype) return x_tensor, y_tensor
In [9]:
Copied!
boston_dataset = BostonDataset(transform=TensorTransformer())
row_1 = boston_dataset[1]
print('Row 1 Features:', row_1[0])
print('Row 1 Outcome:', row_1[1])
boston_dataset = BostonDataset(transform=TensorTransformer()) row_1 = boston_dataset[1] print('Row 1 Features:', row_1[0]) print('Row 1 Outcome:', row_1[1])
Row 1 Features: tensor([2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01, 6.4210e+00, 7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02, 1.7800e+01, 3.9690e+02, 9.1400e+00]) Row 1 Outcome: tensor(21.6000)
In [ ]:
Copied!