DataLoaders

author: Juma Shafara date: "2024-09-04" title: DataLoaders Practice keywords: [Training Two Parameter, Mini-Batch Gradient Decent, Training Two Parameter Mini-Batch Gradient Decent] description: In this lab, you will review how to make a prediction in several different ways by using PyTorch.

Photo by DATAIDEA

import pandas as pd
import dataidea_science as ds
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

boston_ = ds.loadDataset('boston')

Custom Dataset

class BostonDataset(Dataset): 

    def __init__(self):
        # define our dataset
        self.data = boston_
        self.x = torch.tensor(self.data.drop('MEDV', axis=1).values, dtype=torch.float32)
        self.y = torch.tensor(self.data.MEDV.values, dtype=torch.float32)
        self.samples = self.data.shape[0]

    def __getitem__(self, index):
        # access samples
        return self.x[index], self.y[index]

    def __len__(self):
        # len(dataset)
        return self.samples

boston_dataset = BostonDataset()

row_1 = boston_dataset[1]
print('Row 1 Features:', row_1[0])
print('Row 1 Outcome:', row_1[1])

length_ = len(boston_dataset)
print('Total Samples: ', length_)

Row 1 Features: tensor([2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01, 6.4210e+00,
        7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02, 1.7800e+01, 3.9690e+02,
        9.1400e+00])
Row 1 Outcome: tensor(21.6000)
Total Samples:  506

DataLoaders

boston_dataloader = DataLoader(dataset=boston_dataset,
                               batch_size=3,
                               shuffle=True,
                               num_workers=2)

for batch_no, (x, y) in enumerate(boston_dataloader):
    print(f'Batch: {batch_no}:')
    print(f'Data: {x}')
    print(f'Labels: {y}')

    if batch_no == 0:
        break

Batch: 0:
Data: tensor([[2.9819e-01, 0.0000e+00, 6.2000e+00, 0.0000e+00, 5.0400e-01, 7.6860e+00,
         1.7000e+01, 3.3751e+00, 8.0000e+00, 3.0700e+02, 1.7400e+01, 3.7751e+02,
         3.9200e+00],
        [6.8012e+00, 0.0000e+00, 1.8100e+01, 0.0000e+00, 7.1300e-01, 6.0810e+00,
         8.4400e+01, 2.7175e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9690e+02,
         1.4700e+01],
        [1.5874e+01, 0.0000e+00, 1.8100e+01, 0.0000e+00, 6.7100e-01, 6.5450e+00,
         9.9100e+01, 1.5192e+00, 2.4000e+01, 6.6600e+02, 2.0200e+01, 3.9690e+02,
         2.1080e+01]])
Labels: tensor([46.7000, 20.0000, 10.9000])