The below code is aspired by the working of the wonderful book dive into deep learning, they have created their own d2l package that raps many of the complex implementations for training a model. This allows trying many different approaches to test assumptions and theories.
Show Code
import torchfrom torch import nnfrom d2l import torch as d2l
c:\Users\user\Documents\GitHub\simpe-AI\d2l_env\lib\site-packages\torch\cuda\__init__.py:52: UserWarning: CUDA initialization: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx (Triggered internally at ..\c10\cuda\CUDAFunctions.cpp:100.)
return torch._C._cuda_getDeviceCount() > 0
trainer = d2l.Trainer(max_epochs=10, num_gpus=1)data = d2l.FashionMNIST(batch_size=128)data.num_workers =0model2 = BNLeNet(lr=0.1)# Pass a full batch instead of a single samplebatch =next(iter(data.get_dataloader(True)))[0] # shape: [128, 1, 28, 28]model2.apply_init([batch], init_cnn)trainer.fit(model2, data)
Notice the huge difference in accuracy and loss achieved by introducing BatchNorm layers compared to the first model trained where batch norm was not used.
class Trainer(d2l.HyperParameters):
"""The base class for training models with data.
Defined in :numref:`subsec_oo-design-models`"""
def __init__(self, max_epochs, num_gpus=0, gradient_clip_val=0):
self.save_hyperparameters()
assert num_gpus == 0, 'No GPU support yet'
def prepare_data(self, data):
self.train_dataloader = data.train_dataloader()
self.val_dataloader = data.val_dataloader()
self.num_train_batches = len(self.train_dataloader)
self.num_val_batches = (len(self.val_dataloader)
if self.val_dataloader is not None else 0)
def prepare_model(self, model):
model.trainer = self
model.board.xlim = [0, self.max_epochs]
self.model = model
def fit(self, model, data):
self.prepare_data(data)
self.prepare_model(model)
self.optim = model.configure_optimizers()
self.epoch = 0
self.train_batch_idx = 0
self.val_batch_idx = 0
for self.epoch in range(self.max_epochs):
self.fit_epoch()
def fit_epoch(self):
raise NotImplementedError
def prepare_batch(self, batch):
"""Defined in :numref:`sec_linear_scratch`"""
return batch
def fit_epoch(self):
"""Defined in :numref:`sec_linear_scratch`"""
self.model.train()
for batch in self.train_dataloader:
loss = self.model.training_step(self.prepare_batch(batch))
self.optim.zero_grad()
with torch.no_grad():
loss.backward()
if self.gradient_clip_val > 0: # To be discussed later
self.clip_gradients(self.gradient_clip_val, self.model)
self.optim.step()
self.train_batch_idx += 1
if self.val_dataloader is None:
return
self.model.eval()
for batch in self.val_dataloader:
with torch.no_grad():
self.model.validation_step(self.prepare_batch(batch))
self.val_batch_idx += 1
def __init__(self, max_epochs, num_gpus=0, gradient_clip_val=0):
"""Defined in :numref:`sec_use_gpu`"""
self.save_hyperparameters()
self.gpus = [d2l.gpu(i) for i in range(min(num_gpus, d2l.num_gpus()))]
def prepare_batch(self, batch):
"""Defined in :numref:`sec_use_gpu`"""
if self.gpus:
batch = [d2l.to(a, self.gpus[0]) for a in batch]
return batch
def prepare_model(self, model):
"""Defined in :numref:`sec_use_gpu`"""
model.trainer = self
model.board.xlim = [0, self.max_epochs]
if self.gpus:
model.to(self.gpus[0])
self.model = model
def clip_gradients(self, grad_clip_val, model):
"""Defined in :numref:`sec_rnn-scratch`"""
params = [p for p in model.parameters() if p.requires_grad]
norm = torch.sqrt(sum(torch.sum((p.grad ** 2)) for p in params))
if norm > grad_clip_val:
for param in params:
param.grad[:] *= grad_clip_val / norm
Comments