This notebook shows an example of how to use PyTorch Lightning to wrap the model, train, monitor training, validate, and visualize results.
#example
from breakhis_gradcam.data import initialize_datasets
from breakhis_gradcam.resnet import resnet18, resnet34, resnet50, resnet101, resnet152
from breakhis_gradcam.utils import get_param_lr_maps, mixup_data, setup_optimizer_and_scheduler
import os
import torch
from torch import nn
from torchvision import transforms

import pytorch_lightning as pl
from pytorch_lightning.logging.tensorboard import TensorBoardLogger

resnet_model_mapping = {
    'resnet18': resnet18,
    'resnet34': resnet34,
    'resnet50': resnet50,
    'resnet101': resnet101,
    'resnet152': resnet152
}

%load_ext tensorboard

We'll define the Lightning module below, with several of the things seen in the last modules, including:

  • The transforms included as a method that can be tweaked using input arguments of the initialization of the Lightning module *
#example
class LightningResNet(pl.LightningModule):
    def __init__(
        self,
        resnet_type='resnet18',
        train_ratio=0.8,
        label='tumor_class',
        criterion=['tumor_type', 'magnification'],
        resize_shape=224,
        mixup=True,
        mixup_alpha=0.4,
        tta=False,
        tta_mixing=0.6,
        batch_size=32,
        base_lr=1e-3,
        finetune_body_factor=[1e-5, 1e-2],
        num_epochs=20,
    ):
        super(LightningResNet, self).__init__()
        self.mixup, self.mixup_alpha = mixup, mixup_alpha
        self.tta, self.tta_mixing = tta, tta_mixing
        train_transform, val_transform = self.get_transforms(resize_shape, tta=tta)
        ds_mapping = initialize_datasets(
            '/share/nikola/export/dt372/BreaKHis_v1/',
            split={'train': train_ratio, 'val': 1 - train_ratio},
            label=label, criterion=criterion,
            split_transforms={'train': train_transform, 'val': val_transform}
        )
        tr_ds, val_ds = ds_mapping['train'], ds_mapping['val']
        self.tr_dl = torch.utils.data.DataLoader(tr_ds, batch_size=batch_size, shuffle=True)
        self.val_dl = torch.utils.data.DataLoader(val_ds, batch_size=batch_size)
        assert resnet_type in resnet_model_mapping, "Please specify a valid ResNet architecture."
        self.model = resnet_model_mapping[resnet_type](
            pretrained=True, num_classes=2, create_log_and_save_dirs=False
        )
        self.base_lr = base_lr
        self.num_epochs = num_epochs
        self.param_lr_maps = get_param_lr_maps(self.model, base_lr, finetune_body_factor)
        self.criterion = {
            'train': nn.CrossEntropyLoss(reduction='none' if mixup else 'mean'),
            'val': nn.CrossEntropyLoss()
        }
        
    def get_tta_transforms(resize_shape, normalize_transform, n=5):
        tta = transforms.Compose([
            transforms.RandomRotation(15),
            transforms.RandomResizedCrop((resize_shape, resize_shape)),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
            transforms.ToTensor()
        ])
        original_transform = transforms.Compose([
            transforms.Resize((resize_shape, resize_shape)),
            transforms.ToTensor()
        ])
        return transforms.Compose([
            transforms.Lambda(
                lambda image: torch.stack(
                    [tta(image) for _ in range(n)] + [original_transform(image)]
                )
            ),
            transforms.Lambda(
                lambda images: torch.stack([
                    normalize_transform(image) for image in images
                ])
            ),
        ])

    def get_transforms(self, resize_shape, tta=False, tta_n=5):
        random_resized_crop = transforms.RandomResizedCrop((resize_shape, resize_shape))
        random_horizontal_flip = transforms.RandomHorizontalFlip()
        resize = transforms.Resize((resize_shape, resize_shape))
        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        )
        train_transforms = transforms.Compose([
            random_resized_crop, random_horizontal_flip, transforms.ToTensor(), normalize
        ])
        val_transforms = (
            get_tta_transforms(resize_shape, normalize, n=tta_n) if tta
            else transforms.Compose([resize, transforms.ToTensor(), normalize])
        )
        return train_transforms, val_transforms
    
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        mixed_x, y_a, y_b, lam, mixup_criterion, mixup_acc = mixup_data(
            x, y, self.criterion['train'], alpha=self.mixup_alpha if self.mixup else 0.0
        )
        output = self.forward(mixed_x)
        prediction = torch.argmax(output, -1)
        loss = mixup_criterion(output)
        return {
            'loss': loss,
            'batch_size': len(y),
            'correct': mixup_acc(prediction),
            'log': {
                'train_loss': loss.item()
            }
        }
        total_loss += loss.item() * len(y)
        total_correct += mixup_acc(prediction)
        total += len(y)

    def validation_step(self, batch, batch_idx):
        x, y = batch
        if self.tta:
            bs, n_aug, c, h, w = x.size()
            output = self.forward(x.view(-1, c, h, w)).view(bs, n_aug, -1)
            output = (
                ((1 - self.tta_mixing) * output[:, -1, :]) + (self.tta_mixing * output[:, :-1, :].mean(1))
            )
        else:
            output = self.forward(x)
        prediction = torch.argmax(output, -1)
        loss = self.criterion['val'](output, y)
        return {
            'loss': loss.item(),
            'batch_size': len(y),
            'correct': (prediction == y).sum().item(),
            'log': {
                'val_loss': loss.item()
            }
        }

        
    def validation_end(self, outputs):
        total_loss = sum([out['loss'] for out in outputs])
        total_correct = sum([out['correct'] for out in outputs])
        total = sum([out['batch_size'] for out in outputs])
        return {
            'val_loss': total_loss / total,
            'val_acc': total_correct / total,
            'log': {
                'val_loss': total_loss / total,
                'val_acc': total_correct / total
            }
        }

    def configure_optimizers(self):
        optimizer, scheduler = setup_optimizer_and_scheduler(
            self.param_lr_maps, self.base_lr, self.num_epochs, len(self.tr_dl)
        )
        return [optimizer], [scheduler]

    @pl.data_loader
    def train_dataloader(self):
        return self.tr_dl
        
    @pl.data_loader
    def val_dataloader(self):
        return self.val_dl
model = LightningResNet(resnet_type='resnet34')
Setting up optimizer to fine-tune body with LR in range [0.00000001, 0.00001000] and head with LR 0.00100

Lightning will resume from a checkpoint (usually based on Slurm run ID of the notebook / process you're running in) - to avoid this, you simply need to do something like the following (sets the version number to the next highest number to avoid using a checkpointed model state).

from pytorch_lightning.logging import TensorBoardLogger

use_new_model_version = True

logger = TensorBoardLogger(
    save_dir=os.getcwd(),
    version=None,
    name='lightning_logs'
)
if use_new_model_version:
    logger._version = logger._get_next_version()

trainer = pl.Trainer(logger=logger, gpus=1)
INFO:root:gpu available: True, used: True
INFO:root:VISIBLE GPUS: 0
print("Logging under lightning_logs directory, under version ID %s" % trainer.logger.version)
Logging under lightning_logs directory, under version ID 270075

We can take advantage of the fact that Lightning logs Tensorboard logs automatically for us, and see loss curves in the notebook itself!

For some context, I work on a GPU cluster, and I ssh'ed in as follows:

local port=${1-8080};
local tensorboard_port=${2-8081};
ssh -NfL :${port}:127.0.0.1:${port} graphite;
ssh -NfL :${tensorboard_port}:127.0.0.1:${tensorboard_port} graphite;
ssh -t graphite 'tmux -CC new-session -A -s dev';
%tensorboard --logdir lightning_logs/ --host 127.0.0.1 --port 8081
trainer.fit(model)
INFO:root:set slurm handle signals
INFO:root:
                     Name               Type Params
0                   model             ResNet   21 M
1             model.conv1             Conv2d    9 K
2               model.bn1        BatchNorm2d  128  
3              model.relu               ReLU    0  
4           model.maxpool          MaxPool2d    0  
..                    ...                ...    ...
111   model.layer4.2.relu               ReLU    0  
112  model.layer4.2.conv2             Conv2d    2 M
113    model.layer4.2.bn2        BatchNorm2d    1 K
114         model.avgpool  AdaptiveAvgPool2d    0  
115          model.out_fc             Linear    1 K

[116 rows x 3 columns]
                                                                         
Epoch 1:   0%|          | 0/248 [00:00<?, ?batch/s]

Epoch 1:  80%|███████▉  | 198/248 [06:01<01:13,  1.47s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 1:  80%|████████  | 199/248 [06:03<01:16,  1.56s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  81%|████████  | 200/248 [06:04<01:17,  1.62s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  81%|████████  | 201/248 [06:06<01:17,  1.65s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  81%|████████▏ | 202/248 [06:08<01:16,  1.66s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  82%|████████▏ | 203/248 [06:09<01:15,  1.68s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  82%|████████▏ | 204/248 [06:11<01:14,  1.69s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  83%|████████▎ | 205/248 [06:13<01:12,  1.70s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  83%|████████▎ | 206/248 [06:15<01:10,  1.69s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  83%|████████▎ | 207/248 [06:16<01:09,  1.69s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  84%|████████▍ | 208/248 [06:18<01:08,  1.71s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  84%|████████▍ | 209/248 [06:20<01:07,  1.73s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  85%|████████▍ | 210/248 [06:21<01:05,  1.74s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  85%|████████▌ | 211/248 [06:23<01:04,  1.73s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  85%|████████▌ | 212/248 [06:25<01:01,  1.71s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  86%|████████▌ | 213/248 [06:27<00:59,  1.70s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  86%|████████▋ | 214/248 [06:28<00:58,  1.71s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  87%|████████▋ | 215/248 [06:30<00:57,  1.74s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  87%|████████▋ | 216/248 [06:32<00:56,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  88%|████████▊ | 217/248 [06:34<00:54,  1.75s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  88%|████████▊ | 218/248 [06:35<00:52,  1.76s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  88%|████████▊ | 219/248 [06:37<00:51,  1.76s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  89%|████████▊ | 220/248 [06:39<00:50,  1.80s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  89%|████████▉ | 221/248 [06:41<00:48,  1.81s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  90%|████████▉ | 222/248 [06:43<00:46,  1.80s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  90%|████████▉ | 223/248 [06:44<00:44,  1.78s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  90%|█████████ | 224/248 [06:46<00:42,  1.78s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  91%|█████████ | 225/248 [06:48<00:40,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  91%|█████████ | 226/248 [06:50<00:39,  1.80s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  92%|█████████▏| 227/248 [06:52<00:37,  1.80s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  92%|█████████▏| 228/248 [06:53<00:35,  1.78s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  92%|█████████▏| 229/248 [06:55<00:33,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  93%|█████████▎| 230/248 [06:57<00:31,  1.76s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  93%|█████████▎| 231/248 [06:59<00:30,  1.79s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  94%|█████████▎| 232/248 [07:01<00:28,  1.80s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  94%|█████████▍| 233/248 [07:02<00:26,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  94%|█████████▍| 234/248 [07:04<00:24,  1.76s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  95%|█████████▍| 235/248 [07:06<00:22,  1.75s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  95%|█████████▌| 236/248 [07:08<00:21,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  96%|█████████▌| 237/248 [07:09<00:19,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  96%|█████████▌| 238/248 [07:11<00:17,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  96%|█████████▋| 239/248 [07:13<00:15,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  97%|█████████▋| 240/248 [07:15<00:14,  1.79s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  97%|█████████▋| 241/248 [07:16<00:12,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  98%|█████████▊| 242/248 [07:18<00:10,  1.77s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  98%|█████████▊| 243/248 [07:20<00:08,  1.76s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  98%|█████████▊| 244/248 [07:22<00:06,  1.74s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  99%|█████████▉| 245/248 [07:23<00:05,  1.75s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1:  99%|█████████▉| 246/248 [07:25<00:03,  1.73s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1: 100%|█████████▉| 247/248 [07:27<00:01,  1.73s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
Epoch 1: 100%|██████████| 248/248 [07:29<00:00,  1.61s/batch, batch_idx=197, gpu=0, loss=0.389, v_num=270075]
                                                              
/home/dt372/anaconda2/envs/breakhis_gradcam/lib/python3.7/site-packages/pytorch_lightning/trainer/training_io.py:342: UserWarning: Did not find hyperparameters at model.hparams. Saving checkpoint without hyperparameters
  "Did not find hyperparameters at model.hparams. Saving checkpoint without"
Epoch 2:  80%|███████▉  | 198/248 [06:00<01:14,  1.49s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 2:  80%|████████  | 199/248 [06:02<01:16,  1.57s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  81%|████████  | 200/248 [06:04<01:17,  1.62s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  81%|████████  | 201/248 [06:06<01:18,  1.66s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  81%|████████▏ | 202/248 [06:07<01:16,  1.67s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  82%|████████▏ | 203/248 [06:09<01:15,  1.68s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  82%|████████▏ | 204/248 [06:11<01:14,  1.69s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  83%|████████▎ | 205/248 [06:12<01:12,  1.70s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  83%|████████▎ | 206/248 [06:14<01:10,  1.69s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  83%|████████▎ | 207/248 [06:16<01:09,  1.69s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  84%|████████▍ | 208/248 [06:18<01:08,  1.70s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  84%|████████▍ | 209/248 [06:19<01:07,  1.72s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  85%|████████▍ | 210/248 [06:21<01:05,  1.74s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  85%|████████▌ | 211/248 [06:23<01:04,  1.74s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  85%|████████▌ | 212/248 [06:24<01:01,  1.72s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  86%|████████▌ | 213/248 [06:26<00:59,  1.71s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  86%|████████▋ | 214/248 [06:28<00:58,  1.71s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  87%|████████▋ | 215/248 [06:30<00:58,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  87%|████████▋ | 216/248 [06:32<00:56,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  88%|████████▊ | 217/248 [06:33<00:54,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  88%|████████▊ | 218/248 [06:35<00:52,  1.75s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  88%|████████▊ | 219/248 [06:37<00:50,  1.75s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  89%|████████▊ | 220/248 [06:39<00:49,  1.78s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  89%|████████▉ | 221/248 [06:40<00:48,  1.79s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  90%|████████▉ | 222/248 [06:42<00:46,  1.79s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  90%|████████▉ | 223/248 [06:44<00:44,  1.80s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  90%|█████████ | 224/248 [06:46<00:42,  1.78s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  91%|█████████ | 225/248 [06:48<00:41,  1.79s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  91%|█████████ | 226/248 [06:49<00:39,  1.81s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  92%|█████████▏| 227/248 [06:51<00:37,  1.79s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  92%|█████████▏| 228/248 [06:53<00:35,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  92%|█████████▏| 229/248 [06:55<00:33,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  93%|█████████▎| 230/248 [06:56<00:31,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  93%|█████████▎| 231/248 [06:58<00:30,  1.79s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  94%|█████████▎| 232/248 [07:00<00:28,  1.80s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  94%|█████████▍| 233/248 [07:02<00:26,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  94%|█████████▍| 234/248 [07:04<00:24,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  95%|█████████▍| 235/248 [07:05<00:22,  1.75s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  95%|█████████▌| 236/248 [07:07<00:21,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  96%|█████████▌| 237/248 [07:09<00:19,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  96%|█████████▌| 238/248 [07:11<00:17,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  96%|█████████▋| 239/248 [07:12<00:15,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  97%|█████████▋| 240/248 [07:14<00:14,  1.79s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  97%|█████████▋| 241/248 [07:16<00:12,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  98%|█████████▊| 242/248 [07:18<00:10,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  98%|█████████▊| 243/248 [07:19<00:08,  1.76s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  98%|█████████▊| 244/248 [07:21<00:07,  1.75s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  99%|█████████▉| 245/248 [07:23<00:05,  1.77s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2:  99%|█████████▉| 246/248 [07:25<00:03,  1.75s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2: 100%|█████████▉| 247/248 [07:26<00:01,  1.73s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 2: 100%|██████████| 248/248 [07:28<00:00,  1.62s/batch, batch_idx=197, gpu=0, loss=0.361, v_num=270075]
Epoch 3:  80%|███████▉  | 198/248 [06:00<01:13,  1.46s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 3:  80%|████████  | 199/248 [06:02<01:15,  1.55s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  81%|████████  | 200/248 [06:04<01:17,  1.61s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  81%|████████  | 201/248 [06:05<01:17,  1.64s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  81%|████████▏ | 202/248 [06:07<01:16,  1.66s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  82%|████████▏ | 203/248 [06:09<01:15,  1.68s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  82%|████████▏ | 204/248 [06:10<01:14,  1.69s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  83%|████████▎ | 205/248 [06:12<01:12,  1.69s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  83%|████████▎ | 206/248 [06:14<01:10,  1.69s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  83%|████████▎ | 207/248 [06:16<01:09,  1.70s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  84%|████████▍ | 208/248 [06:17<01:09,  1.75s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  84%|████████▍ | 209/248 [06:19<01:08,  1.75s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  85%|████████▍ | 210/248 [06:21<01:06,  1.74s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  85%|████████▌ | 211/248 [06:23<01:04,  1.75s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  85%|████████▌ | 212/248 [06:24<01:02,  1.73s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  86%|████████▌ | 213/248 [06:26<01:00,  1.72s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  86%|████████▋ | 214/248 [06:28<00:59,  1.74s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  87%|████████▋ | 215/248 [06:30<00:58,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  87%|████████▋ | 216/248 [06:31<00:57,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  88%|████████▊ | 217/248 [06:33<00:54,  1.76s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  88%|████████▊ | 218/248 [06:35<00:52,  1.75s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  88%|████████▊ | 219/248 [06:37<00:50,  1.75s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  89%|████████▊ | 220/248 [06:39<00:50,  1.79s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  89%|████████▉ | 221/248 [06:40<00:48,  1.80s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  90%|████████▉ | 222/248 [06:42<00:46,  1.80s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  90%|████████▉ | 223/248 [06:44<00:44,  1.79s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  90%|█████████ | 224/248 [06:46<00:42,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  91%|█████████ | 225/248 [06:47<00:41,  1.80s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  91%|█████████ | 226/248 [06:49<00:39,  1.81s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  92%|█████████▏| 227/248 [06:51<00:37,  1.81s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  92%|█████████▏| 228/248 [06:53<00:35,  1.79s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  92%|█████████▏| 229/248 [06:55<00:33,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  93%|█████████▎| 230/248 [06:56<00:31,  1.77s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  93%|█████████▎| 231/248 [06:58<00:30,  1.80s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  94%|█████████▎| 232/248 [07:00<00:28,  1.80s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  94%|█████████▍| 233/248 [07:02<00:26,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  94%|█████████▍| 234/248 [07:04<00:24,  1.77s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  95%|█████████▍| 235/248 [07:05<00:22,  1.76s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  95%|█████████▌| 236/248 [07:07<00:21,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  96%|█████████▌| 237/248 [07:09<00:19,  1.79s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  96%|█████████▌| 238/248 [07:11<00:17,  1.78s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  96%|█████████▋| 239/248 [07:12<00:15,  1.77s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  97%|█████████▋| 240/248 [07:14<00:14,  1.79s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  97%|█████████▋| 241/248 [07:16<00:12,  1.77s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  98%|█████████▊| 242/248 [07:18<00:10,  1.77s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  98%|█████████▊| 243/248 [07:19<00:08,  1.76s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  98%|█████████▊| 244/248 [07:21<00:06,  1.74s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  99%|█████████▉| 245/248 [07:23<00:05,  1.75s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3:  99%|█████████▉| 246/248 [07:25<00:03,  1.74s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3: 100%|█████████▉| 247/248 [07:26<00:01,  1.73s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 3: 100%|██████████| 248/248 [07:28<00:00,  1.62s/batch, batch_idx=197, gpu=0, loss=0.371, v_num=270075]
Epoch 4:  80%|███████▉  | 198/248 [06:00<01:13,  1.48s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 4:  80%|████████  | 199/248 [06:02<01:16,  1.57s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  81%|████████  | 200/248 [06:04<01:17,  1.62s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  81%|████████  | 201/248 [06:06<01:17,  1.65s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  81%|████████▏ | 202/248 [06:07<01:16,  1.66s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  82%|████████▏ | 203/248 [06:09<01:15,  1.69s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  82%|████████▏ | 204/248 [06:11<01:14,  1.70s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  83%|████████▎ | 205/248 [06:13<01:13,  1.71s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  83%|████████▎ | 206/248 [06:14<01:11,  1.70s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  83%|████████▎ | 207/248 [06:16<01:09,  1.70s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  84%|████████▍ | 208/248 [06:18<01:09,  1.74s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  84%|████████▍ | 209/248 [06:19<01:07,  1.74s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  85%|████████▍ | 210/248 [06:21<01:06,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  85%|████████▌ | 211/248 [06:23<01:04,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  85%|████████▌ | 212/248 [06:25<01:01,  1.71s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  86%|████████▌ | 213/248 [06:26<00:59,  1.70s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  86%|████████▋ | 214/248 [06:28<00:58,  1.72s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  87%|████████▋ | 215/248 [06:30<00:58,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  87%|████████▋ | 216/248 [06:32<00:57,  1.78s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  88%|████████▊ | 217/248 [06:33<00:54,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  88%|████████▊ | 218/248 [06:35<00:52,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  88%|████████▊ | 219/248 [06:37<00:50,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  89%|████████▊ | 220/248 [06:39<00:50,  1.79s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  89%|████████▉ | 221/248 [06:41<00:48,  1.78s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  90%|████████▉ | 222/248 [06:42<00:46,  1.79s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  90%|████████▉ | 223/248 [06:44<00:44,  1.78s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  90%|█████████ | 224/248 [06:46<00:42,  1.77s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  91%|█████████ | 225/248 [06:48<00:41,  1.79s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  91%|█████████ | 226/248 [06:50<00:39,  1.80s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  92%|█████████▏| 227/248 [06:51<00:37,  1.79s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  92%|█████████▏| 228/248 [06:53<00:35,  1.77s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  92%|█████████▏| 229/248 [06:55<00:33,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  93%|█████████▎| 230/248 [06:57<00:31,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  93%|█████████▎| 231/248 [06:58<00:30,  1.78s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  94%|█████████▎| 232/248 [07:00<00:28,  1.79s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  94%|█████████▍| 233/248 [07:02<00:26,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  94%|█████████▍| 234/248 [07:04<00:24,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  95%|█████████▍| 235/248 [07:05<00:22,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  95%|█████████▌| 236/248 [07:07<00:21,  1.77s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  96%|█████████▌| 237/248 [07:09<00:19,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  96%|█████████▌| 238/248 [07:11<00:17,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  96%|█████████▋| 239/248 [07:12<00:15,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  97%|█████████▋| 240/248 [07:14<00:14,  1.78s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  97%|█████████▋| 241/248 [07:16<00:12,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  98%|█████████▊| 242/248 [07:18<00:10,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  98%|█████████▊| 243/248 [07:19<00:08,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  98%|█████████▊| 244/248 [07:21<00:07,  1.75s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  99%|█████████▉| 245/248 [07:23<00:05,  1.76s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4:  99%|█████████▉| 246/248 [07:25<00:03,  1.74s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4: 100%|█████████▉| 247/248 [07:26<00:01,  1.74s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 4: 100%|██████████| 248/248 [07:28<00:00,  1.62s/batch, batch_idx=197, gpu=0, loss=0.339, v_num=270075]
Epoch 5:  80%|███████▉  | 198/248 [06:01<01:14,  1.49s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 5:  80%|████████  | 199/248 [06:02<01:16,  1.57s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  81%|████████  | 200/248 [06:04<01:17,  1.62s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  81%|████████  | 201/248 [06:06<01:17,  1.65s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  81%|████████▏ | 202/248 [06:07<01:16,  1.66s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  82%|████████▏ | 203/248 [06:09<01:15,  1.67s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  82%|████████▏ | 204/248 [06:11<01:14,  1.68s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  83%|████████▎ | 205/248 [06:13<01:12,  1.69s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  83%|████████▎ | 206/248 [06:14<01:10,  1.68s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  83%|████████▎ | 207/248 [06:16<01:09,  1.69s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  84%|████████▍ | 208/248 [06:18<01:08,  1.72s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  84%|████████▍ | 209/248 [06:19<01:07,  1.73s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  85%|████████▍ | 210/248 [06:21<01:05,  1.73s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  85%|████████▌ | 211/248 [06:23<01:03,  1.73s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  85%|████████▌ | 212/248 [06:25<01:01,  1.71s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  86%|████████▌ | 213/248 [06:26<00:59,  1.70s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  86%|████████▋ | 214/248 [06:28<00:58,  1.72s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  87%|████████▋ | 215/248 [06:30<00:58,  1.77s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  87%|████████▋ | 216/248 [06:32<00:56,  1.78s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  88%|████████▊ | 217/248 [06:33<00:54,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  88%|████████▊ | 218/248 [06:35<00:53,  1.77s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  88%|████████▊ | 219/248 [06:37<00:51,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  89%|████████▊ | 220/248 [06:39<00:50,  1.80s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  89%|████████▉ | 221/248 [06:41<00:48,  1.81s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  90%|████████▉ | 222/248 [06:42<00:47,  1.81s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  90%|████████▉ | 223/248 [06:44<00:45,  1.80s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  90%|█████████ | 224/248 [06:46<00:43,  1.80s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  91%|█████████ | 225/248 [06:48<00:41,  1.80s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  91%|█████████ | 226/248 [06:50<00:39,  1.80s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  92%|█████████▏| 227/248 [06:51<00:37,  1.80s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  92%|█████████▏| 228/248 [06:53<00:35,  1.78s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  92%|█████████▏| 229/248 [06:55<00:33,  1.77s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  93%|█████████▎| 230/248 [06:57<00:31,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  93%|█████████▎| 231/248 [06:59<00:30,  1.79s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  94%|█████████▎| 232/248 [07:00<00:28,  1.79s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  94%|█████████▍| 233/248 [07:02<00:26,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  94%|█████████▍| 234/248 [07:04<00:24,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  95%|█████████▍| 235/248 [07:06<00:22,  1.75s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  95%|█████████▌| 236/248 [07:07<00:21,  1.77s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  96%|█████████▌| 237/248 [07:09<00:19,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  96%|█████████▌| 238/248 [07:11<00:17,  1.77s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  96%|█████████▋| 239/248 [07:13<00:15,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  97%|█████████▋| 240/248 [07:14<00:14,  1.78s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  97%|█████████▋| 241/248 [07:16<00:12,  1.77s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  98%|█████████▊| 242/248 [07:18<00:10,  1.76s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  98%|█████████▊| 243/248 [07:20<00:08,  1.75s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  98%|█████████▊| 244/248 [07:21<00:06,  1.73s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  99%|█████████▉| 245/248 [07:23<00:05,  1.75s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5:  99%|█████████▉| 246/248 [07:25<00:03,  1.74s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5: 100%|█████████▉| 247/248 [07:27<00:01,  1.73s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 5: 100%|██████████| 248/248 [07:28<00:00,  1.61s/batch, batch_idx=197, gpu=0, loss=0.336, v_num=270075]
Epoch 6:  80%|███████▉  | 198/248 [06:00<01:13,  1.47s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 6:  80%|████████  | 199/248 [06:02<01:16,  1.57s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  81%|████████  | 200/248 [06:04<01:17,  1.62s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  81%|████████  | 201/248 [06:06<01:17,  1.66s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  81%|████████▏ | 202/248 [06:07<01:16,  1.66s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  82%|████████▏ | 203/248 [06:09<01:15,  1.68s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  82%|████████▏ | 204/248 [06:11<01:14,  1.69s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  83%|████████▎ | 205/248 [06:12<01:12,  1.69s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  83%|████████▎ | 206/248 [06:14<01:10,  1.69s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  83%|████████▎ | 207/248 [06:16<01:09,  1.69s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  84%|████████▍ | 208/248 [06:18<01:09,  1.73s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  84%|████████▍ | 209/248 [06:19<01:07,  1.74s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  85%|████████▍ | 210/248 [06:21<01:06,  1.74s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  85%|████████▌ | 211/248 [06:23<01:04,  1.75s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  85%|████████▌ | 212/248 [06:24<01:01,  1.72s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  86%|████████▌ | 213/248 [06:26<00:59,  1.71s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  86%|████████▋ | 214/248 [06:28<00:58,  1.72s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  87%|████████▋ | 215/248 [06:30<00:58,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  87%|████████▋ | 216/248 [06:32<00:56,  1.78s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  88%|████████▊ | 217/248 [06:33<00:54,  1.76s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  88%|████████▊ | 218/248 [06:35<00:52,  1.76s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  88%|████████▊ | 219/248 [06:37<00:50,  1.75s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  89%|████████▊ | 220/248 [06:39<00:50,  1.79s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  89%|████████▉ | 221/248 [06:40<00:48,  1.79s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  90%|████████▉ | 222/248 [06:42<00:46,  1.79s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  90%|████████▉ | 223/248 [06:44<00:44,  1.78s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  90%|█████████ | 224/248 [06:46<00:42,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  91%|█████████ | 225/248 [06:48<00:41,  1.78s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  91%|█████████ | 226/248 [06:49<00:39,  1.80s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  92%|█████████▏| 227/248 [06:51<00:37,  1.79s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  92%|█████████▏| 228/248 [06:53<00:35,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  92%|█████████▏| 229/248 [06:55<00:33,  1.76s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  93%|█████████▎| 230/248 [06:56<00:31,  1.75s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  93%|█████████▎| 231/248 [06:58<00:30,  1.78s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  94%|█████████▎| 232/248 [07:00<00:28,  1.80s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  94%|█████████▍| 233/248 [07:02<00:26,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  94%|█████████▍| 234/248 [07:04<00:24,  1.76s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  95%|█████████▍| 235/248 [07:05<00:22,  1.76s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  95%|█████████▌| 236/248 [07:07<00:21,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  96%|█████████▌| 237/248 [07:09<00:19,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  96%|█████████▌| 238/248 [07:11<00:17,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  96%|█████████▋| 239/248 [07:12<00:15,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  97%|█████████▋| 240/248 [07:14<00:14,  1.78s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  97%|█████████▋| 241/248 [07:16<00:12,  1.76s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  98%|█████████▊| 242/248 [07:18<00:10,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  98%|█████████▊| 243/248 [07:19<00:08,  1.75s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  98%|█████████▊| 244/248 [07:21<00:06,  1.74s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  99%|█████████▉| 245/248 [07:23<00:05,  1.77s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6:  99%|█████████▉| 246/248 [07:25<00:03,  1.75s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6: 100%|█████████▉| 247/248 [07:26<00:01,  1.74s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 6: 100%|██████████| 248/248 [07:28<00:00,  1.62s/batch, batch_idx=197, gpu=0, loss=0.317, v_num=270075]
Epoch 7:  80%|███████▉  | 198/248 [06:01<01:14,  1.49s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Validating:   0%|          | 0/50 [00:00<?, ?batch/s]
Epoch 7:  80%|████████  | 199/248 [06:02<01:17,  1.58s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  81%|████████  | 200/248 [06:04<01:18,  1.63s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  81%|████████  | 201/248 [06:06<01:18,  1.67s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  81%|████████▏ | 202/248 [06:07<01:16,  1.67s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  82%|████████▏ | 203/248 [06:09<01:16,  1.69s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  82%|████████▏ | 204/248 [06:11<01:14,  1.70s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  83%|████████▎ | 205/248 [06:13<01:13,  1.70s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  83%|████████▎ | 206/248 [06:14<01:11,  1.69s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  83%|████████▎ | 207/248 [06:16<01:09,  1.69s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  84%|████████▍ | 208/248 [06:18<01:08,  1.71s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  84%|████████▍ | 209/248 [06:20<01:07,  1.73s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  85%|████████▍ | 210/248 [06:21<01:05,  1.73s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  85%|████████▌ | 211/248 [06:23<01:04,  1.74s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  85%|████████▌ | 212/248 [06:25<01:01,  1.71s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  86%|████████▌ | 213/248 [06:26<00:59,  1.71s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  86%|████████▋ | 214/248 [06:28<00:58,  1.72s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  87%|████████▋ | 215/248 [06:30<00:58,  1.77s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  87%|████████▋ | 216/248 [06:32<00:56,  1.78s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  88%|████████▊ | 217/248 [06:34<00:54,  1.76s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  88%|████████▊ | 218/248 [06:35<00:52,  1.76s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  88%|████████▊ | 219/248 [06:37<00:50,  1.75s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  89%|████████▊ | 220/248 [06:39<00:50,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  89%|████████▉ | 221/248 [06:41<00:48,  1.81s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  90%|████████▉ | 222/248 [06:43<00:46,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  90%|████████▉ | 223/248 [06:44<00:44,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  90%|█████████ | 224/248 [06:46<00:42,  1.78s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  91%|█████████ | 225/248 [06:48<00:41,  1.78s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  91%|█████████ | 226/248 [06:50<00:39,  1.81s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  92%|█████████▏| 227/248 [06:51<00:37,  1.79s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  92%|█████████▏| 228/248 [06:53<00:36,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  92%|█████████▏| 229/248 [06:55<00:34,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  93%|█████████▎| 230/248 [06:57<00:32,  1.79s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  93%|█████████▎| 231/248 [06:59<00:30,  1.82s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  94%|█████████▎| 232/248 [07:01<00:29,  1.82s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  94%|█████████▍| 233/248 [07:02<00:27,  1.83s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  94%|█████████▍| 234/248 [07:04<00:25,  1.82s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  95%|█████████▍| 235/248 [07:06<00:23,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  95%|█████████▌| 236/248 [07:08<00:21,  1.81s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  96%|█████████▌| 237/248 [07:10<00:19,  1.80s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  96%|█████████▌| 238/248 [07:11<00:17,  1.79s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  96%|█████████▋| 239/248 [07:13<00:16,  1.78s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  97%|█████████▋| 240/248 [07:15<00:14,  1.79s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  97%|█████████▋| 241/248 [07:17<00:12,  1.77s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  98%|█████████▊| 242/248 [07:18<00:10,  1.76s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  98%|█████████▊| 243/248 [07:20<00:08,  1.75s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  98%|█████████▊| 244/248 [07:22<00:06,  1.74s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  99%|█████████▉| 245/248 [07:24<00:05,  1.75s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7:  99%|█████████▉| 246/248 [07:25<00:03,  1.74s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7: 100%|█████████▉| 247/248 [07:27<00:01,  1.73s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
Epoch 7: 100%|██████████| 248/248 [07:29<00:00,  1.62s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
                                                              
INFO:root:Epoch 00007: early stopping
Epoch 7: 100%|██████████| 248/248 [07:29<00:00,  1.81s/batch, batch_idx=197, gpu=0, loss=0.344, v_num=270075]
1

Now, using this trained model, we can just as easily visualize the class-activation maps using the vis module.

#example
from breakhis_gradcam.data import BreaKHisDataset
from breakhis_gradcam.vis import show_heatmap_and_original

resize = transforms.Resize((224, 224))
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
)
inference_transform = transforms.Compose([resize, transforms.ToTensor(), normalize])
inference_ds = BreaKHisDataset.initalize(
    '/share/nikola/export/dt372/BreaKHis_v1/', label='tumor_class',
    criterion=['tumor_type', 'magnification'],
    split={'all': 1.0},
    split_transforms={'all': inference_transform}
)['all'].dataset
#example
show_heatmap_and_original(model.model, inference_ds[1], inference_transform, show_activation_grid=False)
Model would have predicted benign (0.82078 vs. 0.82078)
Showing activation heatmap for the given label: benign