Image Classification

In this notebook we explore standard image classification on MNIST and CIFAR10 with convolutional Neural ODE variants. * Depth-invariant neural ODE * Galerkin neural ODE (GalNODE)

In the following notebooks we’ll explore augmentation strategies that can be easily applied to the models below with the flexible torchdyn API. Here, we use simple 0-augmentation (the ANODE model).

[1]:
import sys ; sys.path.append('../')
from torchdyn.models import *; from torchdyn import *
[2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.metrics.functional import accuracy
C:\Users\Stefano\anaconda3\lib\site-packages\pytorch_lightning\utilities\distributed.py:25: UserWarning: Unsupported `ReduceOp` for distributed computing.
  warnings.warn(*args, **kwargs)
[3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
[4]:
batch_size=128
size=28
path_to_data='../data/mnist_data'

all_transforms = transforms.Compose([
    transforms.Resize(size),
    transforms.ToTensor(),
])

train_data = datasets.MNIST(path_to_data, train=True, download=True,
                            transform=all_transforms)
test_data = datasets.MNIST(path_to_data, train=False,
                           transform=all_transforms)

trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
testloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

The Learner is then defined as:

[5]:
class Learner(pl.LightningModule):
    def __init__(self, model:nn.Module):
        super().__init__()
        self.lr = 1e-3
        self.model = model
        self.iters = 0.

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        self.iters += 1.
        x, y = batch
        x, y = x.to(device), y.to(device)
        y_hat = self.model(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        epoch_progress = self.iters / self.loader_len
        acc = accuracy(y_hat, y)
        nfe = model[1].nfe ; model[1].nfe = 0
        tqdm_dict = {'train_loss': loss, 'accuracy': acc, 'NFE': nfe}
        logs = {'train_loss': loss, 'epoch': epoch_progress}
        return {'loss': loss, 'progress_bar': tqdm_dict, 'log': logs}

    def test_step(self, batch, batch_nb):
        x, y = batch
        x, y = x.to(device), y.to(device)
        y_hat = self(x)
        acc = accuracy(y_hat, y)
        return {'test_loss': nn.CrossEntropyLoss()(y_hat, y), 'test_accuracy': acc}

    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        avg_acc = torch.stack([x['test_accuracy'] for x in outputs]).mean()
        logs = {'test_loss': avg_loss}
        return {'avg_test_loss': avg_loss, 'avg_test_accuracy': avg_acc,
                'log': logs, 'progress_bar': logs}

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=5e-5)
        sched = {'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(opt),
                 'monitor': 'loss',
                 'interval': 'step',
                 'frequency': 10  }
        return [opt], [sched]

    def train_dataloader(self):
        self.loader_len = len(trainloader)
        return trainloader

    def test_dataloader(self):
        self.test_loader_len = len(trainloader)
        return testloader

Depth-Invariant Conv Neural ODE

[6]:
func = nn.Sequential(nn.Conv2d(11, 11, 3, padding=1),
                     nn.Tanh(),
                     ).to(device)

neuralDE = NeuralDE(func,
                   solver='rk4',
                   sensitivity='autograd',
                   s_span=torch.linspace(0, 1, 10)).to(device)

model = nn.Sequential(Augmenter(augment_dims=10),
                      neuralDE,
                      nn.Conv2d(11, 1, 3, padding=1),
                      nn.Flatten(),
                      nn.Linear(28*28, 10)).to(device)

[7]:
learn = Learner(model)
trainer = pl.Trainer(max_epochs=3,
                     gpus=1,
                     progress_bar_refresh_rate=1,
                     )

trainer.fit(learn)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 9 K

[7]:
1

3 epochs are not enough. Feel free to keep training and using all kinds of scheduling and optimization tricks :)

Galerkin Data-Controlled Conv Neural ODE (IL-Augmentation)

[12]:
func = nn.Sequential(DataControl(),
                     DepthCat(1),
                     GalConv2d(10+10, 12, 3, padding=1, expfunc=FourierExpansion, n_harmonics=5),
                     nn.Softplus(),
                     DataControl(),
                     DepthCat(1),
                     GalConv2d(22, 10, 3, padding=1, expfunc=FourierExpansion, n_harmonics=5),
                     nn.Tanh()
                     )

neuralDE = NeuralDE(func,
                   solver='dopri5',
                   sensitivity='adjoint',
                   s_span=torch.linspace(0, 1, 2)).to(device)

model = nn.Sequential(Augmenter(augment_idx=1, augment_func=nn.Conv2d(1, 9, 3, padding=1)),
                      neuralDE,
                      nn.Conv2d(10, 1, 3, padding=1),
                      nn.Flatten(),
                      nn.Linear(28*28, 10)).to(device)

[13]:
learn = Learner(model)
trainer = pl.Trainer(max_epochs=3,
                     gpus=1,
                     progress_bar_refresh_rate=1,
                     )

trainer.fit(learn)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 49 K

[13]:
1

3 epochs are not enough. Feel free to keep training and using all kinds of scheduling and optimization tricks :)