Demo on CIFAR10 dataset

CNN example from official PyTorch¶

dataset: CIFAR10
02-05-2019 (Tue)

Imports and Path setup¶

In [5]:

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision 
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os,sys
import pdb

In [6]:

PROJ_ROOT = Path(os.getcwd()).parent.parent
DATA_DIR = PROJ_ROOT/'data/raw'
SRC_DIR = PROJ_ROOT/'scripts'
paths2add = [PROJ_ROOT, SRC_DIR]

print("Project root: ", PROJ_ROOT)
print("Data dir: ", DATA_DIR)
print("SRC dir: ", SRC_DIR)

Project root:  /home/hayley/Playground/Spacenet_Preprocess
Data dir:  /home/hayley/Playground/Spacenet_Preprocess/data/raw
SRC dir:  /home/hayley/Playground/Spacenet_Preprocess/scripts

Add project root and src directory to the path¶

In [7]:

for p in paths2add:
    p = str(p)
    if p not in sys.path:
        sys.path.insert(0, p)
        print("Prepened to path: ", p)

Prepened to path:  /home/hayley/Playground/Spacenet_Preprocess
Prepened to path:  /home/hayley/Playground/Spacenet_Preprocess/scripts

Datasets and Dataloaders¶

torchvision.transforms take in PIL Image instances and perform specific image processings.

In [8]:

transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5),
                              (0.5,0.5,0.5))
        ])
trainset = torchvision.datasets.CIFAR10(root=str(DATA_DIR), 
                                        train=True, download=True,
                                        transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=str(DATA_DIR),
                                       train=False, download=True,
                                       transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)
CLASSES = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified

Visualization helpers¶

In [9]:

def to_npimg(t):
    """
    Returns a new view of tensor t (nc, h, w) as a numpy array of (h,w,nc)
    Note: this is an operation which assumes the input is image data with (h,w,nc).
        - if t is 3-dim, we assume t's dimensions represent (n_channels, h, w) and 
        *not* (num_grayscale_images, h, w)
        - if t has 4 dims, we assume t's dimensions correspond to (batch_size, n_channels, h, w)
    These assumptions matter because tensor and numpy images use different conventions in 
    interpreting each dimension: 
        - tensor: (nc, h, w) or (n_batchs, nc, h, w)
        - numpy_imgarray : (h,w,nc) or (n_batches, h, w, nc)
    n_channels (`nc`) is equivalent to `num_planes` in PyTorch documentation.
    """
    #todo: handle 1dim and 2dim 
    if t.dim() not in [3, 4]:
        raise TypeError("input must have dim of 3 or 4: {}".
                        format(t.dim()))
                        
    if t.dim() == 3:
        return t.permute(1,2,0).numpy()
    else:
        return np.array(list(map(to_npimg, t)))
def test_to_npimg_3dim():
    print("testing 3dim tensor: ")
    t = torch.ones(3,5,5)
    t[0:2,:,:] = 0
    npimg = to_npimg(t)
    print(npimg.shape)
    for i in range(3):
        print(npimg[:,:,i].sum())
def test_to_npimg_4dim():
    print("Testing 4dim tensor: ")
    t = torch.ones(2,3,5,5)
    t[:,0:2,:,:] = 0
    npimg = to_npimg(t)
    print(npimg.shape)
    for n in range(2):
        print("{}th tensor...".format(n))
        for i in range(3):
            print(npimg[n,:,:,i].sum())

In [ ]:

# test_to_npimg_3dim()
test_to_npimg_4dim()

In [11]:

def batch_y2str(batch_y, delim=", "):
    batch_str = [CLASSES[l.item()] for l in batch_y]
    return delim.join(list(map(str, batch_str)))
        
def show_tensor(img_t, figsize=(10,10), title=""):
    """
    Show an 3dim image tensor (nc, h, w)
    """
    # unnormalize
    img_t = img_t/2 + 0.5
    img_np = to_npimg(img_t)
    f,ax = plt.subplots(figsize=figsize)
    ax.imshow(img_np)
    ax.set_title(title)
    f.show()
    return f, ax
def show_batch(dataloader):
    """
    Show the first mini-batch opf images from the dataloader
    Args:
    - dataloader (iterable)
    
    """
    batch_X, batch_y = iter(dataloader).next()
    grid = torchvision.utils.make_grid(batch_X)
    title = batch_y2str(batch_y)
    show_tensor(grid, figsize=(20,20), title=title)

In [12]:

def test_show_tensor():
    for i in range(3):
        batch_X, batch_y = iter(trainloader).next()
        grid = torchvision.utils.make_grid(batch_X)
        title = batch_y2str(batch_y)
        show_tensor(grid, figsize=(20,20), title=title)
test_show_tensor()

/home/hayley/miniconda3/envs/fastai/lib/python3.6/site-packages/matplotlib/figure.py:459: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "

In [14]:

def test_show_batch():
    show_batch(trainloader)
test_show_batch()

In [223]:

def show_samples(n, dataloader, figsize=(20,20)):
    """
    Show n batch of X data from the dataloader.
    Assumes:
    - batch_x is a mini-batch of 3dim image tensor,
    - batch_y is a mini-batch of class labels
    
    """
    it = iter(dataloader)
    assert( n <= len(dataloader) )
    for i in range(n):
        batch_x, batch_y = it.next()
        labels_str = batch_y2str(batch_y)
        grid = torchvision.utils.make_grid(batch_x)
        show_tensor(grid, 
                    figsize=figsize, 
                    title=labels_str)
        
def show_train_samples(n=10):
    nprint("Train sample batches")
    show_samples(n, trainloader)
def show_test_samples(n=10):
    nprint("Test sample batches")
    show_samples(n, testloader)

In [ ]:

show_train_samples(n=3)

In [ ]:

show_test_samples(n=3)

In [49]:

# Define a CNN layer
def get_convolved_size(x, f, pad, stride):
    """
    Given an input of (x,x) image, return the output spatial dimension
    after convolving with a filter of size f and pad and stride as specified
    Args:
    - x (int): input size
    - f (int): size of the convolution filter
    - pad (int): padsize (which is added at four sides)
    - stride (int): number of steps to slide the convolution filter spatially
    
    Returns:
    - (int): output image's size
    """
    return np.floor((x + 2*pad - f)/stride) + 1

def test_get_convolved_size():
    x = 10
    print(get_convolved_size(x, 3,1,1))

def test_get_convolved_size_2():
    x = 11
    print(get_convolved_size(x, 3,1,1))
test_get_convolved_size()
test_get_convolved_size_2()

10.0
11.0

In [53]:

def get_pooled_size(x, f, stride):
    """
    Given an input of (x,x) image, return the output spatial dimension
    after max-pooling with a filter of size f and stride as specified
    
    Args:
    - x (int): input size
    - f (int): max-pool window size
    - stride (int): number of steps to slide the pooling window spatially
    
    Returns:
    - (int): output image's size after max-pooling
    """
    return np.floor((x-f)/stride) + 1

def test_get_pooled_size_1():
    x = 10
    f = 2
    stride = 2
    print("x: {}, f:{}, s: {}".
          format(x, f, stride))
    print("after: ", get_pooled_size(x,f,stride))

test_get_pooled_size_1()

x: 10, f:2, s: 2
after:  5.0

In [83]:

def Conv3x3(in_channels, out_channels, stride=1):
    """
    Use it like this:
    conv3x3 = Conv3x3(3, 3) # Creates three filters each of which operates on a 3-channel image
    batch_x, batch_y = iter(trainloader).next()
    convolved_x = conv3x3(batch_x)
    """
    return nn.Conv2d(in_channels, out_channels, 
                         kernel_size=3, 
                         stride=stride, 
                         padding=1,
                         bias=False) # don't need bias terms if it's followed by BN layer

In [80]:

class MyCNN(nn.Module):
    def __init__(self, in_shape, out_channels, n_filters, f_sizes, p_sizes, n_hs):
        """
        Assumes an input tensor of 4 dim (batch_size, nc, h, w)
        Architecture:
        - conv2d - maxpool2x2 - relu - fc
        """
        super(MyCNN, self).__init__()
        _, self.in_channels, self.in_h, self.in_w = in_shape
        self.n_filters = n_filters # a list of number of filters for each conv layer
        self.f_sizes = f_sizes # a list of kernel sizes
        self.p_sizes = p_sizes # a list of paddings for convolution layers
        self.n_hs = n_hs# a list of hidden units for fc layers
        self.out_channels = out_channels
        
        # layers self.conv3x3 = nn.Conv2d(in_channels, n_filters, f, )
        self.h, self.w = get_convolved_size(in_h, f_sizes[0], 
        self.pool = nn.MaxPool2d(2,2)
#         self.conv2 = nn.Conv2d(n_filters[0], n_filters[2], f_sizes[1])
        
        # todo: compute spatial size (ie. h,w) after conv1-pool-conv2-pool
        self.last_h, self.last_w = self._compute_spatial_dim(self.n_filters, f_sizes, p_sizes)
        self.fc1 = nn.Linear(n_filters[1]*self.last_h*self.last_w, n_hs[0])
        self.fc2 = nn.Linear(n_hs[0], n_hs[1])
        self.fc3 = nn.Linear(n_hs[1], out_channels) # 10 for number of labels
        
    def _compute_spatial_dim(self):
        """Compute the size of the hidden volumn after the convolution and pooling 
        Assume n_filters,f_sizes, and p_sizes have same length
        """
        assert( len(n_filters) == len(f_sizes) == len(p_sizes))
        
        return 
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, x.numel()) # better way to flattened x?
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
myCNN = MyCNN()

  File "<ipython-input-80-173b28d2fa1d>", line 22
    self.last_h, self.last_w = self._compute_spatial_dim(self.n_filters, f_sizes, p_sizes)
       ^
SyntaxError: invalid syntax

In [145]:

class HayleyNet(nn.Module):
    def __init__(self, in_channels, n_filters, out_channels):
        """
        Assumes input image of size (32, 32) and the MaxPooling of 2x2 window 
        with stride 2.  As a result of max-pool, the input will be halved in (h,w).
        
        Args:
        - in_channels: number of channels in the mini-batch of images
        - n_filters: number of filters in the convolution layer
        - out_channels: number of target classes
        """
        super(HayleyNet, self).__init__() # in Python3: super().__init__()
        self.in_channels = in_channels
        self.n_filters = n_filters
        self.out_channels = out_channels
        
        self.conv = Conv3x3(in_channels,  n_filters)
        self.pool = nn.MaxPool2d(2,2)
        self.relu = nn.ReLU(inplace=True)
        
        # Assume input size is 32x32 and the MaxPool downsamples by half
        # Therefore, 16 is the input width and height now.
        self.fc1 = nn.Linear(n_filters * 16 * 16, out_channels, bias=True)
    
    def forward(self, x):
        x = self.pool(self.conv(x))
        x = self.relu(x)
        
        # flatten x for the following fully-connected layer
        x = x.view(x.size(0), -1)
#         print("x size: ", x.size())
#         pdb.set_trace()
        x = self.fc1(x)
        return x

In [151]:

# Device config
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device: ", device)

device:  cuda:0

In [156]:

# Hyperparams
n_epochs = 10
n_classes = 10
batch_size = 70
lr = 0.001

in_channels = 3 # CIFAR10 images are in RGB domain
n_filters = 10 # hyperparam
out_channels = 10 # CIFAR10 has 10 target labels

# Model
model = HayleyNet(in_channels, n_filters, out_channels)
# device = 'pu' # todo: just for now..
model.to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss() #takes in a batch of score vectors for each class as the first argument,
# and a batch of correct label (int)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [157]:

model

Out[157]:

HayleyNet(
  (conv): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU(inplace)
  (fc1): Linear(in_features=2560, out_features=10, bias=True)
)

In [158]:

def check_device(model):
    from collections import Counter
    counter = Counter()
    for param in model.parameters():
        counter[str(param.device)] += 1
    return counter
def test_check_device():
    nprint(model)
    print(check_device(model))
test_check_device()

================================================================================
HayleyNet(
  (conv): Conv2d(3, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU(inplace)
  (fc1): Linear(in_features=2560, out_features=10, bias=True)
)
Counter({'cuda:0': 3})

In [ ]:

# train
n_iters = len(trainloader)
for epoch in range(n_epochs):
    for i, (batch_x, batch_y) in enumerate(trainloader):
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
       
        # forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)

        # Backprop and update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 1000 ==0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, n_epochs, i, n_iters+1, loss.item()))
        
    # todo: evaluation at the end of each epoch
    # todo: save the model
    # todo: logging to tensorboard
    torch.save(model.state_dict(), './model_{}.pth'.format(epoch))
    
    
# test
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for i, (test_x, test_y) in enumerate(testloader):
        test_x, test_y = test_x.to(device), test_y.to(device)
        outputs = model(test_x)
        _, preds = torch.max(outputs.data, 1 ) #todo: why .data?
        
        correct += (preds == test_y).sum().item()
        total += len(preds) #or, preds.size(0)
    print("Test accuracy on {} images: {}".
          format(total, correct/total))

02-10-2019 (Sun)¶

todo:

[ ] add progress bar
[x] debug input size error
[ ] make the road dataset into a separate script
[ ] define the road train,val,test dataset and create dataloaders
[ ] use hayleynet?

Play with nn.MaxPool2d()¶

In [72]:

def demo_maxPool2x2(dataloader):
    pool = nn.MaxPool2d(2,stride=2)
    X_batch, y_batch =  iter(dataloader).next()
    print(X_batch.size(), y_batch.size())
    X_pooled = pool(X_batch)

    grid = torchvision.utils.make_grid(X_batch)
    grid2 = torchvision.utils.make_grid(X_pooled)

    show_tensor(grid)
    show_tensor(grid2)
demo_maxPool2x2(trainloader)

torch.Size([4, 3, 32, 32]) torch.Size([4])

/home/hayley/miniconda3/envs/fastai/lib/python3.6/site-packages/matplotlib/figure.py:459: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "

In [73]:

demo_maxPool2x2(testloader)

torch.Size([4, 3, 32, 32]) torch.Size([4])

posted at 00:00 · ML

Small Simplicity

Understanding Intelligence from Computational Perspective

Dec 05, 2019

Demo on CIFAR10 dataset

CNN example from official PyTorch¶

Imports and Path setup¶

Add project root and src directory to the path¶

Datasets and Dataloaders¶

Visualization helpers¶

02-10-2019 (Sun)¶

Play with nn.MaxPool2d()¶