PyTorch Lightning 1.1: research : CIFAR10 (DenseNet)
作成 : (株)クラスキャットセールスインフォメーション
作成日時 : 02/24/2021 (1.1.x)

* 本ページは、以下のリソースを参考にして遂行した実験結果のレポートです：

* ご自由にリンクを張って頂いてかまいませんが、sales-info@classcat.com までご一報いただけると嬉しいです。

★ 無料セミナー実施中 ★ クラスキャット主催人工知能 & ビジネス Web セミナー

人工知能とビジネスをテーマにウェビナー (WEB セミナー) を定期的に開催しています。スケジュールは弊社公式 Web サイトでご確認頂けます。

お住まいの地域に関係なく Web ブラウザからご参加頂けます。事前登録 が必要ですのでご注意ください。
Windows PC のブラウザからご参加が可能です。スマートデバイスもご利用可能です。

クラスキャットは人工知能・テレワークに関する各種サービスを提供しております :

人工知能研究開発支援	人工知能研修サービス	テレワーク & オンライン授業を支援
PoC(概念実証)を失敗させないための支援 (本支援はセミナーに参加しアンケートに回答した方を対象としています。)

◆ お問合せ : 本件に関するお問い合わせ先は下記までお願いいたします。

株式会社クラスキャット セールス・マーケティング本部セールス・インフォメーション

E-Mail：sales-info@classcat.com ; WebSite: https://www.classcat.com/

Facebook: https://www.facebook.com/ClassCatJP/

research: CIFAR10 (DenseNet)

結果

100 エポック: ReduceLROnPlateau

DenseNet – {‘test_acc’: 0.8942999839782715, ‘test_loss’: 0.39830952882766724} – Wall time: 2h 19min 57s (‘Tesla M60’ x 2)

コード

import math

import torch
import torch.nn as nn
import torch.nn.functional as F


class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4*growth_rate)
        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([out,x], 1)
        return out


class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

    def forward(self, x):
        out = self.conv(F.relu(self.bn(x)))
        out = F.avg_pool2d(out, 2)
        return out


class DenseNet(nn.Module):
    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        num_planes = 2*growth_rate
        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)

        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
        num_planes += nblocks[0]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3]*growth_rate

        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, num_classes)

    def _make_dense_layers(self, block, in_planes, nblock):
        layers = []
        for i in range(nblock):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)
        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def DenseNet121():
    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)

def DenseNet169():
    return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)

def DenseNet201():
    return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)

def DenseNet161():
    return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)

def densenet_cifar():
    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)

net = densenet_cifar()
print(net)
x = torch.randn(1,3,32,32)
y = net(x)
print(y)

DenseNet(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (dense1): Sequential(
    (0): Bottleneck(
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): Bottleneck(
      (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): Bottleneck(
      (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (3): Bottleneck(
      (bn1): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(60, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (4): Bottleneck(
      (bn1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(72, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (5): Bottleneck(
      (bn1): BatchNorm2d(84, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(84, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (trans1): Transition(
    (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (dense2): Sequential(
    (0): Bottleneck(
      (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): Bottleneck(
      (bn1): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(60, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): Bottleneck(
      (bn1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(72, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (3): Bottleneck(
      (bn1): BatchNorm2d(84, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(84, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (4): Bottleneck(
      (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (5): Bottleneck(
      (bn1): BatchNorm2d(108, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(108, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (6): Bottleneck(
      (bn1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(120, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (7): Bottleneck(
      (bn1): BatchNorm2d(132, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(132, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (8): Bottleneck(
      (bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(144, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (9): Bottleneck(
      (bn1): BatchNorm2d(156, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(156, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (10): Bottleneck(
      (bn1): BatchNorm2d(168, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(168, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (11): Bottleneck(
      (bn1): BatchNorm2d(180, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(180, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (trans2): Transition(
    (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (dense3): Sequential(
    (0): Bottleneck(
      (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): Bottleneck(
      (bn1): BatchNorm2d(108, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(108, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): Bottleneck(
      (bn1): BatchNorm2d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(120, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (3): Bottleneck(
      (bn1): BatchNorm2d(132, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(132, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (4): Bottleneck(
      (bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(144, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (5): Bottleneck(
      (bn1): BatchNorm2d(156, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(156, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (6): Bottleneck(
      (bn1): BatchNorm2d(168, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(168, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (7): Bottleneck(
      (bn1): BatchNorm2d(180, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(180, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (8): Bottleneck(
      (bn1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (9): Bottleneck(
      (bn1): BatchNorm2d(204, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(204, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (10): Bottleneck(
      (bn1): BatchNorm2d(216, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(216, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (11): Bottleneck(
      (bn1): BatchNorm2d(228, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(228, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (12): Bottleneck(
      (bn1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(240, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (13): Bottleneck(
      (bn1): BatchNorm2d(252, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(252, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (14): Bottleneck(
      (bn1): BatchNorm2d(264, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(264, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (15): Bottleneck(
      (bn1): BatchNorm2d(276, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(276, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (16): Bottleneck(
      (bn1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (17): Bottleneck(
      (bn1): BatchNorm2d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(300, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (18): Bottleneck(
      (bn1): BatchNorm2d(312, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(312, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (19): Bottleneck(
      (bn1): BatchNorm2d(324, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(324, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (20): Bottleneck(
      (bn1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(336, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (21): Bottleneck(
      (bn1): BatchNorm2d(348, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(348, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (22): Bottleneck(
      (bn1): BatchNorm2d(360, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(360, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (23): Bottleneck(
      (bn1): BatchNorm2d(372, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(372, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (trans3): Transition(
    (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv): Conv2d(384, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (dense4): Sequential(
    (0): Bottleneck(
      (bn1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): Bottleneck(
      (bn1): BatchNorm2d(204, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(204, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (2): Bottleneck(
      (bn1): BatchNorm2d(216, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(216, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (3): Bottleneck(
      (bn1): BatchNorm2d(228, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(228, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (4): Bottleneck(
      (bn1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(240, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (5): Bottleneck(
      (bn1): BatchNorm2d(252, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(252, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (6): Bottleneck(
      (bn1): BatchNorm2d(264, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(264, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (7): Bottleneck(
      (bn1): BatchNorm2d(276, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(276, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (8): Bottleneck(
      (bn1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(288, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (9): Bottleneck(
      (bn1): BatchNorm2d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(300, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (10): Bottleneck(
      (bn1): BatchNorm2d(312, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(312, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (11): Bottleneck(
      (bn1): BatchNorm2d(324, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(324, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (12): Bottleneck(
      (bn1): BatchNorm2d(336, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(336, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (13): Bottleneck(
      (bn1): BatchNorm2d(348, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(348, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (14): Bottleneck(
      (bn1): BatchNorm2d(360, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(360, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (15): Bottleneck(
      (bn1): BatchNorm2d(372, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(372, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
  )
  (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear): Linear(in_features=384, out_features=10, bias=True)
)
tensor([[-0.0524, -0.1796, -0.1964, -0.1849,  0.0016, -0.1339,  0.1824,  0.1621,
          0.1819, -0.4861]], grad_fn=)

from torchsummary import summary

summary(densenet_cifar().to('cuda'), (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 24, 32, 32]             648
       BatchNorm2d-2           [-1, 24, 32, 32]              48
            Conv2d-3           [-1, 48, 32, 32]           1,152
       BatchNorm2d-4           [-1, 48, 32, 32]              96
            Conv2d-5           [-1, 12, 32, 32]           5,184
        Bottleneck-6           [-1, 36, 32, 32]               0
       BatchNorm2d-7           [-1, 36, 32, 32]              72
            Conv2d-8           [-1, 48, 32, 32]           1,728
       BatchNorm2d-9           [-1, 48, 32, 32]              96
           Conv2d-10           [-1, 12, 32, 32]           5,184
       Bottleneck-11           [-1, 48, 32, 32]               0
      BatchNorm2d-12           [-1, 48, 32, 32]              96
           Conv2d-13           [-1, 48, 32, 32]           2,304
      BatchNorm2d-14           [-1, 48, 32, 32]              96
           Conv2d-15           [-1, 12, 32, 32]           5,184
       Bottleneck-16           [-1, 60, 32, 32]               0
      BatchNorm2d-17           [-1, 60, 32, 32]             120
           Conv2d-18           [-1, 48, 32, 32]           2,880
      BatchNorm2d-19           [-1, 48, 32, 32]              96
           Conv2d-20           [-1, 12, 32, 32]           5,184
       Bottleneck-21           [-1, 72, 32, 32]               0
      BatchNorm2d-22           [-1, 72, 32, 32]             144
           Conv2d-23           [-1, 48, 32, 32]           3,456
      BatchNorm2d-24           [-1, 48, 32, 32]              96
           Conv2d-25           [-1, 12, 32, 32]           5,184
       Bottleneck-26           [-1, 84, 32, 32]               0
      BatchNorm2d-27           [-1, 84, 32, 32]             168
           Conv2d-28           [-1, 48, 32, 32]           4,032
      BatchNorm2d-29           [-1, 48, 32, 32]              96
           Conv2d-30           [-1, 12, 32, 32]           5,184
       Bottleneck-31           [-1, 96, 32, 32]               0
      BatchNorm2d-32           [-1, 96, 32, 32]             192
           Conv2d-33           [-1, 48, 32, 32]           4,608
       Transition-34           [-1, 48, 16, 16]               0
      BatchNorm2d-35           [-1, 48, 16, 16]              96
           Conv2d-36           [-1, 48, 16, 16]           2,304
      BatchNorm2d-37           [-1, 48, 16, 16]              96
           Conv2d-38           [-1, 12, 16, 16]           5,184
       Bottleneck-39           [-1, 60, 16, 16]               0
      BatchNorm2d-40           [-1, 60, 16, 16]             120
           Conv2d-41           [-1, 48, 16, 16]           2,880
      BatchNorm2d-42           [-1, 48, 16, 16]              96
           Conv2d-43           [-1, 12, 16, 16]           5,184
       Bottleneck-44           [-1, 72, 16, 16]               0
      BatchNorm2d-45           [-1, 72, 16, 16]             144
           Conv2d-46           [-1, 48, 16, 16]           3,456
      BatchNorm2d-47           [-1, 48, 16, 16]              96
           Conv2d-48           [-1, 12, 16, 16]           5,184
       Bottleneck-49           [-1, 84, 16, 16]               0
      BatchNorm2d-50           [-1, 84, 16, 16]             168
           Conv2d-51           [-1, 48, 16, 16]           4,032
      BatchNorm2d-52           [-1, 48, 16, 16]              96
           Conv2d-53           [-1, 12, 16, 16]           5,184
       Bottleneck-54           [-1, 96, 16, 16]               0
      BatchNorm2d-55           [-1, 96, 16, 16]             192
           Conv2d-56           [-1, 48, 16, 16]           4,608
      BatchNorm2d-57           [-1, 48, 16, 16]              96
           Conv2d-58           [-1, 12, 16, 16]           5,184
       Bottleneck-59          [-1, 108, 16, 16]               0
      BatchNorm2d-60          [-1, 108, 16, 16]             216
           Conv2d-61           [-1, 48, 16, 16]           5,184
      BatchNorm2d-62           [-1, 48, 16, 16]              96
           Conv2d-63           [-1, 12, 16, 16]           5,184
       Bottleneck-64          [-1, 120, 16, 16]               0
      BatchNorm2d-65          [-1, 120, 16, 16]             240
           Conv2d-66           [-1, 48, 16, 16]           5,760
      BatchNorm2d-67           [-1, 48, 16, 16]              96
           Conv2d-68           [-1, 12, 16, 16]           5,184
       Bottleneck-69          [-1, 132, 16, 16]               0
      BatchNorm2d-70          [-1, 132, 16, 16]             264
           Conv2d-71           [-1, 48, 16, 16]           6,336
      BatchNorm2d-72           [-1, 48, 16, 16]              96
           Conv2d-73           [-1, 12, 16, 16]           5,184
       Bottleneck-74          [-1, 144, 16, 16]               0
      BatchNorm2d-75          [-1, 144, 16, 16]             288
           Conv2d-76           [-1, 48, 16, 16]           6,912
      BatchNorm2d-77           [-1, 48, 16, 16]              96
           Conv2d-78           [-1, 12, 16, 16]           5,184
       Bottleneck-79          [-1, 156, 16, 16]               0
      BatchNorm2d-80          [-1, 156, 16, 16]             312
           Conv2d-81           [-1, 48, 16, 16]           7,488
      BatchNorm2d-82           [-1, 48, 16, 16]              96
           Conv2d-83           [-1, 12, 16, 16]           5,184
       Bottleneck-84          [-1, 168, 16, 16]               0
      BatchNorm2d-85          [-1, 168, 16, 16]             336
           Conv2d-86           [-1, 48, 16, 16]           8,064
      BatchNorm2d-87           [-1, 48, 16, 16]              96
           Conv2d-88           [-1, 12, 16, 16]           5,184
       Bottleneck-89          [-1, 180, 16, 16]               0
      BatchNorm2d-90          [-1, 180, 16, 16]             360
           Conv2d-91           [-1, 48, 16, 16]           8,640
      BatchNorm2d-92           [-1, 48, 16, 16]              96
           Conv2d-93           [-1, 12, 16, 16]           5,184
       Bottleneck-94          [-1, 192, 16, 16]               0
      BatchNorm2d-95          [-1, 192, 16, 16]             384
           Conv2d-96           [-1, 96, 16, 16]          18,432
       Transition-97             [-1, 96, 8, 8]               0
      BatchNorm2d-98             [-1, 96, 8, 8]             192
           Conv2d-99             [-1, 48, 8, 8]           4,608
     BatchNorm2d-100             [-1, 48, 8, 8]              96
          Conv2d-101             [-1, 12, 8, 8]           5,184
      Bottleneck-102            [-1, 108, 8, 8]               0
     BatchNorm2d-103            [-1, 108, 8, 8]             216
          Conv2d-104             [-1, 48, 8, 8]           5,184
     BatchNorm2d-105             [-1, 48, 8, 8]              96
          Conv2d-106             [-1, 12, 8, 8]           5,184
      Bottleneck-107            [-1, 120, 8, 8]               0
     BatchNorm2d-108            [-1, 120, 8, 8]             240
          Conv2d-109             [-1, 48, 8, 8]           5,760
     BatchNorm2d-110             [-1, 48, 8, 8]              96
          Conv2d-111             [-1, 12, 8, 8]           5,184
      Bottleneck-112            [-1, 132, 8, 8]               0
     BatchNorm2d-113            [-1, 132, 8, 8]             264
          Conv2d-114             [-1, 48, 8, 8]           6,336
     BatchNorm2d-115             [-1, 48, 8, 8]              96
          Conv2d-116             [-1, 12, 8, 8]           5,184
      Bottleneck-117            [-1, 144, 8, 8]               0
     BatchNorm2d-118            [-1, 144, 8, 8]             288
          Conv2d-119             [-1, 48, 8, 8]           6,912
     BatchNorm2d-120             [-1, 48, 8, 8]              96
          Conv2d-121             [-1, 12, 8, 8]           5,184
      Bottleneck-122            [-1, 156, 8, 8]               0
     BatchNorm2d-123            [-1, 156, 8, 8]             312
          Conv2d-124             [-1, 48, 8, 8]           7,488
     BatchNorm2d-125             [-1, 48, 8, 8]              96
          Conv2d-126             [-1, 12, 8, 8]           5,184
      Bottleneck-127            [-1, 168, 8, 8]               0
     BatchNorm2d-128            [-1, 168, 8, 8]             336
          Conv2d-129             [-1, 48, 8, 8]           8,064
     BatchNorm2d-130             [-1, 48, 8, 8]              96
          Conv2d-131             [-1, 12, 8, 8]           5,184
      Bottleneck-132            [-1, 180, 8, 8]               0
     BatchNorm2d-133            [-1, 180, 8, 8]             360
          Conv2d-134             [-1, 48, 8, 8]           8,640
     BatchNorm2d-135             [-1, 48, 8, 8]              96
          Conv2d-136             [-1, 12, 8, 8]           5,184
      Bottleneck-137            [-1, 192, 8, 8]               0
     BatchNorm2d-138            [-1, 192, 8, 8]             384
          Conv2d-139             [-1, 48, 8, 8]           9,216
     BatchNorm2d-140             [-1, 48, 8, 8]              96
          Conv2d-141             [-1, 12, 8, 8]           5,184
      Bottleneck-142            [-1, 204, 8, 8]               0
     BatchNorm2d-143            [-1, 204, 8, 8]             408
          Conv2d-144             [-1, 48, 8, 8]           9,792
     BatchNorm2d-145             [-1, 48, 8, 8]              96
          Conv2d-146             [-1, 12, 8, 8]           5,184
      Bottleneck-147            [-1, 216, 8, 8]               0
     BatchNorm2d-148            [-1, 216, 8, 8]             432
          Conv2d-149             [-1, 48, 8, 8]          10,368
     BatchNorm2d-150             [-1, 48, 8, 8]              96
          Conv2d-151             [-1, 12, 8, 8]           5,184
      Bottleneck-152            [-1, 228, 8, 8]               0
     BatchNorm2d-153            [-1, 228, 8, 8]             456
          Conv2d-154             [-1, 48, 8, 8]          10,944
     BatchNorm2d-155             [-1, 48, 8, 8]              96
          Conv2d-156             [-1, 12, 8, 8]           5,184
      Bottleneck-157            [-1, 240, 8, 8]               0
     BatchNorm2d-158            [-1, 240, 8, 8]             480
          Conv2d-159             [-1, 48, 8, 8]          11,520
     BatchNorm2d-160             [-1, 48, 8, 8]              96
          Conv2d-161             [-1, 12, 8, 8]           5,184
      Bottleneck-162            [-1, 252, 8, 8]               0
     BatchNorm2d-163            [-1, 252, 8, 8]             504
          Conv2d-164             [-1, 48, 8, 8]          12,096
     BatchNorm2d-165             [-1, 48, 8, 8]              96
          Conv2d-166             [-1, 12, 8, 8]           5,184
      Bottleneck-167            [-1, 264, 8, 8]               0
     BatchNorm2d-168            [-1, 264, 8, 8]             528
          Conv2d-169             [-1, 48, 8, 8]          12,672
     BatchNorm2d-170             [-1, 48, 8, 8]              96
          Conv2d-171             [-1, 12, 8, 8]           5,184
      Bottleneck-172            [-1, 276, 8, 8]               0
     BatchNorm2d-173            [-1, 276, 8, 8]             552
          Conv2d-174             [-1, 48, 8, 8]          13,248
     BatchNorm2d-175             [-1, 48, 8, 8]              96
          Conv2d-176             [-1, 12, 8, 8]           5,184
      Bottleneck-177            [-1, 288, 8, 8]               0
     BatchNorm2d-178            [-1, 288, 8, 8]             576
          Conv2d-179             [-1, 48, 8, 8]          13,824
     BatchNorm2d-180             [-1, 48, 8, 8]              96
          Conv2d-181             [-1, 12, 8, 8]           5,184
      Bottleneck-182            [-1, 300, 8, 8]               0
     BatchNorm2d-183            [-1, 300, 8, 8]             600
          Conv2d-184             [-1, 48, 8, 8]          14,400
     BatchNorm2d-185             [-1, 48, 8, 8]              96
          Conv2d-186             [-1, 12, 8, 8]           5,184
      Bottleneck-187            [-1, 312, 8, 8]               0
     BatchNorm2d-188            [-1, 312, 8, 8]             624
          Conv2d-189             [-1, 48, 8, 8]          14,976
     BatchNorm2d-190             [-1, 48, 8, 8]              96
          Conv2d-191             [-1, 12, 8, 8]           5,184
      Bottleneck-192            [-1, 324, 8, 8]               0
     BatchNorm2d-193            [-1, 324, 8, 8]             648
          Conv2d-194             [-1, 48, 8, 8]          15,552
     BatchNorm2d-195             [-1, 48, 8, 8]              96
          Conv2d-196             [-1, 12, 8, 8]           5,184
      Bottleneck-197            [-1, 336, 8, 8]               0
     BatchNorm2d-198            [-1, 336, 8, 8]             672
          Conv2d-199             [-1, 48, 8, 8]          16,128
     BatchNorm2d-200             [-1, 48, 8, 8]              96
          Conv2d-201             [-1, 12, 8, 8]           5,184
      Bottleneck-202            [-1, 348, 8, 8]               0
     BatchNorm2d-203            [-1, 348, 8, 8]             696
          Conv2d-204             [-1, 48, 8, 8]          16,704
     BatchNorm2d-205             [-1, 48, 8, 8]              96
          Conv2d-206             [-1, 12, 8, 8]           5,184
      Bottleneck-207            [-1, 360, 8, 8]               0
     BatchNorm2d-208            [-1, 360, 8, 8]             720
          Conv2d-209             [-1, 48, 8, 8]          17,280
     BatchNorm2d-210             [-1, 48, 8, 8]              96
          Conv2d-211             [-1, 12, 8, 8]           5,184
      Bottleneck-212            [-1, 372, 8, 8]               0
     BatchNorm2d-213            [-1, 372, 8, 8]             744
          Conv2d-214             [-1, 48, 8, 8]          17,856
     BatchNorm2d-215             [-1, 48, 8, 8]              96
          Conv2d-216             [-1, 12, 8, 8]           5,184
      Bottleneck-217            [-1, 384, 8, 8]               0
     BatchNorm2d-218            [-1, 384, 8, 8]             768
          Conv2d-219            [-1, 192, 8, 8]          73,728
      Transition-220            [-1, 192, 4, 4]               0
     BatchNorm2d-221            [-1, 192, 4, 4]             384
          Conv2d-222             [-1, 48, 4, 4]           9,216
     BatchNorm2d-223             [-1, 48, 4, 4]              96
          Conv2d-224             [-1, 12, 4, 4]           5,184
      Bottleneck-225            [-1, 204, 4, 4]               0
     BatchNorm2d-226            [-1, 204, 4, 4]             408
          Conv2d-227             [-1, 48, 4, 4]           9,792
     BatchNorm2d-228             [-1, 48, 4, 4]              96
          Conv2d-229             [-1, 12, 4, 4]           5,184
      Bottleneck-230            [-1, 216, 4, 4]               0
     BatchNorm2d-231            [-1, 216, 4, 4]             432
          Conv2d-232             [-1, 48, 4, 4]          10,368
     BatchNorm2d-233             [-1, 48, 4, 4]              96
          Conv2d-234             [-1, 12, 4, 4]           5,184
      Bottleneck-235            [-1, 228, 4, 4]               0
     BatchNorm2d-236            [-1, 228, 4, 4]             456
          Conv2d-237             [-1, 48, 4, 4]          10,944
     BatchNorm2d-238             [-1, 48, 4, 4]              96
          Conv2d-239             [-1, 12, 4, 4]           5,184
      Bottleneck-240            [-1, 240, 4, 4]               0
     BatchNorm2d-241            [-1, 240, 4, 4]             480
          Conv2d-242             [-1, 48, 4, 4]          11,520
     BatchNorm2d-243             [-1, 48, 4, 4]              96
          Conv2d-244             [-1, 12, 4, 4]           5,184
      Bottleneck-245            [-1, 252, 4, 4]               0
     BatchNorm2d-246            [-1, 252, 4, 4]             504
          Conv2d-247             [-1, 48, 4, 4]          12,096
     BatchNorm2d-248             [-1, 48, 4, 4]              96
          Conv2d-249             [-1, 12, 4, 4]           5,184
      Bottleneck-250            [-1, 264, 4, 4]               0
     BatchNorm2d-251            [-1, 264, 4, 4]             528
          Conv2d-252             [-1, 48, 4, 4]          12,672
     BatchNorm2d-253             [-1, 48, 4, 4]              96
          Conv2d-254             [-1, 12, 4, 4]           5,184
      Bottleneck-255            [-1, 276, 4, 4]               0
     BatchNorm2d-256            [-1, 276, 4, 4]             552
          Conv2d-257             [-1, 48, 4, 4]          13,248
     BatchNorm2d-258             [-1, 48, 4, 4]              96
          Conv2d-259             [-1, 12, 4, 4]           5,184
      Bottleneck-260            [-1, 288, 4, 4]               0
     BatchNorm2d-261            [-1, 288, 4, 4]             576
          Conv2d-262             [-1, 48, 4, 4]          13,824
     BatchNorm2d-263             [-1, 48, 4, 4]              96
          Conv2d-264             [-1, 12, 4, 4]           5,184
      Bottleneck-265            [-1, 300, 4, 4]               0
     BatchNorm2d-266            [-1, 300, 4, 4]             600
          Conv2d-267             [-1, 48, 4, 4]          14,400
     BatchNorm2d-268             [-1, 48, 4, 4]              96
          Conv2d-269             [-1, 12, 4, 4]           5,184
      Bottleneck-270            [-1, 312, 4, 4]               0
     BatchNorm2d-271            [-1, 312, 4, 4]             624
          Conv2d-272             [-1, 48, 4, 4]          14,976
     BatchNorm2d-273             [-1, 48, 4, 4]              96
          Conv2d-274             [-1, 12, 4, 4]           5,184
      Bottleneck-275            [-1, 324, 4, 4]               0
     BatchNorm2d-276            [-1, 324, 4, 4]             648
          Conv2d-277             [-1, 48, 4, 4]          15,552
     BatchNorm2d-278             [-1, 48, 4, 4]              96
          Conv2d-279             [-1, 12, 4, 4]           5,184
      Bottleneck-280            [-1, 336, 4, 4]               0
     BatchNorm2d-281            [-1, 336, 4, 4]             672
          Conv2d-282             [-1, 48, 4, 4]          16,128
     BatchNorm2d-283             [-1, 48, 4, 4]              96
          Conv2d-284             [-1, 12, 4, 4]           5,184
      Bottleneck-285            [-1, 348, 4, 4]               0
     BatchNorm2d-286            [-1, 348, 4, 4]             696
          Conv2d-287             [-1, 48, 4, 4]          16,704
     BatchNorm2d-288             [-1, 48, 4, 4]              96
          Conv2d-289             [-1, 12, 4, 4]           5,184
      Bottleneck-290            [-1, 360, 4, 4]               0
     BatchNorm2d-291            [-1, 360, 4, 4]             720
          Conv2d-292             [-1, 48, 4, 4]          17,280
     BatchNorm2d-293             [-1, 48, 4, 4]              96
          Conv2d-294             [-1, 12, 4, 4]           5,184
      Bottleneck-295            [-1, 372, 4, 4]               0
     BatchNorm2d-296            [-1, 372, 4, 4]             744
          Conv2d-297             [-1, 48, 4, 4]          17,856
     BatchNorm2d-298             [-1, 48, 4, 4]              96
          Conv2d-299             [-1, 12, 4, 4]           5,184
      Bottleneck-300            [-1, 384, 4, 4]               0
     BatchNorm2d-301            [-1, 384, 4, 4]             768
          Linear-302                   [-1, 10]           3,850
================================================================
Total params: 1,000,618
Trainable params: 1,000,618
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 29.44
Params size (MB): 3.82
Estimated Total Size (MB): 33.27
----------------------------------------------------------------

ReduceLROnPlateau スケジューラ

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import OneCycleLR, CyclicLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
from torch.optim.swa_utils import AveragedModel, update_bn
import torchvision
 
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, GPUStatsMonitor, EarlyStopping
from pytorch_lightning.metrics.functional import accuracy
from pl_bolts.datamodules import CIFAR10DataModule
from pl_bolts.transforms.dataset_normalizations import cifar10_normalization

pl.seed_everything(7);

batch_size = 50
 
train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.RandomCrop(32, padding=4),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    cifar10_normalization(),
])
 
test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    cifar10_normalization(),
])
 
cifar10_dm = CIFAR10DataModule(
    batch_size=batch_size,
    train_transforms=train_transforms,
    test_transforms=test_transforms,
    val_transforms=test_transforms,
)

class LitCifar10(pl.LightningModule):
    def __init__(self, lr=0.05, factor=0.8):
        super().__init__()
 
        self.save_hyperparameters()
        self.model = densenet_cifar()

    def forward(self, x):
        out = self.model(x)
        return F.log_softmax(out, dim=1)
 
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = F.log_softmax(self.model(x), dim=1)
        loss = F.nll_loss(logits, y)
        self.log('train_loss', loss)
        return loss
 
    def evaluate(self, batch, stage=None):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)
 
        if stage:
            self.log(f'{stage}_loss', loss, prog_bar=True)
            self.log(f'{stage}_acc', acc, prog_bar=True)
 
    def validation_step(self, batch, batch_idx):
        self.evaluate(batch, 'val')
 
    def test_step(self, batch, batch_idx):
        self.evaluate(batch, 'test')
 
    def configure_optimizers(self):
        if False:
            optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=0, eps=1e-3)
        else:
            optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4)

        return {
          'optimizer': optimizer,
          'lr_scheduler': ReduceLROnPlateau(optimizer, 'max', patience=5, factor=self.hparams.factor, verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1, min_lr=1e-5),
          'monitor': 'val_acc'
        }

%%time

model = LitCifar10(lr=0.05, factor=0.75)
model.datamodule = cifar10_dm
 
trainer = pl.Trainer(
    gpus=1,
    max_epochs=100,
    #auto_scale_batch_size=True,
    #auto_lr_find = True,
    progress_bar_refresh_rate=100,
    logger=pl.loggers.TensorBoardLogger('tblogs/', name='densenet'),
    callbacks=[LearningRateMonitor(logging_interval='step')],
)
 
trainer.fit(model, cifar10_dm)
trainer.test(model, datamodule=cifar10_dm);

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
Files already downloaded and verified

  | Name  | Type     | Params
-----------------------------------
0 | model | DenseNet | 1.0 M 
-----------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.002     Total estimated model params size (MB)
(...)
Epoch    30: reducing learning rate of group 0 to 3.7500e-02.
Epoch    48: reducing learning rate of group 0 to 2.8125e-02.
Epoch    58: reducing learning rate of group 0 to 2.1094e-02.
Epoch    65: reducing learning rate of group 0 to 1.5820e-02.
Epoch    72: reducing learning rate of group 0 to 1.1865e-02.
Epoch    79: reducing learning rate of group 0 to 8.8989e-03.
Epoch    86: reducing learning rate of group 0 to 6.6742e-03.
(...)
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.8942999839782715, 'test_loss': 0.39830952882766724}
--------------------------------------------------------------------------------
CPU times: user 2h 5min 49s, sys: 9min 55s, total: 2h 15min 45s
Wall time: 2h 19min 57s

!nvidia-smi

Tue Feb 23 14:45:35 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   70C    P0    31W /  70W |   1206MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
+-----------------------------------------------------------------------------+

以上

2021年2月
月	火	水	木	金	土	日
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28