PyTorch Lightning 1.1: research : CIFAR10 (ShuffleNet)
作成 : (株)クラスキャット セールスインフォメーション
作成日時 : 02/23/2021 (1.1.x)
* 本ページは、以下のリソースを参考にして遂行した実験結果のレポートです:
* ご自由にリンクを張って頂いてかまいませんが、sales-info@classcat.com までご一報いただけると嬉しいです。
★ 無料セミナー実施中 ★ クラスキャット主催 人工知能 & ビジネス Web セミナー
人工知能とビジネスをテーマにウェビナー (WEB セミナー) を定期的に開催しています。スケジュールは弊社 公式 Web サイト でご確認頂けます。
- お住まいの地域に関係なく Web ブラウザからご参加頂けます。事前登録 が必要ですのでご注意ください。
- Windows PC のブラウザからご参加が可能です。スマートデバイスもご利用可能です。
クラスキャットは人工知能・テレワークに関する各種サービスを提供しております :
人工知能研究開発支援 | 人工知能研修サービス | テレワーク & オンライン授業を支援 |
PoC(概念実証)を失敗させないための支援 (本支援はセミナーに参加しアンケートに回答した方を対象としています。) |
◆ お問合せ : 本件に関するお問い合わせ先は下記までお願いいたします。
株式会社クラスキャット セールス・マーケティング本部 セールス・インフォメーション |
E-Mail:sales-info@classcat.com ; WebSite: https://www.classcat.com/ |
Facebook: https://www.facebook.com/ClassCatJP/ |
research: CIFAR10 (ShuffleNet)
仕様
- Total params: 352,042 (352K)
- Trainable params: 352,042
- Non-trainable params: 0
結果
- ShuffleNetV2
- {‘test_acc’: 0.8831999897956848, ‘test_loss’: 0.3897647559642792}
- 100 エポック ; Wall time: 1h 6min 42s
- ‘Tesla M60’ x 2
- ReduceLROnPlateau
コード
import torch import torch.nn as nn import torch.nn.functional as F class ShuffleBlock(nn.Module): def __init__(self, groups=2): super(ShuffleBlock, self).__init__() self.groups = groups def forward(self, x): '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' N, C, H, W = x.size() g = self.groups return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W) class SplitBlock(nn.Module): def __init__(self, ratio): super(SplitBlock, self).__init__() self.ratio = ratio def forward(self, x): c = int(x.size(1) * self.ratio) return x[:, :c, :, :], x[:, c:, :, :] class BasicBlock(nn.Module): def __init__(self, in_channels, split_ratio=0.5): super(BasicBlock, self).__init__() self.split = SplitBlock(split_ratio) in_channels = int(in_channels * split_ratio) self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(in_channels) self.conv2 = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False) self.bn2 = nn.BatchNorm2d(in_channels) self.conv3 = nn.Conv2d(in_channels, in_channels, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(in_channels) self.shuffle = ShuffleBlock() def forward(self, x): x1, x2 = self.split(x) out = F.relu(self.bn1(self.conv1(x2))) out = self.bn2(self.conv2(out)) out = F.relu(self.bn3(self.conv3(out))) out = torch.cat([x1, out], 1) out = self.shuffle(out) return out class DownBlock(nn.Module): def __init__(self, in_channels, out_channels): super(DownBlock, self).__init__() mid_channels = out_channels // 2 # left self.conv1 = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False) self.bn1 = nn.BatchNorm2d(in_channels) self.conv2 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False) self.bn2 = nn.BatchNorm2d(mid_channels) # right self.conv3 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(mid_channels) self.conv4 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False) self.bn4 = nn.BatchNorm2d(mid_channels) self.conv5 = nn.Conv2d(mid_channels, mid_channels, kernel_size=1, bias=False) self.bn5 = nn.BatchNorm2d(mid_channels) self.shuffle = ShuffleBlock() def forward(self, x): # left out1 = self.bn1(self.conv1(x)) out1 = F.relu(self.bn2(self.conv2(out1))) # right out2 = F.relu(self.bn3(self.conv3(x))) out2 = self.bn4(self.conv4(out2)) out2 = F.relu(self.bn5(self.conv5(out2))) # concat out = torch.cat([out1, out2], 1) out = self.shuffle(out) return out class ShuffleNetV2(nn.Module): def __init__(self, net_size): super(ShuffleNetV2, self).__init__() out_channels = configs[net_size]['out_channels'] num_blocks = configs[net_size]['num_blocks'] self.conv1 = nn.Conv2d(3, 24, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(24) self.in_channels = 24 self.layer1 = self._make_layer(out_channels[0], num_blocks[0]) self.layer2 = self._make_layer(out_channels[1], num_blocks[1]) self.layer3 = self._make_layer(out_channels[2], num_blocks[2]) self.conv2 = nn.Conv2d(out_channels[2], out_channels[3], kernel_size=1, stride=1, padding=0, bias=False) self.bn2 = nn.BatchNorm2d(out_channels[3]) self.linear = nn.Linear(out_channels[3], 10) def _make_layer(self, out_channels, num_blocks): layers = [DownBlock(self.in_channels, out_channels)] for i in range(num_blocks): layers.append(BasicBlock(out_channels)) self.in_channels = out_channels return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) # out = F.max_pool2d(out, 3, stride=2, padding=1) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = F.relu(self.bn2(self.conv2(out))) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out configs = { 0.5: { 'out_channels': (48, 96, 192, 1024), 'num_blocks': (3, 7, 3) }, 1: { 'out_channels': (116, 232, 464, 1024), 'num_blocks': (3, 7, 3) }, 1.5: { 'out_channels': (176, 352, 704, 1024), 'num_blocks': (3, 7, 3) }, 2: { 'out_channels': (224, 488, 976, 2048), 'num_blocks': (3, 7, 3) } }
net = ShuffleNetV2(net_size=0.5) print(net) x = torch.randn(3, 3, 32, 32) y = net(x) print(y.shape)
ShuffleNetV2( (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (layer1): Sequential( (0): DownBlock( (conv1): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False) (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv4): Conv2d(24, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=24, bias=False) (bn4): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv5): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn5): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (1): BasicBlock( (split): SplitBlock() (conv1): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (2): BasicBlock( (split): SplitBlock() (conv1): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (3): BasicBlock( (split): SplitBlock() (conv1): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False) (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) ) (layer2): Sequential( (0): DownBlock( (conv1): Conv2d(48, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=48, bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv4): Conv2d(48, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=48, bias=False) (bn4): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv5): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn5): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (1): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (2): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (3): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (4): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (5): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (6): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (7): BasicBlock( (split): SplitBlock() (conv1): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False) (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(48, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) ) (layer3): Sequential( (0): DownBlock( (conv1): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False) (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv4): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False) (bn4): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv5): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn5): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (1): BasicBlock( (split): SplitBlock() (conv1): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=96, bias=False) (bn2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (2): BasicBlock( (split): SplitBlock() (conv1): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=96, bias=False) (bn2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) (3): BasicBlock( (split): SplitBlock() (conv1): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=96, bias=False) (bn2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shuffle): ShuffleBlock() ) ) (conv2): Conv2d(192, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn2): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (linear): Linear(in_features=1024, out_features=10, bias=True) ) torch.Size([3, 10])
from torchsummary import summary summary(ShuffleNetV2(net_size=0.5).to('cuda:0'), (3, 32, 32))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 24, 32, 32] 648 BatchNorm2d-2 [-1, 24, 32, 32] 48 Conv2d-3 [-1, 24, 16, 16] 216 BatchNorm2d-4 [-1, 24, 16, 16] 48 Conv2d-5 [-1, 24, 16, 16] 576 BatchNorm2d-6 [-1, 24, 16, 16] 48 Conv2d-7 [-1, 24, 32, 32] 576 BatchNorm2d-8 [-1, 24, 32, 32] 48 Conv2d-9 [-1, 24, 16, 16] 216 BatchNorm2d-10 [-1, 24, 16, 16] 48 Conv2d-11 [-1, 24, 16, 16] 576 BatchNorm2d-12 [-1, 24, 16, 16] 48 ShuffleBlock-13 [-1, 48, 16, 16] 0 DownBlock-14 [-1, 48, 16, 16] 0 SplitBlock-15 [[-1, 24, 16, 16], [-1, 24, 16, 16]] 0 Conv2d-16 [-1, 24, 16, 16] 576 BatchNorm2d-17 [-1, 24, 16, 16] 48 Conv2d-18 [-1, 24, 16, 16] 216 BatchNorm2d-19 [-1, 24, 16, 16] 48 Conv2d-20 [-1, 24, 16, 16] 576 BatchNorm2d-21 [-1, 24, 16, 16] 48 ShuffleBlock-22 [-1, 48, 16, 16] 0 BasicBlock-23 [-1, 48, 16, 16] 0 SplitBlock-24 [[-1, 24, 16, 16], [-1, 24, 16, 16]] 0 Conv2d-25 [-1, 24, 16, 16] 576 BatchNorm2d-26 [-1, 24, 16, 16] 48 Conv2d-27 [-1, 24, 16, 16] 216 BatchNorm2d-28 [-1, 24, 16, 16] 48 Conv2d-29 [-1, 24, 16, 16] 576 BatchNorm2d-30 [-1, 24, 16, 16] 48 ShuffleBlock-31 [-1, 48, 16, 16] 0 BasicBlock-32 [-1, 48, 16, 16] 0 SplitBlock-33 [[-1, 24, 16, 16], [-1, 24, 16, 16]] 0 Conv2d-34 [-1, 24, 16, 16] 576 BatchNorm2d-35 [-1, 24, 16, 16] 48 Conv2d-36 [-1, 24, 16, 16] 216 BatchNorm2d-37 [-1, 24, 16, 16] 48 Conv2d-38 [-1, 24, 16, 16] 576 BatchNorm2d-39 [-1, 24, 16, 16] 48 ShuffleBlock-40 [-1, 48, 16, 16] 0 BasicBlock-41 [-1, 48, 16, 16] 0 Conv2d-42 [-1, 48, 8, 8] 432 BatchNorm2d-43 [-1, 48, 8, 8] 96 Conv2d-44 [-1, 48, 8, 8] 2,304 BatchNorm2d-45 [-1, 48, 8, 8] 96 Conv2d-46 [-1, 48, 16, 16] 2,304 BatchNorm2d-47 [-1, 48, 16, 16] 96 Conv2d-48 [-1, 48, 8, 8] 432 BatchNorm2d-49 [-1, 48, 8, 8] 96 Conv2d-50 [-1, 48, 8, 8] 2,304 BatchNorm2d-51 [-1, 48, 8, 8] 96 ShuffleBlock-52 [-1, 96, 8, 8] 0 DownBlock-53 [-1, 96, 8, 8] 0 SplitBlock-54 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-55 [-1, 48, 8, 8] 2,304 BatchNorm2d-56 [-1, 48, 8, 8] 96 Conv2d-57 [-1, 48, 8, 8] 432 BatchNorm2d-58 [-1, 48, 8, 8] 96 Conv2d-59 [-1, 48, 8, 8] 2,304 BatchNorm2d-60 [-1, 48, 8, 8] 96 ShuffleBlock-61 [-1, 96, 8, 8] 0 BasicBlock-62 [-1, 96, 8, 8] 0 SplitBlock-63 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-64 [-1, 48, 8, 8] 2,304 BatchNorm2d-65 [-1, 48, 8, 8] 96 Conv2d-66 [-1, 48, 8, 8] 432 BatchNorm2d-67 [-1, 48, 8, 8] 96 Conv2d-68 [-1, 48, 8, 8] 2,304 BatchNorm2d-69 [-1, 48, 8, 8] 96 ShuffleBlock-70 [-1, 96, 8, 8] 0 BasicBlock-71 [-1, 96, 8, 8] 0 SplitBlock-72 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-73 [-1, 48, 8, 8] 2,304 BatchNorm2d-74 [-1, 48, 8, 8] 96 Conv2d-75 [-1, 48, 8, 8] 432 BatchNorm2d-76 [-1, 48, 8, 8] 96 Conv2d-77 [-1, 48, 8, 8] 2,304 BatchNorm2d-78 [-1, 48, 8, 8] 96 ShuffleBlock-79 [-1, 96, 8, 8] 0 BasicBlock-80 [-1, 96, 8, 8] 0 SplitBlock-81 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-82 [-1, 48, 8, 8] 2,304 BatchNorm2d-83 [-1, 48, 8, 8] 96 Conv2d-84 [-1, 48, 8, 8] 432 BatchNorm2d-85 [-1, 48, 8, 8] 96 Conv2d-86 [-1, 48, 8, 8] 2,304 BatchNorm2d-87 [-1, 48, 8, 8] 96 ShuffleBlock-88 [-1, 96, 8, 8] 0 BasicBlock-89 [-1, 96, 8, 8] 0 SplitBlock-90 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-91 [-1, 48, 8, 8] 2,304 BatchNorm2d-92 [-1, 48, 8, 8] 96 Conv2d-93 [-1, 48, 8, 8] 432 BatchNorm2d-94 [-1, 48, 8, 8] 96 Conv2d-95 [-1, 48, 8, 8] 2,304 BatchNorm2d-96 [-1, 48, 8, 8] 96 ShuffleBlock-97 [-1, 96, 8, 8] 0 BasicBlock-98 [-1, 96, 8, 8] 0 SplitBlock-99 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-100 [-1, 48, 8, 8] 2,304 BatchNorm2d-101 [-1, 48, 8, 8] 96 Conv2d-102 [-1, 48, 8, 8] 432 BatchNorm2d-103 [-1, 48, 8, 8] 96 Conv2d-104 [-1, 48, 8, 8] 2,304 BatchNorm2d-105 [-1, 48, 8, 8] 96 ShuffleBlock-106 [-1, 96, 8, 8] 0 BasicBlock-107 [-1, 96, 8, 8] 0 SplitBlock-108 [[-1, 48, 8, 8], [-1, 48, 8, 8]] 0 Conv2d-109 [-1, 48, 8, 8] 2,304 BatchNorm2d-110 [-1, 48, 8, 8] 96 Conv2d-111 [-1, 48, 8, 8] 432 BatchNorm2d-112 [-1, 48, 8, 8] 96 Conv2d-113 [-1, 48, 8, 8] 2,304 BatchNorm2d-114 [-1, 48, 8, 8] 96 ShuffleBlock-115 [-1, 96, 8, 8] 0 BasicBlock-116 [-1, 96, 8, 8] 0 Conv2d-117 [-1, 96, 4, 4] 864 BatchNorm2d-118 [-1, 96, 4, 4] 192 Conv2d-119 [-1, 96, 4, 4] 9,216 BatchNorm2d-120 [-1, 96, 4, 4] 192 Conv2d-121 [-1, 96, 8, 8] 9,216 BatchNorm2d-122 [-1, 96, 8, 8] 192 Conv2d-123 [-1, 96, 4, 4] 864 BatchNorm2d-124 [-1, 96, 4, 4] 192 Conv2d-125 [-1, 96, 4, 4] 9,216 BatchNorm2d-126 [-1, 96, 4, 4] 192 ShuffleBlock-127 [-1, 192, 4, 4] 0 DownBlock-128 [-1, 192, 4, 4] 0 SplitBlock-129 [[-1, 96, 4, 4], [-1, 96, 4, 4]] 0 Conv2d-130 [-1, 96, 4, 4] 9,216 BatchNorm2d-131 [-1, 96, 4, 4] 192 Conv2d-132 [-1, 96, 4, 4] 864 BatchNorm2d-133 [-1, 96, 4, 4] 192 Conv2d-134 [-1, 96, 4, 4] 9,216 BatchNorm2d-135 [-1, 96, 4, 4] 192 ShuffleBlock-136 [-1, 192, 4, 4] 0 BasicBlock-137 [-1, 192, 4, 4] 0 SplitBlock-138 [[-1, 96, 4, 4], [-1, 96, 4, 4]] 0 Conv2d-139 [-1, 96, 4, 4] 9,216 BatchNorm2d-140 [-1, 96, 4, 4] 192 Conv2d-141 [-1, 96, 4, 4] 864 BatchNorm2d-142 [-1, 96, 4, 4] 192 Conv2d-143 [-1, 96, 4, 4] 9,216 BatchNorm2d-144 [-1, 96, 4, 4] 192 ShuffleBlock-145 [-1, 192, 4, 4] 0 BasicBlock-146 [-1, 192, 4, 4] 0 SplitBlock-147 [[-1, 96, 4, 4], [-1, 96, 4, 4]] 0 Conv2d-148 [-1, 96, 4, 4] 9,216 BatchNorm2d-149 [-1, 96, 4, 4] 192 Conv2d-150 [-1, 96, 4, 4] 864 BatchNorm2d-151 [-1, 96, 4, 4] 192 Conv2d-152 [-1, 96, 4, 4] 9,216 BatchNorm2d-153 [-1, 96, 4, 4] 192 ShuffleBlock-154 [-1, 192, 4, 4] 0 BasicBlock-155 [-1, 192, 4, 4] 0 Conv2d-156 [-1, 1024, 4, 4] 196,608 BatchNorm2d-157 [-1, 1024, 4, 4] 2,048 Linear-158 [-1, 10] 10,250 ================================================================ Total params: 352,042 Trainable params: 352,042 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 1416.34 Params size (MB): 1.34 Estimated Total Size (MB): 1417.69 ----------------------------------------------------------------
ReduceLROnPlateau スケジューラ
import torch import torch.nn as nn import torch.nn.functional as F from torch.optim.lr_scheduler import OneCycleLR, CyclicLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau from torch.optim.swa_utils import AveragedModel, update_bn import torchvision import pytorch_lightning as pl from pytorch_lightning.callbacks import LearningRateMonitor, GPUStatsMonitor, EarlyStopping from pytorch_lightning.metrics.functional import accuracy from pl_bolts.datamodules import CIFAR10DataModule from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
pl.seed_everything(7);
batch_size = 100 train_transforms = torchvision.transforms.Compose([ torchvision.transforms.RandomCrop(32, padding=4), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), cifar10_normalization(), ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), cifar10_normalization(), ]) cifar10_dm = CIFAR10DataModule( batch_size=batch_size, train_transforms=train_transforms, test_transforms=test_transforms, val_transforms=test_transforms, )
class LitCifar10(pl.LightningModule): def __init__(self, optim, lr=0.05, factor=0.8): super().__init__() self.save_hyperparameters() self.model = ShuffleNetV2(net_size=0.5) def forward(self, x): out = self.model(x) return F.log_softmax(out, dim=1) def training_step(self, batch, batch_idx): x, y = batch logits = F.log_softmax(self.model(x), dim=1) loss = F.nll_loss(logits, y) self.log('train_loss', loss) return loss def evaluate(self, batch, stage=None): x, y = batch logits = self(x) loss = F.nll_loss(logits, y) preds = torch.argmax(logits, dim=1) acc = accuracy(preds, y) if stage: self.log(f'{stage}_loss', loss, prog_bar=True) self.log(f'{stage}_acc', acc, prog_bar=True) def validation_step(self, batch, batch_idx): self.evaluate(batch, 'val') def test_step(self, batch, batch_idx): self.evaluate(batch, 'test') def configure_optimizers(self): optim = self.hparams.optim if optim == 'adam': optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=0, eps=1e-3) else: optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4) return { 'optimizer': optimizer, 'lr_scheduler': ReduceLROnPlateau(optimizer, 'max', patience=5, factor=self.hparams.factor, verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1, min_lr=1e-5), 'monitor': 'val_acc' }
%%time model = LitCifar10(optim='sgd', lr=0.05, factor=0.5) model.datamodule = cifar10_dm trainer = pl.Trainer( gpus=2, #num_nodes=1, accelerator='dp', max_epochs=100, progress_bar_refresh_rate=100, logger=pl.loggers.TensorBoardLogger('tblogs/', name='shufflenet2'), callbacks=[LearningRateMonitor(logging_interval='step')], ) trainer.fit(model, cifar10_dm) trainer.test(model, datamodule=cifar10_dm);
GPU available: True, used: True TPU available: None, using: 0 TPU cores Files already downloaded and verified Files already downloaded and verified | Name | Type | Params --------------------------------------- 0 | model | ShuffleNetV2 | 352 K --------------------------------------- 352 K Trainable params 0 Non-trainable params 352 K Total params 1.408 Total estimated model params size (MB) (...) Epoch 59: reducing learning rate of group 0 to 2.5000e-02. Epoch 81: reducing learning rate of group 0 to 1.2500e-02. Epoch 91: reducing learning rate of group 0 to 6.2500e-03. (...) -------------------------------------------------------------------------------- DATALOADER:0 TEST RESULTS {'test_acc': 0.8831999897956848, 'test_loss': 0.3897647559642792} -------------------------------------------------------------------------------- CPU times: user 1h 28min 43s, sys: 4min 40s, total: 1h 33min 23s Wall time: 1h 6min 42s [{'test_loss': 0.3897647559642792, 'test_acc': 0.8831999897956848}]
以上