PyTorch Lightning 1.1: research : CIFAR10 (EfficientNet)
作成 : (株)クラスキャット セールスインフォメーション
作成日時 : 02/22/2021 (1.1.x)
* 本ページは、以下のリソースを参考にして遂行した実験結果のレポートです:
* ご自由にリンクを張って頂いてかまいませんが、sales-info@classcat.com までご一報いただけると嬉しいです。
★ 無料セミナー実施中 ★ クラスキャット主催 人工知能 & ビジネス Web セミナー
人工知能とビジネスをテーマにウェビナー (WEB セミナー) を定期的に開催しています。スケジュールは弊社 公式 Web サイト でご確認頂けます。
- お住まいの地域に関係なく Web ブラウザからご参加頂けます。事前登録 が必要ですのでご注意ください。
- Windows PC のブラウザからご参加が可能です。スマートデバイスもご利用可能です。
クラスキャットは人工知能・テレワークに関する各種サービスを提供しております :
人工知能研究開発支援 | 人工知能研修サービス | テレワーク & オンライン授業を支援 |
PoC(概念実証)を失敗させないための支援 (本支援はセミナーに参加しアンケートに回答した方を対象としています。) |
◆ お問合せ : 本件に関するお問い合わせ先は下記までお願いいたします。
株式会社クラスキャット セールス・マーケティング本部 セールス・インフォメーション |
E-Mail:sales-info@classcat.com ; WebSite: https://www.classcat.com/ |
Facebook: https://www.facebook.com/ClassCatJP/ |
research: CIFAR10 (EfficientNet)
結果
150 エポック: ReduceLROnPlateau
- {‘test_acc’: 0.8824999928474426, ‘test_loss’: 0.3956470191478729} – Wall time: 2h 17min 43s
コード
import torch import torch.nn as nn import torch.nn.functional as F def swish(x): return x * x.sigmoid() def drop_connect(x, drop_ratio): keep_ratio = 1.0 - drop_ratio mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device) mask.bernoulli_(keep_ratio) x.div_(keep_ratio) x.mul_(mask) return x class SE(nn.Module): '''Squeeze-and-Excitation block with Swish.''' def __init__(self, in_channels, se_channels): super(SE, self).__init__() self.se1 = nn.Conv2d(in_channels, se_channels, kernel_size=1, bias=True) self.se2 = nn.Conv2d(se_channels, in_channels, kernel_size=1, bias=True) def forward(self, x): out = F.adaptive_avg_pool2d(x, (1, 1)) out = swish(self.se1(out)) out = self.se2(out).sigmoid() out = x * out return out class Block(nn.Module): '''expansion + depthwise + pointwise + squeeze-excitation''' def __init__(self, in_channels, out_channels, kernel_size, stride, expand_ratio=1, se_ratio=0., drop_rate=0.): super(Block, self).__init__() self.stride = stride self.drop_rate = drop_rate self.expand_ratio = expand_ratio # Expansion channels = expand_ratio * in_channels self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=1, stride=1, padding=0, bias=False) self.bn1 = nn.BatchNorm2d(channels) # Depthwise conv self.conv2 = nn.Conv2d(channels, channels, kernel_size=kernel_size, stride=stride, padding=(1 if kernel_size == 3 else 2), groups=channels, bias=False) self.bn2 = nn.BatchNorm2d(channels) # SE layers se_channels = int(in_channels * se_ratio) self.se = SE(channels, se_channels) # Output self.conv3 = nn.Conv2d(channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False) self.bn3 = nn.BatchNorm2d(out_channels) # Skip connection if in and out shapes are the same (MV-V2 style) self.has_skip = (stride == 1) and (in_channels == out_channels) def forward(self, x): out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x))) out = swish(self.bn2(self.conv2(out))) out = self.se(out) out = self.bn3(self.conv3(out)) if self.has_skip: if self.training and self.drop_rate > 0: out = drop_connect(out, self.drop_rate) out = out + x return out class EfficientNet(nn.Module): def __init__(self, cfg, num_classes=10): super(EfficientNet, self).__init__() self.cfg = cfg self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(32) self.layers = self._make_layers(in_channels=32) self.linear = nn.Linear(cfg['out_channels'][-1], num_classes) def _make_layers(self, in_channels): layers = [] cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size', 'stride']] b = 0 blocks = sum(self.cfg['num_blocks']) for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg): strides = [stride] + [1] * (num_blocks - 1) for stride in strides: drop_rate = self.cfg['drop_connect_rate'] * b / blocks layers.append( Block(in_channels, out_channels, kernel_size, stride, expansion, se_ratio=0.25, drop_rate=drop_rate)) in_channels = out_channels return nn.Sequential(*layers) def forward(self, x): out = swish(self.bn1(self.conv1(x))) out = self.layers(out) out = F.adaptive_avg_pool2d(out, 1) out = out.view(out.size(0), -1) dropout_rate = self.cfg['dropout_rate'] if self.training and dropout_rate > 0: out = F.dropout(out, p=dropout_rate) out = self.linear(out) return out def EfficientNetB0(): cfg = { 'num_blocks': [1, 2, 2, 3, 3, 4, 1], 'expansion': [1, 6, 6, 6, 6, 6, 6], 'out_channels': [16, 24, 40, 80, 112, 192, 320], 'kernel_size': [3, 3, 5, 3, 5, 5, 3], 'stride': [1, 2, 2, 2, 1, 2, 1], 'dropout_rate': 0.2, 'drop_connect_rate': 0.2, } return EfficientNet(cfg)
net = EfficientNetB0() print(net) x = torch.randn(2, 3, 32, 32) y = net(x) print(y.shape)
EfficientNet( (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (layers): Sequential( (0): Block( (conv1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False) (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Block( (conv1): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False) (bn2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(96, 4, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(4, 96, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): Block( (conv1): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False) (bn2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (3): Block( (conv1): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(144, 144, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=144, bias=False) (bn2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (4): Block( (conv1): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(240, 240, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=240, bias=False) (bn2): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(240, 10, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(10, 240, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (5): Block( (conv1): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False) (bn2): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(240, 10, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(10, 240, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (6): Block( (conv1): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False) (bn2): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(480, 20, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(20, 480, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (7): Block( (conv1): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False) (bn2): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(480, 20, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(20, 480, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (8): Block( (conv1): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(480, 480, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=480, bias=False) (bn2): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(480, 20, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(20, 480, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (9): Block( (conv1): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False) (bn2): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (10): Block( (conv1): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False) (bn2): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (11): Block( (conv1): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False) (bn2): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (12): Block( (conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False) (bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (13): Block( (conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False) (bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (14): Block( (conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False) (bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (15): Block( (conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1152, bias=False) (bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (se): SE( (se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1)) (se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1)) ) (conv3): Conv2d(1152, 320, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (linear): Linear(in_features=320, out_features=10, bias=True) ) torch.Size([2, 10])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
device(type='cuda')
from torchsummary import summary summary(EfficientNetB0().to('cuda'), (3, 32, 32))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 32, 32, 32] 864 BatchNorm2d-2 [-1, 32, 32, 32] 64 Conv2d-3 [-1, 32, 32, 32] 288 BatchNorm2d-4 [-1, 32, 32, 32] 64 Conv2d-5 [-1, 8, 1, 1] 264 Conv2d-6 [-1, 32, 1, 1] 288 SE-7 [-1, 32, 32, 32] 0 Conv2d-8 [-1, 16, 32, 32] 512 BatchNorm2d-9 [-1, 16, 32, 32] 32 Block-10 [-1, 16, 32, 32] 0 Conv2d-11 [-1, 96, 32, 32] 1,536 BatchNorm2d-12 [-1, 96, 32, 32] 192 Conv2d-13 [-1, 96, 16, 16] 864 BatchNorm2d-14 [-1, 96, 16, 16] 192 Conv2d-15 [-1, 4, 1, 1] 388 Conv2d-16 [-1, 96, 1, 1] 480 SE-17 [-1, 96, 16, 16] 0 Conv2d-18 [-1, 24, 16, 16] 2,304 BatchNorm2d-19 [-1, 24, 16, 16] 48 Block-20 [-1, 24, 16, 16] 0 Conv2d-21 [-1, 144, 16, 16] 3,456 BatchNorm2d-22 [-1, 144, 16, 16] 288 Conv2d-23 [-1, 144, 16, 16] 1,296 BatchNorm2d-24 [-1, 144, 16, 16] 288 Conv2d-25 [-1, 6, 1, 1] 870 Conv2d-26 [-1, 144, 1, 1] 1,008 SE-27 [-1, 144, 16, 16] 0 Conv2d-28 [-1, 24, 16, 16] 3,456 BatchNorm2d-29 [-1, 24, 16, 16] 48 Block-30 [-1, 24, 16, 16] 0 Conv2d-31 [-1, 144, 16, 16] 3,456 BatchNorm2d-32 [-1, 144, 16, 16] 288 Conv2d-33 [-1, 144, 8, 8] 3,600 BatchNorm2d-34 [-1, 144, 8, 8] 288 Conv2d-35 [-1, 6, 1, 1] 870 Conv2d-36 [-1, 144, 1, 1] 1,008 SE-37 [-1, 144, 8, 8] 0 Conv2d-38 [-1, 40, 8, 8] 5,760 BatchNorm2d-39 [-1, 40, 8, 8] 80 Block-40 [-1, 40, 8, 8] 0 Conv2d-41 [-1, 240, 8, 8] 9,600 BatchNorm2d-42 [-1, 240, 8, 8] 480 Conv2d-43 [-1, 240, 8, 8] 6,000 BatchNorm2d-44 [-1, 240, 8, 8] 480 Conv2d-45 [-1, 10, 1, 1] 2,410 Conv2d-46 [-1, 240, 1, 1] 2,640 SE-47 [-1, 240, 8, 8] 0 Conv2d-48 [-1, 40, 8, 8] 9,600 BatchNorm2d-49 [-1, 40, 8, 8] 80 Block-50 [-1, 40, 8, 8] 0 Conv2d-51 [-1, 240, 8, 8] 9,600 BatchNorm2d-52 [-1, 240, 8, 8] 480 Conv2d-53 [-1, 240, 4, 4] 2,160 BatchNorm2d-54 [-1, 240, 4, 4] 480 Conv2d-55 [-1, 10, 1, 1] 2,410 Conv2d-56 [-1, 240, 1, 1] 2,640 SE-57 [-1, 240, 4, 4] 0 Conv2d-58 [-1, 80, 4, 4] 19,200 BatchNorm2d-59 [-1, 80, 4, 4] 160 Block-60 [-1, 80, 4, 4] 0 Conv2d-61 [-1, 480, 4, 4] 38,400 BatchNorm2d-62 [-1, 480, 4, 4] 960 Conv2d-63 [-1, 480, 4, 4] 4,320 BatchNorm2d-64 [-1, 480, 4, 4] 960 Conv2d-65 [-1, 20, 1, 1] 9,620 Conv2d-66 [-1, 480, 1, 1] 10,080 SE-67 [-1, 480, 4, 4] 0 Conv2d-68 [-1, 80, 4, 4] 38,400 BatchNorm2d-69 [-1, 80, 4, 4] 160 Block-70 [-1, 80, 4, 4] 0 Conv2d-71 [-1, 480, 4, 4] 38,400 BatchNorm2d-72 [-1, 480, 4, 4] 960 Conv2d-73 [-1, 480, 4, 4] 4,320 BatchNorm2d-74 [-1, 480, 4, 4] 960 Conv2d-75 [-1, 20, 1, 1] 9,620 Conv2d-76 [-1, 480, 1, 1] 10,080 SE-77 [-1, 480, 4, 4] 0 Conv2d-78 [-1, 80, 4, 4] 38,400 BatchNorm2d-79 [-1, 80, 4, 4] 160 Block-80 [-1, 80, 4, 4] 0 Conv2d-81 [-1, 480, 4, 4] 38,400 BatchNorm2d-82 [-1, 480, 4, 4] 960 Conv2d-83 [-1, 480, 4, 4] 12,000 BatchNorm2d-84 [-1, 480, 4, 4] 960 Conv2d-85 [-1, 20, 1, 1] 9,620 Conv2d-86 [-1, 480, 1, 1] 10,080 SE-87 [-1, 480, 4, 4] 0 Conv2d-88 [-1, 112, 4, 4] 53,760 BatchNorm2d-89 [-1, 112, 4, 4] 224 Block-90 [-1, 112, 4, 4] 0 Conv2d-91 [-1, 672, 4, 4] 75,264 BatchNorm2d-92 [-1, 672, 4, 4] 1,344 Conv2d-93 [-1, 672, 4, 4] 16,800 BatchNorm2d-94 [-1, 672, 4, 4] 1,344 Conv2d-95 [-1, 28, 1, 1] 18,844 Conv2d-96 [-1, 672, 1, 1] 19,488 SE-97 [-1, 672, 4, 4] 0 Conv2d-98 [-1, 112, 4, 4] 75,264 BatchNorm2d-99 [-1, 112, 4, 4] 224 Block-100 [-1, 112, 4, 4] 0 Conv2d-101 [-1, 672, 4, 4] 75,264 BatchNorm2d-102 [-1, 672, 4, 4] 1,344 Conv2d-103 [-1, 672, 4, 4] 16,800 BatchNorm2d-104 [-1, 672, 4, 4] 1,344 Conv2d-105 [-1, 28, 1, 1] 18,844 Conv2d-106 [-1, 672, 1, 1] 19,488 SE-107 [-1, 672, 4, 4] 0 Conv2d-108 [-1, 112, 4, 4] 75,264 BatchNorm2d-109 [-1, 112, 4, 4] 224 Block-110 [-1, 112, 4, 4] 0 Conv2d-111 [-1, 672, 4, 4] 75,264 BatchNorm2d-112 [-1, 672, 4, 4] 1,344 Conv2d-113 [-1, 672, 2, 2] 16,800 BatchNorm2d-114 [-1, 672, 2, 2] 1,344 Conv2d-115 [-1, 28, 1, 1] 18,844 Conv2d-116 [-1, 672, 1, 1] 19,488 SE-117 [-1, 672, 2, 2] 0 Conv2d-118 [-1, 192, 2, 2] 129,024 BatchNorm2d-119 [-1, 192, 2, 2] 384 Block-120 [-1, 192, 2, 2] 0 Conv2d-121 [-1, 1152, 2, 2] 221,184 BatchNorm2d-122 [-1, 1152, 2, 2] 2,304 Conv2d-123 [-1, 1152, 2, 2] 28,800 BatchNorm2d-124 [-1, 1152, 2, 2] 2,304 Conv2d-125 [-1, 48, 1, 1] 55,344 Conv2d-126 [-1, 1152, 1, 1] 56,448 SE-127 [-1, 1152, 2, 2] 0 Conv2d-128 [-1, 192, 2, 2] 221,184 BatchNorm2d-129 [-1, 192, 2, 2] 384 Block-130 [-1, 192, 2, 2] 0 Conv2d-131 [-1, 1152, 2, 2] 221,184 BatchNorm2d-132 [-1, 1152, 2, 2] 2,304 Conv2d-133 [-1, 1152, 2, 2] 28,800 BatchNorm2d-134 [-1, 1152, 2, 2] 2,304 Conv2d-135 [-1, 48, 1, 1] 55,344 Conv2d-136 [-1, 1152, 1, 1] 56,448 SE-137 [-1, 1152, 2, 2] 0 Conv2d-138 [-1, 192, 2, 2] 221,184 BatchNorm2d-139 [-1, 192, 2, 2] 384 Block-140 [-1, 192, 2, 2] 0 Conv2d-141 [-1, 1152, 2, 2] 221,184 BatchNorm2d-142 [-1, 1152, 2, 2] 2,304 Conv2d-143 [-1, 1152, 2, 2] 28,800 BatchNorm2d-144 [-1, 1152, 2, 2] 2,304 Conv2d-145 [-1, 48, 1, 1] 55,344 Conv2d-146 [-1, 1152, 1, 1] 56,448 SE-147 [-1, 1152, 2, 2] 0 Conv2d-148 [-1, 192, 2, 2] 221,184 BatchNorm2d-149 [-1, 192, 2, 2] 384 Block-150 [-1, 192, 2, 2] 0 Conv2d-151 [-1, 1152, 2, 2] 221,184 BatchNorm2d-152 [-1, 1152, 2, 2] 2,304 Conv2d-153 [-1, 1152, 2, 2] 10,368 BatchNorm2d-154 [-1, 1152, 2, 2] 2,304 Conv2d-155 [-1, 48, 1, 1] 55,344 Conv2d-156 [-1, 1152, 1, 1] 56,448 SE-157 [-1, 1152, 2, 2] 0 Conv2d-158 [-1, 320, 2, 2] 368,640 BatchNorm2d-159 [-1, 320, 2, 2] 640 Block-160 [-1, 320, 2, 2] 0 Linear-161 [-1, 10] 3,210 ================================================================ Total params: 3,598,598 Trainable params: 3,598,598 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 10.18 Params size (MB): 13.73 Estimated Total Size (MB): 23.92 ----------------------------------------------------------------
ReduceLROnPlateau スケジューラ
import torch import torch.nn as nn import torch.nn.functional as F from torch.optim.lr_scheduler import OneCycleLR, CyclicLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau from torch.optim.swa_utils import AveragedModel, update_bn import torchvision import pytorch_lightning as pl from pytorch_lightning.callbacks import LearningRateMonitor, GPUStatsMonitor, EarlyStopping from pytorch_lightning.metrics.functional import accuracy from pl_bolts.datamodules import CIFAR10DataModule from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
pl.seed_everything(7);
batch_size = 50 train_transforms = torchvision.transforms.Compose([ torchvision.transforms.RandomCrop(32, padding=4), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), cifar10_normalization(), ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), cifar10_normalization(), ]) cifar10_dm = CIFAR10DataModule( batch_size=batch_size, train_transforms=train_transforms, test_transforms=test_transforms, val_transforms=test_transforms, )
class LitCifar10(pl.LightningModule): def __init__(self, lr=0.05): super().__init__() self.save_hyperparameters() self.model = EfficientNetB0() def forward(self, x): out = self.model(x) return F.log_softmax(out, dim=1) def training_step(self, batch, batch_idx): x, y = batch logits = F.log_softmax(self.model(x), dim=1) loss = F.nll_loss(logits, y) self.log('train_loss', loss) return loss def evaluate(self, batch, stage=None): x, y = batch logits = self(x) loss = F.nll_loss(logits, y) preds = torch.argmax(logits, dim=1) acc = accuracy(preds, y) if stage: self.log(f'{stage}_loss', loss, prog_bar=True) self.log(f'{stage}_acc', acc, prog_bar=True) def validation_step(self, batch, batch_idx): self.evaluate(batch, 'val') def test_step(self, batch, batch_idx): self.evaluate(batch, 'test') def configure_optimizers(self): if False: optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=0, eps=1e-3) else: optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4) return { 'optimizer': optimizer, 'lr_scheduler': ReduceLROnPlateau(optimizer, 'max', patience=4, factor=0.8, verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1, min_lr=1e-5), 'monitor': 'val_acc' } def xconfigure_optimizers(self): #print("###") #print(self.hparams) optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4) steps_per_epoch = 45000 // batch_size scheduler_dict = { #'scheduler': ExponentialLR(optimizer, gamma=0.1), #'interval': 'epoch', 'scheduler': OneCycleLR(optimizer, max_lr=0.1, pct_start=0.2, epochs=self.trainer.max_epochs, steps_per_epoch=steps_per_epoch), #'scheduler': CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=steps_per_epoch*2, mode="triangular2"), #'scheduler': CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=steps_per_epoch, mode="exp_range", gamma=0.85), #'scheduler': CosineAnnealingLR(optimizer, T_max=200), 'interval': 'step', } return {'optimizer': optimizer, 'lr_scheduler': scheduler_dict}
%%time model = LitCifar10(lr=0.05) model.datamodule = cifar10_dm trainer = pl.Trainer( gpus=1, max_epochs=150, auto_scale_batch_size=True, auto_lr_find = True, progress_bar_refresh_rate=100, logger=pl.loggers.TensorBoardLogger('tblogs/', name='efficientnet'), callbacks=[LearningRateMonitor(logging_interval='step')], ) trainer.fit(model, cifar10_dm) trainer.test(model, datamodule=cifar10_dm);
GPU available: True, used: True TPU available: None, using: 0 TPU cores | Name | Type | Params --------------------------------------- 0 | model | EfficientNet | 3.6 M --------------------------------------- 3.6 M Trainable params 0 Non-trainable params 3.6 M Total params 14.399 Total estimated model params size (MB) (...) (...) -------------------------------------------------------------------------------- DATALOADER:0 TEST RESULTS {'test_acc': 0.8824999928474426, 'test_loss': 0.3956470191478729} -------------------------------------------------------------------------------- CPU times: user 2h 7min 34s, sys: 3min 30s, total: 2h 11min 4s Wall time: 2h 17min 43s
以上