PyTorch Lightning 1.1: research : CIFAR10 (EfficientNet)
作成 : (株)クラスキャット セールスインフォメーション
作成日時 : 02/22/2021 (1.1.x)
* 本ページは、以下のリソースを参考にして遂行した実験結果のレポートです:
* ご自由にリンクを張って頂いてかまいませんが、sales-info@classcat.com までご一報いただけると嬉しいです。
★ 無料セミナー実施中 ★ クラスキャット主催 人工知能 & ビジネス Web セミナー

人工知能とビジネスをテーマにウェビナー (WEB セミナー) を定期的に開催しています。スケジュールは弊社 公式 Web サイト でご確認頂けます。
- お住まいの地域に関係なく Web ブラウザからご参加頂けます。事前登録 が必要ですのでご注意ください。
- Windows PC のブラウザからご参加が可能です。スマートデバイスもご利用可能です。
クラスキャットは人工知能・テレワークに関する各種サービスを提供しております :
| 人工知能研究開発支援 | 人工知能研修サービス | テレワーク & オンライン授業を支援 |
| PoC(概念実証)を失敗させないための支援 (本支援はセミナーに参加しアンケートに回答した方を対象としています。) | ||
◆ お問合せ : 本件に関するお問い合わせ先は下記までお願いいたします。
| 株式会社クラスキャット セールス・マーケティング本部 セールス・インフォメーション |
| E-Mail:sales-info@classcat.com ; WebSite: https://www.classcat.com/ |
| Facebook: https://www.facebook.com/ClassCatJP/ |
research: CIFAR10 (EfficientNet)
結果
150 エポック: ReduceLROnPlateau
- {‘test_acc’: 0.8824999928474426, ‘test_loss’: 0.3956470191478729} – Wall time: 2h 17min 43s
コード
import torch
import torch.nn as nn
import torch.nn.functional as F
def swish(x):
return x * x.sigmoid()
def drop_connect(x, drop_ratio):
keep_ratio = 1.0 - drop_ratio
mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
mask.bernoulli_(keep_ratio)
x.div_(keep_ratio)
x.mul_(mask)
return x
class SE(nn.Module):
'''Squeeze-and-Excitation block with Swish.'''
def __init__(self, in_channels, se_channels):
super(SE, self).__init__()
self.se1 = nn.Conv2d(in_channels, se_channels,
kernel_size=1, bias=True)
self.se2 = nn.Conv2d(se_channels, in_channels,
kernel_size=1, bias=True)
def forward(self, x):
out = F.adaptive_avg_pool2d(x, (1, 1))
out = swish(self.se1(out))
out = self.se2(out).sigmoid()
out = x * out
return out
class Block(nn.Module):
'''expansion + depthwise + pointwise + squeeze-excitation'''
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
expand_ratio=1,
se_ratio=0.,
drop_rate=0.):
super(Block, self).__init__()
self.stride = stride
self.drop_rate = drop_rate
self.expand_ratio = expand_ratio
# Expansion
channels = expand_ratio * in_channels
self.conv1 = nn.Conv2d(in_channels,
channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn1 = nn.BatchNorm2d(channels)
# Depthwise conv
self.conv2 = nn.Conv2d(channels,
channels,
kernel_size=kernel_size,
stride=stride,
padding=(1 if kernel_size == 3 else 2),
groups=channels,
bias=False)
self.bn2 = nn.BatchNorm2d(channels)
# SE layers
se_channels = int(in_channels * se_ratio)
self.se = SE(channels, se_channels)
# Output
self.conv3 = nn.Conv2d(channels,
out_channels,
kernel_size=1,
stride=1,
padding=0,
bias=False)
self.bn3 = nn.BatchNorm2d(out_channels)
# Skip connection if in and out shapes are the same (MV-V2 style)
self.has_skip = (stride == 1) and (in_channels == out_channels)
def forward(self, x):
out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
out = swish(self.bn2(self.conv2(out)))
out = self.se(out)
out = self.bn3(self.conv3(out))
if self.has_skip:
if self.training and self.drop_rate > 0:
out = drop_connect(out, self.drop_rate)
out = out + x
return out
class EfficientNet(nn.Module):
def __init__(self, cfg, num_classes=10):
super(EfficientNet, self).__init__()
self.cfg = cfg
self.conv1 = nn.Conv2d(3,
32,
kernel_size=3,
stride=1,
padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layers = self._make_layers(in_channels=32)
self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
def _make_layers(self, in_channels):
layers = []
cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
'stride']]
b = 0
blocks = sum(self.cfg['num_blocks'])
for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
strides = [stride] + [1] * (num_blocks - 1)
for stride in strides:
drop_rate = self.cfg['drop_connect_rate'] * b / blocks
layers.append(
Block(in_channels,
out_channels,
kernel_size,
stride,
expansion,
se_ratio=0.25,
drop_rate=drop_rate))
in_channels = out_channels
return nn.Sequential(*layers)
def forward(self, x):
out = swish(self.bn1(self.conv1(x)))
out = self.layers(out)
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
dropout_rate = self.cfg['dropout_rate']
if self.training and dropout_rate > 0:
out = F.dropout(out, p=dropout_rate)
out = self.linear(out)
return out
def EfficientNetB0():
cfg = {
'num_blocks': [1, 2, 2, 3, 3, 4, 1],
'expansion': [1, 6, 6, 6, 6, 6, 6],
'out_channels': [16, 24, 40, 80, 112, 192, 320],
'kernel_size': [3, 3, 5, 3, 5, 5, 3],
'stride': [1, 2, 2, 2, 1, 2, 1],
'dropout_rate': 0.2,
'drop_connect_rate': 0.2,
}
return EfficientNet(cfg)
net = EfficientNetB0() print(net) x = torch.randn(2, 3, 32, 32) y = net(x) print(y.shape)
EfficientNet(
(conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(layers): Sequential(
(0): Block(
(conv1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): Block(
(conv1): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
(bn2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(96, 4, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(4, 96, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(2): Block(
(conv1): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
(bn2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(3): Block(
(conv1): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(144, 144, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=144, bias=False)
(bn2): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(144, 6, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(6, 144, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(4): Block(
(conv1): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(240, 240, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=240, bias=False)
(bn2): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(240, 10, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(10, 240, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(5): Block(
(conv1): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False)
(bn2): BatchNorm2d(240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(240, 10, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(10, 240, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(6): Block(
(conv1): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
(bn2): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(480, 20, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(20, 480, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(7): Block(
(conv1): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
(bn2): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(480, 20, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(20, 480, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(8): Block(
(conv1): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(480, 480, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=480, bias=False)
(bn2): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(480, 20, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(20, 480, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(9): Block(
(conv1): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False)
(bn2): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(10): Block(
(conv1): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False)
(bn2): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(11): Block(
(conv1): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False)
(bn2): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(672, 28, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(28, 672, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(12): Block(
(conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
(bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(13): Block(
(conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
(bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(14): Block(
(conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
(bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(15): Block(
(conv1): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1152, bias=False)
(bn2): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(se): SE(
(se1): Conv2d(1152, 48, kernel_size=(1, 1), stride=(1, 1))
(se2): Conv2d(48, 1152, kernel_size=(1, 1), stride=(1, 1))
)
(conv3): Conv2d(1152, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(linear): Linear(in_features=320, out_features=10, bias=True)
)
torch.Size([2, 10])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
device(type='cuda')
from torchsummary import summary
summary(EfficientNetB0().to('cuda'), (3, 32, 32))
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 32, 32] 864
BatchNorm2d-2 [-1, 32, 32, 32] 64
Conv2d-3 [-1, 32, 32, 32] 288
BatchNorm2d-4 [-1, 32, 32, 32] 64
Conv2d-5 [-1, 8, 1, 1] 264
Conv2d-6 [-1, 32, 1, 1] 288
SE-7 [-1, 32, 32, 32] 0
Conv2d-8 [-1, 16, 32, 32] 512
BatchNorm2d-9 [-1, 16, 32, 32] 32
Block-10 [-1, 16, 32, 32] 0
Conv2d-11 [-1, 96, 32, 32] 1,536
BatchNorm2d-12 [-1, 96, 32, 32] 192
Conv2d-13 [-1, 96, 16, 16] 864
BatchNorm2d-14 [-1, 96, 16, 16] 192
Conv2d-15 [-1, 4, 1, 1] 388
Conv2d-16 [-1, 96, 1, 1] 480
SE-17 [-1, 96, 16, 16] 0
Conv2d-18 [-1, 24, 16, 16] 2,304
BatchNorm2d-19 [-1, 24, 16, 16] 48
Block-20 [-1, 24, 16, 16] 0
Conv2d-21 [-1, 144, 16, 16] 3,456
BatchNorm2d-22 [-1, 144, 16, 16] 288
Conv2d-23 [-1, 144, 16, 16] 1,296
BatchNorm2d-24 [-1, 144, 16, 16] 288
Conv2d-25 [-1, 6, 1, 1] 870
Conv2d-26 [-1, 144, 1, 1] 1,008
SE-27 [-1, 144, 16, 16] 0
Conv2d-28 [-1, 24, 16, 16] 3,456
BatchNorm2d-29 [-1, 24, 16, 16] 48
Block-30 [-1, 24, 16, 16] 0
Conv2d-31 [-1, 144, 16, 16] 3,456
BatchNorm2d-32 [-1, 144, 16, 16] 288
Conv2d-33 [-1, 144, 8, 8] 3,600
BatchNorm2d-34 [-1, 144, 8, 8] 288
Conv2d-35 [-1, 6, 1, 1] 870
Conv2d-36 [-1, 144, 1, 1] 1,008
SE-37 [-1, 144, 8, 8] 0
Conv2d-38 [-1, 40, 8, 8] 5,760
BatchNorm2d-39 [-1, 40, 8, 8] 80
Block-40 [-1, 40, 8, 8] 0
Conv2d-41 [-1, 240, 8, 8] 9,600
BatchNorm2d-42 [-1, 240, 8, 8] 480
Conv2d-43 [-1, 240, 8, 8] 6,000
BatchNorm2d-44 [-1, 240, 8, 8] 480
Conv2d-45 [-1, 10, 1, 1] 2,410
Conv2d-46 [-1, 240, 1, 1] 2,640
SE-47 [-1, 240, 8, 8] 0
Conv2d-48 [-1, 40, 8, 8] 9,600
BatchNorm2d-49 [-1, 40, 8, 8] 80
Block-50 [-1, 40, 8, 8] 0
Conv2d-51 [-1, 240, 8, 8] 9,600
BatchNorm2d-52 [-1, 240, 8, 8] 480
Conv2d-53 [-1, 240, 4, 4] 2,160
BatchNorm2d-54 [-1, 240, 4, 4] 480
Conv2d-55 [-1, 10, 1, 1] 2,410
Conv2d-56 [-1, 240, 1, 1] 2,640
SE-57 [-1, 240, 4, 4] 0
Conv2d-58 [-1, 80, 4, 4] 19,200
BatchNorm2d-59 [-1, 80, 4, 4] 160
Block-60 [-1, 80, 4, 4] 0
Conv2d-61 [-1, 480, 4, 4] 38,400
BatchNorm2d-62 [-1, 480, 4, 4] 960
Conv2d-63 [-1, 480, 4, 4] 4,320
BatchNorm2d-64 [-1, 480, 4, 4] 960
Conv2d-65 [-1, 20, 1, 1] 9,620
Conv2d-66 [-1, 480, 1, 1] 10,080
SE-67 [-1, 480, 4, 4] 0
Conv2d-68 [-1, 80, 4, 4] 38,400
BatchNorm2d-69 [-1, 80, 4, 4] 160
Block-70 [-1, 80, 4, 4] 0
Conv2d-71 [-1, 480, 4, 4] 38,400
BatchNorm2d-72 [-1, 480, 4, 4] 960
Conv2d-73 [-1, 480, 4, 4] 4,320
BatchNorm2d-74 [-1, 480, 4, 4] 960
Conv2d-75 [-1, 20, 1, 1] 9,620
Conv2d-76 [-1, 480, 1, 1] 10,080
SE-77 [-1, 480, 4, 4] 0
Conv2d-78 [-1, 80, 4, 4] 38,400
BatchNorm2d-79 [-1, 80, 4, 4] 160
Block-80 [-1, 80, 4, 4] 0
Conv2d-81 [-1, 480, 4, 4] 38,400
BatchNorm2d-82 [-1, 480, 4, 4] 960
Conv2d-83 [-1, 480, 4, 4] 12,000
BatchNorm2d-84 [-1, 480, 4, 4] 960
Conv2d-85 [-1, 20, 1, 1] 9,620
Conv2d-86 [-1, 480, 1, 1] 10,080
SE-87 [-1, 480, 4, 4] 0
Conv2d-88 [-1, 112, 4, 4] 53,760
BatchNorm2d-89 [-1, 112, 4, 4] 224
Block-90 [-1, 112, 4, 4] 0
Conv2d-91 [-1, 672, 4, 4] 75,264
BatchNorm2d-92 [-1, 672, 4, 4] 1,344
Conv2d-93 [-1, 672, 4, 4] 16,800
BatchNorm2d-94 [-1, 672, 4, 4] 1,344
Conv2d-95 [-1, 28, 1, 1] 18,844
Conv2d-96 [-1, 672, 1, 1] 19,488
SE-97 [-1, 672, 4, 4] 0
Conv2d-98 [-1, 112, 4, 4] 75,264
BatchNorm2d-99 [-1, 112, 4, 4] 224
Block-100 [-1, 112, 4, 4] 0
Conv2d-101 [-1, 672, 4, 4] 75,264
BatchNorm2d-102 [-1, 672, 4, 4] 1,344
Conv2d-103 [-1, 672, 4, 4] 16,800
BatchNorm2d-104 [-1, 672, 4, 4] 1,344
Conv2d-105 [-1, 28, 1, 1] 18,844
Conv2d-106 [-1, 672, 1, 1] 19,488
SE-107 [-1, 672, 4, 4] 0
Conv2d-108 [-1, 112, 4, 4] 75,264
BatchNorm2d-109 [-1, 112, 4, 4] 224
Block-110 [-1, 112, 4, 4] 0
Conv2d-111 [-1, 672, 4, 4] 75,264
BatchNorm2d-112 [-1, 672, 4, 4] 1,344
Conv2d-113 [-1, 672, 2, 2] 16,800
BatchNorm2d-114 [-1, 672, 2, 2] 1,344
Conv2d-115 [-1, 28, 1, 1] 18,844
Conv2d-116 [-1, 672, 1, 1] 19,488
SE-117 [-1, 672, 2, 2] 0
Conv2d-118 [-1, 192, 2, 2] 129,024
BatchNorm2d-119 [-1, 192, 2, 2] 384
Block-120 [-1, 192, 2, 2] 0
Conv2d-121 [-1, 1152, 2, 2] 221,184
BatchNorm2d-122 [-1, 1152, 2, 2] 2,304
Conv2d-123 [-1, 1152, 2, 2] 28,800
BatchNorm2d-124 [-1, 1152, 2, 2] 2,304
Conv2d-125 [-1, 48, 1, 1] 55,344
Conv2d-126 [-1, 1152, 1, 1] 56,448
SE-127 [-1, 1152, 2, 2] 0
Conv2d-128 [-1, 192, 2, 2] 221,184
BatchNorm2d-129 [-1, 192, 2, 2] 384
Block-130 [-1, 192, 2, 2] 0
Conv2d-131 [-1, 1152, 2, 2] 221,184
BatchNorm2d-132 [-1, 1152, 2, 2] 2,304
Conv2d-133 [-1, 1152, 2, 2] 28,800
BatchNorm2d-134 [-1, 1152, 2, 2] 2,304
Conv2d-135 [-1, 48, 1, 1] 55,344
Conv2d-136 [-1, 1152, 1, 1] 56,448
SE-137 [-1, 1152, 2, 2] 0
Conv2d-138 [-1, 192, 2, 2] 221,184
BatchNorm2d-139 [-1, 192, 2, 2] 384
Block-140 [-1, 192, 2, 2] 0
Conv2d-141 [-1, 1152, 2, 2] 221,184
BatchNorm2d-142 [-1, 1152, 2, 2] 2,304
Conv2d-143 [-1, 1152, 2, 2] 28,800
BatchNorm2d-144 [-1, 1152, 2, 2] 2,304
Conv2d-145 [-1, 48, 1, 1] 55,344
Conv2d-146 [-1, 1152, 1, 1] 56,448
SE-147 [-1, 1152, 2, 2] 0
Conv2d-148 [-1, 192, 2, 2] 221,184
BatchNorm2d-149 [-1, 192, 2, 2] 384
Block-150 [-1, 192, 2, 2] 0
Conv2d-151 [-1, 1152, 2, 2] 221,184
BatchNorm2d-152 [-1, 1152, 2, 2] 2,304
Conv2d-153 [-1, 1152, 2, 2] 10,368
BatchNorm2d-154 [-1, 1152, 2, 2] 2,304
Conv2d-155 [-1, 48, 1, 1] 55,344
Conv2d-156 [-1, 1152, 1, 1] 56,448
SE-157 [-1, 1152, 2, 2] 0
Conv2d-158 [-1, 320, 2, 2] 368,640
BatchNorm2d-159 [-1, 320, 2, 2] 640
Block-160 [-1, 320, 2, 2] 0
Linear-161 [-1, 10] 3,210
================================================================
Total params: 3,598,598
Trainable params: 3,598,598
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 10.18
Params size (MB): 13.73
Estimated Total Size (MB): 23.92
----------------------------------------------------------------
ReduceLROnPlateau スケジューラ
import torch import torch.nn as nn import torch.nn.functional as F from torch.optim.lr_scheduler import OneCycleLR, CyclicLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau from torch.optim.swa_utils import AveragedModel, update_bn import torchvision import pytorch_lightning as pl from pytorch_lightning.callbacks import LearningRateMonitor, GPUStatsMonitor, EarlyStopping from pytorch_lightning.metrics.functional import accuracy from pl_bolts.datamodules import CIFAR10DataModule from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
pl.seed_everything(7);
batch_size = 50
train_transforms = torchvision.transforms.Compose([
torchvision.transforms.RandomCrop(32, padding=4),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
cifar10_normalization(),
])
test_transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
cifar10_normalization(),
])
cifar10_dm = CIFAR10DataModule(
batch_size=batch_size,
train_transforms=train_transforms,
test_transforms=test_transforms,
val_transforms=test_transforms,
)
class LitCifar10(pl.LightningModule):
def __init__(self, lr=0.05):
super().__init__()
self.save_hyperparameters()
self.model = EfficientNetB0()
def forward(self, x):
out = self.model(x)
return F.log_softmax(out, dim=1)
def training_step(self, batch, batch_idx):
x, y = batch
logits = F.log_softmax(self.model(x), dim=1)
loss = F.nll_loss(logits, y)
self.log('train_loss', loss)
return loss
def evaluate(self, batch, stage=None):
x, y = batch
logits = self(x)
loss = F.nll_loss(logits, y)
preds = torch.argmax(logits, dim=1)
acc = accuracy(preds, y)
if stage:
self.log(f'{stage}_loss', loss, prog_bar=True)
self.log(f'{stage}_acc', acc, prog_bar=True)
def validation_step(self, batch, batch_idx):
self.evaluate(batch, 'val')
def test_step(self, batch, batch_idx):
self.evaluate(batch, 'test')
def configure_optimizers(self):
if False:
optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=0, eps=1e-3)
else:
optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4)
return {
'optimizer': optimizer,
'lr_scheduler': ReduceLROnPlateau(optimizer, 'max', patience=4, factor=0.8, verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1, min_lr=1e-5),
'monitor': 'val_acc'
}
def xconfigure_optimizers(self):
#print("###")
#print(self.hparams)
optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4)
steps_per_epoch = 45000 // batch_size
scheduler_dict = {
#'scheduler': ExponentialLR(optimizer, gamma=0.1),
#'interval': 'epoch',
'scheduler': OneCycleLR(optimizer, max_lr=0.1, pct_start=0.2, epochs=self.trainer.max_epochs, steps_per_epoch=steps_per_epoch),
#'scheduler': CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=steps_per_epoch*2, mode="triangular2"),
#'scheduler': CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=steps_per_epoch, mode="exp_range", gamma=0.85),
#'scheduler': CosineAnnealingLR(optimizer, T_max=200),
'interval': 'step',
}
return {'optimizer': optimizer, 'lr_scheduler': scheduler_dict}
%%time
model = LitCifar10(lr=0.05)
model.datamodule = cifar10_dm
trainer = pl.Trainer(
gpus=1,
max_epochs=150,
auto_scale_batch_size=True,
auto_lr_find = True,
progress_bar_refresh_rate=100,
logger=pl.loggers.TensorBoardLogger('tblogs/', name='efficientnet'),
callbacks=[LearningRateMonitor(logging_interval='step')],
)
trainer.fit(model, cifar10_dm)
trainer.test(model, datamodule=cifar10_dm);
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
| Name | Type | Params
---------------------------------------
0 | model | EfficientNet | 3.6 M
---------------------------------------
3.6 M Trainable params
0 Non-trainable params
3.6 M Total params
14.399 Total estimated model params size (MB)
(...)
(...)
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.8824999928474426, 'test_loss': 0.3956470191478729}
--------------------------------------------------------------------------------
CPU times: user 2h 7min 34s, sys: 3min 30s, total: 2h 11min 4s
Wall time: 2h 17min 43s
以上