PyTorch Lightning 1.1: research : CIFAR10 (RegNet)
作成 : (株)クラスキャット セールスインフォメーション
作成日時 : 02/23/2021 (1.1.x)
* 本ページは、以下のリソースを参考にして遂行した実験結果のレポートです:
* ご自由にリンクを張って頂いてかまいませんが、sales-info@classcat.com までご一報いただけると嬉しいです。
★ 無料セミナー実施中 ★ クラスキャット主催 人工知能 & ビジネス Web セミナー
人工知能とビジネスをテーマにウェビナー (WEB セミナー) を定期的に開催しています。スケジュールは弊社 公式 Web サイト でご確認頂けます。
- お住まいの地域に関係なく Web ブラウザからご参加頂けます。事前登録 が必要ですのでご注意ください。
- Windows PC のブラウザからご参加が可能です。スマートデバイスもご利用可能です。
クラスキャットは人工知能・テレワークに関する各種サービスを提供しております :
人工知能研究開発支援 | 人工知能研修サービス | テレワーク & オンライン授業を支援 |
PoC(概念実証)を失敗させないための支援 (本支援はセミナーに参加しアンケートに回答した方を対象としています。) |
◆ お問合せ : 本件に関するお問い合わせ先は下記までお願いいたします。
株式会社クラスキャット セールス・マーケティング本部 セールス・インフォメーション |
E-Mail:sales-info@classcat.com ; WebSite: https://www.classcat.com/ |
Facebook: https://www.facebook.com/ClassCatJP/ |
research: CIFAR10 (RegNet)
結果
100 エポック: ReduceLROnPlateau
- RegNetX_200MF – {‘test_acc’: 0.9345999956130981, ‘test_loss’: 0.23981913924217224} – Wall time: 2h 5min 55s (‘Tesla M60’ x 2)
コード
import torch import torch.nn as nn import torch.nn.functional as F class SE(nn.Module): '''Squeeze-and-Excitation block.''' def __init__(self, in_planes, se_planes): super(SE, self).__init__() self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True) self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True) def forward(self, x): out = F.adaptive_avg_pool2d(x, (1, 1)) out = F.relu(self.se1(out)) out = self.se2(out).sigmoid() out = x * out return out class Block(nn.Module): def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio): super(Block, self).__init__() # 1x1 w_b = int(round(w_out * bottleneck_ratio)) self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(w_b) # 3x3 num_groups = w_b // group_width self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3, stride=stride, padding=1, groups=num_groups, bias=False) self.bn2 = nn.BatchNorm2d(w_b) # se self.with_se = se_ratio > 0 if self.with_se: w_se = int(round(w_in * se_ratio)) self.se = SE(w_b, w_se) # 1x1 self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(w_out) self.shortcut = nn.Sequential() if stride != 1 or w_in != w_out: self.shortcut = nn.Sequential( nn.Conv2d(w_in, w_out, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(w_out) ) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) if self.with_se: out = self.se(out) out = self.bn3(self.conv3(out)) out += self.shortcut(x) out = F.relu(out) return out class RegNet(nn.Module): def __init__(self, cfg, num_classes=10): super(RegNet, self).__init__() self.cfg = cfg self.in_planes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(64) self.layer1 = self._make_layer(0) self.layer2 = self._make_layer(1) self.layer3 = self._make_layer(2) self.layer4 = self._make_layer(3) self.linear = nn.Linear(self.cfg['widths'][-1], num_classes) def _make_layer(self, idx): depth = self.cfg['depths'][idx] width = self.cfg['widths'][idx] stride = self.cfg['strides'][idx] group_width = self.cfg['group_width'] bottleneck_ratio = self.cfg['bottleneck_ratio'] se_ratio = self.cfg['se_ratio'] layers = [] for i in range(depth): s = stride if i == 0 else 1 layers.append(Block(self.in_planes, width, s, group_width, bottleneck_ratio, se_ratio)) self.in_planes = width return nn.Sequential(*layers) def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.adaptive_avg_pool2d(out, (1, 1)) out = out.view(out.size(0), -1) out = self.linear(out) return out def RegNetX_200MF(): cfg = { 'depths': [1, 1, 4, 7], 'widths': [24, 56, 152, 368], 'strides': [1, 1, 2, 2], 'group_width': 8, 'bottleneck_ratio': 1, 'se_ratio': 0, } return RegNet(cfg) def RegNetX_400MF(): cfg = { 'depths': [1, 2, 7, 12], 'widths': [32, 64, 160, 384], 'strides': [1, 1, 2, 2], 'group_width': 16, 'bottleneck_ratio': 1, 'se_ratio': 0, } return RegNet(cfg) def RegNetY_400MF(): cfg = { 'depths': [1, 2, 7, 12], 'widths': [32, 64, 160, 384], 'strides': [1, 1, 2, 2], 'group_width': 16, 'bottleneck_ratio': 1, 'se_ratio': 0.25, } return RegNet(cfg)
net = RegNetX_200MF() print(net) x = torch.randn(2, 3, 32, 32) y = net(x) print(y.shape)
RegNet( (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (layer1): Sequential( (0): Block( (conv1): Conv2d(64, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=3, bias=False) (bn2): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential( (0): Conv2d(64, 24, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (layer2): Sequential( (0): Block( (conv1): Conv2d(24, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(56, 56, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=7, bias=False) (bn2): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(56, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential( (0): Conv2d(24, 56, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(56, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (layer3): Sequential( (0): Block( (conv1): Conv2d(56, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(152, 152, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=19, bias=False) (bn2): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential( (0): Conv2d(56, 152, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Block( (conv1): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(152, 152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=19, bias=False) (bn2): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (2): Block( (conv1): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(152, 152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=19, bias=False) (bn2): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (3): Block( (conv1): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(152, 152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=19, bias=False) (bn2): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(152, 152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) ) (layer4): Sequential( (0): Block( (conv1): Conv2d(152, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential( (0): Conv2d(152, 368, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Block( (conv1): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (2): Block( (conv1): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (3): Block( (conv1): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (4): Block( (conv1): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (5): Block( (conv1): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) (6): Block( (conv1): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(368, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=46, bias=False) (bn2): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(368, 368, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (shortcut): Sequential() ) ) (linear): Linear(in_features=368, out_features=10, bias=True) ) torch.Size([2, 10])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
device(type='cuda')
from torchsummary import summary summary(ResNeXt29_2x64d().to('cuda'), (3, 32, 32))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 64, 32, 32] 192 BatchNorm2d-2 [-1, 64, 32, 32] 128 Conv2d-3 [-1, 128, 32, 32] 8,192 BatchNorm2d-4 [-1, 128, 32, 32] 256 Conv2d-5 [-1, 128, 32, 32] 73,728 BatchNorm2d-6 [-1, 128, 32, 32] 256 Conv2d-7 [-1, 256, 32, 32] 32,768 BatchNorm2d-8 [-1, 256, 32, 32] 512 Conv2d-9 [-1, 256, 32, 32] 16,384 BatchNorm2d-10 [-1, 256, 32, 32] 512 Block-11 [-1, 256, 32, 32] 0 Conv2d-12 [-1, 128, 32, 32] 32,768 BatchNorm2d-13 [-1, 128, 32, 32] 256 Conv2d-14 [-1, 128, 32, 32] 73,728 BatchNorm2d-15 [-1, 128, 32, 32] 256 Conv2d-16 [-1, 256, 32, 32] 32,768 BatchNorm2d-17 [-1, 256, 32, 32] 512 Block-18 [-1, 256, 32, 32] 0 Conv2d-19 [-1, 128, 32, 32] 32,768 BatchNorm2d-20 [-1, 128, 32, 32] 256 Conv2d-21 [-1, 128, 32, 32] 73,728 BatchNorm2d-22 [-1, 128, 32, 32] 256 Conv2d-23 [-1, 256, 32, 32] 32,768 BatchNorm2d-24 [-1, 256, 32, 32] 512 Block-25 [-1, 256, 32, 32] 0 Conv2d-26 [-1, 256, 32, 32] 65,536 BatchNorm2d-27 [-1, 256, 32, 32] 512 Conv2d-28 [-1, 256, 16, 16] 294,912 BatchNorm2d-29 [-1, 256, 16, 16] 512 Conv2d-30 [-1, 512, 16, 16] 131,072 BatchNorm2d-31 [-1, 512, 16, 16] 1,024 Conv2d-32 [-1, 512, 16, 16] 131,072 BatchNorm2d-33 [-1, 512, 16, 16] 1,024 Block-34 [-1, 512, 16, 16] 0 Conv2d-35 [-1, 256, 16, 16] 131,072 BatchNorm2d-36 [-1, 256, 16, 16] 512 Conv2d-37 [-1, 256, 16, 16] 294,912 BatchNorm2d-38 [-1, 256, 16, 16] 512 Conv2d-39 [-1, 512, 16, 16] 131,072 BatchNorm2d-40 [-1, 512, 16, 16] 1,024 Block-41 [-1, 512, 16, 16] 0 Conv2d-42 [-1, 256, 16, 16] 131,072 BatchNorm2d-43 [-1, 256, 16, 16] 512 Conv2d-44 [-1, 256, 16, 16] 294,912 BatchNorm2d-45 [-1, 256, 16, 16] 512 Conv2d-46 [-1, 512, 16, 16] 131,072 BatchNorm2d-47 [-1, 512, 16, 16] 1,024 Block-48 [-1, 512, 16, 16] 0 Conv2d-49 [-1, 512, 16, 16] 262,144 BatchNorm2d-50 [-1, 512, 16, 16] 1,024 Conv2d-51 [-1, 512, 8, 8] 1,179,648 BatchNorm2d-52 [-1, 512, 8, 8] 1,024 Conv2d-53 [-1, 1024, 8, 8] 524,288 BatchNorm2d-54 [-1, 1024, 8, 8] 2,048 Conv2d-55 [-1, 1024, 8, 8] 524,288 BatchNorm2d-56 [-1, 1024, 8, 8] 2,048 Block-57 [-1, 1024, 8, 8] 0 Conv2d-58 [-1, 512, 8, 8] 524,288 BatchNorm2d-59 [-1, 512, 8, 8] 1,024 Conv2d-60 [-1, 512, 8, 8] 1,179,648 BatchNorm2d-61 [-1, 512, 8, 8] 1,024 Conv2d-62 [-1, 1024, 8, 8] 524,288 BatchNorm2d-63 [-1, 1024, 8, 8] 2,048 Block-64 [-1, 1024, 8, 8] 0 Conv2d-65 [-1, 512, 8, 8] 524,288 BatchNorm2d-66 [-1, 512, 8, 8] 1,024 Conv2d-67 [-1, 512, 8, 8] 1,179,648 BatchNorm2d-68 [-1, 512, 8, 8] 1,024 Conv2d-69 [-1, 1024, 8, 8] 524,288 BatchNorm2d-70 [-1, 1024, 8, 8] 2,048 Block-71 [-1, 1024, 8, 8] 0 Linear-72 [-1, 10] 10,250 ================================================================ Total params: 9,128,778 Trainable params: 9,128,778 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 65.00 Params size (MB): 34.82 Estimated Total Size (MB): 99.84 ----------------------------------------------------------------
torch.cuda.device_count()
2
torch.cuda.current_device()
0
torch.cuda.device(0)
<torch.cuda.device at 0x7f0501e8d1d0>
torch.cuda.get_device_name(0)
'Tesla M60'
torch.cuda.is_available()
True
torch.device(0)
device(type='cuda', index=0)
torch.device(1)
device(type='cuda', index=1)
from torchsummary import summary summary(RegNetX_200MF().to('cuda:0'), (3, 32, 32))
---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 64, 32, 32] 1,728 BatchNorm2d-2 [-1, 64, 32, 32] 128 Conv2d-3 [-1, 24, 32, 32] 1,536 BatchNorm2d-4 [-1, 24, 32, 32] 48 Conv2d-5 [-1, 24, 32, 32] 1,728 BatchNorm2d-6 [-1, 24, 32, 32] 48 Conv2d-7 [-1, 24, 32, 32] 576 BatchNorm2d-8 [-1, 24, 32, 32] 48 Conv2d-9 [-1, 24, 32, 32] 1,536 BatchNorm2d-10 [-1, 24, 32, 32] 48 Block-11 [-1, 24, 32, 32] 0 Conv2d-12 [-1, 56, 32, 32] 1,344 BatchNorm2d-13 [-1, 56, 32, 32] 112 Conv2d-14 [-1, 56, 32, 32] 4,032 BatchNorm2d-15 [-1, 56, 32, 32] 112 Conv2d-16 [-1, 56, 32, 32] 3,136 BatchNorm2d-17 [-1, 56, 32, 32] 112 Conv2d-18 [-1, 56, 32, 32] 1,344 BatchNorm2d-19 [-1, 56, 32, 32] 112 Block-20 [-1, 56, 32, 32] 0 Conv2d-21 [-1, 152, 32, 32] 8,512 BatchNorm2d-22 [-1, 152, 32, 32] 304 Conv2d-23 [-1, 152, 16, 16] 10,944 BatchNorm2d-24 [-1, 152, 16, 16] 304 Conv2d-25 [-1, 152, 16, 16] 23,104 BatchNorm2d-26 [-1, 152, 16, 16] 304 Conv2d-27 [-1, 152, 16, 16] 8,512 BatchNorm2d-28 [-1, 152, 16, 16] 304 Block-29 [-1, 152, 16, 16] 0 Conv2d-30 [-1, 152, 16, 16] 23,104 BatchNorm2d-31 [-1, 152, 16, 16] 304 Conv2d-32 [-1, 152, 16, 16] 10,944 BatchNorm2d-33 [-1, 152, 16, 16] 304 Conv2d-34 [-1, 152, 16, 16] 23,104 BatchNorm2d-35 [-1, 152, 16, 16] 304 Block-36 [-1, 152, 16, 16] 0 Conv2d-37 [-1, 152, 16, 16] 23,104 BatchNorm2d-38 [-1, 152, 16, 16] 304 Conv2d-39 [-1, 152, 16, 16] 10,944 BatchNorm2d-40 [-1, 152, 16, 16] 304 Conv2d-41 [-1, 152, 16, 16] 23,104 BatchNorm2d-42 [-1, 152, 16, 16] 304 Block-43 [-1, 152, 16, 16] 0 Conv2d-44 [-1, 152, 16, 16] 23,104 BatchNorm2d-45 [-1, 152, 16, 16] 304 Conv2d-46 [-1, 152, 16, 16] 10,944 BatchNorm2d-47 [-1, 152, 16, 16] 304 Conv2d-48 [-1, 152, 16, 16] 23,104 BatchNorm2d-49 [-1, 152, 16, 16] 304 Block-50 [-1, 152, 16, 16] 0 Conv2d-51 [-1, 368, 16, 16] 55,936 BatchNorm2d-52 [-1, 368, 16, 16] 736 Conv2d-53 [-1, 368, 8, 8] 26,496 BatchNorm2d-54 [-1, 368, 8, 8] 736 Conv2d-55 [-1, 368, 8, 8] 135,424 BatchNorm2d-56 [-1, 368, 8, 8] 736 Conv2d-57 [-1, 368, 8, 8] 55,936 BatchNorm2d-58 [-1, 368, 8, 8] 736 Block-59 [-1, 368, 8, 8] 0 Conv2d-60 [-1, 368, 8, 8] 135,424 BatchNorm2d-61 [-1, 368, 8, 8] 736 Conv2d-62 [-1, 368, 8, 8] 26,496 BatchNorm2d-63 [-1, 368, 8, 8] 736 Conv2d-64 [-1, 368, 8, 8] 135,424 BatchNorm2d-65 [-1, 368, 8, 8] 736 Block-66 [-1, 368, 8, 8] 0 Conv2d-67 [-1, 368, 8, 8] 135,424 BatchNorm2d-68 [-1, 368, 8, 8] 736 Conv2d-69 [-1, 368, 8, 8] 26,496 BatchNorm2d-70 [-1, 368, 8, 8] 736 Conv2d-71 [-1, 368, 8, 8] 135,424 BatchNorm2d-72 [-1, 368, 8, 8] 736 Block-73 [-1, 368, 8, 8] 0 Conv2d-74 [-1, 368, 8, 8] 135,424 BatchNorm2d-75 [-1, 368, 8, 8] 736 Conv2d-76 [-1, 368, 8, 8] 26,496 BatchNorm2d-77 [-1, 368, 8, 8] 736 Conv2d-78 [-1, 368, 8, 8] 135,424 BatchNorm2d-79 [-1, 368, 8, 8] 736 Block-80 [-1, 368, 8, 8] 0 Conv2d-81 [-1, 368, 8, 8] 135,424 BatchNorm2d-82 [-1, 368, 8, 8] 736 Conv2d-83 [-1, 368, 8, 8] 26,496 BatchNorm2d-84 [-1, 368, 8, 8] 736 Conv2d-85 [-1, 368, 8, 8] 135,424 BatchNorm2d-86 [-1, 368, 8, 8] 736 Block-87 [-1, 368, 8, 8] 0 Conv2d-88 [-1, 368, 8, 8] 135,424 BatchNorm2d-89 [-1, 368, 8, 8] 736 Conv2d-90 [-1, 368, 8, 8] 26,496 BatchNorm2d-91 [-1, 368, 8, 8] 736 Conv2d-92 [-1, 368, 8, 8] 135,424 BatchNorm2d-93 [-1, 368, 8, 8] 736 Block-94 [-1, 368, 8, 8] 0 Conv2d-95 [-1, 368, 8, 8] 135,424 BatchNorm2d-96 [-1, 368, 8, 8] 736 Conv2d-97 [-1, 368, 8, 8] 26,496 BatchNorm2d-98 [-1, 368, 8, 8] 736 Conv2d-99 [-1, 368, 8, 8] 135,424 BatchNorm2d-100 [-1, 368, 8, 8] 736 Block-101 [-1, 368, 8, 8] 0 Linear-102 [-1, 10] 3,690 ================================================================ Total params: 2,321,946 Trainable params: 2,321,946 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.01 Forward/backward pass size (MB): 27.55 Params size (MB): 8.86 Estimated Total Size (MB): 36.42 ----------------------------------------------------------------
ReduceLROnPlateau スケジューラ
import torch import torch.nn as nn import torch.nn.functional as F from torch.optim.lr_scheduler import OneCycleLR, CyclicLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau from torch.optim.swa_utils import AveragedModel, update_bn import torchvision import pytorch_lightning as pl from pytorch_lightning.callbacks import LearningRateMonitor, GPUStatsMonitor, EarlyStopping from pytorch_lightning.metrics.functional import accuracy from pl_bolts.datamodules import CIFAR10DataModule from pl_bolts.transforms.dataset_normalizations import cifar10_normalization
pl.seed_everything(7);
batch_size = 100 train_transforms = torchvision.transforms.Compose([ torchvision.transforms.RandomCrop(32, padding=4), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), cifar10_normalization(), ]) test_transforms = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), cifar10_normalization(), ]) cifar10_dm = CIFAR10DataModule( batch_size=batch_size, train_transforms=train_transforms, test_transforms=test_transforms, val_transforms=test_transforms, )
class LitCifar10(pl.LightningModule): def __init__(self, optim, lr=0.05, factor=0.8): super().__init__() self.save_hyperparameters() self.model = RegNetX_200MF() def forward(self, x): out = self.model(x) return F.log_softmax(out, dim=1) def training_step(self, batch, batch_idx): x, y = batch logits = F.log_softmax(self.model(x), dim=1) loss = F.nll_loss(logits, y) self.log('train_loss', loss) return loss def evaluate(self, batch, stage=None): x, y = batch logits = self(x) loss = F.nll_loss(logits, y) preds = torch.argmax(logits, dim=1) acc = accuracy(preds, y) if stage: self.log(f'{stage}_loss', loss, prog_bar=True) self.log(f'{stage}_acc', acc, prog_bar=True) def validation_step(self, batch, batch_idx): self.evaluate(batch, 'val') def test_step(self, batch, batch_idx): self.evaluate(batch, 'test') def configure_optimizers(self): optim = self.hparams.optim if optim == 'adam': optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=0, eps=1e-3) else: optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=0.9, weight_decay=5e-4) return { 'optimizer': optimizer, 'lr_scheduler': ReduceLROnPlateau(optimizer, 'max', patience=8, factor=self.hparams.factor, verbose=True, threshold=0.0001, threshold_mode='abs', cooldown=1, min_lr=1e-5), 'monitor': 'val_acc' }
%%time model = LitCifar10(optim='sgd', lr=0.05, factor=0.5) model.datamodule = cifar10_dm trainer = pl.Trainer( gpus=2, num_nodes=1, accelerator='dp', max_epochs=100, progress_bar_refresh_rate=100, logger=pl.loggers.TensorBoardLogger('tblogs/', name='regnetx_200mf'), callbacks=[LearningRateMonitor(logging_interval='step')], ) trainer.fit(model, cifar10_dm) trainer.test(model, datamodule=cifar10_dm);
GPU available: True, used: True TPU available: None, using: 0 TPU cores Files already downloaded and verified Files already downloaded and verified | Name | Type | Params --------------------------------- 0 | model | RegNet | 2.3 M --------------------------------- 2.3 M Trainable params 0 Non-trainable params 2.3 M Total params 9.288 Total estimated model params size (MB) (...) Epoch 46: reducing learning rate of group 0 to 2.5000e-02. Epoch 57: reducing learning rate of group 0 to 1.2500e-02. Epoch 69: reducing learning rate of group 0 to 6.2500e-03. Epoch 88: reducing learning rate of group 0 to 3.1250e-03. (...) -------------------------------------------------------------------------------- DATALOADER:0 TEST RESULTS {'test_acc': 0.9345999956130981, 'test_loss': 0.23981913924217224} -------------------------------------------------------------------------------- CPU times: user 2h 9min 29s, sys: 24min 35s, total: 2h 34min 5s Wall time: 2h 5min 55s [{'test_loss': 0.23981913924217224, 'test_acc': 0.9345999956130981}]
以上