Commit 16e74c15 by TJL233

add files 6.1

parent 0e84f69a
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import time
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision.models as models
from torch.autograd import Variable
class MobileNet(nn.Module):
def __init__(self, n_class=1000):
super(MobileNet, self).__init__()
self.nclass = n_class
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def conv_dw(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
nn.ReLU(inplace=True),
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True),
)
self.model = nn.Sequential(
conv_bn(3, 32, 2),
conv_dw(32, 64, 1),
conv_dw(64, 128, 2),
conv_dw(128, 128, 1),
conv_dw(128, 256, 2),
conv_dw(256, 256, 1),
conv_dw(256, 512, 2),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 512, 1),
conv_dw(512, 1024, 2),
conv_dw(1024, 1024, 1),
nn.AvgPool2d(7),
)
self.fc = nn.Linear(1024, self.nclass)
def forward(self, x):
x = self.model(x)
x = x.view(-1, 1024)
x = self.fc(x)
return x
from torch import nn
from torch import nn
import torch
def _make_divisible(ch, divisor=8, min_ch=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
"""
if min_ch is None:
min_ch = divisor
new_ch = max(min_ch, int(ch + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_ch < 0.9 * ch:
new_ch += divisor
return new_ch
class ConvBNReLU(nn.Sequential):
def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_channel),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, in_channel, out_channel, stride, expand_ratio):
super(InvertedResidual, self).__init__()
hidden_channel = in_channel * expand_ratio
self.use_shortcut = stride == 1 and in_channel == out_channel
layers = []
if expand_ratio != 1:
# 1x1 pointwise conv
layers.append(ConvBNReLU(in_channel, hidden_channel, kernel_size=1))
layers.extend([
# 3x3 depthwise conv
ConvBNReLU(hidden_channel, hidden_channel, stride=stride, groups=hidden_channel),
# 1x1 pointwise conv(linear)
nn.Conv2d(hidden_channel, out_channel, kernel_size=1, bias=False),
nn.BatchNorm2d(out_channel),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_shortcut:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, num_classes=1000, alpha=1.0, round_nearest=8):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = _make_divisible(32 * alpha, round_nearest)
last_channel = _make_divisible(1280 * alpha, round_nearest)
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
features = []
# conv1 layer
features.append(ConvBNReLU(3, input_channel, stride=2))
# building inverted residual residual blockes
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * alpha, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, last_channel, 1))
# combine feature layers
self.features = nn.Sequential(*features)
# building classifier
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(last_channel, num_classes)
)
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def count_n_grams(data, n, start_token = "<s>", end_token = "<e>") -> 'dict':
def count_n_grams(data, n, start_token = "<s>", end_token = "<e>") -> 'dict':
# 创建n-gram的字典
n_grams = {}
# 遍历数据集中的句子
for sentence in data:
# 在句子前和后分别加上 start token 和 end token
sentence = [start_token]*n + sentence + [end_token]
# 将句子转化为 tuple
sentence = tuple(sentence)
# 存储句子的长度
# 1-gram时特化处理
m = len(sentence) if n==1 else len(sentence)-1
# 遍历句子长度,生成N-Gram对且存储进字典中
for i in range(m):
# 生成n-gram对
n_gram = sentence[i:i+n]
# 请填充如下代码
# 将当前生成的N-gram对加入到N-Grams 字典中
# 如果已存在当前的N-Gram则对计数+1
return n_grams
# 计算单词的概率
def prob_for_single_word(word, previous_n_gram, n_gram_counts, nplus1_gram_counts, vocabulary_size, k = 1.0) -> 'float':
# 将上一个 n-gram对转化为 tuple
previous_n_gram = tuple(previous_n_gram)
# 计算上一个 n-gram 的频数
previous_n_gram_count = n_gram_counts[previous_n_gram] if previous_n_gram in n_gram_counts else 0
# 计算分母
denom = previous_n_gram_count + k * vocabulary_size
# 将上一个n-gram与当前词组成一个 tuple
nplus1_gram = previous_n_gram + (word,)
# 填充如下代码
# 计算当前词与上一个n-gram组成词对的频数
# 计算分子
num = nplus1_gram_count + k
# Final Fraction
prob = num / denom
return prob
# 计算文本的概率分布
def probs(previous_n_gram, n_gram_counts, nplus1_gram_counts, vocabulary, k=1.0) -> 'dict':
# 上一个n-gram对转化为tuple
previous_n_gram = tuple(previous_n_gram)
# 向子典集中加入 UNK 和 end token
vocabulary = vocabulary + ["<e>", "<unk>"]
# 计算字典的大小
vocabulary_size = len(vocabulary)
# 构建概率字典
probabilities = {}
# 遍历所有单词
for word in vocabulary:
# 计算概率
probability = prob_for_single_word(word, previous_n_gram,
n_gram_counts, nplus1_gram_counts,
vocabulary_size, k=k)
# 填充概率分布
probabilities[word] = probability
return probabilities
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
课程大纲地址:https://github.com/sawyerbutton
课程大纲地址:https://github.com/sawyerbutton
{
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9a1ddf95",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ddca5972",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([9, 32, 8])\n",
"torch.Size([2, 3, 32, 8])\n"
]
}
],
"source": [
"#[class1-3, student, scores]\n",
"\n",
"a = torch.rand(3, 32, 8)\n",
"b = torch.rand(6, 32, 8)\n",
"c = torch.rand(3, 32, 8)\n",
"\n",
"print(torch.cat([a, b], dim=0).shape)\n",
"\n",
"print(torch.stack([a,c], dim =0).shape)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d0fd26fc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([2, 32, 8])\n",
"torch.Size([2, 32, 8])\n",
"torch.Size([1, 32, 8])\n",
"3\n",
"torch.Size([3, 32, 8])\n",
"torch.Size([2, 32, 8])\n",
"2\n"
]
}
],
"source": [
"a = torch.rand(5, 32, 8)\n",
"b = torch.split(a, 2, 0)\n",
"print(b[0].shape)\n",
"print(b[1].shape)\n",
"print(b[2].shape)\n",
"print(len(b))\n",
"\n",
"\n",
"c = torch.chunk(a, 2, 0)\n",
"print(c[0].shape)\n",
"print(c[1].shape)\n",
"print(len(c))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "5b02c65c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(3.)\n",
"tensor(4.)\n",
"tensor(3.)\n",
"tensor(3.)\n",
"tensor(0.1416)\n"
]
}
],
"source": [
"a = torch.tensor(3.1415926)\n",
"\n",
"# floor\n",
"print(a.floor())\n",
"\n",
"# ceil\n",
"print(a.ceil())\n",
"\n",
"# round\n",
"print(a.round())\n",
"\n",
"# trunc\n",
"print(a.trunc())\n",
"\n",
"# frac\n",
"print(a.frac())"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b214751c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([1., 2., 3., 4., 5., 6., 7.])\n",
"tensor(4.)\n",
"tensor(7.)\n",
"tensor(1.)\n",
"tensor(28.)\n",
"tensor(5040.)\n",
"tensor(6)\n",
"tensor(0)\n"
]
}
],
"source": [
"a = torch.tensor([1.,2.,3.,4.,5.,6.,7.])\n",
"print(a)\n",
"\n",
"print(a.mean())\n",
"print(a.max())\n",
"print(a.min())\n",
"print(a.sum())\n",
"print(a.prod())\n",
"\n",
"# argmax / argmin\n",
"print(a.argmax())\n",
"print(a.argmin())"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2f89ff69",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[1., 1., 1.],\n",
" [1., 1., 1.],\n",
" [1., 1., 1.]])\n",
"tensor([[1., 0., 0.],\n",
" [0., 1., 0.],\n",
" [0., 0., 1.]])\n",
"tensor([[ True, False, False],\n",
" [False, True, False],\n",
" [False, False, True]])\n",
"False\n"
]
}
],
"source": [
"a = torch.ones(3,3)\n",
"b = torch.eye(3,3)\n",
"print(a)\n",
"print(b)\n",
"\n",
"print(torch.eq(a,b))\n",
"print(torch.equal(a, b))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "14e15af9",
"metadata": {},
"outputs": [],
"source": [
"# 基于pytorch 实现 手写数字的识别问题 mnist"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "d40ebab2",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"import torch.optim as optim"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "d4a79df5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.22868333333333332 0.3601\n",
"0.47163333333333335 0.5569\n",
"0.5959666666666666 0.6348\n",
"0.6523833333333333 0.6823\n",
"0.68475 0.7109\n",
"done\n"
]
}
],
"source": [
"if torch.cuda.is_available():\n",
" device = 'cuda'\n",
"else: \n",
" device = 'cpu'\n",
"\n",
"class Net(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" # 28*28 = 784\n",
" self.fc1 = nn.Linear(784, 100)\n",
" self.fc2 = nn.Linear(100, 10)\n",
" # hook\n",
" def forward(self, x):\n",
" x = torch.flatten(x, start_dim = 1)\n",
" x = torch.relu(self.fc1(x))\n",
" x = self.fc2(x)\n",
" \n",
" return x\n",
" \n",
" \n",
"max_epochs = 5\n",
"batch_size = 16\n",
"\n",
"# data\n",
"transform = transforms.Compose([transforms.ToTensor()])\n",
"# 55000\n",
"trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)\n",
"train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)\n",
"testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)\n",
"test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)\n",
"\n",
"# net init\n",
"\n",
"net = Net()\n",
"net.to(device)\n",
"\n",
"# nn. MSE\n",
"loss = nn.CrossEntropyLoss()\n",
"optimizer = optim.SGD(net.parameters(), lr = 0.0001)\n",
"\n",
"def train():\n",
" acc_num=0\n",
" for epoch in range(max_epochs):\n",
" for i,(data, label) in enumerate(train_loader):\n",
" data = data.to(device)\n",
" label = label.to(device)\n",
" optimizer.zero_grad()\n",
" output = net(data)\n",
" Loss = loss(output, label)\n",
" Loss.backward()\n",
" optimizer.step()\n",
" \n",
" pred_class = torch.max(output, dim=1)[1]\n",
" acc_num += torch.eq(pred_class, label.to(device)).sum().item()\n",
" train_acc = acc_num / len(trainset)\n",
" \n",
" net.eval()\n",
" acc_num = 0.0\n",
" best_acc =0\n",
" with torch.no_grad():\n",
" for val_data in test_loader:\n",
" val_image, val_label = val_data\n",
" output = net(val_image.to(device))\n",
" predict_y = torch.max(output, dim=1)[1]\n",
" acc_num += torch.eq(predict_y, val_label.to(device)).sum().item()\n",
" val_acc = acc_num / len(testset)\n",
" print(train_acc, val_acc)\n",
" if val_acc > best_acc:\n",
" torch.save(net.state_dict(), './minst.pth')\n",
" best_acc = val_acc\n",
" \n",
" acc_num = 0\n",
" train_acc = 0\n",
" test_acc = 0\n",
" print('done')\n",
"\n",
"train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c675ece6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:.conda-pytorch_dl] *",
"language": "python",
"name": "conda-env-.conda-pytorch_dl-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import torch
import torch
import torch.nn as nn
from torch.nn import functional as F
from utils.CustomLayers import ConvActivation, ConvBNActivation, ConvBatchNormalization
class SmallResidual(nn.Module):
expansion = 1
def __init__(self, input_channels, output_channels, stride=1, downsample=None, **kwargs):
super().__init__()
self.conv1 = ConvBNActivation(input_channels, output_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.conv2 = ConvBatchNormalization(output_channels, output_channels, kernel_size=3, stride=1, padding=1, bias=False)
self.downsample= downsample
def forward(self, x):
indentity = x
if self.downsample is not None:
indentity = self.downsample(x)
# F.ReLU()是函数调用,一般使用在foreward函数里。而nn.ReLU()是模块调用,一般在定义网络层的时候使用。
out = self.conv1(x)
out = self.conv2(out)
out += indentity
out = F.relu(out, True)
return out
class BigResidual(nn.Module):
expansion = 4
# groups是组卷积; 用于实现ResNeXt
def __init__(self, input_channels, output_channels, stride=1, downsample=None, groups=1, width_per_group=64, **kwargs):
super().__init__()
width = int(output_channels*(width_per_group/64))*groups
self.conv1 = ConvBNActivation(input_channels=input_channels, output_channels=width, kernel_size=1, stride=1, padding=0)
self.conv2 = ConvBNActivation(input_channels=width, output_channels=width, kernel_size=3, stride=stride, padding=1, groups=groups)
self.conv3 = ConvBatchNormalization(input_channels=width, output_channels=output_channels*self.expansion, kernel_size=1, stride=1, padding=0)
self.downsample = downsample
def forward(self, x):
indentity = x
if self.downsample is not None:
indentity = self.downsample(x)
out = self.conv1(x)
out = self.conv2(out)
out = self.conv3(out)
out = out + indentity
out = F.relu(out, True)
return out
class ResNet(nn.Module):
def __init__(self, method_type, num_blocks, num_classes=None, include_top=None, groups=1, width_per_groups=64):
super().__init__()
self.include_top = include_top
self.in_nc = 64
self.groups=groups
self.width_per_groups = width_per_groups
self.conv1 = nn.Conv2d(3, self.in_nc, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(self.in_nc)
self.relu = nn.ReLU(True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# 模块中的层方法, 模块接收的通道维度, 模块中层的个数, 卷积的步幅(resnet中根据步幅来决定模块输出的通道维度)
self.Block1 = self._make_Block(method_type, 64, num_blocks[0])
self.Block2 = self._make_Block(method_type, 128, num_blocks[1], stride=2)
self.Block3 = self._make_Block(method_type, 256, num_blocks[2], stride=2)
self.Block4 = self._make_Block(method_type, 512, num_blocks[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(512*method_type.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.Block1(x)
x = self.Block2(x)
x = self.Block3(x)
x = self.Block4(x)
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x,start_dim=1)
x = self.fc(x)
return x
def _make_Block(self, method_type, in_channels, num_layers, stride=1):
downsample = None
if stride != 1 or self.in_nc != in_channels * method_type.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_nc, in_channels * method_type.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(in_channels * method_type.expansion))
Block = []
Block.append(method_type(self.in_nc,
in_channels,
downsample=downsample,
stride=stride,
groups = self.groups,
width_per_group=self.width_per_groups))
self.in_nc = in_channels * method_type.expansion
# 只有模块的第一层有维度变化,所以上面单独提出来使用downsample参数做下采样
for _ in range(1, num_layers):
Block.append(method_type(self.in_nc, in_channels, groups=self.groups, width_per_group=self.width_per_groups))
return nn.Sequential(*Block)
def ResNet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(SmallResidual, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def ResNet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(BigResidual, [3, 3, 9, 3], num_classes=num_classes, include_top=include_top)
def ResNet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(BigResidual, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(BigResidual, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
groups = 32
width_per_group = 8
return ResNet(BigResidual, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
This source diff could not be displayed because it is too large. You can view the blob instead.
from typing import List, Callable
from typing import List, Callable
import torch
from torch import Tensor
import torch.nn as nn
def channel_shuffle(x: Tensor, groups: int):
batch_size, num_channels, height, width = x.size()
channels_per_group = num_channels // groups
# reshape
# [batch_size, num_channels, height, width] -> [batch_size, groups, channels_per_group, height, width]
x = x.view(batch_size, groups, channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batch_size, -1, height, width)
return x
class InvertedResidual(nn.Module):
def __init__(self, input_c: int, output_c: int, stride: int):
super(InvertedResidual, self).__init__()
if stride not in [1, 2]:
raise ValueError("illegal stride value.")
self.stride = stride
assert output_c % 2 == 0
branch_features = output_c // 2
# 当stride为1时,input_channel应该是branch_features的两倍
# python中 '<<' 是位运算,可理解为计算×2的快速方法
assert (self.stride != 1) or (input_c == branch_features << 1)
if self.stride == 2:
self.branch1 = nn.Sequential(
self.depthwise_conv(input_c, input_c, kernel_s=3, stride=self.stride, padding=1),
nn.BatchNorm2d(input_c),
nn.Conv2d(input_c, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True)
)
else:
self.branch1 = nn.Sequential()
self.branch2 = nn.Sequential(
nn.Conv2d(input_c if self.stride > 1 else branch_features, branch_features, kernel_size=1,
stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True),
self.depthwise_conv(branch_features, branch_features, kernel_s=3, stride=self.stride, padding=1),
nn.BatchNorm2d(branch_features),
nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(branch_features),
nn.ReLU(inplace=True)
)
@staticmethod
def depthwise_conv(input_c: int,output_c: int,kernel_s: int,stride: int = 1, padding: int = 0,bias: bool = False):
return nn.Conv2d(in_channels=input_c, out_channels=output_c, kernel_size=kernel_s,
stride=stride, padding=padding, bias=bias, groups=input_c)
def forward(self, x: Tensor):
if self.stride == 1:
x1, x2 = x.chunk(2, dim=1)
out = torch.cat((x1, self.branch2(x2)), dim=1)
else:
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
out = channel_shuffle(out, 2)
return out
class ShuffleNetV2(nn.Module):
def __init__(self, stages_repeats: List[int],stages_out_channels: List[int],num_classes: int = 1000,
inverted_residual: Callable[..., nn.Module] = InvertedResidual):
super(ShuffleNetV2, self).__init__()
if len(stages_repeats) != 3:
raise ValueError("expected stages_repeats as list of 3 positive ints")
if len(stages_out_channels) != 5:
raise ValueError("expected stages_out_channels as list of 5 positive ints")
self._stage_out_channels = stages_out_channels
# input RGB image
input_channels = 3
output_channels = self._stage_out_channels[0]
self.conv1 = nn.Sequential(
nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True)
)
input_channels = output_channels
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# Static annotations
self.stage2: nn.Sequential
self.stage3: nn.Sequential
self.stage4: nn.Sequential
stage_names = ["stage{}".format(i) for i in [2, 3, 4]]
for name, repeats, output_channels in zip(stage_names, stages_repeats, self._stage_out_channels[1:]):
seq = [inverted_residual(input_channels, output_channels, 2)]
for i in range(repeats - 1):
seq.append(inverted_residual(output_channels, output_channels, 1))
setattr(self, name, nn.Sequential(*seq))
input_channels = output_channels
output_channels = self._stage_out_channels[-1]
self.conv5 = nn.Sequential(
nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(output_channels),
nn.ReLU(inplace=True)
)
self.fc = nn.Linear(output_channels, num_classes)
def _forward_impl(self, x: Tensor):
# See note [TorchScript super()]
x = self.conv1(x)
x = self.maxpool(x)
x = self.stage2(x)
x = self.stage3(x)
x = self.stage4(x)
x = self.conv5(x)
x = x.mean([2, 3]) # global pool
x = self.fc(x)
return x
def forward(self, x: Tensor):
return self._forward_impl(x)
"""
Constructs a ShuffleNetV2 with 1.0x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
<https://arxiv.org/abs/1807.11164>`.
weight: https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth
:param num_classes:
:return:
"""
def shufflenet_v2_x1_0(num_classes=1000):
model = ShuffleNetV2(stages_repeats=[4, 8, 4],
stages_out_channels=[24, 116, 232, 464, 1024],
num_classes=num_classes)
return model
"""
Constructs a ShuffleNetV2 with 0.5x output channels, as described in
`"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
<https://arxiv.org/abs/1807.11164>`.
weight: https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth
:param num_classes:
:return:
"""
def shufflenet_v2_x0_5(num_classes=1000):
model = ShuffleNetV2(stages_repeats=[4, 8, 4],
stages_out_channels=[24, 48, 96, 192, 1024],
num_classes=num_classes)
return model
在Swin Transformer中,引入了一种名为相对位置编码(Relative Position Encoding)的机制。这是因为在自然图像中,位置信息通常具有很大的重要性,而传统的Transformer由于其全局注意力的设计,并没有明显的位置感知能力。为了解决这个问题,Swin Transformer在每个窗口内部的自注意力计算中引入了相对位置编码。
在Swin Transformer中,引入了一种名为相对位置编码(Relative Position Encoding)的机制。这是因为在自然图像中,位置信息通常具有很大的重要性,而传统的Transformer由于其全局注意力的设计,并没有明显的位置感知能力。为了解决这个问题,Swin Transformer在每个窗口内部的自注意力计算中引入了相对位置编码。
相对位置编码的主要思想是,对于每个窗口中的任意两个像素,其关系应当只取决于他们之间的相对位置,而不是他们在全局图像中的绝对位置。例如,一个窗口中左上角的像素对右下角的像素的影响,应当和另一个窗口中左上角的像素对右下角的像素的影响是一样的。
在实现上,Swin Transformer中的相对位置编码通常通过学习一个二维的Embedding矩阵来实现。这个Embedding矩阵的大小等于W-MSA窗口的大小,对于每个窗口中的任意两个像素,他们之间的相对位置编码就等于Embedding矩阵中对应位置的偏置。在计算自注意力的时候,相对位置偏置将与传统的注意力分数一起被考虑进去,从而增加了模型对位置信息的感知能力。
相对位置编码的 Embedding 矩阵中的偏置是可学习的参数,在训练开始时,Embedding矩阵中的相对位置偏置通常会被初始化为零。这意味着在训练的初始阶段,模型并没有显式地利用位置信息。然而,这个Embedding矩阵是可以被学习的参数,随着训练的进行,模型会自动调整这个矩阵中的偏置,以便更好地利用位置信息。这个过程是通过反向传播和梯度下降等优化算法实现的。简单来说,如果模型发现某种位置关系对预测结果有帮助,那么它会调整相应的位置编码,使其更强地表示这种位置关系。这是一种端到端的学习方法,也是深度学习的一种重要思想。
在实际的应用中,这些偏置参数被视为模型的一部分,并与其他参数(如 Transformer 中的权重矩阵)一同被保存和加载。在模型训练完成后,这些偏置参数就可以用于新的图像,以提供位置相关的上下文信息。
通过这种方式,Swin Transformer成功地将传统Transformer的全局注意力扩展到了自然图像的处理上,同时保留了位置信息的感知能力,表现出了优秀的性能。
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
from turtle import forward
from turtle import forward
import torch
import torch.nn as nn
from utils.CustomLayers import ConvActivation
# official pretrain weights
model_urls = {
'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth'
}
# note: if use pretrain parameters, minus the mean of ImageNet(123.68, 116.78, 103.94) to normalize the dataset
cfgs_feature = {
'vgg11': [64, 'Pooling', 128, 'Pooling', 256, 256, 'Pooling', 512, 512, 'Pooling', 512, 512, 'Pooling'],
'vgg13': [64, 64, 'Pooling', 128, 128, 'Pooling', 256, 256, 'Pooling', 512, 512, 'Pooling', 512, 512, 'Pooling'],
'vgg16': [64, 64, 'Pooling', 128, 128, 'Pooling', 256, 256, 256, 'Pooling', 512, 512, 512, 'Pooling', 512, 512, 512, 'Pooling'],
'vgg19': [64, 64, 'Pooling', 128, 128, 'Pooling', 256, 256, 256, 256, 'Pooling', 512, 512, 512, 512, 'Pooling', 512, 512, 512, 512, 'Pooling'],
}
def create_feature_layers(cfgs:list, input_channels=3):
feature_layers=[]
for layer in cfgs:
if layer == 'Pooling':
feature_layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
feature_layers += [ConvActivation(input_channels, layer, kernel_size=3, stride=1, padding=1)]
input_channels = layer
return nn.Sequential(*feature_layers)
class VggNet(nn.Module):
def __init__(self, num_classes, feature_layers_type='vgg16', init_weights=True):
super().__init__()
assert feature_layers_type in cfgs_feature, "Warning: feature_layers_type not in cfgs dict!"
self.feature_layers = create_feature_layers(cfgs=cfgs_feature[feature_layers_type])
self.classifier_layers = nn.Sequential(*[
nn.Linear(512*7*7, 4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(p=0.4),
nn.Linear(4096, num_classes)
])
if init_weights:
self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = self.feature_layers(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier_layers(x)
return x
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment