Yolo Series Basic Model Struct
Yolo Series Basic Model Struct
general yolo model structure
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
class BackBone(nn.Module):
def __init__(self,
model: nn.Module,
extract_ids: list):
super().__init__()
self.model = model
self.extract_ids = extract_ids
self.net = nn.Sequential(*[layer for layer in self.model.children()])
def forward(self, x):
outputs = []
for i in range(len(self.net)):
x = self.net[i](x)
if i in self.extract_ids:
outputs.append(x)
return outputs[::-1]
class Neck(nn.Module):
def __init__(self, input_channels: list, output_channels: list):
super().__init__()
assert len(input_channels) == len(output_channels)
self.input_channels = input_channels
self.output_channels = output_channels
self.net = self._create_layers()
def forward(self, Xs: list):
outputs = []
for i in range(len(Xs)):
if i == 0:
Xs[i] = self.net[i][0](Xs[i])
else:
concatX = self.net[i][0](Xs[i - 1])
concatX = F.interpolate(concatX, (Xs[i].size(-2), Xs[i].size(-1)), mode='bilinear')
concatX = torch.cat((concatX, Xs[i]), dim=1)
Xs[i] = self.net[i][1](concatX)
outputs.append(Xs[i])
return outputs
def _create_layers(self):
layers = []
for i in range(len(self.input_channels)):
if i == 0:
layer = [ConvSet(self.input_channels[i], self.output_channels[i], 1)]
layers.append(layer)
else:
layer = [
CNNBlock(self.output_channels[i - 1], self.input_channels[i] // 2, (1, 1)),
ConvSet(self.input_channels[i] + self.input_channels[i] // 2, self.output_channels[i], 1)
]
layers.append(layer)
return layers
class Head(nn.Module):
def __init__(self, input_channels: list, num_class=20):
super().__init__()
self.input_channels = input_channels
self.num_class = num_class
self.net = self._create_layers()
def forward(self, Xs: list):
outputs = [layer(x) for x,layer in zip(Xs,self.net)]
return outputs
def _create_layers(self):
layers = [ScalePrediction(channel, self.num_class) for channel in self.input_channels]
return layers
if __name__ == '__main__':
ids = [3, 4, 5]
b = BackBone(bone().net, ids)
x = torch.randn((1, 3, 416, 416))
outs = b(x)
for o in outs:
print(o.shape)
print('------------')
inc = [o.size(1) for o in outs]
oc = [512,256,128]
neck = Neck(inc, )
ne_outs = neck(outs)
for no in ne_outs:
print(no.shape)
print('--------------')
heads = Head(oc)
he_outs = heads(ne_outs)
for ho in he_outs:
print(ho.shape)
"""
torch.Size([1, 1024, 13, 13])
torch.Size([1, 512, 26, 26])
torch.Size([1, 256, 52, 52])
------------
torch.Size([1, 512, 13, 13])
torch.Size([1, 256, 26, 26])
torch.Size([1, 128, 52, 52])
--------------
torch.Size([1, 3, 13, 13, 25])
torch.Size([1, 3, 26, 26, 25])
torch.Size([1, 3, 52, 52, 25])
"""
This post is licensed under CC BY 4.0 by the author.