Cowboy Detection Jupyter SSD
import packages
1
2
3
4
5
6
!pip install pycocotools
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py
1
2
3
4
5
6
import torch
from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn, FasterRCNN, FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader, Subset
import os
from PIL import Image
from torchvision.transforms import v2 as T
1
2
from engine import train_one_epoch, evaluate
from pycocotools.coco import COCO
Dataset
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class CowBoyDataSet(Dataset):
def __init__(self, coco, img_dir, transforms):
self.coco = coco
self.img_dir = img_dir
self.transforms = transforms
self.img_ids = list(sorted(coco.imgs.keys()))
def __getitem__(self, idx):
# 返回 img tensor, bbox, cat_id
img_id = self.img_ids[idx]
img_name = self.coco.loadImgs(img_id)[0]['file_name']
img_path = os.path.join(self.img_dir, img_name)
img = Image.open(img_path).convert("RGB")
anno_ids = self.coco.getAnnIds(img_id)
annos = self.coco.loadAnns(anno_ids)
boxes = []
labels = []
areas = []
iscrowds = []
for anno in annos:
# anno 中 box 为 x,y,w,h
# vison 中 faster-rcnn box 为 x_min, y_min, x_max, y_max
x_min, y_min, w, h = anno['bbox']
x_max, y_max = x_min + w, y_min + h
boxes.append([x_min, y_min, x_max, y_max])
cat_id = anno['category_id']
label = catid_2_label[cat_id]
labels.append(label)
areas.append(anno['area'])
# 表示是否多个小物体聚在一起, false 为干净的单个物体
iscrowds.append(anno['iscrowd'])
img_id = torch.as_tensor([idx], dtype=torch.int64)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.as_tensor(labels, dtype=torch.int64)
areas = torch.tensor(areas)
iscrowds = torch.tensor(iscrowds)
# 训练中 model 只用到 boxes 和 labels,其他参数是给 coco 工具 evalueate 时用的
targets = {
'boxes': boxes,
'labels': labels,
'image_id': int(img_id),
'area': areas,
'iscrowd': iscrowds
}
if self.transforms is not None:
img, targets = self.transforms(img, targets)
return img, targets
def __len__(self):
return len(self.img_ids)
# 由于目标检测每张图片尺寸不一样,这个函数使得 dataloader 能过正确处理这种情况,否则会报错
def collate_fn(batch):
return tuple(zip(*batch))
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
class config:
coco = COCO('/kaggle/input/cowboyoutfits/train.json')
IMG_PATH = '/kaggle/input/cowboyoutfits/images'
VAL_SIZE = 613
NUM_WORKS = 2
LR = 0.005
MOMEMTUM = 0.9
weight_decay = 0.0005
EPOCH = 3
STEP_SIZE = 3
GAMMA = 0.1
cat_map = {v['id']: v['name'] for k,v in config.coco.cats.items()}
# 真实物体类别从1开始
catid_2_label = {cat_id: index + 1 for index, cat_id in enumerate(list(sorted(cat_map.keys())))}
label_2_catid = {v: k for k, v in catid_2_label.items()}
Split Dataset
Dataloader
1
2
3
4
5
6
7
# 这里训练集、验证集,每次运行都是随机划分的,如果需要固定划分便于继续训练,需要保存 indices
dataset_train = CowBoyDataSet(config.coco, config.IMG_PATH, get_transform(train=True))
dataset_eval = CowBoyDataSet(config.coco, config.IMG_PATH, get_transform(train=False))
indices = torch.randperm(len(dataset_train)).tolist()
dataset_train = Subset(dataset_train, indices[: -config.VAL_SIZE])
dataset_eval = Subset(dataset_eval, indices[-config.VAL_SIZE:])
1
2
3
4
5
train_loader = DataLoader(
dataset_train, 4, shuffle=True, num_workers=config.NUM_WORKS, collate_fn=collate_fn, drop_last=True)
val_loader = DataLoader(
dataset_eval, 4, shuffle=False, num_workers=config.NUM_WORKS, collate_fn=collate_fn, drop_last=True)
Model
1
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, ssd300_vgg16
1
!wget https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth
1
2
3
4
5
6
7
8
9
10
11
12
13
def get_model(num_class):
model = ssdlite320_mobilenet_v3_large(weights=None, num_classes=num_class)
# Step 2, load the model state_dict and the default model's state_dict
mstate_dict = model.state_dict()
cstate_dict = torch.load('/kaggle/working/ssdlite320_mobilenet_v3_large_coco-a79551df.pth', map_location='cpu')
# Step 3.
for k in mstate_dict.keys():
if mstate_dict[k].shape != cstate_dict[k].shape:
print('key {} will be removed, orishape: {}, training shape: {}'.format(k, cstate_dict[k].shape, mstate_dict[k].shape))
cstate_dict.pop(k)
# Step 4.
model.load_state_dict(cstate_dict, strict=False)
return model
1
model = get_model(6)
1
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
1
model.to(device)
1
2
3
param = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(param, lr=config.LR, momentum=config.MOMEMTUM, weight_decay=config.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config.STEP_SIZE, gamma=config.GAMMA)
Train and Eval
1
2
3
4
5
6
7
for epoch in range(config.EPOCH):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=225)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, val_loader, device=device)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
Epoch: [0] [ 0/612] eta: 0:04:41 lr: 0.000013 loss: 15.8132 (15.8132) bbox_regression: 3.8578 (3.8578) classification: 11.9554 (11.9554) time: 0.4603 data: 0.2584 max mem: 703
Epoch: [0] [225/612] eta: 0:00:37 lr: 0.001853 loss: 6.2370 (8.6265) bbox_regression: 2.5555 (3.0052) classification: 3.5278 (5.6213) time: 0.0948 data: 0.0181 max mem: 708
Epoch: [0] [450/612] eta: 0:00:15 lr: 0.003692 loss: 5.8581 (7.2568) bbox_regression: 2.6145 (2.7386) classification: 3.3042 (4.5182) time: 0.0943 data: 0.0177 max mem: 710
Epoch: [0] [611/612] eta: 0:00:00 lr: 0.005000 loss: 6.2539 (6.9103) bbox_regression: 2.6714 (2.6927) classification: 3.2703 (4.2176) time: 0.0932 data: 0.0188 max mem: 710
Epoch: [0] Total time: 0:00:59 (0.0966 s / it)
creating index...
index created!
Test: [ 0/153] eta: 0:01:08 model_time: 0.1527 (0.1527) evaluator_time: 0.0523 (0.0523) time: 0.4477 data: 0.2242 max mem: 710
Test: [100/153] eta: 0:00:06 model_time: 0.0275 (0.0315) evaluator_time: 0.0499 (0.0586) time: 0.1313 data: 0.0190 max mem: 710
Test: [152/153] eta: 0:00:00 model_time: 0.0277 (0.0313) evaluator_time: 0.0462 (0.0575) time: 0.1330 data: 0.0382 max mem: 768
Test: Total time: 0:00:18 (0.1224 s / it)
Averaged stats: model_time: 0.0277 (0.0313) evaluator_time: 0.0462 (0.0575)
Accumulating evaluation results...
DONE (t=2.02s).
IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.033
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.105
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.011
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.003
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.049
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.073
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.135
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.188
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.025
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.257
This post is licensed under CC BY 4.0 by the author.