Cowboy Detection Jupyter Faster rcnn
import packages
1
2
3
4
5
6
!pip install pycocotools
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py
!wget https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py
1
2
3
4
5
6
7
8
9
10
import torch
from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn, FasterRCNN, FastRCNNPredictor
from engine import train_one_epoch, evaluate
from torch.utils.data import Dataset, DataLoader, Subset
import os
from PIL import Image
from torchvision.transforms import v2 as T
from pycocotools.coco import COCO
Dataset
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class CowBoyDataSet(Dataset):
def __init__(self, coco, img_dir, transforms):
self.coco = coco
self.img_dir = img_dir
self.transforms = transforms
self.img_ids = list(sorted(coco.imgs.keys()))
def __getitem__(self, idx):
# 返回 img tensor, bbox, cat_id
img_id = self.img_ids[idx]
img_name = self.coco.loadImgs(img_id)[0]['file_name']
img_path = os.path.join(self.img_dir, img_name)
img = Image.open(img_path).convert("RGB")
anno_ids = self.coco.getAnnIds(img_id)
annos = self.coco.loadAnns(anno_ids)
boxes = []
labels = []
areas = []
iscrowds = []
for anno in annos:
# anno 中 box 为 x,y,w,h
# vison 中 faster-rcnn box 为 x_min, y_min, x_max, y_max
x_min, y_min, w, h = anno['bbox']
x_max, y_max = x_min + w, y_min + h
boxes.append([x_min, y_min, x_max, y_max])
cat_id = anno['category_id']
label = catid_2_label[cat_id]
labels.append(label)
areas.append(anno['area'])
# 表示是否多个小物体聚在一起, false 为干净的单个物体
iscrowds.append(anno['iscrowd'])
img_id = torch.as_tensor([idx], dtype=torch.int64)
boxes = torch.as_tensor(boxes, dtype=torch.float32)
labels = torch.as_tensor(labels, dtype=torch.int64)
areas = torch.tensor(areas)
iscrowds = torch.tensor(iscrowds)
# 训练中 model 只用到 boxes 和 labels,其他参数是给 coco 工具 evalueate 时用的
targets = {
'boxes': boxes,
'labels': labels,
'image_id': int(img_id),
'area': areas,
'iscrowd': iscrowds
}
if self.transforms is not None:
img, targets = self.transforms(img, targets)
return img, targets
def __len__(self):
return len(self.img_ids)
# 由于目标检测每张图片尺寸不一样,这个函数使得 dataloader 能过正确处理这种情况,否则会报错
def collate_fn(batch):
return tuple(zip(*batch))
def get_transform(train):
transforms = []
transforms.append(T.ToTensor())
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
class config:
coco = COCO('/kaggle/input/cowboyoutfits/train.json')
IMG_PATH = '/kaggle/input/cowboyoutfits/images'
VAL_SIZE = 613
NUM_WORKS = 2
LR = 0.005
MOMEMTUM = 0.9
weight_decay = 0.0005
EPOCH = 15
STEP_SIZE = 3
GAMMA = 0.1
cat_map = {v['id']: v['name'] for k,v in config.coco.cats.items()}
# 真实物体类别从1开始
catid_2_label = {cat_id: index + 1 for index, cat_id in enumerate(list(sorted(cat_map.keys())))}
label_2_catid = {v: k for k, v in catid_2_label.items()}
1
2
3
4
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
Split Dataset
1
2
3
4
5
6
7
# 这里训练集、验证集,每次运行都是随机划分的,如果需要固定划分便于继续训练,需要保存 indices
dataset_train = CowBoyDataSet(config.coco, config.IMG_PATH, get_transform(train=True))
dataset_eval = CowBoyDataSet(config.coco, config.IMG_PATH, get_transform(train=False))
indices = torch.randperm(len(dataset_train)).tolist()
dataset_train = Subset(dataset_train, indices[: -config.VAL_SIZE])
dataset_eval = Subset(dataset_eval, indices[-config.VAL_SIZE:])
Dataloader
1
2
3
4
5
train_loader = DataLoader(
dataset_train, 2, shuffle=True, num_workers=config.NUM_WORKS, collate_fn=collate_fn)
val_loader = DataLoader(
dataset_eval, 2, shuffle=False, num_workers=config.NUM_WORKS, collate_fn=collate_fn)
1
2
3
for images, targets in train_loader:
print(images[0].shape, targets[0])
break
1
2
torch.Size([3, 565, 1024]) {'boxes': tensor([[588.8000, 103.7800, 680.3200, 179.3700],
[866.5600, 406.1300, 901.7600, 429.8300]]), 'labels': tensor([3, 3]), 'image_id': 2041, 'area': tensor([6917.9702, 834.3000]), 'iscrowd': tensor([False, False])}
Model
1
2
3
4
5
6
def get_model(num_class):
fasterrcnn = fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = fasterrcnn.roi_heads.box_predictor.cls_score.in_features
fasterrcnn.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_class+1)
return fasterrcnn
1
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
1
fasterrcnn = get_model(5).to(device)
1
2
3
param = [p for p in fasterrcnn.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(param, lr=config.LR, momentum=config.MOMEMTUM, weight_decay=config.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config.STEP_SIZE, gamma=config.GAMMA)
Train and Eval
1
2
3
4
5
6
7
for epoch in range(config.EPOCH):
# train for one epoch, printing every 10 iterations
train_one_epoch(fasterrcnn, optimizer, train_loader, device, epoch, print_freq=225)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(fasterrcnn, val_loader, device=device)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
Epoch: [0] [ 0/1225] eta: 0:54:38 lr: 0.000010 loss: 2.3367 (2.3367) loss_classifier: 2.0731 (2.0731) loss_box_reg: 0.0797 (0.0797) loss_objectness: 0.1657 (0.1657) loss_rpn_box_reg: 0.0182 (0.0182) time: 2.6761 data: 0.1969 max mem: 2364
Epoch: [0] [ 225/1225] eta: 0:09:59 lr: 0.001134 loss: 0.2126 (0.5220) loss_classifier: 0.0864 (0.2973) loss_box_reg: 0.0909 (0.1148) loss_objectness: 0.0334 (0.0927) loss_rpn_box_reg: 0.0056 (0.0171) time: 0.6091 data: 0.0084 max mem: 4128
Epoch: [0] [ 450/1225] eta: 0:07:58 lr: 0.002258 loss: 0.2499 (0.4043) loss_classifier: 0.1113 (0.2027) loss_box_reg: 0.0978 (0.1098) loss_objectness: 0.0274 (0.0754) loss_rpn_box_reg: 0.0078 (0.0164) time: 0.6615 data: 0.0079 max mem: 4314
Epoch: [0] [ 675/1225] eta: 0:05:40 lr: 0.003382 loss: 0.2278 (0.3743) loss_classifier: 0.0910 (0.1747) loss_box_reg: 0.0867 (0.1117) loss_objectness: 0.0289 (0.0721) loss_rpn_box_reg: 0.0049 (0.0158) time: 0.6045 data: 0.0080 max mem: 4314
Epoch: [0] [ 900/1225] eta: 0:03:22 lr: 0.004505 loss: 0.1977 (0.3444) loss_classifier: 0.0619 (0.1548) loss_box_reg: 0.0788 (0.1075) loss_objectness: 0.0310 (0.0665) loss_rpn_box_reg: 0.0065 (0.0156) time: 0.6353 data: 0.0081 max mem: 4314
Epoch: [0] [1125/1225] eta: 0:01:02 lr: 0.005000 loss: 0.2195 (0.3330) loss_classifier: 0.1018 (0.1461) loss_box_reg: 0.0816 (0.1073) loss_objectness: 0.0228 (0.0641) loss_rpn_box_reg: 0.0030 (0.0155) time: 0.6108 data: 0.0086 max mem: 4314
Epoch: [0] [1224/1225] eta: 0:00:00 lr: 0.005000 loss: 0.2440 (0.3281) loss_classifier: 0.0905 (0.1421) loss_box_reg: 0.1042 (0.1067) loss_objectness: 0.0156 (0.0638) loss_rpn_box_reg: 0.0080 (0.0155) time: 0.6164 data: 0.0076 max mem: 4314
Epoch: [0] Total time: 0:12:44 (0.6240 s / it)
creating index...
index created!
Test: [ 0/307] eta: 0:02:16 model_time: 0.2611 (0.2611) evaluator_time: 0.0126 (0.0126) time: 0.4431 data: 0.1613 max mem: 4314
Test: [100/307] eta: 0:01:00 model_time: 0.2534 (0.2699) evaluator_time: 0.0052 (0.0063) time: 0.2939 data: 0.0090 max mem: 4314
Test: [200/307] eta: 0:00:41 model_time: 0.2459 (0.2685) evaluator_time: 0.0052 (0.1076) time: 0.2846 data: 0.0086 max mem: 4314
Test: [300/307] eta: 0:00:02 model_time: 0.2415 (0.2684) evaluator_time: 0.0049 (0.0740) time: 0.2700 data: 0.0085 max mem: 4314
Test: [306/307] eta: 0:00:00 model_time: 0.2403 (0.2678) evaluator_time: 0.0051 (0.0727) time: 0.2607 data: 0.0081 max mem: 4314
Test: Total time: 0:01:49 (0.3557 s / it)
Averaged stats: model_time: 0.2403 (0.2678) evaluator_time: 0.0051 (0.0727)
Accumulating evaluation results...
DONE (t=0.41s).
IoU metric: bbox
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.223
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.476
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.173
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.029
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.101
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.269
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.213
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.367
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.374
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.091
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.246
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.418
This post is licensed under CC BY 4.0 by the author.