Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 6ea1b86

Browse files
add yolo2
1 parent 02d7162 commit 6ea1b86

File tree

7 files changed

+452
-0
lines changed

7 files changed

+452
-0
lines changed

‎ObjectDetections/yolo2/config.py‎

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""
2+
Yolov2 anchors and coco classes
3+
"""
4+
5+
"""
6+
anchors = [[0.738768, 0.874946],
7+
[2.42204, 2.65704],
8+
[4.30971, 7.04493],
9+
[10.246, 4.59428],
10+
[12.6868, 11.8741]]
11+
"""
12+
anchors = [[0.57273, 0.677385],
13+
[1.87446, 2.06253],
14+
[3.33843, 5.47434],
15+
[7.88282, 3.52778],
16+
[9.77052, 9.16828]]
17+
18+
def read_coco_labels():
19+
f = open("./data/coco_classes.txt")
20+
class_names = []
21+
for l in f.readlines():
22+
class_names.append(l[:-1])
23+
return class_names
24+
25+
class_names = read_coco_labels()

‎ObjectDetections/yolo2/demo.py‎

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Demo for yolov2
3+
"""
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
import cv2
8+
from PIL import Image
9+
10+
from model import darknet
11+
from detect_ops import decode
12+
from utils import preprocess_image, postprocess, draw_detection
13+
from config import anchors, class_names
14+
15+
16+
input_size = (416, 416)
17+
image_file = "./images/car.jpg"
18+
image = cv2.imread(image_file)
19+
image_shape = image.shape[:2]
20+
image_cp = preprocess_image(image, input_size)
21+
"""
22+
image = Image.open(image_file)
23+
image_cp = image.resize(input_size, Image.BICUBIC)
24+
image_cp = np.array(image_cp, dtype=np.float32)/255.0
25+
image_cp = np.expand_dims(image_cp, 0)
26+
#print(image_cp)
27+
"""
28+
29+
30+
images = tf.placeholder(tf.float32, [1, input_size[0], input_size[1], 3])
31+
detection_feat = darknet(images)
32+
feat_sizes = input_size[0] // 32, input_size[1] // 32
33+
detection_results = decode(detection_feat, feat_sizes, len(class_names), anchors)
34+
35+
checkpoint_path = "./checkpoint_dir/yolo2_coco.ckpt"
36+
saver = tf.train.Saver()
37+
with tf.Session() as sess:
38+
saver.restore(sess, checkpoint_path)
39+
bboxes, obj_probs, class_probs = sess.run(detection_results, feed_dict={images: image_cp})
40+
41+
bboxes, scores, class_inds = postprocess(bboxes, obj_probs, class_probs,
42+
image_shape=image_shape)
43+
img_detection = draw_detection(image, bboxes, scores, class_inds, class_names)
44+
cv2.imwrite("detection.jpg", img_detection)
45+
cv2.imshow("detection results", img_detection)
46+
47+
cv2.waitKey(0)
48+
49+
50+

‎ObjectDetections/yolo2/detect_ops.py‎

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
Detection ops for Yolov2
3+
"""
4+
5+
import tensorflow as tf
6+
import numpy as np
7+
8+
9+
def decode(detection_feat, feat_sizes=(13, 13), num_classes=80,
10+
anchors=None):
11+
"""decode from the detection feature"""
12+
H, W = feat_sizes
13+
num_anchors = len(anchors)
14+
detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors,
15+
num_classes + 5])
16+
17+
bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2])
18+
bbox_wh = tf.exp(detetion_results[:, :, :, 2:4])
19+
obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4])
20+
class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:])
21+
22+
anchors = tf.constant(anchors, dtype=tf.float32)
23+
24+
height_ind = tf.range(H, dtype=tf.float32)
25+
width_ind = tf.range(W, dtype=tf.float32)
26+
x_offset, y_offset = tf.meshgrid(height_ind, width_ind)
27+
x_offset = tf.reshape(x_offset, [1, -1, 1])
28+
y_offset = tf.reshape(y_offset, [1, -1, 1])
29+
30+
# decode
31+
bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W
32+
bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H
33+
bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5
34+
bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5
35+
36+
bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h,
37+
bbox_x + bbox_w, bbox_y + bbox_h], axis=3)
38+
39+
return bboxes, obj_probs, class_probs

‎ObjectDetections/yolo2/loss.py‎

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""
2+
Loss function for YOLOv2
3+
"""
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
8+
def compute_loss(predictions, targets, anchors, scales, num_classes=20, feat_sizes=(13, 13)):
9+
"""
10+
Compute the loss of Yolov2 for training
11+
"""
12+
H, W = feat_sizes
13+
C = num_classes
14+
B = len(anchors)
15+
anchors = tf.constant(anchors, dtype=tf.float32)
16+
anchors = tf.reshape(anchors, [1, 1, B, 2])
17+
18+
sprob, sconf, snoob, scoor = scales # the scales for different parts
19+
20+
_coords = targets["coords"] # ground truth [-1, H*W, B, 4]
21+
_probs = targets["probs"] # class probability [-1, H*W, B, C] one hot
22+
_confs = targets["confs"] # 1 for object, 0 for background, [-1, H*W, B]
23+
24+
# decode the net output
25+
predictions = tf.reshape(predictions, [-1, H, W, B, (5 + C)])
26+
coords = predictions[:, :, :, :, 0:4] # t_x, t_y, t_w, t_h
27+
coords = tf.reshape(coords, [-1, H*W, B, 4])
28+
coords_xy = tf.nn.sigmoid(coords[:, :, :, 0:2]) # (0, 1) relative cell top left
29+
coords_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchors /
30+
np.reshape([W, H], [1, 1, 1, 2])) # sqrt of w, h (0, 1)
31+
coords = tf.concat([coords_xy, coords_wh], axis=3) # [batch_size, H*W, B, 4]
32+
33+
confs = tf.nn.sigmoid(predictions[:, :, :, :, 4]) # object confidence
34+
confs = tf.reshape(confs, [-1, H*W, B, 1])
35+
36+
probs = tf.nn.softmax(predictions[:, :, :, :, 5:]) # class probability
37+
probs = tf.reshape(probs, [-1, H*W, B, C])
38+
39+
preds = tf.concat([coords, confs, probs], axis=3) # [-1, H*W, B, (4+1+C)]
40+
41+
# match ground truths with anchors (predictions in fact)
42+
# assign ground truths to the predictions with the best IOU (select 1 among 5 anchors)
43+
wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
44+
areas = wh[:, :, :, 0] * wh[:, :, :, 1]
45+
centers = coords[:, :, :, 0:2]
46+
up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5)
47+
48+
# the ground truth
49+
_wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
50+
_areas = _wh[:, :, :, 0] * _wh[:, :, :, 1]
51+
_centers = _coords[:, :, :, 0:2]
52+
_up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5)
53+
54+
# compute IOU
55+
inter_upleft = tf.maximum(up_left, _up_left)
56+
inter_downright = tf.minimum(down_right, _down_right)
57+
inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0)
58+
intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1]
59+
ious = tf.truediv(intersects, areas + _areas - intersects)
60+
61+
best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True))
62+
best_iou_mask = tf.cast(best_iou_mask, tf.float32)
63+
mask = best_iou_mask * _confs # [-1, H*W, B]
64+
mask = tf.expand_dims(mask, -1) # [-1, H*W, B, 1]
65+
66+
# compute weight terms
67+
confs_w = snoob * (1 - mask) + sconf * mask
68+
coords_w = scoor * mask
69+
probs_w = sprob * mask
70+
weights = tf.concat([coords_w, confs_w, probs_w], axis=3)
71+
72+
truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3)
73+
74+
loss = tf.pow(preds - truths, 2) * weights
75+
loss = tf.reduce_sum(loss, axis=[1, 2, 3])
76+
loss = 0.5 * tf.reduce_mean(loss)
77+
return loss
78+
79+
80+
81+
82+
83+
84+
85+
86+

‎ObjectDetections/yolo2/model.png‎

645 KB
Loading[フレーム]

‎ObjectDetections/yolo2/model.py‎

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""
2+
YOLOv2 implemented by Tensorflow, only for predicting
3+
"""
4+
import os
5+
6+
import numpy as np
7+
import tensorflow as tf
8+
9+
10+
11+
######## basic layers #######
12+
13+
def leaky_relu(x):
14+
return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu")
15+
16+
# Conv2d
17+
def conv2d(x, filters, size, pad=0, stride=1, batch_normalize=1,
18+
activation=leaky_relu, use_bias=False, name="conv2d"):
19+
if pad > 0:
20+
x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]])
21+
out = tf.layers.conv2d(x, filters, size, strides=stride, padding="VALID",
22+
activation=None, use_bias=use_bias, name=name)
23+
if batch_normalize == 1:
24+
out = tf.layers.batch_normalization(out, axis=-1, momentum=0.9,
25+
training=False, name=name+"_bn")
26+
if activation:
27+
out = activation(out)
28+
return out
29+
30+
# maxpool2d
31+
def maxpool(x, size=2, stride=2, name="maxpool"):
32+
return tf.layers.max_pooling2d(x, size, stride)
33+
34+
# reorg layer
35+
def reorg(x, stride):
36+
return tf.extract_image_patches(x, [1, stride, stride, 1],
37+
[1, stride, stride, 1], [1,1,1,1], padding="VALID")
38+
39+
40+
def darknet(images, n_last_channels=425):
41+
"""Darknet19 for YOLOv2"""
42+
net = conv2d(images, 32, 3, 1, name="conv1")
43+
net = maxpool(net, name="pool1")
44+
net = conv2d(net, 64, 3, 1, name="conv2")
45+
net = maxpool(net, name="pool2")
46+
net = conv2d(net, 128, 3, 1, name="conv3_1")
47+
net = conv2d(net, 64, 1, name="conv3_2")
48+
net = conv2d(net, 128, 3, 1, name="conv3_3")
49+
net = maxpool(net, name="pool3")
50+
net = conv2d(net, 256, 3, 1, name="conv4_1")
51+
net = conv2d(net, 128, 1, name="conv4_2")
52+
net = conv2d(net, 256, 3, 1, name="conv4_3")
53+
net = maxpool(net, name="pool4")
54+
net = conv2d(net, 512, 3, 1, name="conv5_1")
55+
net = conv2d(net, 256, 1, name="conv5_2")
56+
net = conv2d(net, 512, 3, 1, name="conv5_3")
57+
net = conv2d(net, 256, 1, name="conv5_4")
58+
net = conv2d(net, 512, 3, 1, name="conv5_5")
59+
shortcut = net
60+
net = maxpool(net, name="pool5")
61+
net = conv2d(net, 1024, 3, 1, name="conv6_1")
62+
net = conv2d(net, 512, 1, name="conv6_2")
63+
net = conv2d(net, 1024, 3, 1, name="conv6_3")
64+
net = conv2d(net, 512, 1, name="conv6_4")
65+
net = conv2d(net, 1024, 3, 1, name="conv6_5")
66+
# ---------
67+
net = conv2d(net, 1024, 3, 1, name="conv7_1")
68+
net = conv2d(net, 1024, 3, 1, name="conv7_2")
69+
# shortcut
70+
shortcut = conv2d(shortcut, 64, 1, name="conv_shortcut")
71+
shortcut = reorg(shortcut, 2)
72+
net = tf.concat([shortcut, net], axis=-1)
73+
net = conv2d(net, 1024, 3, 1, name="conv8")
74+
# detection layer
75+
net = conv2d(net, n_last_channels, 1, batch_normalize=0,
76+
activation=None, use_bias=True, name="conv_dec")
77+
return net
78+
79+
80+
81+
if __name__ == "__main__":
82+
x = tf.random_normal([1, 416, 416, 3])
83+
model = darknet(x)
84+
85+
saver = tf.train.Saver()
86+
with tf.Session() as sess:
87+
saver.restore(sess, "./checkpoint_dir/yolo2_coco.ckpt")
88+
print(sess.run(model).shape)
89+

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /