Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 0d047f4

Browse files
samll yolov1 by tf
1 parent 0355edc commit 0d047f4

File tree

1 file changed

+237
-0
lines changed

1 file changed

+237
-0
lines changed

‎ObjectDetections/yolo/yolo_tf.py‎

Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
"""
2+
Yolo V1 by tensorflow
3+
"""
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
import cv2
8+
9+
10+
def leak_relu(x, alpha=0.1):
11+
return tf.maximum(alpha * x, x)
12+
13+
class Yolo(object):
14+
def __init__(self, weights_file, verbose=True):
15+
self.verbose = verbose
16+
# detection params
17+
self.S = 7 # cell size
18+
self.B = 2 # boxes_per_cell
19+
self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",
20+
"bus", "car", "cat", "chair", "cow", "diningtable",
21+
"dog", "horse", "motorbike", "person", "pottedplant",
22+
"sheep", "sofa", "train","tvmonitor"]
23+
self.C = len(self.classes) # number of classes
24+
# offset for box center (top left point of each cell)
25+
self.x_offset = np.transpose(np.reshape(np.array([np.arange(self.S)]*self.S*self.B),
26+
[self.B, self.S, self.S]), [1, 2, 0])
27+
self.y_offset = np.transpose(self.x_offset, [1, 0, 2])
28+
29+
self.threshold = 0.2 # confidence scores threhold
30+
self.iou_threshold = 0.4
31+
# the maximum number of boxes to be selected by non max suppression
32+
self.max_output_size = 10
33+
34+
self.sess = tf.Session()
35+
self._build_net()
36+
self._build_detector()
37+
self._load_weights(weights_file)
38+
39+
def _build_net(self):
40+
"""build the network"""
41+
if self.verbose:
42+
print("Start to build the network ...")
43+
self.images = tf.placeholder(tf.float32, [None, 448, 448, 3])
44+
net = self._conv_layer(self.images, 1, 64, 7, 2)
45+
net = self._maxpool_layer(net, 1, 2, 2)
46+
net = self._conv_layer(net, 2, 192, 3, 1)
47+
net = self._maxpool_layer(net, 2, 2, 2)
48+
net = self._conv_layer(net, 3, 128, 1, 1)
49+
net = self._conv_layer(net, 4, 256, 3, 1)
50+
net = self._conv_layer(net, 5, 256, 1, 1)
51+
net = self._conv_layer(net, 6, 512, 3, 1)
52+
net = self._maxpool_layer(net, 6, 2, 2)
53+
net = self._conv_layer(net, 7, 256, 1, 1)
54+
net = self._conv_layer(net, 8, 512, 3, 1)
55+
net = self._conv_layer(net, 9, 256, 1, 1)
56+
net = self._conv_layer(net, 10, 512, 3, 1)
57+
net = self._conv_layer(net, 11, 256, 1, 1)
58+
net = self._conv_layer(net, 12, 512, 3, 1)
59+
net = self._conv_layer(net, 13, 256, 1, 1)
60+
net = self._conv_layer(net, 14, 512, 3, 1)
61+
net = self._conv_layer(net, 15, 512, 1, 1)
62+
net = self._conv_layer(net, 16, 1024, 3, 1)
63+
net = self._maxpool_layer(net, 16, 2, 2)
64+
net = self._conv_layer(net, 17, 512, 1, 1)
65+
net = self._conv_layer(net, 18, 1024, 3, 1)
66+
net = self._conv_layer(net, 19, 512, 1, 1)
67+
net = self._conv_layer(net, 20, 1024, 3, 1)
68+
net = self._conv_layer(net, 21, 1024, 3, 1)
69+
net = self._conv_layer(net, 22, 1024, 3, 2)
70+
net = self._conv_layer(net, 23, 1024, 3, 1)
71+
net = self._conv_layer(net, 24, 1024, 3, 1)
72+
net = self._flatten(net)
73+
net = self._fc_layer(net, 25, 512, activation=leak_relu)
74+
net = self._fc_layer(net, 26, 4096, activation=leak_relu)
75+
net = self._fc_layer(net, 27, self.S*self.S*(self.C+5*self.B))
76+
self.predicts = net
77+
78+
def _build_detector(self):
79+
"""Interpret the net output and get the predicted boxes"""
80+
# the width and height of orignal image
81+
self.width = tf.placeholder(tf.float32, name="img_w")
82+
self.height = tf.placeholder(tf.float32, name="img_h")
83+
# get class prob, confidence, boxes from net output
84+
idx1 = self.S * self.S * self.C
85+
idx2 = idx1 + self.S * self.S * self.B
86+
# class prediction
87+
class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
88+
# confidence
89+
confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
90+
# boxes -> (x, y, w, h)
91+
boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])
92+
93+
# convert the x, y to the coordinates relative to the top left point of the image
94+
# the predictions of w, h are the square root
95+
# multiply the width and height of image
96+
boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,
97+
(boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,
98+
tf.square(boxes[:, :, :, 2]) * self.width,
99+
tf.square(boxes[:, :, :, 3]) * self.height], axis=3)
100+
101+
# class-specific confidence scores [S, S, B, C]
102+
scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)
103+
104+
scores = tf.reshape(scores, [-1, self.C]) # [S*S*B, C]
105+
boxes = tf.reshape(boxes, [-1, 4]) # [S*S*B, 4]
106+
107+
# find each box class, only select the max score
108+
box_classes = tf.argmax(scores, axis=1)
109+
box_class_scores = tf.reduce_max(scores, axis=1)
110+
111+
# filter the boxes by the score threshold
112+
filter_mask = box_class_scores >= self.threshold
113+
scores = tf.boolean_mask(box_class_scores, filter_mask)
114+
boxes = tf.boolean_mask(boxes, filter_mask)
115+
box_classes = tf.boolean_mask(box_classes, filter_mask)
116+
117+
# non max suppression (do not distinguish different classes)
118+
# ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
119+
# box (x, y, w, h) -> box (x1, y1, x2, y2)
120+
_boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],
121+
boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
122+
nms_indices = tf.image.non_max_suppression(_boxes, scores,
123+
self.max_output_size, self.iou_threshold)
124+
self.scores = tf.gather(scores, nms_indices)
125+
self.boxes = tf.gather(boxes, nms_indices)
126+
self.box_classes = tf.gather(box_classes, nms_indices)
127+
128+
def _conv_layer(self, x, id, num_filters, filter_size, stride):
129+
"""Conv layer"""
130+
in_channels = x.get_shape().as_list()[-1]
131+
weight = tf.Variable(tf.truncated_normal([filter_size, filter_size,
132+
in_channels, num_filters], stddev=0.1))
133+
bias = tf.Variable(tf.zeros([num_filters,]))
134+
# padding, note: not using padding="VALID"
135+
pad_size = filter_size // 2
136+
pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])
137+
x_pad = tf.pad(x, pad_mat)
138+
conv = tf.nn.conv2d(x_pad, weight, strides=[1, stride, stride, 1], padding="VALID")
139+
output = leak_relu(tf.nn.bias_add(conv, bias))
140+
if self.verbose:
141+
print(" Layer %d: type=Conv, num_filter=%d, filter_size=%d, stride=%d, output_shape=%s" \
142+
% (id, num_filters, filter_size, stride, str(output.get_shape())))
143+
return output
144+
145+
def _fc_layer(self, x, id, num_out, activation=None):
146+
"""fully connected layer"""
147+
num_in = x.get_shape().as_list()[-1]
148+
weight = tf.Variable(tf.truncated_normal([num_in, num_out], stddev=0.1))
149+
bias = tf.Variable(tf.zeros([num_out,]))
150+
output = tf.nn.xw_plus_b(x, weight, bias)
151+
if activation:
152+
output = activation(output)
153+
if self.verbose:
154+
print(" Layer %d: type=Fc, num_out=%d, output_shape=%s" \
155+
% (id, num_out, str(output.get_shape())))
156+
return output
157+
158+
def _maxpool_layer(self, x, id, pool_size, stride):
159+
output = tf.nn.max_pool(x, [1, pool_size, pool_size, 1],
160+
strides=[1, stride, stride, 1], padding="SAME")
161+
if self.verbose:
162+
print(" Layer %d: type=MaxPool, pool_size=%d, stride=%d, output_shape=%s" \
163+
% (id, pool_size, stride, str(output.get_shape())))
164+
return output
165+
166+
def _flatten(self, x):
167+
"""flatten the x"""
168+
tran_x = tf.transpose(x, [0, 3, 1, 2]) # channle first mode
169+
nums = np.product(x.get_shape().as_list()[1:])
170+
return tf.reshape(tran_x, [-1, nums])
171+
172+
def _load_weights(self, weights_file):
173+
"""Load weights from file"""
174+
if self.verbose:
175+
print("Start to load weights from file:%s" % (weights_file))
176+
saver = tf.train.Saver()
177+
saver.restore(self.sess, weights_file)
178+
179+
def detect_from_file(self, image_file, deteted_boxes_file="boxes.txt",
180+
detected_image_file="detected_image.jpg"):
181+
"""Do detection given a image file"""
182+
# read image
183+
image = cv2.imread(image_file)
184+
img_h, img_w, _ = image.shape
185+
scores, boxes, box_classes = self._detect_from_image(image)
186+
predict_boxes = []
187+
for i in range(len(scores)):
188+
predict_boxes.append((self.classes[box_classes[i]], boxes[i, 0],
189+
boxes[i, 1], boxes[i, 2], boxes[i, 3], scores[i]))
190+
self.show_results(image, predict_boxes, deteted_boxes_file, detected_image_file)
191+
192+
def _detect_from_image(self, image):
193+
"""Do detection given a cv image"""
194+
img_h, img_w, _ = image.shape
195+
img_resized = cv2.resize(image, (448, 448))
196+
img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
197+
img_resized_np = np.asarray(img_RGB)
198+
_images = np.zeros((1, 448, 448, 3), dtype=np.float32)
199+
_images[0] = (img_resized_np / 255.0) * 2.0 - 1.0
200+
scores, boxes, box_classes = self.sess.run([self.scores, self.boxes, self.box_classes],
201+
feed_dict={self.images: _images, self.width: img_w, self.height: img_h})
202+
return scores, boxes, box_classes
203+
204+
def show_results(self, image, results, imshow=True, deteted_boxes_file=None,
205+
detected_image_file=None):
206+
"""Show the detection boxes"""
207+
img_cp = image.copy()
208+
if deteted_boxes_file:
209+
f = open(deteted_boxes_file, "w")
210+
# draw boxes
211+
for i in range(len(results)):
212+
x = int(results[i][1])
213+
y = int(results[i][2])
214+
w = int(results[i][3]) // 2
215+
h = int(results[i][4]) // 2
216+
if self.verbose:
217+
print(" class: %s, [x, y, w, h]=[%d, %d, %d, %d], confidence=%f" % (results[i][0],
218+
x, y, w, h, results[i][-1]))
219+
220+
cv2.rectangle(img_cp, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
221+
cv2.rectangle(img_cp, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1)
222+
cv2.putText(img_cp, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7),
223+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
224+
if deteted_boxes_file:
225+
f.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' +
226+
str(w) + ',' + str(h)+',' + str(results[i][5]) + '\n')
227+
if imshow:
228+
cv2.imshow('YOLO_small detection', img_cp)
229+
cv2.waitKey(1)
230+
if detected_image_file:
231+
cv2.imwrite(detected_image_file, img_cp)
232+
if deteted_boxes_file:
233+
f.close()
234+
235+
if __name__ == "__main__":
236+
yolo_net = Yolo("./weights/YOLO_small.ckpt")
237+
yolo_net.detect_from_file("./test/car.jpg")

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /