Commit 0d047f4

xiaohu2015 xiaohu2015

authored

samll yolov1 by tf

1 parent 0355edc commit 0d047f4Copy full SHA for 0d047f4

File tree

1 file changed

+237

-0

lines changed

ObjectDetections/yolo
- yolo_tf.py

1 file changed

+237

-0

lines changed

`‎ObjectDetections/yolo/yolo_tf.py‎`

Lines changed: 237 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,237 @@`
	`1`	`+"""`
	`2`	`+Yolo V1 by tensorflow`
	`3`	`+"""`
	`4`	`+`
	`5`	`+import numpy as np`
	`6`	`+import tensorflow as tf`
	`7`	`+import cv2`
	`8`	`+`
	`9`	`+`
	`10`	`+def leak_relu(x, alpha=0.1):`
	`11`	`+ return tf.maximum(alpha * x, x)`
	`12`	`+`
	`13`	`+class Yolo(object):`
	`14`	`+ def __init__(self, weights_file, verbose=True):`
	`15`	`+ self.verbose = verbose`
	`16`	`+ # detection params`
	`17`	`+ self.S = 7 # cell size`
	`18`	`+ self.B = 2 # boxes_per_cell`
	`19`	`+ self.classes = ["aeroplane", "bicycle", "bird", "boat", "bottle",`
	`20`	`+ "bus", "car", "cat", "chair", "cow", "diningtable",`
	`21`	`+ "dog", "horse", "motorbike", "person", "pottedplant",`
	`22`	`+ "sheep", "sofa", "train","tvmonitor"]`
	`23`	`+ self.C = len(self.classes) # number of classes`
	`24`	`+ # offset for box center (top left point of each cell)`
	`25`	`+ self.x_offset = np.transpose(np.reshape(np.array([np.arange(self.S)]self.Sself.B),`
	`26`	`+ [self.B, self.S, self.S]), [1, 2, 0])`
	`27`	`+ self.y_offset = np.transpose(self.x_offset, [1, 0, 2])`
	`28`	`+`
	`29`	`+ self.threshold = 0.2 # confidence scores threhold`
	`30`	`+ self.iou_threshold = 0.4`
	`31`	`+ # the maximum number of boxes to be selected by non max suppression`
	`32`	`+ self.max_output_size = 10`
	`33`	`+`
	`34`	`+ self.sess = tf.Session()`
	`35`	`+ self._build_net()`
	`36`	`+ self._build_detector()`
	`37`	`+ self._load_weights(weights_file)`
	`38`	`+`
	`39`	`+ def _build_net(self):`
	`40`	`+ """build the network"""`
	`41`	`+ if self.verbose:`
	`42`	`+ print("Start to build the network ...")`
	`43`	`+ self.images = tf.placeholder(tf.float32, [None, 448, 448, 3])`
	`44`	`+ net = self._conv_layer(self.images, 1, 64, 7, 2)`
	`45`	`+ net = self._maxpool_layer(net, 1, 2, 2)`
	`46`	`+ net = self._conv_layer(net, 2, 192, 3, 1)`
	`47`	`+ net = self._maxpool_layer(net, 2, 2, 2)`
	`48`	`+ net = self._conv_layer(net, 3, 128, 1, 1)`
	`49`	`+ net = self._conv_layer(net, 4, 256, 3, 1)`
	`50`	`+ net = self._conv_layer(net, 5, 256, 1, 1)`
	`51`	`+ net = self._conv_layer(net, 6, 512, 3, 1)`
	`52`	`+ net = self._maxpool_layer(net, 6, 2, 2)`
	`53`	`+ net = self._conv_layer(net, 7, 256, 1, 1)`
	`54`	`+ net = self._conv_layer(net, 8, 512, 3, 1)`
	`55`	`+ net = self._conv_layer(net, 9, 256, 1, 1)`
	`56`	`+ net = self._conv_layer(net, 10, 512, 3, 1)`
	`57`	`+ net = self._conv_layer(net, 11, 256, 1, 1)`
	`58`	`+ net = self._conv_layer(net, 12, 512, 3, 1)`
	`59`	`+ net = self._conv_layer(net, 13, 256, 1, 1)`
	`60`	`+ net = self._conv_layer(net, 14, 512, 3, 1)`
	`61`	`+ net = self._conv_layer(net, 15, 512, 1, 1)`
	`62`	`+ net = self._conv_layer(net, 16, 1024, 3, 1)`
	`63`	`+ net = self._maxpool_layer(net, 16, 2, 2)`
	`64`	`+ net = self._conv_layer(net, 17, 512, 1, 1)`
	`65`	`+ net = self._conv_layer(net, 18, 1024, 3, 1)`
	`66`	`+ net = self._conv_layer(net, 19, 512, 1, 1)`
	`67`	`+ net = self._conv_layer(net, 20, 1024, 3, 1)`
	`68`	`+ net = self._conv_layer(net, 21, 1024, 3, 1)`
	`69`	`+ net = self._conv_layer(net, 22, 1024, 3, 2)`
	`70`	`+ net = self._conv_layer(net, 23, 1024, 3, 1)`
	`71`	`+ net = self._conv_layer(net, 24, 1024, 3, 1)`
	`72`	`+ net = self._flatten(net)`
	`73`	`+ net = self._fc_layer(net, 25, 512, activation=leak_relu)`
	`74`	`+ net = self._fc_layer(net, 26, 4096, activation=leak_relu)`
	`75`	`+ net = self._fc_layer(net, 27, self.Sself.S(self.C+5*self.B))`
	`76`	`+ self.predicts = net`
	`77`	`+`
	`78`	`+ def _build_detector(self):`
	`79`	`+ """Interpret the net output and get the predicted boxes"""`
	`80`	`+ # the width and height of orignal image`
	`81`	`+ self.width = tf.placeholder(tf.float32, name="img_w")`
	`82`	`+ self.height = tf.placeholder(tf.float32, name="img_h")`
	`83`	`+ # get class prob, confidence, boxes from net output`
	`84`	`+ idx1 = self.S * self.S * self.C`
	`85`	`+ idx2 = idx1 + self.S * self.S * self.B`
	`86`	`+ # class prediction`
	`87`	`+ class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])`
	`88`	`+ # confidence`
	`89`	`+ confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])`
	`90`	`+ # boxes -> (x, y, w, h)`
	`91`	`+ boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])`
	`92`	`+`
	`93`	`+ # convert the x, y to the coordinates relative to the top left point of the image`
	`94`	`+ # the predictions of w, h are the square root`
	`95`	`+ # multiply the width and height of image`
	`96`	`+ boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,`
	`97`	`+ (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,`
	`98`	`+ tf.square(boxes[:, :, :, 2]) * self.width,`
	`99`	`+ tf.square(boxes[:, :, :, 3]) * self.height], axis=3)`
	`100`	`+`
	`101`	`+ # class-specific confidence scores [S, S, B, C]`
	`102`	`+ scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)`
	`103`	`+`
	`104`	`+ scores = tf.reshape(scores, [-1, self.C]) # [SSB, C]`
	`105`	`+ boxes = tf.reshape(boxes, [-1, 4]) # [SSB, 4]`
	`106`	`+`
	`107`	`+ # find each box class, only select the max score`
	`108`	`+ box_classes = tf.argmax(scores, axis=1)`
	`109`	`+ box_class_scores = tf.reduce_max(scores, axis=1)`
	`110`	`+`
	`111`	`+ # filter the boxes by the score threshold`
	`112`	`+ filter_mask = box_class_scores >= self.threshold`
	`113`	`+ scores = tf.boolean_mask(box_class_scores, filter_mask)`
	`114`	`+ boxes = tf.boolean_mask(boxes, filter_mask)`
	`115`	`+ box_classes = tf.boolean_mask(box_classes, filter_mask)`
	`116`	`+`
	`117`	`+ # non max suppression (do not distinguish different classes)`
	`118`	`+ # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression`
	`119`	`+ # box (x, y, w, h) -> box (x1, y1, x2, y2)`
	`120`	`+ _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],`
	`121`	`+ boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)`
	`122`	`+ nms_indices = tf.image.non_max_suppression(_boxes, scores,`
	`123`	`+ self.max_output_size, self.iou_threshold)`
	`124`	`+ self.scores = tf.gather(scores, nms_indices)`
	`125`	`+ self.boxes = tf.gather(boxes, nms_indices)`
	`126`	`+ self.box_classes = tf.gather(box_classes, nms_indices)`
	`127`	`+`
	`128`	`+ def _conv_layer(self, x, id, num_filters, filter_size, stride):`
	`129`	`+ """Conv layer"""`
	`130`	`+ in_channels = x.get_shape().as_list()[-1]`
	`131`	`+ weight = tf.Variable(tf.truncated_normal([filter_size, filter_size,`
	`132`	`+ in_channels, num_filters], stddev=0.1))`
	`133`	`+ bias = tf.Variable(tf.zeros([num_filters,]))`
	`134`	`+ # padding, note: not using padding="VALID"`
	`135`	`+ pad_size = filter_size // 2`
	`136`	`+ pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])`
	`137`	`+ x_pad = tf.pad(x, pad_mat)`
	`138`	`+ conv = tf.nn.conv2d(x_pad, weight, strides=[1, stride, stride, 1], padding="VALID")`
	`139`	`+ output = leak_relu(tf.nn.bias_add(conv, bias))`
	`140`	`+ if self.verbose:`
	`141`	`+ print(" Layer %d: type=Conv, num_filter=%d, filter_size=%d, stride=%d, output_shape=%s" \`
	`142`	`+ % (id, num_filters, filter_size, stride, str(output.get_shape())))`
	`143`	`+ return output`
	`144`	`+`
	`145`	`+ def _fc_layer(self, x, id, num_out, activation=None):`
	`146`	`+ """fully connected layer"""`
	`147`	`+ num_in = x.get_shape().as_list()[-1]`
	`148`	`+ weight = tf.Variable(tf.truncated_normal([num_in, num_out], stddev=0.1))`
	`149`	`+ bias = tf.Variable(tf.zeros([num_out,]))`
	`150`	`+ output = tf.nn.xw_plus_b(x, weight, bias)`
	`151`	`+ if activation:`
	`152`	`+ output = activation(output)`
	`153`	`+ if self.verbose:`
	`154`	`+ print(" Layer %d: type=Fc, num_out=%d, output_shape=%s" \`
	`155`	`+ % (id, num_out, str(output.get_shape())))`
	`156`	`+ return output`
	`157`	`+`
	`158`	`+ def _maxpool_layer(self, x, id, pool_size, stride):`
	`159`	`+ output = tf.nn.max_pool(x, [1, pool_size, pool_size, 1],`
	`160`	`+ strides=[1, stride, stride, 1], padding="SAME")`
	`161`	`+ if self.verbose:`
	`162`	`+ print(" Layer %d: type=MaxPool, pool_size=%d, stride=%d, output_shape=%s" \`
	`163`	`+ % (id, pool_size, stride, str(output.get_shape())))`
	`164`	`+ return output`
	`165`	`+`
	`166`	`+ def _flatten(self, x):`
	`167`	`+ """flatten the x"""`
	`168`	`+ tran_x = tf.transpose(x, [0, 3, 1, 2]) # channle first mode`
	`169`	`+ nums = np.product(x.get_shape().as_list()[1:])`
	`170`	`+ return tf.reshape(tran_x, [-1, nums])`
	`171`	`+`
	`172`	`+ def _load_weights(self, weights_file):`
	`173`	`+ """Load weights from file"""`
	`174`	`+ if self.verbose:`
	`175`	`+ print("Start to load weights from file:%s" % (weights_file))`
	`176`	`+ saver = tf.train.Saver()`
	`177`	`+ saver.restore(self.sess, weights_file)`
	`178`	`+`
	`179`	`+ def detect_from_file(self, image_file, deteted_boxes_file="boxes.txt",`
	`180`	`+ detected_image_file="detected_image.jpg"):`
	`181`	`+ """Do detection given a image file"""`
	`182`	`+ # read image`
	`183`	`+ image = cv2.imread(image_file)`
	`184`	`+ img_h, img_w, _ = image.shape`
	`185`	`+ scores, boxes, box_classes = self._detect_from_image(image)`
	`186`	`+ predict_boxes = []`
	`187`	`+ for i in range(len(scores)):`
	`188`	`+ predict_boxes.append((self.classes[box_classes[i]], boxes[i, 0],`
	`189`	`+ boxes[i, 1], boxes[i, 2], boxes[i, 3], scores[i]))`
	`190`	`+ self.show_results(image, predict_boxes, deteted_boxes_file, detected_image_file)`
	`191`	`+`
	`192`	`+ def _detect_from_image(self, image):`
	`193`	`+ """Do detection given a cv image"""`
	`194`	`+ img_h, img_w, _ = image.shape`
	`195`	`+ img_resized = cv2.resize(image, (448, 448))`
	`196`	`+ img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)`
	`197`	`+ img_resized_np = np.asarray(img_RGB)`
	`198`	`+ _images = np.zeros((1, 448, 448, 3), dtype=np.float32)`
	`199`	`+ _images[0] = (img_resized_np / 255.0) * 2.0 - 1.0`
	`200`	`+ scores, boxes, box_classes = self.sess.run([self.scores, self.boxes, self.box_classes],`
	`201`	`+ feed_dict={self.images: _images, self.width: img_w, self.height: img_h})`
	`202`	`+ return scores, boxes, box_classes`
	`203`	`+`
	`204`	`+ def show_results(self, image, results, imshow=True, deteted_boxes_file=None,`
	`205`	`+ detected_image_file=None):`
	`206`	`+ """Show the detection boxes"""`
	`207`	`+ img_cp = image.copy()`
	`208`	`+ if deteted_boxes_file:`
	`209`	`+ f = open(deteted_boxes_file, "w")`
	`210`	`+ # draw boxes`
	`211`	`+ for i in range(len(results)):`
	`212`	`+ x = int(results[i][1])`
	`213`	`+ y = int(results[i][2])`
	`214`	`+ w = int(results[i][3]) // 2`
	`215`	`+ h = int(results[i][4]) // 2`
	`216`	`+ if self.verbose:`
	`217`	`+ print(" class: %s, [x, y, w, h]=[%d, %d, %d, %d], confidence=%f" % (results[i][0],`
	`218`	`+ x, y, w, h, results[i][-1]))`
	`219`	`+`
	`220`	`+ cv2.rectangle(img_cp, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)`
	`221`	`+ cv2.rectangle(img_cp, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1)`
	`222`	`+ cv2.putText(img_cp, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7),`
	`223`	`+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)`
	`224`	`+ if deteted_boxes_file:`
	`225`	`+ f.write(results[i][0] + ',' + str(x) + ',' + str(y) + ',' +`
	`226`	`+ str(w) + ',' + str(h)+',' + str(results[i][5]) + '\n')`
	`227`	`+ if imshow:`
	`228`	`+ cv2.imshow('YOLO_small detection', img_cp)`
	`229`	`+ cv2.waitKey(1)`
	`230`	`+ if detected_image_file:`
	`231`	`+ cv2.imwrite(detected_image_file, img_cp)`
	`232`	`+ if deteted_boxes_file:`
	`233`	`+ f.close()`
	`234`	`+`
	`235`	`+if __name__ == "__main__":`
	`236`	`+ yolo_net = Yolo("./weights/YOLO_small.ckpt")`
	`237`	`+ yolo_net.detect_from_file("./test/car.jpg")`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 0d047f4

File tree

1 file changed

1 file changed

`‎ObjectDetections/yolo/yolo_tf.py‎`

0 commit comments