1
+ import numpy as np
2
+ import argparse
3
+ import tensorflow as tf
4
+ import cv2
5
+ import pathlib
6
+ import os
7
+ import datetime
8
+ import pandas as pd
9
+ from PIL import Image
10
+
11
+ from object_detection .utils import ops as utils_ops
12
+ from object_detection .utils import label_map_util
13
+ from object_detection .utils import visualization_utils as vis_util
14
+
15
+ # patch tf1 into `utils.ops`
16
+ utils_ops .tf = tf .compat .v1
17
+
18
+ # Patch the location of gfile
19
+ tf .gfile = tf .io .gfile
20
+
21
+
22
+ def load_model (model_path ):
23
+ model = tf .saved_model .load (model_path )
24
+ return model
25
+
26
+
27
+ def run_inference_for_single_image (model , image ):
28
+ image = np .asarray (image )
29
+ # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
30
+ input_tensor = tf .convert_to_tensor (image )
31
+ # The model expects a batch of images, so add an axis with `tf.newaxis`.
32
+ input_tensor = input_tensor [tf .newaxis ,...]
33
+
34
+ # Run inference
35
+ output_dict = model (input_tensor )
36
+
37
+ # All outputs are batches tensors.
38
+ # Convert to numpy arrays, and take index [0] to remove the batch dimension.
39
+ # We're only interested in the first num_detections.
40
+ num_detections = int (output_dict .pop ('num_detections' ))
41
+ output_dict = {key : value [0 , :num_detections ].numpy ()
42
+ for key , value in output_dict .items ()}
43
+ output_dict ['num_detections' ] = num_detections
44
+
45
+ # detection_classes should be ints.
46
+ output_dict ['detection_classes' ] = output_dict ['detection_classes' ].astype (np .int64 )
47
+
48
+ # Handle models with masks:
49
+ if 'detection_masks' in output_dict :
50
+ # Reframe the the bbox mask to the image size.
51
+ detection_masks_reframed = utils_ops .reframe_box_masks_to_image_masks (
52
+ output_dict ['detection_masks' ], output_dict ['detection_boxes' ],
53
+ image .shape [0 ], image .shape [1 ])
54
+ detection_masks_reframed = tf .cast (detection_masks_reframed > 0.5 , tf .uint8 )
55
+ output_dict ['detection_masks_reframed' ] = detection_masks_reframed .numpy ()
56
+
57
+ return output_dict
58
+
59
+
60
+ def run_inference (model , category_index , cap , threshold , show_video_steam , label_to_look_for , output_directory ):
61
+ # Create output directory if not already created
62
+ os .makedirs (output_directory , exist_ok = True )
63
+ os .makedirs (output_directory + '/images' , exist_ok = True )
64
+
65
+ if os .path .exists (output_directory + '/results.csv' ):
66
+ df = pd .read_csv (output_directory + '/results.csv' )
67
+ else :
68
+ df = pd .DataFrame (columns = ['timestamp' , 'img_path' ])
69
+
70
+ while True :
71
+ ret , image_np = cap .read ()
72
+
73
+ # Copy image for later
74
+ image_show = np .copy (image_np )
75
+
76
+ image_height , image_width , _ = image_np .shape
77
+
78
+ # Actual detection.
79
+ output_dict = run_inference_for_single_image (model , image_np )
80
+
81
+ if show_video_steam :
82
+ # Visualization of the results of a detection.
83
+ vis_util .visualize_boxes_and_labels_on_image_array (
84
+ image_np ,
85
+ output_dict ['detection_boxes' ],
86
+ output_dict ['detection_classes' ],
87
+ output_dict ['detection_scores' ],
88
+ category_index ,
89
+ instance_masks = output_dict .get ('detection_masks_reframed' , None ),
90
+ use_normalized_coordinates = True ,
91
+ line_thickness = 8 )
92
+ cv2 .imshow ('object_detection' , cv2 .resize (image_np , (800 , 600 )))
93
+ if cv2 .waitKey (25 ) & 0xFF == ord ('q' ):
94
+ cap .release ()
95
+ cv2 .destroyAllWindows ()
96
+ break
97
+
98
+ # Get data(label, xmin, ymin, xmax, ymax)
99
+ output = []
100
+ for index , score in enumerate (output_dict ['detection_scores' ]):
101
+ if score < threshold :
102
+ continue
103
+ label = category_index [output_dict ['detection_classes' ][index ]]['name' ]
104
+ ymin , xmin , ymax , xmax = output_dict ['detection_boxes' ][index ]
105
+ output .append ((label , int (xmin * image_width ), int (ymin * image_height ), int (xmax * image_width ), int (ymax * image_height )))
106
+
107
+ # Save incident (could be extended to send a email or something)
108
+ for l , x_min , y_min , x_max , y_max in output :
109
+ if l == label_to_look_for :
110
+ array = cv2 .cvtColor (np .array (image_show ), cv2 .COLOR_RGB2BGR )
111
+ image = Image .fromarray (array )
112
+ cropped_img = image .crop ((x_min , y_min , x_max , y_max ))
113
+ file_path = output_directory + '/images/' + str (len (df ))+ '.jpg'
114
+ cropped_img .save (file_path , "JPEG" , icc_profile = cropped_img .info .get ('icc_profile' ))
115
+ df .loc [len (df )] = [datetime .datetime .now (), file_path ]
116
+ df .to_csv (output_directory + '/results.csv' , index = None )
117
+
118
+
119
+ if __name__ == '__main__' :
120
+ parser = argparse .ArgumentParser (description = 'Detect objects inside webcam videostream' )
121
+ parser .add_argument ('-m' , '--model' , type = str , required = True , help = 'Model Path' )
122
+ parser .add_argument ('-l' , '--labelmap' , type = str , required = True , help = 'Path to Labelmap' )
123
+ parser .add_argument ('-t' , '--threshold' , type = float , default = 0.5 , help = 'Threshold for bounding boxes' )
124
+ parser .add_argument ('-s' , '--show' , default = True , action = 'store_true' , help = 'Show window' )
125
+ parser .add_argument ('-la' , '--label' , default = 'person' , type = str , help = 'Label name to detect' )
126
+ parser .add_argument ('-o' , '--output_directory' , default = 'results' , type = str , help = 'Directory for the outputs' )
127
+ args = parser .parse_args ()
128
+
129
+ detection_model = load_model (args .model )
130
+ category_index = label_map_util .create_category_index_from_labelmap (args .labelmap , use_display_name = True )
131
+
132
+ cap = cv2 .VideoCapture (2 )
133
+ run_inference (detection_model , category_index , cap , args .threshold , args .show , args .label , args .output_directory )
0 commit comments