I wrote this code to track small moving objects falling down a chute. The code works buts runs too slowly: using 60 FPS 1920 by 1080 footage the code only runs at about 10 FPS. The problem there is somewhat self-explanatory as I need the program to be able to process footage accurately at real-time speeds and need a very high FPS as the parts move extremely rapidly. Is there anything I can do to improve the run time? I initially tried using a simple neural network but training it proved to be excessively time consuming while this yielded an accurate result in much shorter time.
I'm a mechanical engineer and had to learn this in about a week, so sorry for any obvious mistakes.
Video footage can be seen here: https://www.youtube.com/watch?v=Zs5YekjqhxA&feature=youtu.be
import cv2
import numpy as np
import time
start_time = time.time()
count=0
ID=[0,1,2,3,4,5,6,7,8,9]
TrackList=[]
def nothing(x):
pass
def isolate(img, vertices):
mask=np.zeros_like(img)
channelcount=img.shape[2]
match=(255, )*channelcount
cv2.fillPoly(mask, vertices, match)
masked=cv2.bitwise_and(img, mask)
return masked
#read video input
cap=cv2.VideoCapture('testGood.mp4')
#background removal initiation either KNN or MOG2, KNN yeilded best results in testing
back=cv2.createBackgroundSubtractorKNN()
#grab initial frames
_,frameCap1=cap.read()
check , frameCap2=cap.read()
#main loop
while cap.isOpened:
#ensure there are frames to read
if check == False:
break
#image preprocessing
#declare region of interest eliminating some background issues
tlX,tlY,blX,blY,brX,brY,trX,trY=400,0,400,800,1480,800,1480,0
region=[(tlX,tlY), (blX, blY),(brX,brY) , (trX, trY) ]
grab=isolate(frameCap1,np.array([region],np.int32))
frame=cv2.pyrDown(grab)
#isolate region of interest
roi1=isolate(frameCap1,np.array([region],np.int32))
roi2=isolate(frameCap2,np.array([region],np.int32))
#drop resolution of working frames
frame1=cv2.pyrDown(roi1)
frame2=cv2.pyrDown(roi2)
#apply background subraction
fgmask1=back.apply(frame1)
fgmask2=back.apply(frame2)
#remove shadow pixels and replace them with black pixels or white pixels(0 or 255)
fgmask1[fgmask1==127]=0
fgmask2[fgmask2==127]=0
#apply a threshhold, not necessary but cleans ups some grey noise
_,thresh1=cv2.threshold(fgmask1,200,255,cv2.THRESH_BINARY)
_,thresh2=cv2.threshold(fgmask2,200,255,cv2.THRESH_BINARY)
#find movement
diff=cv2.absdiff(thresh1,thresh2)
contours, _=cv2.findContours(diff,cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
movement=False
moveBox=[]
for contour in contours:
if cv2.contourArea(contour)<1350 or cv2.contourArea(contour)>3500:
continue
#cv2.rectangle(frame,(x,y), (x+w,y+h),(0,255,0),2)
(x,y,w,h)=cv2.boundingRect(contour)
moveBox.append([x,y,w,h])
movement=True
continue
#cv2.putText(frame, 'Status: ()'.format('Movement'),(x,y),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),3)
#update existing IDs
for tracked in TrackList:
success, box=tracked[2].update(frame)
if success:
x,y,w,h=[int(v) for v in box]
cv2.rectangle(frame, (x,y), (x+w, y+h),(0,0,255),2)
cv2.rectangle(thresh1, (x,y), (x+w, y+h),(255,255,255),2)
tracked[3].append([x,y,w,h])
else:
tracked[3].append(None)
#check for tracking which has stopped or tracking which hasnt moved
delList=[]
p=0
for tracked in TrackList:
if len(tracked[3])==1:
continue
moved=True
n=len(tracked[3])-1
if tracked[3][n]==tracked[3][n-1] and tracked[3][0]!=tracked[3][n]:
if tracked[3][n][1]>tracked[3][0][1]:
count+=1
print('count1: ',count)
ID.append(tracked[0])
cv2.putText(frame, 'Counted',(tracked[3][-2][0],tracked[3][-2][1]),cv2.FONT_HERSHEY_SIMPLEX,1,(0,200,255),3)
delList.append(p)
else:
ID.append(tracked[0])
delList.append(p)
print('discard 1')
cv2.putText(frame, 'discard 1',(tracked[3][-2][0],tracked[3][-2][1]),cv2.FONT_HERSHEY_SIMPLEX,1,(0,200,255),3)
print(tracked)
elif n>5 and tracked[3][n]==tracked[3][n-1] and tracked[3][0]==tracked[3][n]:
ID.append(tracked[0])
delList.append(p)
cv2.putText(frame, 'discard 1',(tracked[3][-2][0],tracked[3][-2][1]),cv2.FONT_HERSHEY_SIMPLEX,1,(0,200,255),3)
print('discard 2')
elif tracked[3][-1]==None:
count+=1
print('count2: ',count)
ID.append(tracked[0])
cv2.putText(frame, 'Counted',(tracked[3][-2][0],tracked[3][-2][1]),cv2.FONT_HERSHEY_SIMPLEX,1,(0,200,255),3)
delList.append(p)
p+=1
cv2.putText(frame, 'Count: '+str(count),(50,50),cv2.FONT_HERSHEY_SIMPLEX,1,(0,200,255),3)
if len(delList)>0:
for a in delList:
TrackList[a]=None
#remove dead IDs
cleaned=False
while cleaned==False:
try:
TrackList.remove(None)
except ValueError:
cleaned=True
#check if movement was being tracked
untracked=[]
if movement==True:
checkContours,_=cv2.findContours(thresh1,cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for contour in checkContours:
tracked=False
if 3500>cv2.contourArea(contour)>1350:
(x,y,w,h)=cv2.boundingRect(contour)
for box in TrackList:
if box[3][-1][0]<x+w/2<box[3][-1][0]+box[3][-1][2] and box[3][-1][1]<y+h/2<box[3][-1][1]+box[3][-1][3]:
tracked=True
if tracked==False:
#print('false')
(x,y,w,h)=cv2.boundingRect(contour)
cv2.rectangle(frame, (x,y), (x+w, y+h),(255,0,0),2)
cv2.rectangle(frame, (x,y), (x+w, y+h),(255,255,255),2)
untracked.append([x,y,w,h])
#assign tracking
ID.sort()
for unt in untracked:
idtemp=ID.pop(0)
tempFrame=frame
temp=[idtemp, 0, cv2.TrackerCSRT_create(),[unt]]
temp[2].init(tempFrame,(unt[0],unt[1],1.10*unt[2],1.10*unt[3]))
TrackList.append(temp)
#show frames
cv2.imshow('frame 1',frame)
#cv2.imshow('frame 2',thresh1)
#read new frame
frameCap1=frameCap2
check, frameCap2=cap.read()
#wait delay for a key to be pressed before continuing while loop or exiting
key=cv2.waitKey(1) & 0xFF
if key==27:
break
cap.release()
cv2.destroyAllWindows()
print(count)
print("runtime: %s seconds" % (time.time() - start_time))
-
1\$\begingroup\$ Welcome to Code Review! I changed your title to actually state what your code is supposed to do and also added the beginner tag, since you said you don't have a lot of experience (this is not meant to offend you, merely as a hint towards reviewers). \$\endgroup\$AlexV– AlexV2019年10月09日 10:58:19 +00:00Commented Oct 9, 2019 at 10:58
-
1\$\begingroup\$ Maybe you should also upload the original video file somewhere, because YouTube runs your video through yet another codec, which usually decreases the video quality (especially if fast moving things are involved). \$\endgroup\$AlexV– AlexV2019年10月09日 11:00:16 +00:00Commented Oct 9, 2019 at 11:00
-
1\$\begingroup\$ I can't see this as a code review, so it's a comment. An obvious place to start improving the performance of the code is reducing the preprocessing. The input is 124,416,000 three byte pixels second. It's all getting uncompressed from mp4. Then 80% of the IO and decompression is discarded when downsized to 1080x400 before thresholding discards more work. For a machine learning pipeline, all that data massaging can be done offline. For a real-time system it has a direct impact on performance. A monochrome lower resolution camera designed for computer vision, might be the way to go. \$\endgroup\$ben rudgers– ben rudgers2019年10月10日 04:37:01 +00:00Commented Oct 10, 2019 at 4:37
-
\$\begingroup\$ @benrudgers It's a good point. You should be able to reconfigure the camera to simply capture in a lower resolution (and probably even in greyscale), which will not require post-processing. \$\endgroup\$Reinderien– Reinderien2019年10月10日 14:04:15 +00:00Commented Oct 10, 2019 at 14:04
-
\$\begingroup\$ @Reinderien Since the code mentions .mp4, a camera that does not compress the video stream might also be appropriate. It might be possible to reconfigure a consumer oriented camera, but selecting the right camera for the job probably makes more sense if labor costs are ordinary. 0.5 megapixels at 60 fps in monochrome with a c-mount lens of the right focal length is likely less than 1ドルk. Code that never runs is faster than anything a person can write. \$\endgroup\$ben rudgers– ben rudgers2019年10月10日 15:02:21 +00:00Commented Oct 10, 2019 at 15:02
1 Answer 1
First I'll say that it's very impressive for you to have gotten this working and with mostly reasonable methods, given one weeks' experience.
Range usage
ID=[0,1,2,3,4,5,6,7,8,9]
can be
ID = list(range(10))
No-op function
Based on what you've shown us, nothing
does, well... nothing. It can be deleted.
Formatting
There's a code formatting standard called PEP8 that attempts to make Python code more legible, mostly by defining spacing. Software such as PyCharm or the various linters will suggest that
- You should have spaces around
=
in assignments such asmask=np.zeros_like(img)
- Variable names like
channelcount
should bechannel_count
- There should be a space after
#
in comment lines - etc.
Tuple unpacking
_,frameCap1=cap.read()
This is fine. If you don't want to unpack, and you don't want to use the _
, you could also
frameCap1 = cap.read()[1]
The same applies to your threshold
calls.
Boolean comparison
if check == False:
should be
if not check:
Similarly,
while cleaned==False:
should be
while not cleaned:
This:
if movement==True:
should be
if movement:
No-op continue
This loop:
for contour in contours:
if cv2.contourArea(contour)<1350 or cv2.contourArea(contour)>3500:
continue
#cv2.rectangle(frame,(x,y), (x+w,y+h),(0,255,0),2)
(x,y,w,h)=cv2.boundingRect(contour)
moveBox.append([x,y,w,h])
movement=True
continue
does not need a continue
at the end; you can delete that.
Except-or-break
This loop:
#remove dead IDs
cleaned=False
while cleaned==False:
try:
TrackList.remove(None)
except ValueError:
cleaned=True
shouldn't use a termination flag. Instead:
while True:
try:
TrackList.remove(None)
except ValueError:
break
Magic numbers
They abound in this program. Rather than writing literals like 3500
, you should assign them to named constants for legibility's sake, e.g.
MAX_CONTOUR = 3500