Track objects

Object tracking tracks multiple objects detected in an input video.

Use the standard model

The following code sample demonstrates how to do object tracking using the streaming client library.

Java

To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.


importcom.google.api.gax.rpc.BidiStream ;
importcom.google.cloud.videointelligence.v1p3beta1.ObjectTrackingAnnotation;
importcom.google.cloud.videointelligence.v1p3beta1.ObjectTrackingFrame;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoRequest;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingAnnotateVideoResponse;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingFeature;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingLabelDetectionConfig;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingVideoAnnotationResults;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingVideoConfig;
importcom.google.cloud.videointelligence.v1p3beta1.StreamingVideoIntelligenceServiceClient;
importcom.google.protobuf.ByteString ;
importio.grpc.StatusRuntimeException;
importjava.io.IOException;
importjava.nio.file.Files;
importjava.nio.file.Path;
importjava.nio.file.Paths;
importjava.util.Arrays;
importjava.util.concurrent.TimeoutException;
class StreamingObjectTracking{
// Perform streaming video object tracking
staticvoidstreamingObjectTracking(StringfilePath)
throwsIOException,TimeoutException,StatusRuntimeException{
// String filePath = "path_to_your_video_file";
try(StreamingVideoIntelligenceServiceClientclient=
StreamingVideoIntelligenceServiceClient.create()){
Pathpath=Paths.get(filePath);
byte[]data=Files.readAllBytes(path);
// Set the chunk size to 5MB (recommended less than 10MB).
intchunkSize=5*1024*1024;
intnumChunks=(int)Math.ceil((double)data.length/chunkSize);
StreamingLabelDetectionConfiglabelConfig=
StreamingLabelDetectionConfig.newBuilder().setStationaryCamera(false).build();
StreamingVideoConfigstreamingVideoConfig=
StreamingVideoConfig.newBuilder()
.setFeature(StreamingFeature.STREAMING_OBJECT_TRACKING)
.setLabelDetectionConfig(labelConfig)
.build();
BidiStream<StreamingAnnotateVideoRequest,StreamingAnnotateVideoResponse>call=
client.streamingAnnotateVideoCallable().call();
// The first request must **only** contain the audio configuration:
call.send(
StreamingAnnotateVideoRequest.newBuilder().setVideoConfig(streamingVideoConfig).build());
// Subsequent requests must **only** contain the audio data.
// Send the requests in chunks
for(inti=0;i < numChunks;i++){
call.send(
StreamingAnnotateVideoRequest.newBuilder()
.setInputContent(
ByteString .copyFrom (
Arrays.copyOfRange(data,i*chunkSize,i*chunkSize+chunkSize)))
.build());
}
// Tell the service you are done sending data
call.closeSend();
for(StreamingAnnotateVideoResponseresponse:call){
StreamingVideoAnnotationResultsannotationResults=response.getAnnotationResults();
for(ObjectTrackingAnnotationobjectAnnotations:
annotationResults.getObjectAnnotationsList()){
Stringentity=objectAnnotations.getEntity().getDescription ();
floatconfidence=objectAnnotations.getConfidence();
longtrackId=objectAnnotations.getTrackId();
System.out.format("%s: %f (ID: %d)\n",entity,confidence,trackId);
// In streaming, there is always one frame.
ObjectTrackingFrameframe=objectAnnotations.getFrames(0);
doubleoffset=
frame.getTimeOffset().getSeconds()+frame.getTimeOffset().getNanos()/1e9;
System.out.format("Offset: %f\n",offset);
System.out.println("Bounding Box:");
System.out.format("\tLeft: %f\n",frame.getNormalizedBoundingBox().getLeft());
System.out.format("\tTop: %f\n",frame.getNormalizedBoundingBox().getTop());
System.out.format("\tRight: %f\n",frame.getNormalizedBoundingBox().getRight());
System.out.format("\tBottom: %f\n",frame.getNormalizedBoundingBox().getBottom());
}
}
}
}
}

Node.js

To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const path = 'Local file to analyze, e.g. ./my-file.mp4';
const{StreamingVideoIntelligenceServiceClient}=
require('@google-cloud/video-intelligence').v1p3beta1;
constfs=require('fs');
// Instantiates a client
constclient=newStreamingVideoIntelligenceServiceClient ();
// Streaming configuration
constconfigRequest={
videoConfig:{
feature:'STREAMING_OBJECT_TRACKING',
},
};
constreadStream=fs.createReadStream(path,{
highWaterMark:5*1024*1024,//chunk size set to 5MB (recommended less than 10MB)
encoding:'base64',
});
//Load file content
constchunks=[];
readStream
.on('data',chunk=>{
constrequest={
inputContent:chunk.toString(),
};
chunks.push(request);
})
.on('close',()=>{
// configRequest should be the first in the stream of requests
stream.write(configRequest);
for(leti=0;i < chunks.length;i++){
stream.write(chunks[i]);
}
stream.end();
});
constoptions={timeout:120000};
// Create a job using a long-running operation
conststream=client.streamingAnnotateVideo(options).on('data',response=>{
//Gets annotations for video
constannotations=response.annotationResults;
constobjects=annotations.objectAnnotations;
objects.forEach(object=>{
console.log(`Entity description: ${object.entity.description}`);
console.log(`Entity id: ${object.entity.entityId}`);
console.log(`Track id: ${object.trackId}`);
console.log(`Confidence: ${object.confidence}`);
console.log(
`Time offset for the frame: ${
object.frames[0].timeOffset.seconds||0
}`+`.${(object.frames[0].timeOffset.nanos/1e6).toFixed(0)}s`
);
//Every annotation has only one frame.
constbox=object.frames[0].normalizedBoundingBox;
console.log('Bounding box position:');
console.log(` left :${box.left}`);
console.log(` top :${box.top}`);
console.log(` right :${box.right}`);
console.log(` bottom:${box.bottom}`);
});
});

Python

To authenticate to Video Intelligence, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

fromgoogle.cloudimport videointelligence_v1p3beta1 as videointelligence
# path = 'path_to_file'
client = videointelligence.StreamingVideoIntelligenceServiceClient()
# Set streaming config.
config = videointelligence.StreamingVideoConfig(
 feature=(videointelligence.StreamingFeature.STREAMING_OBJECT_TRACKING)
)
# config_request should be the first in the stream of requests.
config_request = videointelligence.StreamingAnnotateVideoRequest(
 video_config=config
)
# Set the chunk size to 5MB (recommended less than 10MB).
chunk_size = 5 * 1024 * 1024
# Load file content.
stream = []
with io.open(path, "rb") as video_file:
 while True:
 data = video_file.read(chunk_size)
 if not data:
 break
 stream.append(data)
defstream_generator():
 yield config_request
 for chunk in stream:
 yield videointelligence.StreamingAnnotateVideoRequest(input_content=chunk)
requests = stream_generator()
# streaming_annotate_video returns a generator.
# The default timeout is about 300 seconds.
# To process longer videos it should be set to
# larger than the length (in seconds) of the stream.
responses = client.streaming_annotate_video(requests, timeout=900)
# Each response corresponds to about 1 second of video.
for response in responses:
 # Check for errors.
 if response.error.message:
 print(response.error.message)
 break
 object_annotations = response.annotation_results.object_annotations
 # object_annotations could be empty
 if not object_annotations:
 continue
 for annotation in object_annotations:
 # Each annotation has one frame, which has a timeoffset.
 frame = annotation.frames[0]
 time_offset = (
 frame.time_offset.seconds + frame.time_offset.microseconds / 1e6
 )
 description = annotation.entity.description
 confidence = annotation.confidence
 # track_id tracks the same object in the video.
 track_id = annotation.track_id
 # description is in Unicode
 print("{}s".format(time_offset))
 print("\tEntity description: {}".format(description))
 print("\tTrack Id: {}".format(track_id))
 if annotation.entity.entity_id:
 print("\tEntity id: {}".format(annotation.entity.entity_id))
 print("\tConfidence: {}".format(confidence))
 # Every annotation has only one frame
 frame = annotation.frames[0]
 box = frame.normalized_bounding_box
 print("\tBounding box position:")
 print("\tleft : {}".format(box.left))
 print("\ttop : {}".format(box.top))
 print("\tright : {}".format(box.right))
 print("\tbottom: {}\n".format(box.bottom))

Except as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.

Last updated 2025年11月06日 UTC.