dlib C++ Library - object_detector_abstract.h

// Copyright (C) 2011 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
#include "../geometry.h"
#include <vector>
#include "box_overlap_testing_abstract.h"
#include "full_object_detection_abstract.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
 struct rect_detection
 {
 double detection_confidence;
 unsigned long weight_index;
 rectangle rect;
 };
 struct full_detection
 {
 double detection_confidence;
 unsigned long weight_index;
 full_object_detection rect;
 };
// ----------------------------------------------------------------------------------------
 template <
 typename image_scanner_type_
 >
 class object_detector
 {
 /*!
 REQUIREMENTS ON image_scanner_type_
 image_scanner_type_ must be an implementation of 
 dlib/image_processing/scan_image_pyramid_abstract.h or 
 dlib/image_processing/scan_fhog_pyramid.h or 
 dlib/image_processing/scan_image_custom.h or 
 dlib/image_processing/scan_image_boxes_abstract.h 
 WHAT THIS OBJECT REPRESENTS
 This object is a tool for detecting the positions of objects in an image.
 In particular, it is a simple container to aggregate an instance of an image 
 scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
 scan_image_boxes), the weight vector needed by one of these image scanners,
 and finally an instance of test_box_overlap. The test_box_overlap object
 is used to perform non-max suppression on the output of the image scanner
 object. 
 Note further that this object can contain multiple weight vectors. In this
 case, it will run the image scanner multiple times, once with each of the
 weight vectors. Then it will aggregate the results from all runs, perform
 non-max suppression and then return the results. Therefore, the object_detector 
 can also be used as a container for a set of object detectors that all use
 the same image scanner but different weight vectors. This is useful since
 the object detection procedure has two parts. A loading step where the
 image is loaded into the scanner, then a detect step which uses the weight
 vector to locate objects in the image. Since the loading step is independent 
 of the weight vector it is most efficient to run multiple detectors by
 performing one load into a scanner followed by multiple detect steps. This
 avoids unnecessarily loading the same image into the scanner multiple times. 
 !*/
 public:
 typedef image_scanner_type_ image_scanner_type;
 typedef typename image_scanner_type::feature_vector_type feature_vector_type;
 object_detector (
 );
 /*!
 ensures
 - This detector won't generate any detections when
 presented with an image.
 - #num_detectors() == 0
 !*/
 object_detector (
 const object_detector& item 
 );
 /*!
 ensures
 - #*this is a copy of item
 - #get_scanner() == item.get_scanner()
 (note that only the "configuration" of item.get_scanner() is copied.
 I.e. the copy is done using copy_configuration())
 !*/
 object_detector (
 const image_scanner_type& scanner, 
 const test_box_overlap& overlap_tester,
 const feature_vector_type& w 
 );
 /*!
 requires
 - w.size() == scanner.get_num_dimensions() + 1
 - scanner.get_num_detection_templates() > 0
 ensures
 - When the operator() member function is called it will
 invoke scanner.detect(w,dets,w(w.size()-1)), suppress
 overlapping detections, and then report the results.
 - when #*this is used to detect objects, the set of
 output detections will never contain any overlaps
 with respect to overlap_tester. That is, for all 
 pairs of returned detections A and B, we will always
 have: overlap_tester(A,B) == false
 - #get_w() == w
 - #get_overlap_tester() == overlap_tester
 - #get_scanner() == scanner
 (note that only the "configuration" of scanner is copied.
 I.e. the copy is done using copy_configuration())
 - #num_detectors() == 1
 !*/
 object_detector (
 const image_scanner_type& scanner, 
 const test_box_overlap& overlap_tester,
 const std::vector<feature_vector_type>& w 
 );
 /*!
 requires
 - for all valid i:
 - w[i].size() == scanner.get_num_dimensions() + 1
 - scanner.get_num_detection_templates() > 0
 - w.size() > 0
 ensures
 - When the operator() member function is called it will invoke
 get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i. Then it
 will take all the detections output by the calls to detect() and suppress
 overlapping detections, and finally report the results.
 - when #*this is used to detect objects, the set of output detections will
 never contain any overlaps with respect to overlap_tester. That is, for
 all pairs of returned detections A and B, we will always have:
 overlap_tester(A,B) == false
 - for all valid i:
 - #get_w(i) == w[i]
 - #num_detectors() == w.size()
 - #get_overlap_tester() == overlap_tester
 - #get_scanner() == scanner
 (note that only the "configuration" of scanner is copied.
 I.e. the copy is done using copy_configuration())
 !*/
 explicit object_detector (
 const std::vector<object_detector>& detectors
 );
 /*!
 requires
 - detectors.size() != 0
 - All the detectors must use compatibly configured scanners. That is, it
 must make sense for the weight vector from one detector to be used with
 the scanner from any other.
 - for all valid i:
 - detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions()
 (i.e. all the detectors use scanners that use the same kind of feature vectors.)
 ensures
 - Very much like the above constructor, this constructor takes all the
 given detectors and packs them into #*this. That is, invoking operator()
 on #*this will run all the detectors, perform non-max suppression, and
 then report the results.
 - When #*this is used to detect objects, the set of output detections will
 never contain any overlaps with respect to overlap_tester. That is, for
 all pairs of returned detections A and B, we will always have:
 overlap_tester(A,B) == false
 - #num_detectors() == The sum of detectors[i].num_detectors() for all valid i. 
 - #get_overlap_tester() == detectors[0].get_overlap_tester()
 - #get_scanner() == detectors[0].get_scanner()
 (note that only the "configuration" of scanner is copied. I.e. the copy
 is done using copy_configuration())
 !*/
 unsigned long num_detectors (
 ) const; 
 /*!
 ensures
 - returns the number of weight vectors in this object. Since each weight
 vector logically represents an object detector, this returns the number
 of object detectors contained in this object.
 !*/
 const feature_vector_type& get_w (
 unsigned long idx = 0
 ) const;
 /*!
 requires
 - idx < num_detectors()
 ensures
 - returns the idx-th weight vector loaded into this object. All the weight vectors
 have the same dimension and logically each represents a different detector.
 !*/
 const test_box_overlap& get_overlap_tester (
 ) const;
 /*!
 ensures
 - returns the overlap tester used by this object
 !*/
 const image_scanner_type& get_scanner (
 ) const;
 /*!
 ensures
 - returns the image scanner used by this object. 
 !*/
 object_detector& operator= (
 const object_detector& item 
 );
 /*!
 ensures
 - #*this is a copy of item
 - #get_scanner() == item.get_scanner()
 (note that only the "configuration" of item.get_scanner() is 
 copied. I.e. the copy is done using copy_configuration())
 - returns #*this
 !*/
 template <
 typename image_type
 >
 void operator() (
 const image_type& img,
 std::vector<rect_detection>& dets,
 double adjust_threshold = 0
 );
 /*!
 requires
 - img == an object which can be accepted by image_scanner_type::load()
 ensures
 - Performs object detection on the given image and stores the detected
 objects into #dets. In particular, we will have that:
 - #dets is sorted such that the highest confidence detections come
 first. E.g. element 0 is the best detection, element 1 the next
 best, and so on.
 - #dets.size() == the number of detected objects.
 - #dets[i].detection_confidence == The strength of the i-th detection.
 Larger values indicate that the detector is more confident that
 #dets[i] is a correct detection rather than being a false alarm.
 Moreover, the detection_confidence is equal to the detection value
 output by the scanner minus the threshold value stored at the end of
 the weight vector in get_w(#dets[i].weight_index). 
 - #dets[i].weight_index == the index for the weight vector that
 generated this detection. 
 - #dets[i].rect == the bounding box for the i-th detection.
 - #get_scanner() will have been loaded with img. Therefore, you can call
 #get_scanner().get_feature_vector() to obtain the feature vectors or
 #get_scanner().get_full_object_detection() to get the
 full_object_detections for the resulting object detection boxes.
 - The detection threshold is adjusted by having adjust_threshold added to
 it. Therefore, an adjust_threshold value > 0 makes detecting objects
 harder while a negative value makes it easier. Moreover, the following
 will be true for all valid i:
 - #dets[i].detection_confidence >= adjust_threshold
 This means that, for example, you can obtain the maximum possible number
 of detections by setting adjust_threshold equal to negative infinity.
 !*/
 template <
 typename image_type
 >
 void operator() (
 const image_type& img,
 std::vector<full_detection>& dets,
 double adjust_threshold = 0
 );
 /*!
 requires
 - img == an object which can be accepted by image_scanner_type::load()
 ensures
 - This function is identical to the above operator() routine, except that
 it outputs full_object_detections instead of rectangles. This means that
 the output includes part locations. In particular, calling this function
 is the same as calling the above operator() routine and then using
 get_scanner().get_full_object_detection() to resolve all the rectangles
 into full_object_detections. Therefore, this version of operator() is
 simply a convenience function for performing this set of operations.
 !*/
 template <
 typename image_type
 >
 std::vector<rectangle> operator() (
 const image_type& img,
 double adjust_threshold = 0
 );
 /*!
 requires
 - img == an object which can be accepted by image_scanner_type::load()
 ensures
 - This function is identical to the above operator() routine, except that
 it returns a std::vector<rectangle> which contains just the bounding
 boxes of all the detections. 
 !*/
 template <
 typename image_type
 >
 void operator() (
 const image_type& img,
 std::vector<std::pair<double, rectangle> >& dets,
 double adjust_threshold = 0
 );
 /*!
 requires
 - img == an object which can be accepted by image_scanner_type::load()
 ensures
 - performs object detection on the given image and stores the
 detected objects into #dets. In particular, we will have that:
 - #dets is sorted such that the highest confidence detections 
 come first. E.g. element 0 is the best detection, element 1 
 the next best, and so on.
 - #dets.size() == the number of detected objects.
 - #dets[i].first gives the "detection confidence", of the i-th
 detection. This is the detection value output by the scanner minus
 the threshold value stored at the end of the weight vector in get_w(). 
 - #dets[i].second == the bounding box for the i-th detection.
 - #get_scanner() will have been loaded with img. Therefore, you can call
 #get_scanner().get_feature_vector() to obtain the feature vectors or
 #get_scanner().get_full_object_detection() to get the
 full_object_detections for the resulting object detection boxes.
 - The detection threshold is adjusted by having adjust_threshold added to
 it. Therefore, an adjust_threshold value > 0 makes detecting objects
 harder while a negative value makes it easier. Moreover, the following
 will be true for all valid i:
 - #dets[i].first >= adjust_threshold
 This means that, for example, you can obtain the maximum possible number
 of detections by setting adjust_threshold equal to negative infinity.
 !*/
 template <
 typename image_type
 >
 void operator() (
 const image_type& img,
 std::vector<std::pair<double, full_object_detection> >& dets,
 double adjust_threshold = 0
 );
 /*!
 requires
 - img == an object which can be accepted by image_scanner_type::load()
 ensures
 - This function is identical to the above operator() routine, except that
 it outputs full_object_detections instead of rectangles. This means that
 the output includes part locations. In particular, calling this function
 is the same as calling the above operator() routine and then using
 get_scanner().get_full_object_detection() to resolve all the rectangles
 into full_object_detections. Therefore, this version of operator() is
 simply a convenience function for performing this set of operations.
 !*/
 template <
 typename image_type
 >
 void operator() (
 const image_type& img,
 std::vector<full_object_detection>& dets,
 double adjust_threshold = 0
 );
 /*!
 requires
 - img == an object which can be accepted by image_scanner_type::load()
 ensures
 - This function is identical to the above operator() routine, except that
 it doesn't include a double valued score. That is, it just outputs the
 full_object_detections.
 !*/
 };
// ----------------------------------------------------------------------------------------
 template <typename T>
 void serialize (
 const object_detector<T>& item,
 std::ostream& out
 );
 /*!
 provides serialization support. Note that this function only saves the
 configuration part of item.get_scanner(). That is, we use the scanner's
 copy_configuration() function to get a copy of the scanner that doesn't contain any
 loaded image data and we then save just the configuration part of the scanner.
 This means that any serialized object_detectors won't remember any images they have
 processed but will otherwise contain all their state and be able to detect objects
 in new images.
 !*/
// ----------------------------------------------------------------------------------------
 template <typename T>
 void deserialize (
 object_detector<T>& item,
 std::istream& in 
 );
 /*!
 provides deserialization support
 !*/
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_