dlib C++ Library - feature_ranking.h

// Copyright (C) 2008 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_KERNEL_FEATURE_RANKINg_H_
#define DLIB_KERNEL_FEATURE_RANKINg_H_
#include <vector>
#include <limits>
#include "feature_ranking_abstract.h"
#include "kcentroid.h"
#include "../optimization.h"
#include "../statistics.h"
#include <iostream>
namespace dlib
{
// ----------------------------------------------------------------------------------------
 template <
 typename kernel_type,
 typename sample_matrix_type,
 typename label_matrix_type
 >
 matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
 const kcentroid<kernel_type>& kc,
 const sample_matrix_type& samples,
 const label_matrix_type& labels
 )
 {
 /*
 This function ranks features by doing recursive feature elimination
 */
 typedef typename kernel_type::scalar_type scalar_type;
 typedef typename kernel_type::mem_manager_type mm;
 // make sure requires clause is not broken
 DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
 "\tmatrix rank_features()"
 << "\n\t you have given invalid arguments to this function"
 );
 matrix<scalar_type,0,2,mm> results(samples(0).nr(), 2);
 matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
 set_all_elements(mask,1);
 // figure out what the separation is between the two centroids when all the features are 
 // present.
 scalar_type first_separation;
 {
 kcentroid<kernel_type> c1(kc);
 kcentroid<kernel_type> c2(kc);
 // find the centers of each class
 for (long s = 0; s < samples.size(); ++s)
 {
 if (labels(s) < 0)
 {
 c1.train(samples(s));
 }
 else
 {
 c2.train(samples(s));
 }
 }
 first_separation = c1(c2);
 }
 for (long i = results.nr()-1; i >= 0; --i)
 {
 long worst_feature_idx = 0;
 scalar_type worst_feature_score = -std::numeric_limits<scalar_type>::infinity();
 // figure out which feature to remove next
 for (long j = 0; j < mask.size(); ++j)
 {
 // skip features we have already removed
 if (mask(j) == 0)
 continue;
 kcentroid<kernel_type> c1(kc);
 kcentroid<kernel_type> c2(kc);
 // temporarily remove this feature from the working set of features
 mask(j) = 0;
 // find the centers of each class
 for (long s = 0; s < samples.size(); ++s)
 {
 if (labels(s) < 0)
 {
 c1.train(pointwise_multiply(samples(s),mask));
 }
 else
 {
 c2.train(pointwise_multiply(samples(s),mask));
 }
 }
 // find the distance between the two centroids and use that
 // as the score
 const double score = c1(c2);
 if (score > worst_feature_score)
 {
 worst_feature_score = score;
 worst_feature_idx = j;
 }
 // add this feature back to the working set of features
 mask(j) = 1;
 }
 // now that we know what the next worst feature is record it 
 mask(worst_feature_idx) = 0;
 results(i,0) = worst_feature_idx;
 results(i,1) = worst_feature_score; 
 }
 // now normalize the results 
 const scalar_type max_separation = std::max(max(colm(results,1)), first_separation);
 set_colm(results,1) = colm(results,1)/max_separation;
 for (long r = 0; r < results.nr()-1; ++r)
 {
 results(r,1) = results(r+1,1);
 }
 results(results.nr()-1,1) = first_separation/max_separation;
 return results;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename kernel_type,
 typename sample_matrix_type,
 typename label_matrix_type
 >
 matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
 const kcentroid<kernel_type>& kc,
 const sample_matrix_type& samples,
 const label_matrix_type& labels
 )
 {
 return rank_features_impl(kc, mat(samples), mat(labels));
 }
// ----------------------------------------------------------------------------------------
 template <
 typename kernel_type,
 typename sample_matrix_type,
 typename label_matrix_type
 >
 matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
 const kcentroid<kernel_type>& kc,
 const sample_matrix_type& samples,
 const label_matrix_type& labels,
 const long num_features
 )
 {
 /*
 This function ranks features by doing recursive feature addition 
 */
 typedef typename kernel_type::scalar_type scalar_type;
 typedef typename kernel_type::mem_manager_type mm;
 // make sure requires clause is not broken
 DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
 "\tmatrix rank_features()"
 << "\n\t you have given invalid arguments to this function"
 );
 DLIB_ASSERT(0 < num_features && num_features <= samples(0).nr(),
 "\tmatrix rank_features()"
 << "\n\t you have given invalid arguments to this function"
 << "\n\t num_features: " << num_features
 << "\n\t samples(0).nr(): " << samples(0).nr() 
 );
 matrix<scalar_type,0,2,mm> results(num_features, 2);
 matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
 set_all_elements(mask,0);
 for (long i = 0; i < results.nr(); ++i)
 {
 long best_feature_idx = 0;
 scalar_type best_feature_score = -std::numeric_limits<scalar_type>::infinity();
 // figure out which feature to add next
 for (long j = 0; j < mask.size(); ++j)
 {
 // skip features we have already added 
 if (mask(j) == 1)
 continue;
 kcentroid<kernel_type> c1(kc);
 kcentroid<kernel_type> c2(kc);
 // temporarily add this feature to the working set of features
 mask(j) = 1;
 // find the centers of each class
 for (long s = 0; s < samples.size(); ++s)
 {
 if (labels(s) < 0)
 {
 c1.train(pointwise_multiply(samples(s),mask));
 }
 else
 {
 c2.train(pointwise_multiply(samples(s),mask));
 }
 }
 // find the distance between the two centroids and use that
 // as the score
 const double score = c1(c2);
 if (score > best_feature_score)
 {
 best_feature_score = score;
 best_feature_idx = j;
 }
 // take this feature back out of the working set of features
 mask(j) = 0;
 }
 // now that we know what the next best feature is record it 
 mask(best_feature_idx) = 1;
 results(i,0) = best_feature_idx;
 results(i,1) = best_feature_score; 
 }
 // now normalize the results 
 set_colm(results,1) = colm(results,1)/max(colm(results,1));
 return results;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename kernel_type,
 typename sample_matrix_type,
 typename label_matrix_type
 >
 matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
 const kcentroid<kernel_type>& kc,
 const sample_matrix_type& samples,
 const label_matrix_type& labels,
 const long num_features
 )
 {
 if (mat(samples).nr() > 0 && num_features == mat(samples)(0).nr())
 {
 // if we are going to rank them all then might as well do the recursive feature elimination version
 return rank_features_impl(kc, mat(samples), mat(labels));
 }
 else
 {
 return rank_features_impl(kc, mat(samples), mat(labels), num_features);
 }
 }
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
 namespace rank_features_helpers
 {
 template <
 typename K,
 typename sample_matrix_type,
 typename label_matrix_type
 >
 typename K::scalar_type centroid_gap (
 const kcentroid<K>& kc,
 const sample_matrix_type& samples,
 const label_matrix_type& labels
 )
 {
 kcentroid<K> kc1(kc);
 kcentroid<K> kc2(kc);
 // toss all the samples into our kcentroids
 for (long i = 0; i < samples.size(); ++i)
 {
 if (labels(i) > 0)
 kc1.train(samples(i));
 else
 kc2.train(samples(i));
 }
 // now return the separation between the mean of these two centroids
 return kc1(kc2);
 }
 template <
 typename sample_matrix_type,
 typename label_matrix_type
 >
 class test
 {
 typedef typename sample_matrix_type::type sample_type;
 typedef typename sample_type::type scalar_type;
 typedef typename sample_type::mem_manager_type mem_manager_type;
 public:
 test (
 const sample_matrix_type& samples_,
 const label_matrix_type& labels_,
 unsigned long num_sv_,
 bool verbose_
 ) : samples(samples_), labels(labels_), num_sv(num_sv_), verbose(verbose_)
 {
 }
 double operator() (
 double gamma
 ) const
 {
 // we are doing the optimization in log space so don't forget to convert back to normal space
 gamma = std::exp(gamma);
 typedef radial_basis_kernel<sample_type> kernel_type;
 // Make a kcentroid and find out what the gap is at the current gamma. Try to pick a reasonable
 // tolerance.
 const double tolerance = std::min(gamma*0.01, 0.01);
 const kernel_type kern(gamma);
 kcentroid<kernel_type> kc(kern, tolerance, num_sv);
 scalar_type temp = centroid_gap(kc, samples, labels);
 if (verbose)
 {
 std::cout << "\rChecking goodness of gamma = " << gamma << ". Goodness = " 
 << temp << " " << std::flush;
 }
 return temp;
 }
 const sample_matrix_type& samples;
 const label_matrix_type& labels;
 unsigned long num_sv;
 bool verbose;
 };
 template <
 typename sample_matrix_type,
 typename label_matrix_type
 >
 double find_gamma_with_big_centroid_gap_impl (
 const sample_matrix_type& samples,
 const label_matrix_type& labels,
 double initial_gamma,
 unsigned long num_sv,
 bool verbose
 )
 {
 if (verbose)
 {
 std::cout << std::endl;
 }
 test<sample_matrix_type, label_matrix_type> funct(samples, labels, num_sv, verbose);
 double best_gamma = std::log(initial_gamma);
 double goodness = find_max_single_variable(funct, best_gamma, -15, 15, 1e-3, 100);
 
 if (verbose)
 {
 std::cout << "\rBest gamma = " << std::exp(best_gamma) << ". Goodness = " 
 << goodness << " " << std::endl;
 }
 return std::exp(best_gamma);
 }
 }
// ----------------------------------------------------------------------------------------
 template <
 typename sample_matrix_type,
 typename label_matrix_type
 >
 double find_gamma_with_big_centroid_gap (
 const sample_matrix_type& samples,
 const label_matrix_type& labels,
 double initial_gamma = 0.1,
 unsigned long num_sv = 40
 )
 {
 DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
 "\t double find_gamma_with_big_centroid_gap()"
 << "\n\t initial_gamma: " << initial_gamma
 << "\n\t num_sv: " << num_sv 
 << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels) 
 );
 return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples), 
 mat(labels),
 initial_gamma,
 num_sv,
 false);
 }
// ----------------------------------------------------------------------------------------
 template <
 typename sample_matrix_type,
 typename label_matrix_type
 >
 double verbose_find_gamma_with_big_centroid_gap (
 const sample_matrix_type& samples,
 const label_matrix_type& labels,
 double initial_gamma = 0.1,
 unsigned long num_sv = 40
 )
 {
 DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
 "\t double verbose_find_gamma_with_big_centroid_gap()"
 << "\n\t initial_gamma: " << initial_gamma
 << "\n\t num_sv: " << num_sv 
 << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels) 
 );
 return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples), 
 mat(labels),
 initial_gamma,
 num_sv,
 true);
 }
// ----------------------------------------------------------------------------------------
 template <
 typename vector_type
 >
 double compute_mean_squared_distance (
 const vector_type& samples
 )
 {
 running_stats<double> rs;
 for (unsigned long i = 0; i < samples.size(); ++i)
 {
 for (unsigned long j = i+1; j < samples.size(); ++j)
 {
 rs.add(length_squared(samples[i] - samples[j]));
 }
 }
 return rs.mean();
 }
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_KERNEL_FEATURE_RANKINg_H_