dlib C++ Library - svm_c_linear_dcd.cpp

// Copyright (C) 2012 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include <dlib/svm.h>
#include <dlib/rand.h>
#include <dlib/statistics.h>
#include "tester.h"
namespace 
{
 using namespace test;
 using namespace dlib;
 using namespace std;
 logger dlog("test.svm_c_linear_dcd");
// ----------------------------------------------------------------------------------------
 void test_sparse()
 {
 typedef std::map<unsigned long,double> sample_type;
 typedef sparse_linear_kernel<sample_type> kernel_type;
 svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
 const double C = 0.2;
 linear_trainer.set_epsilon(1e-10);
 linear_trainer_cpa.set_epsilon(1e-10);
 linear_trainer_cpa.set_relative_epsilon(1e-10);
 std::vector<sample_type> samples;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 decision_function<kernel_type> df, df2, df3;
 dlib::rand rnd;
 // Now lets go into a loop and randomly generate 10000 samples.
 double label = +1;
 for (int i = 0; i < 100; ++i)
 {
 // flip this flag
 label *= -1;
 sample.clear();
 // now make a random sparse sample with at most 10 non-zero elements
 for (int j = 0; j < 5; ++j)
 {
 int idx = rnd.get_random_32bit_number()%10;
 double value = rnd.get_random_double();
 sample[idx] = label*value;
 }
 // Also save the samples we are generating so we can let the svm_c_linear_trainer
 // learn from them below. 
 samples.push_back(sample);
 labels.push_back(label);
 if (samples.size() > 1)
 {
 linear_trainer_cpa.set_c_class1(C);
 linear_trainer_cpa.set_c_class2(1.5*C);
 linear_trainer.set_c_class1(C/samples.size());
 linear_trainer.set_c_class2(1.5*C/samples.size());
 df = linear_trainer.train(samples, labels, state);
 df2 = linear_trainer_cpa.train(samples, labels);
 df3 = linear_trainer.train(samples, labels);
 DLIB_TEST_MSG( dlib::distance(df.basis_vectors(0), df2.basis_vectors(0)) < 1e-8, dlib::distance(df.basis_vectors(0), df2.basis_vectors(0)));
 DLIB_TEST( std::abs(df.b - df2.b) < 1e-8);
 DLIB_TEST( dlib::distance(df.basis_vectors(0), df3.basis_vectors(0)) < 1e-8);
 DLIB_TEST( std::abs(df.b - df3.b) < 1e-8);
 }
 }
 }
// ----------------------------------------------------------------------------------------
 void test_normal_no_bias()
 {
 typedef matrix<double,10,1> sample_type;
 typedef linear_kernel<sample_type> kernel_type;
 svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
 const double C = 1.0;
 linear_trainer.set_epsilon(1e-10);
 linear_trainer_cpa.set_epsilon(1e-10);
 linear_trainer_cpa.set_relative_epsilon(1e-10);
 linear_trainer.include_bias(false);
 std::vector<sample_type> samples, samples_explict_bias;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 decision_function<kernel_type> df, df2, df3;
 dlib::rand rnd;
 // Now lets go into a loop and randomly generate 10000 samples.
 double label = +1;
 for (int i = 0; i < 100; ++i)
 {
 // flip this flag
 label *= -1;
 sample = 0;
 // now make a random sparse sample with at most 10 non-zero elements
 for (int j = 0; j < 5; ++j)
 {
 int idx = rnd.get_random_32bit_number()%9;
 double value = rnd.get_random_double();
 sample(idx) = label*value;
 }
 // Also save the samples we are generating so we can let the svm_c_linear_trainer
 // learn from them below. 
 samples.push_back(sample);
 labels.push_back(label);
 sample(9) = -1;
 samples_explict_bias.push_back(sample);
 if (samples.size() > 1)
 {
 linear_trainer_cpa.set_c_class1(C);
 linear_trainer_cpa.set_c_class2(1.5*C);
 linear_trainer.set_c_class1(C/samples.size());
 linear_trainer.set_c_class2(1.5*C/samples.size());
 df = linear_trainer.train(samples_explict_bias, labels, state);
 df2 = linear_trainer_cpa.train(samples, labels);
 df3 = linear_trainer.train(samples_explict_bias, labels);
 DLIB_TEST( std::abs(df2.basis_vectors(0)(9)) < 1e-7);
 DLIB_TEST_MSG( max(abs(colm(df.basis_vectors(0),0,9) - colm(df2.basis_vectors(0),0,9))) < 1e-6, max(abs(colm(df.basis_vectors(0),0,9) - colm(df2.basis_vectors(0),0,9))));
 DLIB_TEST( std::abs(df.basis_vectors(0)(9) - df2.b) < 1e-6);
 DLIB_TEST( max(abs(df.basis_vectors(0) - df3.basis_vectors(0))) < 1e-6);
 DLIB_TEST( std::abs(df.b - df3.b) < 1e-7);
 }
 }
 }
// ----------------------------------------------------------------------------------------
 void test_normal()
 {
 typedef matrix<double,10,1> sample_type;
 typedef linear_kernel<sample_type> kernel_type;
 svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
 const double C = 1;
 linear_trainer.set_epsilon(1e-10);
 linear_trainer_cpa.set_epsilon(1e-10);
 linear_trainer_cpa.set_relative_epsilon(1e-10);
 std::vector<sample_type> samples;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 decision_function<kernel_type> df, df2, df3;
 dlib::rand rnd;
 // Now lets go into a loop and randomly generate 10000 samples.
 double label = +1;
 for (int i = 0; i < 100; ++i)
 {
 // flip this flag
 label *= -1;
 sample = 0;
 // now make a random sparse sample with at most 10 non-zero elements
 for (int j = 0; j < 5; ++j)
 {
 int idx = rnd.get_random_32bit_number()%10;
 double value = rnd.get_random_double();
 sample(idx) = label*value;
 }
 // Also save the samples we are generating so we can let the svm_c_linear_trainer
 // learn from them below. 
 samples.push_back(sample);
 labels.push_back(label);
 if (samples.size() > 1)
 {
 linear_trainer_cpa.set_c_class1(C);
 linear_trainer_cpa.set_c_class2(1.5*C);
 linear_trainer.set_c_class1(C/samples.size());
 linear_trainer.set_c_class2(1.5*C/samples.size());
 df = linear_trainer.train(samples, labels, state);
 df2 = linear_trainer_cpa.train(samples, labels);
 df3 = linear_trainer.train(samples, labels);
 DLIB_TEST_MSG( max(abs(df.basis_vectors(0) - df2.basis_vectors(0))) < 1e-7, max(abs(df.basis_vectors(0) - df2.basis_vectors(0))));
 DLIB_TEST( std::abs(df.b - df2.b) < 1e-7);
 DLIB_TEST( max(abs(df.basis_vectors(0) - df3.basis_vectors(0))) < 1e-7);
 DLIB_TEST( std::abs(df.b - df3.b) < 1e-7);
 }
 }
 }
// ----------------------------------------------------------------------------------------
 void test_normal_force_last_weight(bool have_bias, bool force_weight)
 {
 typedef matrix<double,10,1> sample_type;
 dlog << LINFO << "have_bias: "<< have_bias << " force_weight: "<< force_weight;
 typedef linear_kernel<sample_type> kernel_type;
 svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
 const double C = 1;
 linear_trainer.set_epsilon(1e-10);
 linear_trainer_cpa.set_epsilon(1e-11);
 linear_trainer_cpa.set_relative_epsilon(1e-11);
 linear_trainer_cpa.force_last_weight_to_1(force_weight);
 linear_trainer.force_last_weight_to_1(force_weight);
 linear_trainer.include_bias(have_bias);
 std::vector<sample_type> samples;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 decision_function<kernel_type> df, df2;
 running_stats<double> rs;
 dlib::rand rnd;
 // Now lets go into a loop and randomly generate 10000 samples.
 double label = +1;
 for (int i = 0; i < 40; ++i)
 {
 // flip this flag
 label *= -1;
 sample = 0;
 // now make a random sparse sample with at most 10 non-zero elements
 for (int j = 0; j < 5; ++j)
 {
 int idx = rnd.get_random_32bit_number()%9;
 double value = rnd.get_random_double();
 sample(idx) = label*value + label;
 }
 sample(9) = 4;
 // Also save the samples we are generating so we can let the svm_c_linear_trainer
 // learn from them below. 
 samples.push_back(sample);
 labels.push_back(label);
 linear_trainer.set_c(C);
 linear_trainer_cpa.set_c(C*samples.size());
 df = linear_trainer.train(samples, labels, state);
 if (force_weight)
 {
 DLIB_TEST(std::abs(df.basis_vectors(0)(9) - 1) < 1e-8);
 DLIB_TEST(std::abs(df.b) < 1e-8);
 if (samples.size() > 1)
 {
 df2 = linear_trainer_cpa.train(samples, labels);
 DLIB_TEST_MSG( max(abs(df.basis_vectors(0) - df2.basis_vectors(0))) < 1e-7, max(abs(df.basis_vectors(0) - df2.basis_vectors(0))));
 DLIB_TEST( std::abs(df.b - df2.b) < 1e-7);
 }
 }
 if (!have_bias)
 DLIB_TEST(std::abs(df.b) < 1e-8);
 for (unsigned long k = 0; k < samples.size(); ++k)
 {
 //cout << "pred: "<< labels[k]*df(samples[k]) << endl;
 rs.add(labels[k]*df(samples[k]));
 }
 }
 DLIB_TEST_MSG(std::abs(rs.min()-1) < 1e-7, std::abs(rs.min()-1));
 }
// ----------------------------------------------------------------------------------------
 void test_normal_1_sample(double label)
 {
 typedef matrix<double,10,1> sample_type;
 typedef linear_kernel<sample_type> kernel_type;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
 const double C = 10;
 linear_trainer.set_epsilon(1e-10);
 linear_trainer.set_c(C);
 linear_trainer.force_last_weight_to_1(true);
 linear_trainer.include_bias(false);
 std::vector<sample_type> samples;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 sample = 0;
 sample(0) = -1;
 sample(1) = -1;
 sample(9) = 4;
 samples.push_back(sample);
 labels.push_back(label);
 for (int i = 0; i < 4; ++i)
 {
 decision_function<kernel_type> df;
 df = linear_trainer.train(samples, labels);
 if (label > 0)
 {
 DLIB_TEST(std::abs(df(samples[0])-4) < 1e-8);
 }
 else
 {
 DLIB_TEST(std::abs(df(samples[0])+1) < 1e-8);
 }
 }
 }
// ----------------------------------------------------------------------------------------
 void test_sparse_1_sample(double label)
 {
 typedef std::vector<std::pair<unsigned long,double> > sample_type;
 typedef sparse_linear_kernel<sample_type> kernel_type;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
 const double C = 10;
 linear_trainer.set_epsilon(1e-10);
 linear_trainer.set_c(C);
 linear_trainer.force_last_weight_to_1(true);
 linear_trainer.include_bias(false);
 std::vector<sample_type> samples;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 sample.push_back(make_pair(0,-1));
 sample.push_back(make_pair(1,1));
 sample.push_back(make_pair(9,4));
 for (int i = 0; i < 4; ++i)
 {
 samples.push_back(sample);
 labels.push_back(label);
 decision_function<kernel_type> df;
 df = linear_trainer.train(samples, labels);
 if (label > 0)
 {
 DLIB_TEST(std::abs(df(samples[0])-4) < 1e-8);
 }
 else
 {
 DLIB_TEST(std::abs(df(samples[0])+1) < 1e-8);
 }
 }
 }
// ----------------------------------------------------------------------------------------
 void test_l2_version ()
 {
 typedef std::map<unsigned long,double> sample_type;
 typedef sparse_linear_kernel<sample_type> kernel_type;
 svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
 linear_trainer.set_c(10);
 linear_trainer.set_epsilon(1e-5);
 std::vector<sample_type> samples;
 std::vector<double> labels;
 // make an instance of a sample vector so we can use it below
 sample_type sample;
 // Now let's go into a loop and randomly generate 10000 samples.
 double label = +1;
 for (int i = 0; i < 1000; ++i)
 {
 // flip this flag
 label *= -1;
 sample.clear();
 // now make a random sparse sample with at most 10 non-zero elements
 for (int j = 0; j < 10; ++j)
 {
 int idx = std::rand()%100;
 double value = static_cast<double>(std::rand())/RAND_MAX;
 sample[idx] = label*value;
 }
 // Also save the samples we are generating so we can let the svm_c_linear_trainer
 // learn from them below. 
 samples.push_back(sample);
 labels.push_back(label);
 }
 decision_function<kernel_type> df = linear_trainer.train(samples, labels);
 sample.clear();
 sample[4] = 0.3;
 sample[10] = 0.9;
 DLIB_TEST(df(sample) > 0);
 sample.clear();
 sample[83] = -0.3;
 sample[26] = -0.9;
 sample[58] = -0.7;
 DLIB_TEST(df(sample) < 0);
 sample.clear();
 sample[0] = -0.2;
 sample[9] = -0.8;
 DLIB_TEST(df(sample) < 0);
 }
 class tester_svm_c_linear_dcd : public tester
 {
 public:
 tester_svm_c_linear_dcd (
 ) :
 tester ("test_svm_c_linear_dcd",
 "Runs tests on the svm_c_linear_dcd_trainer.")
 {}
 void perform_test (
 )
 {
 test_normal();
 print_spinner();
 test_normal_no_bias();
 print_spinner();
 test_sparse();
 print_spinner();
 test_normal_force_last_weight(false,false);
 print_spinner();
 test_normal_force_last_weight(false,true);
 print_spinner();
 test_normal_force_last_weight(true,false);
 print_spinner();
 test_normal_force_last_weight(true,true);
 print_spinner();
 test_normal_1_sample(+1);
 print_spinner();
 test_normal_1_sample(-1);
 print_spinner();
 test_sparse_1_sample(+1);
 print_spinner();
 test_sparse_1_sample(-1);
 print_spinner();
 test_l2_version();
 }
 } a;
}