dlib C++ Library - lspi.cpp

// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#include "tester.h"
#include <dlib/control.h>
#include <vector>
#include <sstream>
#include <ctime>
namespace 
{
 using namespace test;
 using namespace dlib;
 using namespace std;
 dlib::logger dlog("test.lspi");
 template <bool have_prior>
 struct chain_model
 {
 typedef int state_type;
 typedef int action_type; // 0 is move left, 1 is move right
 const static bool force_last_weight_to_1 = have_prior;
 const static int num_states = 4; // not required in the model interface
 matrix<double,8,1> offset;
 chain_model()
 {
 offset = 
 2.048 ,
 2.56 ,
 2.048 ,
 3.2 ,
 2.56 ,
 4 ,
 3.2, 
 5 ;
 if (!have_prior)
 offset = 0;
 }
 unsigned long num_features(
 ) const 
 {
 if (have_prior)
 return num_states*2 + 1; 
 else
 return num_states*2; 
 }
 action_type find_best_action (
 const state_type& state,
 const matrix<double,0,1>& w
 ) const
 {
 if (w(state*2)+offset(state*2) >= w(state*2+1)+offset(state*2+1))
 //if (w(state*2) >= w(state*2+1))
 return 0;
 else
 return 1;
 }
 void get_features (
 const state_type& state,
 const action_type& action,
 matrix<double,0,1>& feats
 ) const
 {
 feats.set_size(num_features());
 feats = 0;
 feats(state*2 + action) = 1;
 if (have_prior)
 feats(num_features()-1) = offset(state*2+action);
 }
 };
 void test_lspi_prior1()
 {
 print_spinner();
 typedef process_sample<chain_model<true> > sample_type;
 std::vector<sample_type> samples;
 samples.push_back(sample_type(0,0,0,0));
 samples.push_back(sample_type(0,1,1,0));
 samples.push_back(sample_type(1,0,0,0));
 samples.push_back(sample_type(1,1,2,0));
 samples.push_back(sample_type(2,0,1,0));
 samples.push_back(sample_type(2,1,3,0));
 samples.push_back(sample_type(3,0,2,0));
 samples.push_back(sample_type(3,1,3,1));
 lspi<chain_model<true> > trainer;
 //trainer.be_verbose();
 trainer.set_lambda(0);
 policy<chain_model<true> > pol = trainer.train(samples);
 dlog << LINFO << pol.get_weights();
 matrix<double,0,1> w = pol.get_weights();
 DLIB_TEST(pol.get_weights().size() == 9);
 DLIB_TEST(w(w.size()-1) == 1);
 w(w.size()-1) = 0;
 DLIB_TEST_MSG(length(w) < 1e-12, length(w));
 dlog << LINFO << "action: " << pol(0);
 dlog << LINFO << "action: " << pol(1);
 dlog << LINFO << "action: " << pol(2);
 dlog << LINFO << "action: " << pol(3);
 DLIB_TEST(pol(0) == 1);
 DLIB_TEST(pol(1) == 1);
 DLIB_TEST(pol(2) == 1);
 DLIB_TEST(pol(3) == 1);
 }
 void test_lspi_prior2()
 {
 print_spinner();
 typedef process_sample<chain_model<true> > sample_type;
 std::vector<sample_type> samples;
 samples.push_back(sample_type(0,0,0,0));
 samples.push_back(sample_type(0,1,1,0));
 samples.push_back(sample_type(1,0,0,0));
 samples.push_back(sample_type(1,1,2,0));
 samples.push_back(sample_type(2,0,1,0));
 samples.push_back(sample_type(2,1,3,1));
 samples.push_back(sample_type(3,0,2,0));
 samples.push_back(sample_type(3,1,3,0));
 lspi<chain_model<true> > trainer;
 //trainer.be_verbose();
 trainer.set_lambda(0);
 policy<chain_model<true> > pol = trainer.train(samples);
 dlog << LINFO << "action: " << pol(0);
 dlog << LINFO << "action: " << pol(1);
 dlog << LINFO << "action: " << pol(2);
 dlog << LINFO << "action: " << pol(3);
 DLIB_TEST(pol(0) == 1);
 DLIB_TEST(pol(1) == 1);
 DLIB_TEST(pol(2) == 1);
 DLIB_TEST(pol(3) == 0);
 }
 void test_lspi_noprior1()
 {
 print_spinner();
 typedef process_sample<chain_model<false> > sample_type;
 std::vector<sample_type> samples;
 samples.push_back(sample_type(0,0,0,0));
 samples.push_back(sample_type(0,1,1,0));
 samples.push_back(sample_type(1,0,0,0));
 samples.push_back(sample_type(1,1,2,0));
 samples.push_back(sample_type(2,0,1,0));
 samples.push_back(sample_type(2,1,3,0));
 samples.push_back(sample_type(3,0,2,0));
 samples.push_back(sample_type(3,1,3,1));
 lspi<chain_model<false> > trainer;
 //trainer.be_verbose();
 trainer.set_lambda(0.01);
 policy<chain_model<false> > pol = trainer.train(samples);
 dlog << LINFO << pol.get_weights();
 DLIB_TEST(pol.get_weights().size() == 8);
 dlog << LINFO << "action: " << pol(0);
 dlog << LINFO << "action: " << pol(1);
 dlog << LINFO << "action: " << pol(2);
 dlog << LINFO << "action: " << pol(3);
 DLIB_TEST(pol(0) == 1);
 DLIB_TEST(pol(1) == 1);
 DLIB_TEST(pol(2) == 1);
 DLIB_TEST(pol(3) == 1);
 }
 void test_lspi_noprior2()
 {
 print_spinner();
 typedef process_sample<chain_model<false> > sample_type;
 std::vector<sample_type> samples;
 samples.push_back(sample_type(0,0,0,0));
 samples.push_back(sample_type(0,1,1,0));
 samples.push_back(sample_type(1,0,0,0));
 samples.push_back(sample_type(1,1,2,1));
 samples.push_back(sample_type(2,0,1,0));
 samples.push_back(sample_type(2,1,3,0));
 samples.push_back(sample_type(3,0,2,0));
 samples.push_back(sample_type(3,1,3,0));
 lspi<chain_model<false> > trainer;
 //trainer.be_verbose();
 trainer.set_lambda(0.01);
 policy<chain_model<false> > pol = trainer.train(samples);
 dlog << LINFO << pol.get_weights();
 DLIB_TEST(pol.get_weights().size() == 8);
 dlog << LINFO << "action: " << pol(0);
 dlog << LINFO << "action: " << pol(1);
 dlog << LINFO << "action: " << pol(2);
 dlog << LINFO << "action: " << pol(3);
 DLIB_TEST(pol(0) == 1);
 DLIB_TEST(pol(1) == 1);
 DLIB_TEST(pol(2) == 0);
 DLIB_TEST(pol(3) == 0);
 }
 class lspi_tester : public tester
 {
 public:
 lspi_tester (
 ) :
 tester (
 "test_lspi", // the command line argument name for this test
 "Run tests on the lspi object.", // the command line argument description
 0 // the number of command line arguments for this test
 )
 {
 }
 void perform_test (
 )
 {
 test_lspi_prior1();
 test_lspi_prior2();
 test_lspi_noprior1();
 test_lspi_noprior2();
 }
 };
 lspi_tester a;
}