dlib C++ Library - optimization.h

// Copyright (C) 2008 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_OPTIMIZATIOn_H_
#define DLIB_OPTIMIZATIOn_H_
#include <cmath>
#include <limits>
#include "optimization_abstract.h"
#include "optimization_search_strategies.h"
#include "optimization_stop_strategies.h"
#include "optimization_line_search.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// Functions that transform other functions 
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
 template <typename funct>
 class central_differences
 {
 public:
 central_differences(const funct& f_, double eps_ = 1e-7) : f(f_), eps(eps_){}
 template <typename T>
 typename T::matrix_type operator()(const T& x) const
 {
 // T must be some sort of dlib matrix 
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 typename T::matrix_type der(x.size());
 typename T::matrix_type e(x);
 for (long i = 0; i < x.size(); ++i)
 {
 const double old_val = e(i);
 e(i) += eps;
 const double delta_plus = f(e);
 e(i) = old_val - eps;
 const double delta_minus = f(e);
 der(i) = (delta_plus - delta_minus)/((old_val+eps)-(old_val-eps)); 
 // and finally restore the old value of this element
 e(i) = old_val;
 }
 return der;
 }
 template <typename T, typename U>
 typename U::matrix_type operator()(const T& item, const U& x) const
 {
 // U must be some sort of dlib matrix 
 COMPILE_TIME_ASSERT(is_matrix<U>::value);
 typename U::matrix_type der(x.size());
 typename U::matrix_type e(x);
 for (long i = 0; i < x.size(); ++i)
 {
 const double old_val = e(i);
 e(i) += eps;
 const double delta_plus = f(item,e);
 e(i) = old_val - eps;
 const double delta_minus = f(item,e);
 der(i) = (delta_plus - delta_minus)/((old_val+eps)-(old_val-eps)); 
 // and finally restore the old value of this element
 e(i) = old_val;
 }
 return der;
 }
 
 double operator()(const double& x) const
 {
 return (f(x+eps)-f(x-eps))/((x+eps)-(x-eps));
 }
 private:
 const funct& f;
 const double eps;
 };
 template <typename funct>
 const central_differences<funct> derivative(const funct& f) { return central_differences<funct>(f); }
 template <typename funct>
 const central_differences<funct> derivative(const funct& f, double eps) 
 { 
 DLIB_ASSERT (
 eps > 0,
 "\tcentral_differences derivative(f,eps)"
 << "\n\tYou must give an epsilon > 0"
 << "\n\teps: " << eps 
 );
 return central_differences<funct>(f,eps); 
 }
// ----------------------------------------------------------------------------------------
 template <typename funct, typename EXP1, typename EXP2>
 struct clamped_function_object
 {
 clamped_function_object(
 const funct& f_,
 const matrix_exp<EXP1>& x_lower_,
 const matrix_exp<EXP2>& x_upper_ 
 ) : f(f_), x_lower(x_lower_), x_upper(x_upper_)
 {
 }
 template <typename T>
 double operator() (
 const T& x
 ) const
 {
 return f(clamp(x,x_lower,x_upper));
 }
 
 const funct& f;
 const matrix_exp<EXP1>& x_lower;
 const matrix_exp<EXP2>& x_upper; 
 };
 template <typename funct, typename EXP1, typename EXP2>
 clamped_function_object<funct,EXP1,EXP2> clamp_function(
 const funct& f,
 const matrix_exp<EXP1>& x_lower,
 const matrix_exp<EXP2>& x_upper 
 ) { return clamped_function_object<funct,EXP1,EXP2>(f,x_lower,x_upper); }
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// Functions that perform unconstrained optimization 
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct, 
 typename funct_der, 
 typename T
 >
 double find_min (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f, 
 const funct_der& der, 
 T& x, 
 double min_f
 )
 {
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 DLIB_CASSERT (
 is_col_vector(x),
 "\tdouble find_min()"
 << "\n\tYou have to supply column vectors to this function"
 << "\n\tx.nc(): " << x.nc()
 );
 T g, s;
 double f_value = f(x);
 g = der(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
 {
 s = search_strategy.get_next_direction(x, f_value, g);
 double alpha = line_search(
 make_line_search_function(f,x,s, f_value),
 f_value,
 make_line_search_function(der,x,s, g),
 dot(g,s), // compute initial gradient for the line search
 search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f,
 search_strategy.get_max_line_search_iterations());
 // Take the search step indicated by the above line search
 x += alpha*s;
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 }
 return f_value;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct, 
 typename funct_der, 
 typename T
 >
 double find_max (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f, 
 const funct_der& der, 
 T& x, 
 double max_f
 )
 {
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 DLIB_CASSERT (
 is_col_vector(x),
 "\tdouble find_max()"
 << "\n\tYou have to supply column vectors to this function"
 << "\n\tx.nc(): " << x.nc()
 );
 T g, s;
 // This function is basically just a copy of find_min() but with - put in the right places
 // to flip things around so that it ends up looking for the max rather than the min.
 double f_value = -f(x);
 g = -der(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 while(stop_strategy.should_continue_search(x, f_value, g) && f_value > -max_f)
 {
 s = search_strategy.get_next_direction(x, f_value, g);
 double alpha = line_search(
 negate_function(make_line_search_function(f,x,s, f_value)),
 f_value,
 negate_function(make_line_search_function(der,x,s, g)),
 dot(g,s), // compute initial gradient for the line search
 search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), -max_f,
 search_strategy.get_max_line_search_iterations()
 );
 // Take the search step indicated by the above line search
 x += alpha*s;
 // Don't forget to negate these outputs from the line search since they are 
 // from the unnegated versions of f() and der()
 g *= -1;
 f_value *= -1;
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 // Gradient is zero, no more progress is possible. So stop.
 if (alpha == 0)
 break;
 }
 return -f_value;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct,
 typename T
 >
 double find_min_using_approximate_derivatives (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f,
 T& x,
 double min_f,
 double derivative_eps = 1e-7
 )
 {
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 DLIB_CASSERT (
 is_col_vector(x) && derivative_eps > 0,
 "\tdouble find_min_using_approximate_derivatives()"
 << "\n\tYou have to supply column vectors to this function"
 << "\n\tx.nc(): " << x.nc()
 << "\n\tderivative_eps: " << derivative_eps 
 );
 T g, s;
 double f_value = f(x);
 g = derivative(f,derivative_eps)(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
 {
 s = search_strategy.get_next_direction(x, f_value, g);
 double alpha = line_search(
 make_line_search_function(f,x,s,f_value),
 f_value,
 derivative(make_line_search_function(f,x,s),derivative_eps),
 dot(g,s), // Sometimes the following line is a better way of determining the initial gradient. 
 //derivative(make_line_search_function(f,x,s),derivative_eps)(0),
 search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f,
 search_strategy.get_max_line_search_iterations()
 );
 // Take the search step indicated by the above line search
 x += alpha*s;
 g = derivative(f,derivative_eps)(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 }
 return f_value;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct,
 typename T
 >
 double find_max_using_approximate_derivatives (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f,
 T& x,
 double max_f,
 double derivative_eps = 1e-7
 )
 {
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 DLIB_CASSERT (
 is_col_vector(x) && derivative_eps > 0,
 "\tdouble find_max_using_approximate_derivatives()"
 << "\n\tYou have to supply column vectors to this function"
 << "\n\tx.nc(): " << x.nc()
 << "\n\tderivative_eps: " << derivative_eps 
 );
 // Just negate the necessary things and call the find_min version of this function.
 return -find_min_using_approximate_derivatives(
 search_strategy, 
 stop_strategy, 
 negate_function(f),
 x,
 -max_f,
 derivative_eps
 );
 }
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// Functions for box constrained optimization
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
 template <typename T, typename U, typename V>
 T zero_bounded_variables (
 const double eps,
 T vect,
 const T& x,
 const T& gradient,
 const U& x_lower,
 const V& x_upper
 )
 {
 for (long i = 0; i < gradient.size(); ++i)
 {
 const double tol = eps*std::abs(x(i));
 // if x(i) is an active bound constraint
 if (x_lower(i)+tol >= x(i) && gradient(i) > 0)
 vect(i) = 0;
 else if (x_upper(i)-tol <= x(i) && gradient(i) < 0)
 vect(i) = 0;
 }
 return vect;
 }
// ----------------------------------------------------------------------------------------
 template <typename T, typename U, typename V>
 T gap_step_assign_bounded_variables (
 const double eps,
 T vect,
 const T& x,
 const T& gradient,
 const U& x_lower,
 const V& x_upper
 )
 {
 for (long i = 0; i < gradient.size(); ++i)
 {
 const double tol = eps*std::abs(x(i));
 // If x(i) is an active bound constraint then we should set its search
 // direction such that a single step along the direction either does nothing or
 // closes the gap of size tol before hitting the bound exactly.
 if (x_lower(i)+tol >= x(i) && gradient(i) > 0)
 vect(i) = x_lower(i)-x(i);
 else if (x_upper(i)-tol <= x(i) && gradient(i) < 0)
 vect(i) = x_upper(i)-x(i);
 }
 return vect;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct, 
 typename funct_der, 
 typename T,
 typename EXP1,
 typename EXP2
 >
 double find_min_box_constrained (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f, 
 const funct_der& der, 
 T& x,
 const matrix_exp<EXP1>& x_lower,
 const matrix_exp<EXP2>& x_upper
 )
 {
 /*
 The implementation of this function is more or less based on the discussion in
 the paper Projected Newton-type Methods in Machine Learning by Mark Schmidt, et al.
 */
 // make sure the requires clause is not violated
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 DLIB_CASSERT (
 is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) &&
 x.size() == x_lower.size() && x.size() == x_upper.size(),
 "\tdouble find_min_box_constrained()"
 << "\n\t The inputs to this function must be equal length column vectors."
 << "\n\t is_col_vector(x): " << is_col_vector(x)
 << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
 << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
 << "\n\t x.size(): " << x.size()
 << "\n\t x_lower.size(): " << x_lower.size()
 << "\n\t x_upper.size(): " << x_upper.size()
 );
 DLIB_ASSERT (
 min(x_upper-x_lower) >= 0,
 "\tdouble find_min_box_constrained()"
 << "\n\t You have to supply proper box constraints to this function."
 << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
 );
 T g, s;
 double f_value = f(x);
 g = der(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 // gap_eps determines how close we have to get to a bound constraint before we
 // start basically dropping it from the optimization and consider it to be an
 // active constraint.
 const double gap_eps = 1e-8;
 double last_alpha = 1;
 while(stop_strategy.should_continue_search(x, f_value, g))
 {
 s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
 s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper);
 double alpha = backtracking_line_search(
 make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value),
 f_value,
 dot(g,s), // compute gradient for the line search
 last_alpha, 
 search_strategy.get_wolfe_rho(), 
 search_strategy.get_max_line_search_iterations());
 // Do a trust region style thing for alpha. The idea is that if we take a
 // small step then we are likely to take another small step. So we reuse the
 // alpha from the last iteration unless the line search didn't shrink alpha at
 // all, in that case, we start with a bigger alpha next time.
 if (alpha == last_alpha)
 last_alpha = std::min(last_alpha*10,1.0);
 else
 last_alpha = alpha;
 // Take the search step indicated by the above line search
 x = dlib::clamp(x + alpha*s, x_lower, x_upper);
 g = der(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 }
 return f_value;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct, 
 typename funct_der, 
 typename T
 >
 double find_min_box_constrained (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f, 
 const funct_der& der, 
 T& x,
 double x_lower,
 double x_upper
 )
 {
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 typedef typename T::type scalar_type;
 return find_min_box_constrained(search_strategy,
 stop_strategy,
 f,
 der,
 x,
 uniform_matrix<scalar_type>(x.size(),1,x_lower),
 uniform_matrix<scalar_type>(x.size(),1,x_upper) );
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct, 
 typename funct_der, 
 typename T,
 typename EXP1,
 typename EXP2
 >
 double find_max_box_constrained (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f, 
 const funct_der& der, 
 T& x,
 const matrix_exp<EXP1>& x_lower,
 const matrix_exp<EXP2>& x_upper
 )
 {
 // make sure the requires clause is not violated
 COMPILE_TIME_ASSERT(is_matrix<T>::value);
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 DLIB_CASSERT (
 is_col_vector(x) && is_col_vector(x_lower) && is_col_vector(x_upper) &&
 x.size() == x_lower.size() && x.size() == x_upper.size(),
 "\tdouble find_max_box_constrained()"
 << "\n\t The inputs to this function must be equal length column vectors."
 << "\n\t is_col_vector(x): " << is_col_vector(x)
 << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
 << "\n\t is_col_vector(x_upper): " << is_col_vector(x_upper)
 << "\n\t x.size(): " << x.size()
 << "\n\t x_lower.size(): " << x_lower.size()
 << "\n\t x_upper.size(): " << x_upper.size()
 );
 DLIB_ASSERT (
 min(x_upper-x_lower) >= 0,
 "\tdouble find_max_box_constrained()"
 << "\n\t You have to supply proper box constraints to this function."
 << "\n\r min(x_upper-x_lower): " << min(x_upper-x_lower)
 );
 // This function is basically just a copy of find_min_box_constrained() but with - put 
 // in the right places to flip things around so that it ends up looking for the max
 // rather than the min.
 T g, s;
 double f_value = -f(x);
 g = -der(x);
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 // gap_eps determines how close we have to get to a bound constraint before we
 // start basically dropping it from the optimization and consider it to be an
 // active constraint.
 const double gap_eps = 1e-8;
 double last_alpha = 1;
 while(stop_strategy.should_continue_search(x, f_value, g))
 {
 s = search_strategy.get_next_direction(x, f_value, zero_bounded_variables(gap_eps, g, x, g, x_lower, x_upper));
 s = gap_step_assign_bounded_variables(gap_eps, s, x, g, x_lower, x_upper);
 double alpha = backtracking_line_search(
 negate_function(make_line_search_function(clamp_function(f,x_lower,x_upper), x, s, f_value)),
 f_value,
 dot(g,s), // compute gradient for the line search
 last_alpha, 
 search_strategy.get_wolfe_rho(), 
 search_strategy.get_max_line_search_iterations());
 // Do a trust region style thing for alpha. The idea is that if we take a
 // small step then we are likely to take another small step. So we reuse the
 // alpha from the last iteration unless the line search didn't shrink alpha at
 // all, in that case, we start with a bigger alpha next time.
 if (alpha == last_alpha)
 last_alpha = std::min(last_alpha*10,1.0);
 else
 last_alpha = alpha;
 // Take the search step indicated by the above line search
 x = dlib::clamp(x + alpha*s, x_lower, x_upper);
 g = -der(x);
 // Don't forget to negate the output from the line search since it is from the
 // unnegated version of f() 
 f_value *= -1;
 if (!is_finite(f_value))
 throw error("The objective function generated non-finite outputs");
 if (!is_finite(g))
 throw error("The objective function generated non-finite outputs");
 }
 return -f_value;
 }
// ----------------------------------------------------------------------------------------
 template <
 typename search_strategy_type,
 typename stop_strategy_type,
 typename funct, 
 typename funct_der, 
 typename T
 >
 double find_max_box_constrained (
 search_strategy_type search_strategy,
 stop_strategy_type stop_strategy,
 const funct& f, 
 const funct_der& der, 
 T& x,
 double x_lower,
 double x_upper
 )
 {
 // The starting point (i.e. x) must be a column vector. 
 COMPILE_TIME_ASSERT(T::NC <= 1);
 typedef typename T::type scalar_type;
 return find_max_box_constrained(search_strategy,
 stop_strategy,
 f,
 der,
 x,
 uniform_matrix<scalar_type>(x.size(),1,x_lower),
 uniform_matrix<scalar_type>(x.size(),1,x_upper) );
 }
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_H_