diff options
Diffstat (limited to 'methods/lbfgs/lbfgs.h')
-rw-r--r-- | methods/lbfgs/lbfgs.h | 385 |
1 files changed, 385 insertions, 0 deletions
diff --git a/methods/lbfgs/lbfgs.h b/methods/lbfgs/lbfgs.h new file mode 100644 index 0000000..7be84a7 --- /dev/null +++ b/methods/lbfgs/lbfgs.h @@ -0,0 +1,385 @@ +#ifndef __LBFGS_H__ +#define __LBFGS_H__ + +#ifndef LBFGS_FLOAT +#define LBFGS_FLOAT 64 +#endif/*LBFGS_FLOAT*/ + +/* + * Activate optimization routines for IEEE754 floating point values. + */ +#ifndef LBFGS_IEEE_FLOAT +#define LBFGS_IEEE_FLOAT 1 +#endif/*LBFGS_IEEE_FLOAT*/ + +#if LBFGS_FLOAT == 32 +typedef float lbfgsfloatval_t; + +#elif LBFGS_FLOAT == 64 +typedef double lbfgsfloatval_t; + +#else +#error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only." + +#endif + + +/** + * \addtogroup liblbfgs_api libLBFGS API + * @{ + * + * The libLBFGS API. + */ + +/** + * Return values of lbfgs(). + * + * Roughly speaking, a negative value indicates an error. + */ +enum { + /** L-BFGS reaches convergence. */ + LBFGS_SUCCESS = 0, + LBFGS_CONVERGENCE = 0, + LBFGS_STOP, + /** The initial variables already minimize the objective function. */ + LBFGS_ALREADY_MINIMIZED, + + /** Unknown error. */ + LBFGSERR_UNKNOWNERROR = -1024, + /** Logic error. */ + LBFGSERR_LOGICERROR, + /** Insufficient memory. */ + LBFGSERR_OUTOFMEMORY, + /** The minimization process has been canceled. */ + LBFGSERR_CANCELED, + /** Invalid number of variables specified. */ + LBFGSERR_INVALID_N, + /** Invalid number of variables (for SSE) specified. */ + LBFGSERR_INVALID_N_SSE, + /** The array x must be aligned to 16 (for SSE). */ + LBFGSERR_INVALID_X_SSE, + /** Invalid parameter lbfgs_parameter_t::epsilon specified. */ + LBFGSERR_INVALID_EPSILON, + /** Invalid parameter lbfgs_parameter_t::past specified. */ + LBFGSERR_INVALID_TESTPERIOD, + /** Invalid parameter lbfgs_parameter_t::delta specified. */ + LBFGSERR_INVALID_DELTA, + /** Invalid parameter lbfgs_parameter_t::linesearch specified. */ + LBFGSERR_INVALID_LINESEARCH, + /** Invalid parameter lbfgs_parameter_t::max_step specified. */ + LBFGSERR_INVALID_MINSTEP, + /** Invalid parameter lbfgs_parameter_t::max_step specified. */ + LBFGSERR_INVALID_MAXSTEP, + /** Invalid parameter lbfgs_parameter_t::ftol specified. */ + LBFGSERR_INVALID_FTOL, + /** Invalid parameter lbfgs_parameter_t::wolfe specified. */ + LBFGSERR_INVALID_WOLFE, + /** Invalid parameter lbfgs_parameter_t::gtol specified. */ + LBFGSERR_INVALID_GTOL, + /** Invalid parameter lbfgs_parameter_t::xtol specified. */ + LBFGSERR_INVALID_XTOL, + /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */ + LBFGSERR_INVALID_MAXLINESEARCH, + /** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */ + LBFGSERR_INVALID_ORTHANTWISE, + /** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */ + LBFGSERR_INVALID_ORTHANTWISE_START, + /** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */ + LBFGSERR_INVALID_ORTHANTWISE_END, + /** The line-search step went out of the interval of uncertainty. */ + LBFGSERR_OUTOFINTERVAL, + /** A logic error occurred; alternatively, the interval of uncertainty + became too small. */ + LBFGSERR_INCORRECT_TMINMAX, + /** A rounding error occurred; alternatively, no line-search step + satisfies the sufficient decrease and curvature conditions. */ + LBFGSERR_ROUNDING_ERROR, + /** The line-search step became smaller than lbfgs_parameter_t::min_step. */ + LBFGSERR_MINIMUMSTEP, + /** The line-search step became larger than lbfgs_parameter_t::max_step. */ + LBFGSERR_MAXIMUMSTEP, + /** The line-search routine reaches the maximum number of evaluations. */ + LBFGSERR_MAXIMUMLINESEARCH, + /** The algorithm routine reaches the maximum number of iterations. */ + LBFGSERR_MAXIMUMITERATION, + /** Relative width of the interval of uncertainty is at most + lbfgs_parameter_t::xtol. */ + LBFGSERR_WIDTHTOOSMALL, + /** A logic error (negative line-search step) occurred. */ + LBFGSERR_INVALIDPARAMETERS, + /** The current search direction increases the objective function value. */ + LBFGSERR_INCREASEGRADIENT, +}; + +/** + * Line search algorithms. + */ +enum { + /** The default algorithm (MoreThuente method). */ + LBFGS_LINESEARCH_DEFAULT = 0, + /** MoreThuente method proposd by More and Thuente. */ + LBFGS_LINESEARCH_MORETHUENTE = 0, + /** + * Backtracking method with the Armijo condition. + * The backtracking method finds the step length such that it satisfies + * the sufficient decrease (Armijo) condition, + * - f(x + a * d) <= f(x) + lbfgs_parameter_t::ftol * a * g(x)^T d, + * + * where x is the current point, d is the current search direction, and + * a is the step length. + */ + LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1, + /** The backtracking method with the defualt (regular Wolfe) condition. */ + LBFGS_LINESEARCH_BACKTRACKING = 2, + /** + * Backtracking method with regular Wolfe condition. + * The backtracking method finds the step length such that it satisfies + * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) + * and the curvature condition, + * - g(x + a * d)^T d >= lbfgs_parameter_t::wolfe * g(x)^T d, + * + * where x is the current point, d is the current search direction, and + * a is the step length. + */ + LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2, + /** + * Backtracking method with strong Wolfe condition. + * The backtracking method finds the step length such that it satisfies + * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) + * and the following condition, + * - |g(x + a * d)^T d| <= lbfgs_parameter_t::wolfe * |g(x)^T d|, + * + * where x is the current point, d is the current search direction, and + * a is the step length. + */ + LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3, +}; + +/** + * L-BFGS optimization parameters. + * Call lbfgs_parameter_init() function to initialize parameters to the + * default values. + */ +typedef struct { + /** + * The number of corrections to approximate the inverse hessian matrix. + * The L-BFGS routine stores the computation results of previous \ref m + * iterations to approximate the inverse hessian matrix of the current + * iteration. This parameter controls the size of the limited memories + * (corrections). The default value is \c 6. Values less than \c 3 are + * not recommended. Large values will result in excessive computing time. + */ + int m; + + /** + * Epsilon for convergence test. + * This parameter determines the accuracy with which the solution is to + * be found. A minimization terminates when + * ||g|| < \ref epsilon * max(1, ||x||), + * where ||.|| denotes the Euclidean (L2) norm. The default value is + * \c 1e-5. + */ + lbfgsfloatval_t epsilon; + + /** + * Distance for delta-based convergence test. + * This parameter determines the distance, in iterations, to compute + * the rate of decrease of the objective function. If the value of this + * parameter is zero, the library does not perform the delta-based + * convergence test. The default value is \c 0. + */ + int past; + + /** + * Delta for convergence test. + * This parameter determines the minimum rate of decrease of the + * objective function. The library stops iterations when the + * following condition is met: + * (f' - f) / f < \ref delta, + * where f' is the objective value of \ref past iterations ago, and f is + * the objective value of the current iteration. + * The default value is \c 0. + */ + lbfgsfloatval_t delta; + + /** + * The maximum number of iterations. + * The lbfgs() function terminates an optimization process with + * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count + * exceedes this parameter. Setting this parameter to zero continues an + * optimization process until a convergence or error. The default value + * is \c 0. + */ + int max_iterations; + + /** + * The line search algorithm. + * This parameter specifies a line search algorithm to be used by the + * L-BFGS routine. + */ + int linesearch; + + /** + * The maximum number of trials for the line search. + * This parameter controls the number of function and gradients evaluations + * per iteration for the line search routine. The default value is \c 20. + */ + int max_linesearch; + + /** + * The minimum step of the line search routine. + * The default value is \c 1e-20. This value need not be modified unless + * the exponents are too large for the machine being used, or unless the + * problem is extremely badly scaled (in which case the exponents should + * be increased). + */ + lbfgsfloatval_t min_step; + + /** + * The maximum step of the line search. + * The default value is \c 1e+20. This value need not be modified unless + * the exponents are too large for the machine being used, or unless the + * problem is extremely badly scaled (in which case the exponents should + * be increased). + */ + lbfgsfloatval_t max_step; + + /** + * A parameter to control the accuracy of the line search routine. + * The default value is \c 1e-4. This parameter should be greater + * than zero and smaller than \c 0.5. + */ + lbfgsfloatval_t ftol; + + /** + * A coefficient for the Wolfe condition. + * This parameter is valid only when the backtracking line-search + * algorithm is used with the Wolfe condition, + * ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or + * ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE . + * The default value is \c 0.9. This parameter should be greater + * the \ref ftol parameter and smaller than \c 1.0. + */ + lbfgsfloatval_t wolfe; + + /** + * A parameter to control the accuracy of the line search routine. + * The default value is \c 0.9. If the function and gradient + * evaluations are inexpensive with respect to the cost of the + * iteration (which is sometimes the case when solving very large + * problems) it may be advantageous to set this parameter to a small + * value. A typical small value is \c 0.1. This parameter shuold be + * greater than the \ref ftol parameter (\c 1e-4) and smaller than + * \c 1.0. + */ + lbfgsfloatval_t gtol; + + /** + * The machine precision for floating-point values. + * This parameter must be a positive value set by a client program to + * estimate the machine precision. The line search routine will terminate + * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width + * of the interval of uncertainty is less than this parameter. + */ + lbfgsfloatval_t xtol; + + /** + * Coeefficient for the L1 norm of variables. + * This parameter should be set to zero for standard minimization + * problems. Setting this parameter to a positive value activates + * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which + * minimizes the objective function F(x) combined with the L1 norm |x| + * of the variables, {F(x) + C |x|}. This parameter is the coeefficient + * for the |x|, i.e., C. As the L1 norm |x| is not differentiable at + * zero, the library modifies function and gradient evaluations from + * a client program suitably; a client program thus have only to return + * the function value F(x) and gradients G(x) as usual. The default value + * is zero. + */ + lbfgsfloatval_t orthantwise_c; + + /** + * Start index for computing L1 norm of the variables. + * This parameter is valid only for OWL-QN method + * (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N) + * specifies the index number from which the library computes the + * L1 norm of the variables x, + * |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| . + * In other words, variables x_1, ..., x_{b-1} are not used for + * computing the L1 norm. Setting b (0 < b < N), one can protect + * variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic + * regression) from being regularized. The default value is zero. + */ + int orthantwise_start; + + /** + * End index for computing L1 norm of the variables. + * This parameter is valid only for OWL-QN method + * (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N) + * specifies the index number at which the library stops computing the + * L1 norm of the variables x, + */ + int orthantwise_end; +} lbfgs_parameter_t; + + +/** + * Callback interface to provide objective function and gradient evaluations. + * + * The lbfgs() function call this function to obtain the values of objective + * function and its gradients when needed. A client program must implement + * this function to evaluate the values of the objective function and its + * gradients, given current values of variables. + * + * @param instance The user data sent for lbfgs() function by the client. + * @param x The current values of variables. + * @param g The gradient vector. The callback function must compute + * the gradient values for the current variables. + * @param n The number of variables. + * @param step The current step of the line search routine. + * @retval lbfgsfloatval_t The value of the objective function for the current + * variables. + */ +typedef lbfgsfloatval_t (*lbfgs_evaluate_t)( + void *instance, + const lbfgsfloatval_t *x, + lbfgsfloatval_t *g, + const int n, + const lbfgsfloatval_t step + ); + +/** + * Callback interface to receive the progress of the optimization process. + * + * The lbfgs() function call this function for each iteration. Implementing + * this function, a client program can store or display the current progress + * of the optimization process. + * + * @param instance The user data sent for lbfgs() function by the client. + * @param x The current values of variables. + * @param g The current gradient values of variables. + * @param fx The current value of the objective function. + * @param xnorm The Euclidean norm of the variables. + * @param gnorm The Euclidean norm of the gradients. + * @param step The line-search step used for this iteration. + * @param n The number of variables. + * @param k The iteration count. + * @param ls The number of evaluations called for this iteration. + * @retval int Zero to continue the optimization process. Returning a + * non-zero value will cancel the optimization process. + */ +typedef int (*lbfgs_progress_t)( + void *instance, + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls + ); + +#endif + |