local abs, min, max, sqrt = math.abs, math.min, math.max, math.sqrt local ffi = require 'ffi' local gsl = require 'gsl' local gsl_check = require 'gsl-check' # GSL_DBL_EPSILON = 2.2204460492503131e-16 # GSL_DBL_MIN = 2.2250738585072014e-308 # GSL_DBL_MAX = 1.7976931348623157e+308 # p1, p25, p5, p75, p0001 = 0.1, 0.25, 0.5, 0.75, 0.0001 local signum_result = ffi.new('int[1]') local function vector_size(v) return tonumber(v.size) end local matrix_dim = matrix.dim local function scaled_enorm(d, f) local e2 = 0 local n = vector_size(f) for i= 0, n-1 do local fi = gsl.gsl_vector_get (f, i) local di = gsl.gsl_vector_get (d, i) local u = di * fi e2 = e2 + u * u end return sqrt(e2) end local function compute_delta (diag, x) local Dx = scaled_enorm (diag, x) local factor = 100 -- generally recommended value from MINPACK return (Dx> 0 and factor * Dx or factor) end local function compute_actual_reduction (fnorm, fnorm1) local actred if 0.1 * fnorm1 < fnorm then local u = fnorm1 / fnorm actred = 1 - u * u else actred = -1 end return actred end local function compute_diag (J, diag) local n, p = matrix_dim(J) for j = 0, p-1 do local sum = 0 for i = 0, n-1 do local Jij = gsl.gsl_matrix_get (J, i, j) sum = sum + Jij * Jij end if sum == 0 then sum = 1 end gsl.gsl_vector_set (diag, j, sqrt (sum)) end end local function update_diag (J, diag) local n = vector_size(diag) for j = 0, n-1 do local sum = 0 for i =0, n-1 do local Jij = gsl.gsl_matrix_get (J, i, j) sum = sum + Jij * Jij end if sum == 0 then sum = 1 end local cnorm = sqrt (sum) local diagj = gsl.gsl_vector_get (diag, j) if cnorm> diagj then gsl.gsl_vector_set (diag, j, cnorm) end end end local function compute_rptdx (r, p, dx, rptdx) local n = vector_size(dx) for i = 0, n-1 do local sum = 0 for j = i, n-1 do local pj = gsl.gsl_permutation_get (p, j) sum = sum + gsl.gsl_matrix_get (r, i, j) * gsl.gsl_vector_get (dx, pj) end gsl.gsl_vector_set (rptdx, i, sum) end end local function compute_trial_step (x, dx, x_trial) local n = vector_size(x) for i = 0, n-1 do local pi = gsl.gsl_vector_get (dx, i) local xi = gsl.gsl_vector_get (x, i) gsl.gsl_vector_set (x_trial, i, xi + pi) end end -- This function computes the solution to the least squares system -- phi = [ A x = b , lambda D x = 0 ]^2 -- -- where A is an M by N matrix, D is an N by N diagonal matrix, lambda -- is a scalar parameter and b is a vector of length M. -- The function requires the factorization of A into A = Q R P^T, -- where Q is an orthogonal matrix, R is an upper triangular matrix -- with diagonal elements of non-increasing magnitude and P is a -- permuation matrix. The system above is then equivalent to -- [ R z = Q^T b, P^T (lambda D) P z = 0 ] -- where x = P z. If this system does not have full rank then a least -- squares solution is obtained. On output the function also provides -- an upper triangular matrix S such that -- P^T (A^T A + lambda^2 D^T D) P = S^T S -- Parameters, -- -- r: On input, contains the full upper triangle of R. On output the -- strict lower triangle contains the transpose of the strict upper -- triangle of S, and the diagonal of S is stored in sdiag. The full -- upper triangle of R is not modified. -- p: the encoded form of the permutation matrix P. column j of P is -- column p[j] of the identity matrix. -- lambda, diag: contains the scalar lambda and the diagonal elements -- of the matrix D -- qtb: contains the product Q^T b -- x: on output contains the least squares solution of the system -- wa: is a workspace of length N local function qrsolv(r, p, lambda, diag, qtb, x, sdiag, wa) local n = tonumber(r.size2) -- Copy r and qtb to preserve input and initialise s. In particular, -- save the diagonal elements of r in x for j = 0, n-1 do local rjj = gsl.gsl_matrix_get (r, j, j) local qtbj = gsl.gsl_vector_get (qtb, j) for i = j+1, n-1 do local rji = gsl.gsl_matrix_get (r, j, i) gsl.gsl_matrix_set (r, i, j, rji) end gsl.gsl_vector_set (x, j, rjj) gsl.gsl_vector_set (wa, j, qtbj) end -- Eliminate the diagonal matrix d using a Givens rotation for j = 0, n-1 do local qtbpj local pj = gsl.gsl_permutation_get (p, j) local diagpj = lambda * gsl.gsl_vector_get (diag, pj) if diagpj ~= 0 then gsl.gsl_vector_set (sdiag, j, diagpj) for k = j+1, n-1 do gsl.gsl_vector_set (sdiag, k, 0) end -- The transformations to eliminate the row of d modify only a -- single element of qtb beyond the first n, which is initially -- zero qtbpj = 0 for k = j, n-1 do -- Determine a Givens rotation which eliminates the -- appropriate element in the current row of d local wak = gsl.gsl_vector_get (wa, k) local rkk = gsl.gsl_matrix_get (r, k, k) local sdiagk = gsl.gsl_vector_get (sdiag, k) local sine, cosine if sdiagk ~= 0 then if abs(rkk) < abs(sdiagk) then local cotangent = rkk / sdiagk sine = 0.5 / sqrt (0.25 + 0.25 * cotangent * cotangent) cosine = sine * cotangent else local tangent = sdiagk / rkk cosine = 0.5 / sqrt (0.25 + 0.25 * tangent * tangent) sine = cosine * tangent end -- Compute the modified diagonal element of r and the -- modified element of [qtb,0] do local new_rkk = cosine * rkk + sine * sdiagk local new_wak = cosine * wak + sine * qtbpj qtbpj = -sine * wak + cosine * qtbpj gsl.gsl_matrix_set(r, k, k, new_rkk) gsl.gsl_vector_set(wa, k, new_wak) end -- Accumulate the transformation in the row of s for i = k + 1, n-1 do local rik = gsl.gsl_matrix_get (r, i, k) local sdiagi = gsl.gsl_vector_get (sdiag, i) local new_rik = cosine * rik + sine * sdiagi local new_sdiagi = -sine * rik + cosine * sdiagi gsl.gsl_matrix_set(r, i, k, new_rik) gsl.gsl_vector_set(sdiag, i, new_sdiagi) end end end -- Store the corresponding diagonal element of s and restore the -- corresponding diagonal element of r do local rjj = gsl.gsl_matrix_get (r, j, j) local xj = gsl.gsl_vector_get(x, j) gsl.gsl_vector_set (sdiag, j, rjj) gsl.gsl_matrix_set (r, j, j, xj) end end end -- Solve the triangular system for z. If the system is singular then -- obtain a least squares solution local nsing = n for j = 0, n-1 do local sdiagj = gsl.gsl_vector_get (sdiag, j) if sdiagj == 0 then nsing = j break end end for j = nsing, n-1 do gsl.gsl_vector_set (wa, j, 0) end for k = 0, nsing-1 do local sum = 0 local j = (nsing - 1) - k for i = j + 1, nsing-1 do sum = sum + gsl.gsl_matrix_get(r, i, j) * gsl.gsl_vector_get(wa, i) end do local waj = gsl.gsl_vector_get (wa, j) local sdiagj = gsl.gsl_vector_get (sdiag, j) gsl.gsl_vector_set (wa, j, (waj - sum) / sdiagj) end end -- Permute the components of z back to the components of x for j = 0, n-1 do local pj = gsl.gsl_permutation_get (p, j) local waj = gsl.gsl_vector_get (wa, j) gsl.gsl_vector_set (x, pj, waj) end end local function count_nsing (r) -- Count the number of nonsingular entries. Returns the index of the -- first entry which is singular. local n = tonumber(r.size2) local j = n for i = 0, n-1 do local rii = gsl.gsl_matrix_get (r, i, i) if rii == 0 then j = i break end end return j end local function compute_newton_direction (r, perm, qtf, x) -- Compute and store in x the Gauss-Newton direction. If the -- Jacobian is rank-deficient then obtain a least squares -- solution. local n = tonumber(r.size2) for i = 0, n-1 do local qtfi = gsl.gsl_vector_get (qtf, i) gsl.gsl_vector_set (x, i, qtfi) end local nsing = count_nsing (r) for i = nsing, n-1 do gsl.gsl_vector_set (x, i, 0) end if nsing> 0 then for j = nsing-1, 0, -1 do local rjj = gsl.gsl_matrix_get (r, j, j) local temp = gsl.gsl_vector_get (x, j) / rjj gsl.gsl_vector_set (x, j, temp) for i = 0, j-1 do local rij = gsl.gsl_matrix_get (r, i, j) local xi = gsl.gsl_vector_get (x, i) gsl.gsl_vector_set (x, i, xi - rij * temp) end end end gsl.gsl_permute_vector_inverse (perm, x) end local function compute_newton_correction (r, sdiag, p, x, dxnorm, diag, w) local n = tonumber(r.size2) for i=0, n-1 do local pi = gsl.gsl_permutation_get (p, i) local dpi = gsl.gsl_vector_get (diag, pi) local xpi = gsl.gsl_vector_get (x, pi) gsl.gsl_vector_set (w, i, dpi * (dpi * xpi) / dxnorm) end for j=0, n-1 do local sj = gsl.gsl_vector_get (sdiag, j) local wj = gsl.gsl_vector_get (w, j) local tj = wj / sj gsl.gsl_vector_set (w, j, tj) for i=j+1, n-1 do local rij = gsl.gsl_matrix_get (r, i, j) local wi = gsl.gsl_vector_get (w, i) gsl.gsl_vector_set (w, i, wi - rij * tj) end end end local function compute_newton_bound (r, x, dxnorm, perm, diag, w) -- If the jacobian is not rank-deficient then the Newton step -- provides a lower bound for the zero of the function. Otherwise -- set this bound to zero. local n = tonumber(r.size2) local nsing = count_nsing (r) if nsing < n then gsl.gsl_vector_set_zero (w) return end for i= 0, n-1 do local pi = gsl.gsl_permutation_get (perm, i) local dpi = gsl.gsl_vector_get (diag, pi) local xpi = gsl.gsl_vector_get (x, pi) gsl.gsl_vector_set (w, i, dpi * (dpi * xpi / dxnorm)) end for j= 0, n-1 do local sum = 0 for i= 0, j-1 do sum = sum + gsl.gsl_matrix_get (r, i, j) * gsl.gsl_vector_get (w, i) end do local rjj = gsl.gsl_matrix_get (r, j, j) local wj = gsl.gsl_vector_get (w, j) gsl.gsl_vector_set (w, j, (wj - sum) / rjj) end end end local function compute_gradient_direction (r, p, qtf, diag, g) local n = tonumber(r.size2) for j=0, n-1 do local sum = 0 for i = 0, j do sum = sum + gsl.gsl_matrix_get (r, i, j) * gsl.gsl_vector_get (qtf, i) end do local pj = gsl.gsl_permutation_get (p, j) local dpj = gsl.gsl_vector_get (diag, pj) gsl.gsl_vector_set (g, j, sum / dpj) end end end local function lmpar (r, perm, qtf, diag, delta, par, newton, gradient, sdiag, x, w) local par_lower, par_upper compute_newton_direction (r, perm, qtf, newton) -- Evaluate the function at the origin and test for acceptance of -- the Gauss-Newton direction. local dxnorm = scaled_enorm (diag, newton) local fp = dxnorm - delta if fp <= 0.1 * delta then gsl.gsl_vector_memcpy (x, newton) return 0 end compute_newton_bound (r, newton, dxnorm, perm, diag, w) do local wnorm = gsl.gsl_blas_dnrm2 (w) local phider = wnorm * wnorm -- w == zero if r rank-deficient, -- then set lower bound to zero form MINPACK, lmder.f -- Hans E. Plesser 2002-02-25 (hans.plesser@itf.nlh.no) */ par_lower = (wnorm> 0 and fp / (delta * phider) or 0) end compute_gradient_direction (r, perm, qtf, diag, gradient) local gnorm = gsl.gsl_blas_dnrm2 (gradient) par_upper = gnorm / delta if par_upper == 0 then par_upper = $(GSL_DBL_MIN) / min(delta, 0.1) end if par> par_upper then par = par_upper elseif par < par_lower then par = par_lower end if par == 0 then par = gnorm / dxnorm end -- Beginning of iteration for iter= 1, 10 do -- Evaluate the function at the current value of par if par == 0 then par = max(0.001 * par_upper, $(GSL_DBL_MIN)) end -- Compute the least squares solution of [ R P x - Q^T f, sqrt(par) D x] -- for A = Q R P^T do local sqrt_par = sqrt(par) qrsolv (r, perm, sqrt_par, diag, qtf, x, sdiag, w) end dxnorm = scaled_enorm (diag, x) local fp_old = fp fp = dxnorm - delta -- If the function is small enough, accept the current value of par if abs (fp) <= 0.1 * delta then return par end if par_lower == 0 and fp <= fp_old and fp_old < 0 then return par end -- Check for maximum number of iterations */ if iter == 10 then return par end -- Compute the Newton correction compute_newton_correction (r, sdiag, perm, x, dxnorm, diag, w) local par_c do local wnorm = gsl.gsl_blas_dnrm2 (w) par_c = fp / (delta * wnorm * wnorm) end -- Depending on the sign of the function, update par_lower or par_upper if fp> 0 then if par> par_lower then par_lower = par end elseif fp < 0 then if par < par_upper then par_upper = par end end -- Compute an improved estimate for par par = max (par_lower, par + par_c) end end local M = { _data = {} } local function store(obj, cdata) local t = M._data t[cdata] = obj return cdata end local function object(cdata) local t = M._data return t[cdata] end local function get_vector(nr) local m = matrix.new(nr, 1) local s = gsl.gsl_matrix_column (m, 0) return store(m, s) end local function get_permutation(nr) return ffi.gc(gsl.gsl_permutation_calloc(nr), gsl.gsl_permutation_free) end local function call_f(fdf, xc, fc) local x, f = object(xc), object(fc) return fdf(x, f, nil) end local function call_df(fdf, xc, Jc) local x, J = object(xc), Jc return fdf(x, nil, J) end local function call_fdf(fdf, xc, fc, Jc) local x, f, J = object(xc), object(fc), Jc return fdf(x, f, J) end local state_iter local xnorm, fnorm local delta, par # MINNP = (N < P and N or P) local r = matrix.new($(N), $(P)) local tau = get_vector($(MINNP)) local diag = get_vector($(P)) local qtf = get_vector($(N)) local newton = get_vector($(P)) local gradient = get_vector($(P)) local x_trial = get_vector($(P)) local f_trial = get_vector($(N)) local df = get_vector($(N)) local sdiag = get_vector($(P)) local rptdx = get_vector($(N)) local w = get_vector($(N)) local work1 = get_vector($(P)) local perm = get_permutation($(P)) local state_x = get_vector($(P)) local state_f = get_vector($(N)) local state_J = matrix.new($(N), $(P)) local state_dx = get_vector($(P)) local system_fdf local function lm_set (fdf, x, f, J, dx, scale) -- Evaluate function at x -- return immediately if evaluation raised error */ call_fdf (fdf, x, f, J) par = 0 state_iter = 1 fnorm = gsl.gsl_blas_dnrm2 (f) gsl.gsl_vector_set_all (dx, 0) -- store column norms in diag if scale then compute_diag (J, diag) else gsl.gsl_vector_set_all (diag, 1) end -- set delta to 100 |D x| or to 100 if |D x| is zero xnorm = scaled_enorm (diag, x) delta = compute_delta (diag, x) -- Factorize J into QR decomposition gsl.gsl_matrix_memcpy (r, J) gsl.gsl_linalg_QRPT_decomp (r, tau, perm, signum_result, work1) gsl.gsl_vector_set_zero (rptdx) gsl.gsl_vector_set_zero (w) -- Zero the trial vector, as in the alloc function gsl.gsl_vector_set_zero (f_trial) end local function lm_iterate(fdf, x, f, J, dx, scale) local prered, actred local pnorm, fnorm1, fnorm1p, gnorm local ratio local dirder if fnorm == 0 then return end -- Compute qtf = Q^T f gsl.gsl_vector_memcpy (qtf, f) gsl.gsl_linalg_QR_QTvec (r, tau, qtf) -- Compute norm of scaled gradient compute_gradient_direction (r, perm, qtf, diag, gradient) do local iamax = gsl.gsl_blas_idamax (gradient) gnorm = abs(gsl.gsl_vector_get (gradient, iamax) / fnorm) end -- Determine the Levenberg-Marquardt parameter for iter = 1, 10 do par = lmpar (r, perm, qtf, diag, delta, par, newton, gradient, sdiag, dx, w) -- Take a trial step gsl.gsl_vector_scale (dx, -1) -- reverse the step to go downhill compute_trial_step (x, dx, x_trial) pnorm = scaled_enorm (diag, dx) if state_iter == 1 then delta = min(delta, pnorm) end -- Evaluate function at x + p -- return immediately if evaluation raised error call_f (fdf, x_trial, f_trial) fnorm1 = gsl.gsl_blas_dnrm2 (f_trial) -- Compute the scaled actual reduction actred = compute_actual_reduction (fnorm, fnorm1) -- Compute rptdx = R P^T dx, noting that |J dx| = |R P^T dx| compute_rptdx (r, perm, dx, rptdx) fnorm1p = gsl.gsl_blas_dnrm2 (rptdx) -- Compute the scaled predicted reduction = |J dx|^2 + 2 par |D dx|^2 do local t1 = fnorm1p / fnorm local t2 = (sqrt(par) * pnorm) / fnorm prered = t1 * t1 + t2 * t2 / $(p5) dirder = -(t1 * t1 + t2 * t2) end -- compute the ratio of the actual to predicted reduction ratio = (prered> 0 and actred / prered or 0) -- update the step bound if ratio> $(p25) then if par == 0 or ratio>= $(p75) then delta = pnorm / $(p5) par = par * $(p5) end else local temp = (actred>= 0 and $(p5) or $(p5)*dirder / (dirder + $(p5) * actred)) if $(p1) * fnorm1>= fnorm or temp < $(p1) then temp = $(p1) end delta = temp * min (delta, pnorm/$(p1)) par = par / temp end -- test for successful iteration, termination and stringent tolerances if ratio>= $(p0001) then gsl.gsl_vector_memcpy (x, x_trial) gsl.gsl_vector_memcpy (f, f_trial) -- return immediately if evaluation raised error call_df (fdf, x_trial, J) -- wa2_j = diag_j * x_j xnorm = scaled_enorm(diag, x) fnorm = fnorm1 state_iter = state_iter + 1 -- Rescale if necessary if scale then update_diag (J, diag) end do gsl.gsl_matrix_memcpy (r, J) gsl.gsl_linalg_QRPT_decomp (r, tau, perm, signum_result, work1) end return elseif abs(actred) <= $(GSL_DBL_EPSILON) and prered <= $(GSL_DBL_EPSILON) and $(p5) * ratio <= 1.0 then return 'error ETOLF' elseif delta <= $(GSL_DBL_EPSILON) * xnorm then return 'error ETOLX' elseif gnorm <= $(GSL_DBL_EPSILON) then return 'error ETOLG' end end end local function test_delta (dx, x, epsabs, epsrel) local n = vector_size(x) if epsrel < 0 then error "relative tolerance is negative" end for i = 0, n-1 do local xi = gsl.gsl_vector_get (x, i) local dxi = gsl.gsl_vector_get (dx, i) local tolerance = epsabs + epsrel * abs(xi) if abs(dxi)>= tolerance then return false end end return true end M.set = function(fdf, x0) system_fdf = fdf gsl_check(gsl.gsl_matrix_memcpy(object(state_x), x0)) lm_set (fdf, state_x, state_f, state_J, state_dx, true) end M.iterate = function() return lm_iterate (system_fdf, state_x, state_f, state_J, state_dx, true) end M.test = function(epsabs, epsrel) return test_delta(state_dx, state_x, epsabs, epsrel) end M.chisq = function() return gsl.gsl_blas_dnrm2 (state_f) end M.x, M.f = object(state_x), object(state_f) return M