Hi all, after a good amount of work I've finalized the writing of the ODE integration routine. The method that I've implemented is the Embedded Runge-Kutta-Fehlberg (4, 5) method. This latter method is already much better then the simple Runge-Kutta method and the idea is that the following step would be to implement the Embedded Runge-Kutta Prince-Dormand (8,9) method as someone suggested. The implementation I've done is Lua is virtually identical to those given in the GSL library. I've implemented the same methodology to control the step size to limit the error accordingly to the user input. The difference is that I don't use vector but everything is expanded to local variables using a template preprocessor. To develop the interface I've further refined the Lua preprocessor that Steve Donovan made based on Rici Lake's original code snippet. I've changed the implementation to avoid to write in the global namespace and I've also adde a function to include other files during pre processing. The resulting file is "template.lua". In order to test the algorithm both for accuracy I've taken a basic GSL example to show ODE evolution. I've changed the integration method to rkf45, in the original examples was rk8pd (runge-kutte prince-dormand). Then I've augmented the integration time and repeated the whole process 10 times. The results are just perfect in term of accuracy. Results produced with LuaJIT2 are the same of those given by the C code. For the other size it seems that there is a small problem because the performance of LuaJIT2 are in this case below my expectations. Here what I've got: LuaJIT2: real 0m14.498s user 0m14.497s sys 0m0.000s C code (-O2) with GSL library: real 0m1.094s user 0m1.088s sys 0m0.000s so the C code in this case is approx 13.5x times faster. I hope I've made a big stupid error in my implementation because my hope was to have better results :-) You will find in attachment all the files, if someone want to give a look. The most important one is the preprocessed file, "rkf45-out.lua". This file is generated from "rkf45.lua.in" and "ode-defs.lua.in" by using the template module. Otherwise if you want to reproduce the example with LuaJIT2 you will need to add to math functions like sin, cos etc the "math." prefix. The reason is that GSL shell put all the mathematical functions in the common namespace. You can easily tests everyting by taking the luajit2 branch in the GSL shell git repository. -- Francesco
Attachment:
rkf45.lua.in
Description: Binary data
local ffi = require 'ffi'
ffi.cdef[[
typedef struct {
double t;
double h;
double y[2];
double dydt[2];
} ode_state;
]]
local function ode_new()
return ffi.new('ode_state')
end
local function ode_init(s, t0, h0, f, y_0,y_1)
s.y[0],s.y[1] = y_0,y_1
s.dydt[0],s.dydt[1] = f(t0, y_0,y_1)
s.t = t0
s.h = h0
end
local function hadjust(rmax, h)
local S = 0.9
if rmax > 1.1 then
local r = S / rmax^(1/5)
r = max(0.2, r)
return r * h, -1
elseif rmax < 0.5 then
local r = S / rmax^(1/(5+1))
r = max(1, min(r, 5))
return r * h, 1
end
return h, 0
end
-- These are the differences of fifth and fourth order coefficients
-- for error estimation */
function rkf45_evolve(s, f, t1)
local t, h = s.t, s.h
local dydt = s.dydt
local hadj, inc
local y_0,y_1
local k1_0,k1_1 = dydt[0],dydt[1]
if t + h > t1 then h = t1 - t end
while h > 0 do
y_0,y_1 = s.y[0],s.y[1]
ytmp_0 = y_0 + 0.25 * h * k1_0
ytmp_1 = y_1 + 0.25 * h * k1_1
-- k2 step
local k2_0,k2_1 = f(t + 0.25 * h, ytmp_0,ytmp_1)
ytmp_0 = y_0 + h * (0.09375 * k1_0 + 0.28125 * k2_0)
ytmp_1 = y_1 + h * (0.09375 * k1_1 + 0.28125 * k2_1)
-- k3 step
local k3_0,k3_1 = f(t + 0.375 * h, ytmp_0,ytmp_1)
ytmp_0 = y_0 + h * (0.87938097405553 * k1_0 + -3.2771961766045 * k2_0 + 3.3208921256259 * k3_0)
ytmp_1 = y_1 + h * (0.87938097405553 * k1_1 + -3.2771961766045 * k2_1 + 3.3208921256259 * k3_1)
-- k4 step
local k4_0,k4_1 = f(t + 0.92307692307692 * h, ytmp_0,ytmp_1)
ytmp_0 = y_0 + h * (2.0324074074074 * k1_0 + -8 * k2_0 + 7.1734892787524 * k3_0 + -0.20589668615984 * k4_0)
ytmp_1 = y_1 + h * (2.0324074074074 * k1_1 + -8 * k2_1 + 7.1734892787524 * k3_1 + -0.20589668615984 * k4_1)
-- k5 step
local k5_0,k5_1 = f(t + 1 * h, ytmp_0,ytmp_1)
ytmp_0 = y_0 + h * (-0.2962962962963 * k1_0 + 2 * k2_0 + -1.3816764132554 * k3_0 + 0.45297270955166 * k4_0 + -0.275 * k5_0)
ytmp_1 = y_1 + h * (-0.2962962962963 * k1_1 + 2 * k2_1 + -1.3816764132554 * k3_1 + 0.45297270955166 * k4_1 + -0.275 * k5_1)
-- k6 step and final sum
-- since k2 is no more used we can use k2 to store k6
local k6_0,k6_1 = f(t + 0.5 * h, ytmp_0,ytmp_1)
local di
di = 0.11851851851852 * k1_0 + 0.51898635477583 * k3_0 + 0.50613149034202 * k4_0 + -0.18 * k5_0 + 0.036363636363636 * k6_0
y_0 = y_0 + h * di
di = 0.11851851851852 * k1_1 + 0.51898635477583 * k3_1 + 0.50613149034202 * k4_1 + -0.18 * k5_1 + 0.036363636363636 * k6_1
y_1 = y_1 + h * di
local yerr, r, d0
local rmax = 0
yerr = h * (0.0027777777777778 * k1_0 + -0.029941520467836 * k3_0 + -0.029199893673578 * k4_0 + 0.02 * k5_0 + 0.036363636363636 * k6_0)
d0 = 0 * (1 * abs(y_0)) + 1e-06
r = abs(yerr) / abs(d0)
rmax = max(r, rmax)
yerr = h * (0.0027777777777778 * k1_1 + -0.029941520467836 * k3_1 + -0.029199893673578 * k4_1 + 0.02 * k5_1 + 0.036363636363636 * k6_1)
d0 = 0 * (1 * abs(y_1)) + 1e-06
r = abs(yerr) / abs(d0)
rmax = max(r, rmax)
hadj, inc = hadjust(rmax, h)
if inc >= 0 then break end
h = hadj
end
dydt[0],dydt[1] = f(t + h, y_0,y_1)
s.y[0],s.y[1] = y_0,y_1
s.t = t + h
s.h = hadj
return h
end
return {new= ode_new, init= ode_init, evolve= rkf45_evolve}
local template = require 'template'
local ode_spec = {N = 2, eps_abs = 1e-6, eps_rel = 0, a_y = 1, a_dydt = 0}
local codegen = template.compile('rkf45.lua.in', ode_spec)
local ode = codegen()
function f_vanderpol_gen(mu)
return function(t, x, y) return y, -x + mu * y * (1-x^2) end
end
local f = f_vanderpol_gen(10.0)
local s = ode.new()
local x, y = 1, 0
local t0, t1, h0 = 0, 20000, 0.01
local init, evol = ode.init, ode.evolve
for k=1, 10 do
init(s, t0, h0, f, x, y)
while s.t < t1 do
evol(s, f, t1)
end
print(s.t, s.y[0], s.y[1])
end
#include <stdio.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_odeiv.h>
int
func (double t, const double y[], double f[],
void *params)
{
double mu = *(double *)params;
f[0] = y[1];
f[1] = -y[0] - mu*y[1]*(y[0]*y[0] - 1);
return GSL_SUCCESS;
}
int
jac (double t, const double y[], double *dfdy,
double dfdt[], void *params)
{
double mu = *(double *)params;
gsl_matrix_view dfdy_mat
= gsl_matrix_view_array (dfdy, 2, 2);
gsl_matrix * m = &dfdy_mat.matrix;
gsl_matrix_set (m, 0, 0, 0.0);
gsl_matrix_set (m, 0, 1, 1.0);
gsl_matrix_set (m, 1, 0, -2.0*mu*y[0]*y[1] - 1.0);
gsl_matrix_set (m, 1, 1, -mu*(y[0]*y[0] - 1.0));
dfdt[0] = 0.0;
dfdt[1] = 0.0;
return GSL_SUCCESS;
}
int
main (void)
{
const gsl_odeiv_step_type * T = gsl_odeiv_step_rkf45;
int k;
for (k=0; k < 10; k++)
{
gsl_odeiv_step * s = gsl_odeiv_step_alloc (T, 2);
gsl_odeiv_control * c = gsl_odeiv_control_y_new (1e-6, 0.0);
gsl_odeiv_evolve * e = gsl_odeiv_evolve_alloc (2);
double mu = 10;
gsl_odeiv_system sys = {func, jac, 2, &mu};
double t = 0.0, t1 = 20000.0;
double h = 1e-6;
double y[2] = { 1.0, 0.0 };
while (t < t1)
{
int status = gsl_odeiv_evolve_apply (e, c, s,
&sys,
&t, t1,
&h, y);
if (status != GSL_SUCCESS)
break;
}
printf ("%g %g %g\n", t, y[0], y[1]);
gsl_odeiv_evolve_free (e);
gsl_odeiv_control_free (c);
gsl_odeiv_step_free (s);
}
return 0;
}
Attachment:
ode-defs.lua.in
Description: Binary data
--
-- A Lua preprocessor for template code specialization.
-- Adapted by Steve Donovan, based on original code of Rici Lake.
--
local M = {}
-------------------------------------------------------------------------------
local function preprocess(chunk, name, defs)
local function parseDollarParen(pieces, chunk, s, e)
local append, format = table.insert, string.format
local s = 1
for term, executed, e in chunk:gmatch("()$(%b())()") do
append(pieces,
format("%q..(%s or '')..", chunk:sub(s, term - 1), executed))
s = e
end
append(pieces, format("%q", chunk:sub(s)))
end
local function parseHashLines(chunk)
local append = table.insert
local pieces, s, args = chunk:find("^\n*#ARGS%s*(%b())[ \t]*\n")
if not args or find(args, "^%(%s*%)$") then
pieces, s = {"return function(_put) ", n = 1}, s or 1
else
pieces = {"return function(_put, ", args:sub(2), n = 2}
end
while true do
local ss, e, lua = chunk:find("^#+([^\n]*\n?)", s)
if not e then
ss, e, lua = chunk:find("\n#+([^\n]*\n?)", s)
append(pieces, "_put(")
parseDollarParen(pieces, chunk:sub(s, ss))
append(pieces, ")")
if not e then break end
end
append(pieces, lua)
s = e + 1
end
append(pieces, " end")
return table.concat(pieces)
end
local ppenv
if defs._self then
ppenv = defs._self
else
ppenv = {string= string, table= table, template= M}
for k, v in pairs(defs) do ppenv[k] = v end
ppenv._self = ppenv
local include = function(filename)
return M.process(filename, ppenv)
end
setfenv(include, ppenv)
ppenv.include = include
end
local code = parseHashLines(chunk)
local fcode = loadstring(code, name)
if fcode then
setfenv(fcode, ppenv)
return fcode()
end
end
local function read_file(filename)
local f = io.open(filename)
local content = f:read('*a')
f:close()
return content
end
local function process(filename, defs)
local template = read_file(filename)
local codegen = preprocess(template, 'ode_codegen', defs)
local code = {}
local add = function(s) code[#code+1] = s end
codegen(add)
return table.concat(code)
end
local function compile(filename, defs)
return loadstring(process(filename, defs), 'ode_out')
end
M.process = process
M.compile = compile
return M