gsl-shell.git - gsl-shell

diff --git a/agg-plot/agg_font_freetype.cpp b/agg-plot/agg_font_freetype.cpp
index 45fdf46c..f9d82a3a 100644
--- a/agg-plot/agg_font_freetype.cpp
+++ b/agg-plot/agg_font_freetype.cpp

@@ -909,8 +909,7 @@ namespace agg

m_glyph_index = FT_Get_Char_Index(m_cur_face, glyph_code);

m_last_error = FT_Load_Glyph(m_cur_face,

m_glyph_index,

-// m_hinting ? FT_LOAD_DEFAULT : FT_LOAD_NO_HINTING);

- m_hinting ? FT_LOAD_FORCE_AUTOHINT : FT_LOAD_NO_HINTING);

+ m_hinting ? (FT_LOAD_TARGET_LCD|FT_LOAD_FORCE_AUTOHINT) : FT_LOAD_NO_HINTING);

if(m_last_error == 0)

{

switch(m_glyph_rendering)

diff --git a/agg-plot/agg_pixfmt_rgb24_lcd.h b/agg-plot/agg_pixfmt_rgb24_lcd.h
index 3f8f7313..885bd8b3 100644
--- a/agg-plot/agg_pixfmt_rgb24_lcd.h
+++ b/agg-plot/agg_pixfmt_rgb24_lcd.h

@@ -116,8 +116,8 @@ namespace agg

void copy_hline(int x, int y, unsigned len, const color_type& c)

{

- int xr = x / 3;

- int8u* p = m_rbuf->row_ptr(x, y, len) + xr;

+ int xr = x - (x % 3);

+ int8u* p = m_rbuf->row_ptr(y) + xr;

for (int ilen = len; ilen > 0; p += 3, ilen -= 3)

{

p[0] = c.r;

@@ -188,8 +188,8 @@ namespace agg

void copy_hline(int x, int y, unsigned len, const color_type& c)

{

- int xr = x / 3;

- int8u* p = m_rbuf->row_ptr(x, y, len) + xr;

+ int xr = x - (x % 3);

+ int8u* p = m_rbuf->row_ptr(y) + xr;

for (int ilen = len; ilen > 0; p += 3, ilen -= 3)

{

p[0] = c.r;

diff --git a/agg-plot/canvas-window-cpp.h b/agg-plot/canvas-window-cpp.h
index 29ccc75e..2d29fef5 100644
--- a/agg-plot/canvas-window-cpp.h
+++ b/agg-plot/canvas-window-cpp.h

@@ -22,7 +22,7 @@ extern "C" {

class canvas_window : public platform_support_ext {

protected:

canvas *m_canvas;

- agg::rgba m_bgcolor;

+ agg::rgba8 m_bgcolor;

agg::trans_affine m_matrix;

@@ -43,7 +43,7 @@ public:

enum win_status_e status;

- canvas_window(gsl_shell_state* gs, agg::rgba& bgcol):

+ canvas_window(gsl_shell_state* gs, agg::rgba8 bgcol):

platform_support_ext(gslshell::pixel_format, true),

m_canvas(NULL), m_bgcolor(bgcol), m_matrix(), m_gsl_shell(gs),

status(not_ready)

diff --git a/agg-plot/canvas.h b/agg-plot/canvas.h
index 3a35a501..eba25b61 100644
--- a/agg-plot/canvas.h
+++ b/agg-plot/canvas.h

@@ -20,7 +20,7 @@ template <class Pixel>

class renderer_gray_aa

{

public:

- renderer_gray_aa(agg::rendering_buffer& ren_buf, agg::rgba bg_color):

+ renderer_gray_aa(agg::rendering_buffer& ren_buf, agg::rgba8 bg_color):

m_pixbuf(ren_buf), m_ren_base(m_pixbuf), m_ren_solid(m_ren_base),

m_bgcol(bg_color)

{ }

@@ -40,13 +40,12 @@ public:

void color(agg::rgba8 c) { m_ren_solid.color(c); }

- void clear(agg::rgba c) { m_ren_base.clear(c); }

+ void clear(agg::rgba8 c) { m_ren_base.clear(c); }

void clear_box(const agg::rect_base<int>& r)

{

- unsigned len = r.x2 - r.x1;

for (int y = r.y1; y < r.y2; y++)

- m_ren_base.copy_hline (r.x1, y, len, m_bgcol);

+ m_ren_base.copy_hline (r.x1, y, r.x2, m_bgcol);

}

void clip_box(const agg::rect_base<int>& clip)

@@ -65,7 +64,7 @@ private:

Pixel m_pixbuf;

agg::renderer_base<Pixel> m_ren_base;

agg::renderer_scanline_aa_solid<agg::renderer_base<Pixel> > m_ren_solid;

- agg::rgba m_bgcol;

+ agg::rgba8 m_bgcol;

};

template <class Pixel>

@@ -79,7 +78,7 @@ class renderer_subpixel_aa

};

public:

- renderer_subpixel_aa(agg::rendering_buffer& ren_buf, agg::rgba bg_color):

+ renderer_subpixel_aa(agg::rendering_buffer& ren_buf, agg::rgba8 bg_color):

m_pixbuf(ren_buf), m_ren_base(m_pixbuf), m_ren_solid(m_ren_base),

m_bgcol(bg_color)

{ }

@@ -91,13 +90,13 @@ public:

double width() const { return m_pixbuf.width() / subpixel_scale; };

double height() const { return m_pixbuf.height(); };

- void clear(agg::rgba c) { m_ren_base.clear(c); }

+ void clear(agg::rgba8 c) { m_ren_base.clear(c); }

void clear_box(const agg::rect_base<int>& r)

{

- unsigned len = subpixel_scale * (r.x2 - r.x1);

+ int x1 = subpixel_scale * r.x1, x2 = subpixel_scale * r.x2;

for (int y = r.y1; y < r.y2; y++)

- m_ren_base.copy_hline (subpixel_scale * r.x1, y, len, m_bgcol);

+ m_ren_base.copy_hline (x1, y, x2, m_bgcol);

}

void clip_box(const agg::rect_base<int>& clip)

@@ -128,7 +127,7 @@ private:

pixfmt_type m_pixbuf;

agg::renderer_base<pixfmt_type> m_ren_base;

agg::renderer_scanline_aa_solid<agg::renderer_base<pixfmt_type> > m_ren_solid;

- agg::rgba m_bgcol;

+ agg::rgba8 m_bgcol;

};

template <class Renderer>

@@ -143,7 +142,7 @@ class canvas_gen : public Renderer {

public:

canvas_gen(agg::rendering_buffer& ren_buf, double width, double height,

- agg::rgba bgcol):

+ agg::rgba8 bgcol):

Renderer(ren_buf, bgcol), ras(), sl()

{ }

diff --git a/agg-plot/colors.cpp b/agg-plot/colors.cpp
index adef7ea9..9cee633e 100644
--- a/agg-plot/colors.cpp
+++ b/agg-plot/colors.cpp

@@ -70,7 +70,7 @@ color_arg_lookup (lua_State *L, int index)

return rgba8_lookup (L, cstr);

}

-agg::rgba colors::white(1, 1, 1);

-agg::rgba colors::black(0, 0, 0);

+agg::rgba8 colors::white(0xff, 0xff, 0xff);

+agg::rgba8 colors::black(0, 0, 0);

agg::rgba8 colors::cdefault(180, 0, 0, 255);

diff --git a/agg-plot/colors.h b/agg-plot/colors.h
index ceba512e..a507c729 100644
--- a/agg-plot/colors.h
+++ b/agg-plot/colors.h

@@ -12,8 +12,8 @@ extern agg::rgba8 color_arg_lookup (lua_State *L, int index);

namespace colors {

- extern agg::rgba white;

- extern agg::rgba black;

+ extern agg::rgba8 white;

+ extern agg::rgba8 black;

extern agg::rgba8 cdefault;

};

diff --git a/agg-plot/plot.h b/agg-plot/plot.h
index 9cfb159b..fbc6707c 100644
--- a/agg-plot/plot.h
+++ b/agg-plot/plot.h

@@ -796,7 +796,7 @@ void plot<RM>::draw_axis(canvas_type& canvas, agg::trans_affine& canvas_mtx,

{

draw::text* label = labels[j];

label->apply_transform(m, 1.0);

- canvas.draw(*label, agg::rgba(0, 0, 0));

+ canvas.draw(*label, colors::black);

}

lndash.add_dash(7.0, 3.0);

diff --git a/agg-plot/support_x11.cpp b/agg-plot/support_x11.cpp
index cc40d3d2..a489c730 100644
--- a/agg-plot/support_x11.cpp
+++ b/agg-plot/support_x11.cpp

@@ -2,7 +2,7 @@

#include "agg-pixfmt-config.h"

-const char *ttf_names[] = {"freefont/FreeSans.ttf", "ttf-dejavu/DejaVuSans.ttf", 0};

+const char *ttf_names[] = {"ubuntu-font-family/Ubuntu-R.ttf", "freefont/FreeSans.ttf", "ttf-dejavu/DejaVuSans.ttf", 0};

const char *gslshell::get_font_name()

{

diff --git a/agg-plot/text-shape.h b/agg-plot/text-shape.h
index 38abf892..e8dff0ce 100644
--- a/agg-plot/text-shape.h
+++ b/agg-plot/text-shape.h

@@ -10,11 +10,12 @@ namespace draw {

public:

text_shape(double x, double y, const char* text,

double _size = 10.0, double hjustif = 0.0, double vjustif = 0.0):

- m_text_label(text, _size), m_scaling(0), m_size(_size)

+ m_text_label(text, round(_size)), m_x(x), m_y(y), m_font_size(_size), m_size(_size)

{

- m_matrix.tx = x;

- m_matrix.ty = y;

+ m_matrix.tx = m_x;

+ m_matrix.ty = m_y;

m_text_label.model_mtx(m_matrix);

+ compute_bounding_box();

}

virtual void rewind(unsigned path_id)

@@ -24,20 +25,25 @@ namespace draw {

virtual unsigned vertex(double* x, double* y)

{

- unsigned cmd = m_text_label.vertex(x, y);

- if (m_scaling && agg::is_vertex(cmd))

- m_scaling->transform(x, y);

- return cmd;

+ return m_text_label.vertex(x, y);

}

virtual void bounding_box(double *x1, double *y1, double *x2, double *y2)

{

+ *x1 = m_bbox.x1;

+ *y1 = m_bbox.y1;

+ *x2 = m_bbox.x2;

+ *y2 = m_bbox.y2;

+ }

+ void compute_bounding_box()

+ {

const double pad = 1.0;

const double tx = m_matrix.tx, ty = m_matrix.ty;

- *x1 = tx - pad;

- *y1 = ty - pad;

- *x2 = tx + m_text_label.get_text_width() + pad;

- *y2 = ty + m_text_label.get_text_height() + pad;

+ m_bbox.x1 = tx - pad;

+ m_bbox.y1 = ty - pad;

+ m_bbox.x2 = tx + m_text_label.get_text_width() + pad;

+ m_bbox.y2 = ty + m_text_label.get_text_height() + pad;

}

virtual str write_svg(int id, agg::rgba8 c, double h)

@@ -48,11 +54,6 @@ namespace draw {

const agg::trans_affine& m = m_matrix;

double x = m.tx, y = m.ty;

- if (m_scaling)

- {

- m_scaling->transform(&x, &y);

- txt_size *= m_scaling->sy;

- }

str s = str::print("<text x=\"%g\" y=\"%g\" id=\"text%i\"" \

" style=\"font-size:%i\">" \

@@ -65,15 +66,23 @@ namespace draw {

virtual void apply_transform(const agg::trans_affine& m, double as)

{

- m_scaling = &m;

+ m_text_label.font_size(m.sx * m_font_size, round(m.sy * m_font_size));

+ double x = m_x, y = m_y;

+ m.transform(&x, &y);

+ m_matrix.tx = x;

+ m_matrix.ty = y;

m_text_label.approximation_scale(m.scale());

}

private:

text_label m_text_label;

+ double m_x, m_y;

+ double m_font_size;

agg::trans_affine m_matrix;

- const agg::trans_affine* m_scaling;

double m_size;

+ agg::rect_base<double> m_bbox;

};

}

diff --git a/agg-plot/text.cpp b/agg-plot/text.cpp
index 90e55129..0582b9e0 100644
--- a/agg-plot/text.cpp
+++ b/agg-plot/text.cpp

@@ -11,10 +11,10 @@ namespace draw {

void

text::apply_transform(const agg::trans_affine& m, double as)

{

- m_matrix.tx = m_x;

- m_matrix.ty = m_y;

- m.transform(&m_matrix.tx, &m_matrix.ty);

+ double x = m_x, y = m_y;

+ m.transform(&x, &y);

+ m_matrix.tx = x;

+ m_matrix.ty = round(y);

}

void

diff --git a/agg-plot/text.h b/agg-plot/text.h
index 96957bbf..c6c62e6e 100644
--- a/agg-plot/text.h
+++ b/agg-plot/text.h

@@ -23,7 +23,7 @@ class text : public sg_object

public:

text(const char* text, double size = 10.0, double hjustif = 0.0, double vjustif = 0.0):

m_x(0.0), m_y(0.0), m_angle(0.0),

- m_hjustif(hjustif), m_vjustif(vjustif), m_text_label(text, size)

+ m_hjustif(hjustif), m_vjustif(vjustif), m_text_label(text, round(size))

{

m_text_label.model_mtx(m_matrix);

}

@@ -54,7 +54,7 @@ class text : public sg_object

const char * get_text() const { return m_text_label.text().cstr(); }

- double text_height() const { return m_text_label.text_height(); }

+ double text_height() const { return m_text_label.get_text_height(); }

void set_point(double x, double y)

{

diff --git a/agg-plot/text_label.h b/agg-plot/text_label.h
index d5ed932d..8d5d7293 100644
--- a/agg-plot/text_label.h
+++ b/agg-plot/text_label.h

@@ -10,6 +10,12 @@

#include "sg_object.h"

+struct grid_fit_y_only {

+ static void adjust(double& x, double& y) { y = round(y); }

+};

+typedef grid_fit_y_only grid_fit;

class text_label

{

enum { scale_x = 100 };

@@ -20,7 +26,8 @@ class text_label

str m_text_buf;

double m_width;

- double m_height;

+ double m_font_height;

+ double m_font_width;

unsigned m_pos;

double m_x, m_y;

@@ -36,25 +43,23 @@ class text_label

public:

text_label(const char* text, double size):

- m_text_buf(text), m_height(size), m_pos(0),

+ m_text_buf(text), m_font_height(size), m_font_width(size),

m_font_eng(gslshell::font_engine()), m_font_man(gslshell::font_manager()),

m_model_mtx(&identity_matrix),

m_text_curve(m_font_man.path_adaptor()), m_text_trans(m_text_curve, m_text_mtx)

{

- set_font_size();

+ update_font_size();

m_width = get_text_width();

}

void model_mtx(const agg::trans_affine& m) { m_model_mtx = &m; }

- void set_font_size()

+ void font_size(double height, double width)

{

- m_font_eng.height(m_height);

- m_font_eng.width(m_height * scale_x);

+ m_font_height = height;

+ m_font_width = width;

}

- double text_height() const { return m_height; }

const str& text() const { return m_text_buf; }

bool load_glyph()

@@ -72,9 +77,16 @@ class text_label

if(glyph->data_type == agg::glyph_data_outline)

{

- m_text_mtx.tx = m_x / scale_x;

- m_text_mtx.ty = floor(m_y + 0.5);

- m_model_mtx->transform(&m_text_mtx.tx, &m_text_mtx.ty);

+ agg::trans_affine& m = m_text_mtx;

+ m.tx = m_x / scale_x;

+ m.ty = m_y;

+ m_model_mtx->transform(&m.tx, &m.ty);

+ if (fabs(m.sx * m.sy) > fabs(m.shx * m.shy))

+ grid_fit::adjust(m.tx, m.ty);

+ else

+ grid_fit::adjust(m.ty, m.tx);

m_advance_x = glyph->advance_x;

m_advance_y = glyph->advance_y;

@@ -88,7 +100,7 @@ class text_label

void rewind(double hjustif, double vjustif)

{

m_x = scale_x * (- hjustif * m_width);

- m_y = - 0.86 * vjustif * m_height;

+ m_y = - 0.86 * vjustif * m_font_height;

m_advance_x = 0;

m_advance_y = 0;

m_pos = 0;

@@ -97,7 +109,7 @@ class text_label

agg::trans_affine_scaling scale_mtx(1.0 / double(scale_x), 1.0);

trans_affine_compose (m_text_mtx, scale_mtx);

- set_font_size();

+ update_font_size();

load_glyph();

}

@@ -118,7 +130,7 @@ class text_label

void approximation_scale(double as) { m_text_curve.approximation_scale(as); }

- double get_text_height() const { return m_height; }

+ double get_text_height() const { return m_font_height; }

double get_text_width()

{

@@ -139,6 +151,13 @@ class text_label

return x / double(scale_x);

}

+private:

+ void update_font_size()

+ {

+ m_font_eng.height(m_font_height);

+ m_font_eng.width(m_font_width * scale_x);

+ }

};

#endif

diff --git a/agg-plot/window-cpp.h b/agg-plot/window-cpp.h
index 53afe152..20ba4744 100644
--- a/agg-plot/window-cpp.h
+++ b/agg-plot/window-cpp.h

@@ -60,7 +60,7 @@ private:

ref::node* m_tree;

public:

- window(gsl_shell_state* gs, agg::rgba bgcol= colors::white):

+ window(gsl_shell_state* gs, agg::rgba8 bgcol= colors::white):

canvas_window(gs, bgcol), m_tree(0)

{

this->split(".");

diff --git a/doc/user-manual/bsplines.rst b/doc/user-manual/bsplines.rst
index 1036d53f..7f30d84d 100644
--- a/doc/user-manual/bsplines.rst
+++ b/doc/user-manual/bsplines.rst

@@ -45,7 +45,7 @@ B-Splines functions

.. function:: bspline(a, b, N[, order])

bspline(knots[, order])

- Create an object of type :class:`BSpline`. In the first form it will create a basis splines in the interval from ``a`` to ``b`` with ``N`` uniformly spaced breaks. The ``order`` is 4 if unspecified, it does correspond to cubic splines. In the second form you should provide a non-decreasing list ``knots`` with all the points.

+ Create an object of type :class:`BSpline`. In the first form it will create a basis splines in the interval from ``a`` to ``b`` with ``N`` uniformly spaced breaks. The ``order`` is 4 if unspecified, it corresponds to cubic splines. In the second form you should provide a non-decreasing list ``knots`` with all the points.

.. class:: BSpline

diff --git a/doc/user-manual/complex.rst b/doc/user-manual/complex.rst
index 66e7a9ab..372279e3 100644
--- a/doc/user-manual/complex.rst
+++ b/doc/user-manual/complex.rst

@@ -17,7 +17,7 @@ Complex Functions

In this section we describe the mathematical functions on complex numbers.

.. note::

- Please note that the standard functions from the :mod:`math` does *not* accept complex numbers.

+ Please note that the standard functions from the :mod:`math` do *not* accept complex numbers.

Basic Functions

~~~~~~~~~~~~~~~

@@ -37,12 +37,12 @@ Basic Functions

.. function:: abs(z)

Returns the norm of the complex number ``z`` calculated as :math:`\sqrt{z ,円 z^\star}`.

- This function accept real or complex number as argument.

+ This function accepts a real or complex number as argument.

.. function:: norm2(z)

Returns the square norm of the complex number ``z`` calculated as :math:`z ,円 z^\star`.

- This function accept real or complex number as argument.

+ This function accepts a real or complex number as argument.

.. function:: rect(z)

@@ -196,10 +196,6 @@ Inverse Complex Hyperbolic Functions

The branch cut is on the real axis, less than 1.

Note that in this case we use the negative square root in formula 4.6.21 of Abramowitz & Stegun giving :math:`\textrm{acosh}(z)=\log(z-\sqrt{z^2-1})`.

-.. function:: real(z)

- This function returns the complex hyperbolic arccosine of the real number z, :math:`\textrm{acosh}(z)`.

.. function:: atanh(z)

This function returns the complex hyperbolic arctangent of the complex number z, :math:`\textrm{atanh}(z)`.

diff --git a/doc/user-manual/contour.rst b/doc/user-manual/contour.rst
index d656fad3..87c09232 100644
--- a/doc/user-manual/contour.rst
+++ b/doc/user-manual/contour.rst

@@ -10,9 +10,9 @@ Contour Plots

Overview

--------

-GSL shell offer a contour plot function to draw contour curve of bidimensional functions. The current algorithm works correctly only for continuous functions and it may gives bad results if the function have discontinuities.

+GSL shell offers a contour plot function to draw contour curves of bidimensional functions. The current algorithm works correctly only for continuous functions and it may give bad results if the function has discontinuities.

-Here an example of its utilization to plot the function :math:`f(x,y) = x^2 - y^2`::

+Here is an example of its utilization to plot the function :math:`f(x,y) = x^2 - y^2`::

contour.plot(|x,y| x^2 - y^2, -8, -8, 8, 8)

@@ -24,17 +24,17 @@ Here an example of its utilization to plot the function :math:`f(x,y) = x^2 - y^

Plot a contour plot of the function ``f`` in the rectangle delimited by (xmin, ymin), (xmax, ymax) and return the plot itself.

- The ``options`` argument is an optional table that can contain the following field:

+ The ``options`` argument is an optional table that can contain the following fields:

* ``gridx``, number of subdivision along x

* ``gridy``, number of subdivision along y

* ``levels``, number of contour levels or a list of the level values in monotonic order.

- * ``colormap`` a function that return a color for the contour region. The argument of the function will be a number between 0 and 1.

+ * ``colormap`` a function that returns a color for the contour region. The argument of the function will be a number between 0 and 1.

* ``show``, specify if the plot should be shown. By default it is ``true``.

.. function:: polar_plot(f, R[, options]])

- Plot a contour plot of the function ``f(x, y)`` over the circular domain or radius ``R`` and centered at the origin. The ``options`` table accept the same field of the function :func:`contour`.

+ Plot a contour plot of the function ``f(x, y)`` over the circular domain of radius ``R`` and centered at the origin. The ``options`` table accepts the same fields as the function :func:`contour`.

Example::

diff --git a/doc/user-manual/csv.rst b/doc/user-manual/csv.rst
index 4158fd8d..95aa2abe 100644
--- a/doc/user-manual/csv.rst
+++ b/doc/user-manual/csv.rst

@@ -7,17 +7,17 @@ Comma Separated Values Parsing Utilities

.. module:: csv

-The module ``csv`` offer a few useful functions to read data files in 'csv' format. This latter is a quite popular format used to easily save simple tabular data as a comma-separated values with several lines. This format can be used to exchange data with spreadsheets applications.

+The module ``csv`` offers a few useful functions to read data files in 'csv' format. This latter is a quite popular format used to easily save simple tabular data as comma-separated values with several lines. This format can be used to exchange data with spreadsheets applications.

.. function:: read(filename)

- Read the given filename and returns a table that store the values from the given file (``filename`` argument). The table returned is in the form ``{{row1_v1, row1_v2, ...}, {row2_v1, row2_v2, ...}, ...}`` so that you can obtain the number of lines read by using the ``#`` operator (number of element in a table). The table can contains both numeric values or strings accordingly to the csv specifications. Each row can potentially contain a different number of elements. The user can check the number of number of elements of each row if needed.

+ Reads the given filename and returns a table that stores the values from the given file (``filename`` argument). The table returned is in the form ``{{row1_v1, row1_v2, ...}, {row2_v1, row2_v2, ...}, ...}`` so that you can obtain the number of lines read by using the ``#`` operator (number of elements in a table). The table can contain both numeric values or strings in accordance with the csv specifications. Each row can potentially contain a different number of elements. The user can check the number of elements of each row if needed.

Here an example of utilization::

csv = require 'csv'

- -- load some data and save the results is a table

+ -- load some data and save the results in a table

t = csv.read('examples/data/sige-sims-prof.csv')

-- if the data contains only numbers it can be easily converted

@@ -29,8 +29,8 @@ The module ``csv`` offer a few useful functions to read data files in 'csv' form

p:addline(graph.xyline(m:col(1), m:col(2)), 'blue')

p:show()

- Please note the function returns a table, not a matrix. If the table is in rectangular form and it does contains only number it can be easily converted into a matrix using the function :func:`matrix` as shown in the example above.

+ Please note that the function returns a table, not a matrix. If the table is in rectangular form and if it contains only numbers, it can easily be converted into a matrix using the function :func:`matrix` as shown in the example above.

.. function:: line(str)

- This function split the string ``str`` using commas as separators accordingly to the csv format specifications. This function can be useful to build a customized csv parser.

+ This function splits the string ``str`` using commas as separators in accordance with the csv format specifications. This function can be useful to build a customized csv parser.

diff --git a/doc/user-manual/examples.rst b/doc/user-manual/examples.rst
index 17e3a499..9ad61ff5 100644
--- a/doc/user-manual/examples.rst
+++ b/doc/user-manual/examples.rst

@@ -7,12 +7,12 @@

GSL Shell Examples

==================

-In this chapter we gives some examples about the usage of GSL Shell.

+In this chapter we give some usage examples of GSL Shell.

Home-made Bessel Functions

--------------------------

-The Bessel's function J\ :sub:`n` for integer values of n can be defined with the following integral:

+The Bessel function J\ :sub:`n` for integer values of n can be defined with the following integral:

.. math::

J_n(x) = {1 \over \pi} \int_0^\pi \cos(n \tau - x \sin \tau) \textrm{d}\tau

@@ -29,7 +29,7 @@ This is easy like eating a piece of cake::

return 1/pi * num.integ(f, 0, pi, epsabs, epsrel)

end

-The definition of ``bessJ`` takes x and n as arguments and calculate the definite integral between 0 and |pgr|. Then we can plot the results for various values of n::

+The definition of ``bessJ`` takes x and n as arguments and calculates the definite integral between 0 and |pgr|. Then we can plot the results for various values of n::

p = graph.plot('Bessel Functions Jn, n=0 ... 5')

for n=0, 5 do

@@ -41,7 +41,7 @@ to obtain the following result:

.. figure:: example-bessJ-plot.png

-Then we can also calculate a matrix with the tabulated values. For examples we can use the columns of the matrix to span different values of n. We write then::

+Then we can also calculate a matrix with the tabulated values. For example, we can use the columns of the matrix to span different values of n. We write::

m = matrix.new(200, 6, |k,n| bessJ((k-1)/10, n-1))

@@ -100,7 +100,7 @@ Here an example to plot it with GSL Shell. First we need a function to produce t

end

-Then we need to produce the plot. Since we want to make something cool we produce a closed Von Koch triangle by using always the same curve and adding it to the plot with some rotations and translations. We also produce a nice semi-transparent background to have something more beautiful. Here the code::

+Then we need to produce the plot. Since we want to make something cool, we produce a closed Von Koch triangle by always using the same curve and adding it to the plot with some rotations and translations. We also produce a nice semi-transparent background to have something more beautiful. Here is the code::

p = graph.plot()

@@ -125,37 +125,37 @@ Then we need to produce the plot. Since we want to make something cool we produc

p:show()

-And here the result:

+And this the result:

.. figure:: examples-von-koch-complete.png

-With a similar procedure, the code is in ``demos/fractals.lua`` we can produce beautiful Levy C curve:

+With a similar procedure, for which the code is in ``demos/fractals.lua``, we can produce the beautiful Levy C curve:

.. figure:: examples-levy-c-curve-1.png

Reading and plotting data from files

------------------------------------

-In this example we show how to load some data stored in a file in CSV format and to make some plot. The CSV is a very simple format that can be used to exchange data with spreadsheets applications. It is just a plain text with several lines where each line consists of a comma-separated list of numbers or strings.

+In this example we show how to load some data stored in a file in CSV format and use it to make a plot. The CSV is a very simple format that can be used to exchange data with spreadsheets applications. It is just plain text with several lines where each line consists of a comma-separated list of numbers or strings.

-In order to load the data you need to charge the module :mod:`csv` and the to use the function :func:`~csv.read`. In this example we will use the data stored in the file ``examples/data/sige-sims-prof.csv`` this set of data contains just two columns, the first one is the x and the second column represent the y. Here the simple code to load the data::

+In order to load the data, you need to load the module :mod:`csv` and use the function :func:`~csv.read`. In this example, we will use the data stored in the file ``examples/data/sige-sims-prof.csv``. This set of data contains just two columns, of which the first one is the x and the second column represents the y. Here is the simple code to load the data::

csv = require 'csv'

t = csv.read('examples/data/sige-sims-prof.csv')

-we can then print the number of lines in the table:

+We can then print the number of lines in the table:

>>> #t

316

-If you want to plot the data in the table there isn't actually any function that will do that right way but you can do it by using a few functions::

+If you want to plot the data in the table, there is actually no function that will do that right away, but you can do it by using a few functions::

p = graph.plot()

dget = function(i) return t[i][1], t[i][2] end

p:addline(graph.ipath(iter.sequence(dget, #t)))

p:show()

-the idea is that, in order to plot the curve we need to *build* the curve before.

+The idea is that, in order to plot the curve, we need to *build* the curve beforehand.

What we want is actually a line that connects the points ``(x[i], y[i])`` where ``x[i]`` and ``y[i]`` are taken from the rows of the table ``t``.

The last resort to obtain that would be to create a :class:`Path` object and to give all the points in a procedural way like this::

@@ -166,9 +166,9 @@ The last resort to obtain that would be to create a :class:`Path` object and to

ln:line_to(t[i][1], t[i][2])

end

-but it can be more handy to use the :func:`ipath` function to build the curve. This latter function build a curve using an iterator that returns values in the form ``(x, y)``. Then to obtain the iterator we use the :func:`sequence` function that let us easily build an iterator over a sequence of integer numbers.

+but it can be more handy to use the :func:`ipath` function to build the curve. This latter function builds a curve using an iterator that returns values in the form ``(x, y)``. To obtain the iterator, we use the :func:`sequence` function that lets us easily build an iterator over a sequence of integer numbers.

-So to make more clear the code given above we can separate the curve and the iterator instantiations like in the following example::

+So to clarify the code given above, we can separate the curve and the iterator instantiations as in the following example::

p = graph.plot()

@@ -218,11 +218,11 @@ For m = 0, the even definition is used which reduces to R\ :sub:`n`\ :sup:`0`\ (

Implementation

~~~~~~~~~~~~~~

-The above formula can be implemented quite straightforwardly in GSL shell with only a subtle point about the factorials in the denominator. The problem is that in same cases you can have the factorial of negative number and if you feed a negative number to the :func:`fact` function you will get an error.

+The above formula can be implemented quite straightforwardly in GSL Shell with only a subtle point about the factorials in the denominator. The problem is that in some cases you can have the factorial of a negative number and if you feed a negative number to the :func:`fact` function, you will get an error.

-Actually the meaning of the formula is that the factorial of a negative number if :math:`\infty` and so, since it does appear in the denominator, its contribution to the sum is null. So, in order to implement this behavior we just define an auxiliary function that return the inverse of the factorial and zero when the argument is negative.

+Actually the meaning of the formula is that the factorial of a negative number if :math:`\infty` and so, since it appears in the denominator, its contribution to the sum is null. So, in order to implement this behavior we just define an auxiliary function that returns the inverse of the factorial and zero when the argument is negative.

-So here the code for the radial part::

+So here is the code for the radial part::

use 'math'

@@ -242,14 +242,14 @@ So here the code for the radial part::

return z

end

-and the we define the Zernike's function completed with the angular part::

+Next, we define Zernike's function completed with the angular part::

function zernicke(n, m, p, phi, even)

local pf = even and cos(m*phi) or sin(-m*phi)

return zerR(n, m, p) * pf

end

-Now we are just ready to draw our function, the only missing piece is the relation between ρ, φ and the Cartesian coordinates but this is trivial:

+Now we are ready to draw our function. The only missing piece is the relation between ρ, φ and the Cartesian coordinates but this is trivial:

.. math::

\begin{array}{ll}

@@ -257,7 +257,7 @@ Now we are just ready to draw our function, the only missing piece is the relati

\phi = & \tan^{-1}(y, x)

\end{array}

-let us therefore define our sample function in term of x and y and use it to call the function :func:`polar_contour`::

+Let us therefore define our sample function in term of x and y and use it to call the function :func:`polar_contour`::

require 'contour'

N, M = 8, -2

diff --git a/doc/user-manual/fft.rst b/doc/user-manual/fft.rst
index 7bd00856..5bdce72f 100644
--- a/doc/user-manual/fft.rst
+++ b/doc/user-manual/fft.rst

@@ -39,50 +39,50 @@ Numerical Recipes uses the opposite convention, a positive exponential in the fo

GSL Shell interface

-------------------

-GSL Shell provide a simple interface to perform Fourier transform of real data with the functions :func:`num.fft` and :func:`num.fftinv`.

-The first function perform the Fourier transform of a column matrix and the second is the inverse Fourier transform.

+GSL Shell provide a simple interface to perform Fourier transforms of real data with the functions :func:`num.fft` and :func:`num.fftinv`.

+The first function performs the Fourier transform of a column matrix and the second is the inverse Fourier transform.

-The function :func:`num.fft` returns an half-complex array.

-This latter is similar to a column matrix of complex numbers but it is actually a different object because the numbers are packed together following some specific rules related to the algorithm.

+The function :func:`num.fft` returns a half-complex array.

+This latter is similar to a column matrix of complex numbers, but it is actually a different object because the numbers are packed together following some specific rules related to the algorithm.

-The idea is that you can access to the element of this vector for reading or writing by using a simple indexing.

+The idea is that you can access the elements of this vector for reading or writing simply by indexing it.

You can also obtain the size of the vector using the operator '#'.

-The valid indexes for an half complex object range from 0 to N-1 where N is the size if the vector.

-Each element of the vector correspond to the coefficient :math:`z_k` defined above.

+The valid indices for a half-complex object range from 0 to N-1 where N is the size if the vector.

+Each element of the vector corresponds to the coefficient :math:`z_k` defined above.

-When performing Fourier transform it is important to know that the computation speed can be greatly influenced by the size of the vector. In the size is a power of two then a very efficient algorithm can be used and we can talk in this case of Fast Fourier Transform (FFT). In addition the algorithm has the advantage that it does not require any additional workspace. When the size of the vector is not a power of two we can have two different cases:

+When performing Fourier transforms, it is important to know that the computation speed can be greatly influenced by the size of the vector. If the size is a power of two, a very efficient algorithm can be used and we can talk in this case of a Fast Fourier Transform (FFT). In addition, the algorithm has the advantage that it does not require any additional workspace. When the size of the vector is not a power of two, we can have two different cases:

- * the size if a product of small prime numbers

- * the size contain a big (> 7) prime number in its factorization

+ * the size is a product of small prime numbers

+ * the size contains a big (> 7) prime number in its factorization

-This detail is important because when the size is a product of small prime numbers a fast algorithm is still available but it is still somewhat slower and it does require some additional workspace.

-In the worst case when the size cannot be factorized to small prime numbers the Fourier transform can still be computed but the calculation is slower especially for large array.

+This detail is important because if the size is a product of small prime numbers, a fast algorithm is still available but it is still somewhat slower and it does require some additional workspace.

+In the worst case when the size cannot be factorized to small prime numbers, the Fourier transform can still be computed but the calculation is slower, especially for large arrays.

-GSL Shell hide all the details and take cares of choosing the appropriate algorithm based on the size of the vector.

-It does also provide transparently any additional workspace that may be needed for the algorithm.

-In order to avoid repeated allocation of workspace memory the workspace allocated is kept in memory and reused *if the size of the array does not change*.

-This mean that the approach of GSL Shell is quite optimal if you perform many times Fourier transforms (direct or inverse) of the same size.

+GSL Shell hides all the details and takes care of choosing the appropriate algorithm based on the size of the vector.

+It also transparently provides any additional workspace that may be needed for the algorithm.

+In order to avoid repeated allocation of workspace memory, the workspace allocated is kept in memory and reused *if the size of the array does not change*.

+This means that the approach of GSL Shell is quite optimal if you perform many Fourier transforms (direct or inverse) of the same size.

-Even if GSL Shell take cares of the details automatically you should be aware of these performance notice because it can make a big difference in real applications.

-From the practical point of view it is useful in most of the case to always provides samples whose size is a power of two.

+Even though GSL Shell takes care of the details automatically, you should be aware of these performance notices because it can make a big difference in real applications.

+From a practical point of view, it is useful in most cases to always provide samples whose size is a power of two.

-Another specificity of the functions :func:`num.fft` and :func:`num.fftinv` is that they can optionally perform the transformation *in place* by modifying the original data instead of creating a copy.

-When a transformation *in place* is requested the routine still return a new vector (either a real matrix or an half-complex array) but this latter will point to the same underlying data of the original vector.

-The transformation *in place* can be useful in same cases to avoid unnecessary data copying and memory allocation.

+Another property of the functions :func:`num.fft` and :func:`num.fftinv` is that they can optionally perform the transformation *in place* by modifying the original data instead of creating a copy.

+When a transformation *in place* is requested, the routine still returns a new vector (either a real matrix or a half-complex array) but this latter will point to the same underlying data of the original vector.

+The transformation *in place* can be useful in some cases to avoid unnecessary data copying and memory allocation.

Fourier Transform of Real Data

------------------------------

-For real data the Fourier coefficients satisfies the relation

+For real data, the Fourier coefficients satisfy the relation

.. math::

z_k = z_{N-k}^*

where N is the size of the vector and k is any integer number from 0 to N-1.

-Because of this relation the data is packed in a special type of object called an half-complex array.

+Because of this relation, the data is packed in a special type of object called a half-complex array.

-To access element in half-complex array you can use the indexing with an integer number between 0 and N-1, inclusive. So, for example::

+To access an element in a half-complex array, you can index it with an integer number between 0 and N-1, inclusive. So, for example::

-- get a random number generator

r = rng.new()

@@ -96,15 +96,15 @@ To access element in half-complex array you can use the indexing with an integer

-- print all the coefficients of the Fourier transform

for k=0, #ft-1 do print(ft[k]) end

-As shown in the example above you can use the Lua operator '#' to obtain the size of an half-complex array.

+As shown in the example above, you can use the Lua operator '#' to obtain the size of a half-complex array.

.. function:: fft(v[, in_place])

- Perform the Fourier transform of the real valued column matrix ``x``.

+ Perform the Fourier transform of the real-valued column matrix ``x``.

If ``in_place`` is ``true`` then the original data is altered and the resulting vector will point to the same underlying data of the original vector.

- Please note that the value you obtain is not an ordinary matrix but an half-complex array.

- You can access the element of such kind of array by indexing the vector.

+ Please note that the value you obtain is not an ordinary matrix but a half-complex array.

+ You can access the elements of such an array by indexing the vector.

If you want to have an ordinary matrix you can easily build it with the following instructions::

-- we suppose that f is an half-complex array

@@ -112,18 +112,18 @@ As shown in the example above you can use the Lua operator '#' to obtain the siz

.. function:: fftinv(hc[, in_place])

- Return a column matrix that contains then inverse Fourier transform of the half-complex vector ``hc``.

+ Return a column matrix that contains the inverse Fourier transform of the half-complex vector ``hc``.

If ``in_place`` is ``true`` then the original data is altered and the resulting vector will point to the same underlying data of the original vector.

- This transformation is the inverse of the function :func:`num.fft` so that if you perform the two transformations consecutively you will obtain a vector identical to the initial one.

+ This transformation is the inverse of the function :func:`num.fft`, so that if you perform the two transformations consecutively you will obtain a vector identical to the initial one.

- A typical usage of :func:`fft_inv` is to revert the transformation made with :func:`fft` but by doing some transformations of the way.

+ A typical usage of :func:`fft_inv` is to revert the transformation made with :func:`fft` but by doing some transformations along the way.

So a typical usage path could be::

-- we assume v is a column matrix with our data

ft = num.fft(v) -- Fourier transform

- -- here we can manipulate the half-complex array 'ft' with

+ -- here we can manipulate the half-complex array 'ft'

-- using the methods `get' and `set'

some code here

@@ -133,7 +133,7 @@ As shown in the example above you can use the Lua operator '#' to obtain the siz

FFT example

-----------

-In this example we will treat a square pulse in the temporal domain. To illustrate a typical example of FFT usage we perform the Fourier Transform of the signal and we cut the higher order frequencies. Than we perform the inverse transform and we compare the result with the original time signal.

+In this example we will treat a square pulse in the temporal domain. To illustrate a typical example of FFT usage we perform the Fourier Transform of the signal and we cut the higher order frequencies. Then we perform the inverse transform and we compare the result with the original time signal.

So, first we define our square pulse in the time domain. Actually it will be a matrix with just one column::

@@ -143,7 +143,7 @@ So, first we define our square pulse in the time domain. Actually it will be a m

-- we create a pulse signal in the time domain

y = matrix.new(n, 1, |i| i < n/3 and 0 or (i < 2*n/3 and 1 or 0))

-Than we create two new plots, one for the Fourier transform and one for the signal itself::

+Then we create two new plots, one for the Fourier transform and one for the signal itself::

pt = graph.plot('Original signal / reconstructed')

@@ -177,4 +177,4 @@ and plot the results::

Time signal before (black) and after (red) the transformation

-You can observe in the reconstructed signal (the red curve) that we obtain approximately the square pulse but with a lot of oscillations. Of course this is an artifact of our transformations. The reason is that in order to reproduce perfectly a sharp signal we need also all the high frequencies of the Fourier transform.

+You can observe in the reconstructed signal (the red curve) that we obtain approximately the square pulse, but with a lot of oscillations. Of course this is an artifact of our transformations. The reason is that in order to perfectly reproduce a sharp signal, we also need all the high frequencies of the Fourier transform.

diff --git a/doc/user-manual/general.rst b/doc/user-manual/general.rst
index 7ffc3e11..db7e4e41 100644
--- a/doc/user-manual/general.rst
+++ b/doc/user-manual/general.rst

@@ -11,8 +11,8 @@ Overview

.. function:: use(name)

Makes the functions defined in the module "name" accessible in the global namespace.

- If the function :func:`use` is used in a separate file its effect is limited to the file itself and the function that it calls.

- When the function is used from the interactive shell the global environment is affected.

+ If the function :func:`use` is used in a separate file, its effect is limited to the file itself and the functions that it calls.

+ When the function is used from the interactive shell, the global environment is affected.

As a special case When called with 'strict' as its argument the use of undeclared global variables will be checked.

When active all global variables must be declared through a regular assignment (even assigning nil will do) in a main chunk before being used anywhere or assigned to inside a function.

@@ -21,4 +21,4 @@ Overview

.. function:: restore_env()

- This function restore the original environment by removing the effect of all the :func:`use` calls made before.

+ This function restores the original environment by removing the effect of all the :func:`use` calls made before.

diff --git a/doc/user-manual/graphics.rst b/doc/user-manual/graphics.rst
index 512f587d..dbf05395 100644
--- a/doc/user-manual/graphics.rst
+++ b/doc/user-manual/graphics.rst

@@ -10,24 +10,24 @@ Graphics

Overview

--------

-When you are working with a lot of numbers and complicated functions it is often useful to be able to do some plotting of the data.

-GSL Shell offers a graphics module that allow to produce beautiful graphics with a very rich interface and almost unlimited possibilities.

+When you are working with a lot of numbers and complicated functions, it is often useful to visualize the data.

+GSL Shell offers a graphics module that allows you to produce beautiful graphics with a very rich interface and almost unlimited possibilities.

-The graphics module in GSL Shell use for its implementation the excellent `Anti-Grain Geometry <http://www.antigrain.com/>`_ library, version 2.5, of Maxim Shemanarev.

+The graphics module in GSL Shell is implemented using the excellent `Anti-Grain Geometry <http://www.antigrain.com/>`_ library, version 2.5, of Maxim Shemanarev.

-GSL shell offer a graphics interface with few but powerful functions that, if appropriately used allows to the user to produce a wide range of plot types.

+GSL Shell offers a graphics interface with few but powerful functions that, if used appropriately, allow the user to produce a wide range of plot types.

A First Example

---------------

.. module:: graph

-Let's start with a simple example, let us suppose that we want to plot the function:

+Let's start with a simple example. Suppose that we want to plot the function:

.. math::

f(x) = \exp(-\alpha ,円 t) ,円 \sin(\omega ,円 t)

-where |agr| and |ohgr| are constants and t vary from 0 to t1.

+where |agr| and |ohgr| are constants and t varies from 0 to t1.

Before starting we need to note a couple of things.

@@ -37,7 +37,7 @@ The graphical functions are in the module 'graph'.

In order to access them you can use the :func:`use` function or just prefix all the functions with the module name like, for example, ``graph.plot``.

Choose whatever option you prefer, it is just a matter of taste.

-So now we can came back to our mathematical functions.

+So now we can come back to our mathematical function.

We can plot this function with GSL Shell using the following instructions::

use 'math'

@@ -56,22 +56,22 @@ We can plot this function with GSL Shell using the following instructions::

return p

end

-Then to plot something you have just to call the 'myplot' function. For example::

+Now you just have to call the 'myplot' function. For example::

myplot(0.3, 5, 6)

.. figure:: graphics-example-1.png

-The function :func:`graph.fxline` takes three arguments, the function to plot and the initial and final values of the variable. By default the function will be sampled with 256 points but if you want you can provide a fourth arguments to give the number of sample points.

+The function :func:`graph.fxline` takes three arguments, the function to plot and the initial and final values of the variable. By default, the function will be sampled with 256 points, but if you want, you can provide the number of sample points as a fourth argument.

-In this example we have used the :func:`graph.plot` function to create a plot, the :func:`graph.fxline` function to create the line to draw and the method :func:`~Plot.addline` to add the line to the plot (in red). These three operations can be done with a single function, :func:`graph.fxplot`. It works like that::

+In this example, we have used the :func:`graph.plot` function to create a plot, the :func:`graph.fxline` function to create the line to draw and the method :func:`~Plot.addline` to add the line to the plot (in red). These three operations can be done with a single function, :func:`graph.fxplot`. It can be called as follows::

p = graph.fxplot(|x| sin(x), 0, 8*pi)

-where the first arguments is the function to plot and the following

-arguments are the extrema of variation of the independent variable x.

+where the first argument is the function to plot and the following

+arguments are the limits of the range of the independent variable x.

-You may wish to add a title to the plot by using the :attr:`~Plot.title` attribute and may be save it in a file as an image by using the :meth:`~Plot.save` method. Here how to do it in two lines of code::

+You may wish to add a title to the plot using the :attr:`~Plot.title` attribute and save it in a file as an image using the :meth:`~Plot.save` method. Here's how you do it in two lines of code::

p.title = "y = sin(x)"

p:save('sin-function', 640, 480)

@@ -79,20 +79,20 @@ You may wish to add a title to the plot by using the :attr:`~Plot.title` attribu

Graphics Functions

------------------

-To create many type of plots you don't really need to use always the graphics primitives but you can use the higher level plotting functions.

-We give in this section the description of all the higher level plotting functions.

+For many types of plots, you don't need to use the graphics primitives, as GSL Shell provides higher level plotting functions.

+In this section, we give an overview and a description of all the higher level plotting functions.

.. function:: fxplot(f, xi, xs[, color, n])

Produces a plot of the function ``f(x)`` for x going from ``xi`` to ``xs``.

- The last optional parameter ``n`` is the number of sampling point to use and, if not given, a default value will be used.

+ The last optional parameter ``n`` is the number of sampling points to use and, if not specified, the default value of 256 will be used.

The function returns the plot itself.

.. function:: fiplot(f, a, b[, color])

fiplot(f, b)

Produces a plot of the function ``f(i)`` where 'i' is an integer variable going from ``a`` to ``b``.

- In the second abbreviated form ``a`` take the default value of one.

+ In the second abbreviated form, ``a`` takes the default value of one.

The function returns the plot itself.

*Example*::

@@ -106,8 +106,8 @@ We give in this section the description of all the higher level plotting functio

.. function:: fibars(f, a, b[, color, fill_ratio])

Produces a bar plot of the function ``f(i)`` where ``i`` is an integer ranging from ``a`` to ``b``.

- The parameter ``fill_ratio`` determine the width of the bars and is by default equal to 1.

- When a smaller value is provided for ``fill_ratio`` the bars will be drawn with a smaller width along the x axis.

+ The parameter ``fill_ratio`` determines the width of the bars and is by default equal to 1.

+ When a smaller value is provided for ``fill_ratio``, the bars will be drawn with a smaller width along the x axis.

*Example*::

@@ -116,7 +116,7 @@ We give in this section the description of all the higher level plotting functio

.. function:: fxline(f, xi, xs[, n])

- This function returns an graphical object of type :class:`Path` given by the points (x, f(x)) for x going from ``xi`` to ``xs`` with ``n`` sampling point.

+ This function returns a graphical object of type :class:`Path` given by the points (x, f(x)) for x going from ``xi`` to ``xs`` with ``n`` sampling points.

*Example*::

@@ -131,7 +131,7 @@ We give in this section the description of all the higher level plotting functio

p:show()

*Example*:

- You can produce very easily a nice illustration of the "area below a curve" with the following commands::

+ You can very easily produce a nice illustration of the "area below a curve" with the following commands::

use 'math'

@@ -141,20 +141,20 @@ We give in this section the description of all the higher level plotting functio

p:addline(line)

p:show()

- and it will produce the following output:

+ which will produce the following output:

.. figure:: graphics-example-yellow-area.png

.. function:: filine(f, a, b)

filine(f, b)

- This function returns an graphical object of type :class:`Path` given by the points (i, f(i)) where 'i' is an integer variable going from ``a`` to ``b``. It ``a`` is omitted values will be in the interval 1 .. b.

+ This function returns a graphical object of type :class:`Path` given by the points (i, f(i)) where 'i' is an integer variable going from ``a`` to ``b``. If ``a`` is omitted, values will be in the interval 1 .. b.

- For its usage see the similar function :func:`graph.fxline`.

+ For usage, see the similar function :func:`graph.fxline`.

.. function:: xyline(x, y)

- This function takes two column matrix of dimensions N as arguments and returns a graphical object of type :class:`Path` given by the points (x[i], y[i]) where i goes from 1 to N.

+ This function takes two column matrices of dimension N as arguments and returns a graphical object of type :class:`Path` given by the points (x[i], y[i]) where i goes from 1 to N.

*Example*::

@@ -176,16 +176,16 @@ We give in this section the description of all the higher level plotting functio

ipathp(f)

This function takes an iterator function ``f`` and returns a :class:`Path` given by the points (x, y) returned by the iterator ``f``.

- The variant :func:`ipathp` is able to treat the case when the function ``f`` fails and it does continue by calling the iterator again.

+ The variant :func:`ipathp` is able to treat the case when the function ``f`` fails and continues by calling the iterator again.

*Example*

- In the following example we shows how to create a circle by using an iterator to generates the points.

- We define first an iterator that use a counter to generate the points and then we pass the iterators to the function :func:`ipath`.

- In this way we obtain an object of type :class:`Path` and we can add into a plot.::

+ In the following example, we show how to create a circle using an iterator to generate the points.

+ We first define an iterator that uses a counter to generate the points and then we pass the iterator to the function :func:`ipath`.

+ This way, we obtain an object of type :class:`Path` and we can add into a plot.::

use 'math'

- -- create a simple iterator that return n points uniformly spaced

- -- in a circle centerd in (x0, y0) with radius R

+ -- create a simple iterator that returns n points uniformly spaced

+ -- on a circle centered in (x0, y0) with radius R

circle = function(x0, y0, R, n)

local k = 0

return function()

@@ -204,11 +204,11 @@ We give in this section the description of all the higher level plotting functio

p:addline(line)

p:show()

- The example above show how to create the more generic iterator just using basic Lua constructs.

+ The example above shows how to create a more generic iterator using basic Lua constructs.

A generic iterator is very flexible because it can generate any sequence of values without restrictions of any kind.

- You will probably find that in many cases you want to build iterators that generate values over a range on integer numbers like we was doing for the circle in the example.

- In such cases it can be simpler to use the function :func:`iter.sequence` that creates an iterators over a range of integer.

- Here the same example of above but using the function :func:`iter.sequence`::

+ You will probably find that in many cases, you want to build iterators that generate values over a range of integer numbers, as we did for the circle in the example.

+ In such cases it can be simpler to use the function :func:`iter.sequence` that creates an iterator over a range of integers.

+ Here is the same example as above but using the function :func:`iter.sequence`::

use 'math'

-- create a simple iterator that return n points uniformly spaced

@@ -231,7 +231,7 @@ We give in this section the description of all the higher level plotting functio

.. function:: ibars(f)

This function takes an iterator function ``f`` and returns a :class:`Path` object that draws many adjacent rectangular boxes corresponding to the points (x, y) returned by the iterator ``f``.

- This function simplify the creation of histograms.

+ This function simplifies the creation of histograms.

*Example*::

@@ -256,12 +256,12 @@ We give in this section the description of all the higher level plotting functio

rgba(r, g, b, a)

Returns a color specified by the given ``r``, ``g``, ``b`` values.

- These latters should be numbers in the interval [0, 1].

- The second variant of the function let you specify an alpha value.

- This latter can range from 0 (completely transparent) to 1 (completely opaque).

+ These values should be numbers in the interval [0, 1].

+ The second variant of the function lets you specify an alpha value.

+ This value can range from 0 (completely transparent) to 1 (completely opaque).

- In GSL Shell a color is encoded as an integer number with 8 bit per channel and 4 channels, R, G, B, A in the given order.

- To express a pure green color you can for example write::

+ In GSL Shell, a color is encoded as an integer number with 8 bit per channel and 4 channels, R, G, B, A in the given order.

+ For example, to express a pure green color, you can write::

green = 0x00ff00ff

@@ -277,9 +277,9 @@ We give in this section the description of all the higher level plotting functio

Multiple plot window

--------------------

-With GSL shell it is possible to put several plots in a given window or also to put a given plot on several windows. To better understand what follows lets clarify a little bit the relations between plots and windows.

+With GSL Shell, it is possible to tile several plots in a given window, and also to display a given plot on several windows. To better understand what follows, we will first clarify the relations between plots and windows.

-In GSL shell a plot can exists independently of any window and vice versa, a window can exists without being associated to any plot. When you create a plot using the "plot" function the plot is not shown and is not associated to any window. When you call the method :meth:`~Plot.show` what happens is that:

+In GSL Shell, a plot can exist independently of any window and vice versa, a window can exists without being associated to any plot. When you create a plot using the "plot" function, the plot is not shown and it is not associated to any window. When you call the method :meth:`~Plot.show` what happens is that:

* a window is created and shown on the screen

* the plot is *attached* to the window

@@ -290,9 +290,9 @@ You can perform the operations above explicitly if you want. For example::

w = graph.window()

w:attach(p, '') -- attach the plot "p" to the default slot of "w"

-In this code snippet you can see the method :meth:`~Window.attach` at work. It is a method of the :class:`Window` used to tie a particular plot to a window. At this point you may wonder what is the second argument for. It is something which is not very useful for simple windows but it becomes important when the window is "subdivided" into subwindows.

+In this code snippet you can see the method :meth:`~Window.attach` at work. It is a method of the :class:`Window`, used to tie a particular plot to a window. At this point, you may wonder what the second argument is for. It is something which is not very useful for simple windows but it becomes important when the window is "subdivided" into subwindows.

-Let as see this at work with a second example::

+Let us see this at work with a second example::

use 'math'

@@ -310,7 +310,7 @@ Let as see this at work with a second example::

w:attach(p1, '1') -- attach plot "p1" to the first available slot

w:attach(p2, '2') -- attach plot "p2" to the second slot

-And here what the results will looks like:

+And this is what the result will look like:

.. figure:: example-vtiled-plots.png

@@ -361,24 +361,24 @@ A valid string layout is defined recursively and it is either:

* the character 'v' followed by a sequence of layout strings

* the same as above but enclosed between brackets: '(' ')'.

-With the sequence 'h' and 'v' you designate a subdivision along the horizontal or vertical direction respectively. Each subdivision is done in according to the sequence of cell layout that follows the 'h' or 'v'.

+With the sequence 'h' and 'v' you designate a subdivision along the horizontal or vertical direction respectively. Each subdivision is done according to the sequence of cell layout that follows the 'h' or 'v'.

The pattern described above is recursive and you can use brackets to group items where needed to avoid ambiguity.

Example::

w = window() -- create a window

- w:layout('v(h..).') -- split the windows in three drawing regions

+ w:layout('v(h..).') -- split the windows into three drawing regions

-and here how the resulting window can look:

+and here's how the resulting window can look:

.. figure:: split-window-example.png

-we have added some empty plots so that you can see something inside the window.

+We have added some empty plots so that you can see something inside the window.

-Now let us see how to specify a particular drawing area once the :meth:`~Window.layout` is done. Drawing are specification is done with the method :meth:`~Window.attach` with a string that identifies the drawing area. The string should be a list of comma separated integer number in the form 'n1,n2,...,nk'. With each number you identify the element of the current subdivision and the following numbers will select recursively the nested subdivisions. As the subdivision can be nested at arbitrary depth you should specify as many numbers as are the depth level of the drawing are you want to address.

+Now let us see how to specify a particular drawing area once the :meth:`~Window.layout` is done. Drawing area specification is done with the method :meth:`~Window.attach` with a string that identifies the drawing area. The string should be a list of comma-separated integer numbers in the form 'n1,n2,...,nk'. With each number you identify the element of the current subdivision and the following numbers will select recursively the nested subdivisions. As the subdivision can be nested at arbitrary depth, you should specify as many numbers as required for the depth level of the drawing area you want to address.

-For examples, to identify the drawing area in the sample above the following string should be used: '1,1', '1,2', '2'. You can attach a plot to a particular drawing area by using the method :meth:`~Window.attach` as follows::

+For example, to identify the drawing area in the sample above, the following string should be used: '1,1', '1,2', '2'. You can attach a plot to a particular drawing area by using the method :meth:`~Window.attach` as follows::

w:attach(p1, '1,2')

@@ -386,11 +386,11 @@ For examples, to identify the drawing area in the sample above the following str

Graphics primitives

-------------------

-In order to better understand the way GSL shell graphics works it is better to take a step back. Let use suppose that we want to plot an equilateral triangle. We can proceed as follows:

+In order to better understand the way GSL Shell graphics work, it is better to take a step back. Let use suppose that we want to plot an equilateral triangle. We can proceed as follows:

- define a 'path' that describe the contour that we want to plot

- add the 'path' that we have defined to a 'plot' object to show it

-In order to create a 'path' we can use the function :func:`graph.path` which just creates an empty path. Then we add the the points by using the methods :meth:`~Path.move_to` and :meth:`~Path.line_to` methods. When you use :meth:`~Path.move_to` a new curve is started at the given points and with :meth:`~Path.line_to` you can draw your curve.

+In order to create a 'path', we can use the function :func:`graph.path` which just creates an empty path. Then we add the points using the methods :meth:`~Path.move_to` and :meth:`~Path.line_to` methods. When you use :meth:`~Path.move_to`, a new curve is started at the given point. With :meth:`~Path.line_to` you can draw your curve.

So to plot a triangle you can give the following instructions::

@@ -408,7 +408,7 @@ So to plot a triangle you can give the following instructions::

Please not that we have used the :meth:`~Plot.add` method instead of :meth:`~Plot.addline` to add the path.

-Now let us suppose that we want to plot only the contour of the triangle with a line 10 pixel thick and with round edges. Then what you have to do is to supply to the :meth:`~Plot.add` method a third argument where you specify a ``stroke`` transformation::

+Now let us suppose that we want to plot only the contour of the triangle with a line width of 10 pixels and with round edges. Then what you have to do is to supply the :meth:`~Plot.add` method with a third argument where you specify a ``stroke`` transformation::

p = graph.plot()

p:add(t, 'red', {{'stroke', width=10, cap='round'}})

@@ -416,7 +416,7 @@ Now let us suppose that we want to plot only the contour of the triangle with a

.. figure:: simpler-example-2.png

-As you can see we have used the 'stroke' transformation and we have provided two parameters: 'width' and 'cap' to specify of the stroke should be done.

+As you can see we have used the 'stroke' transformation and we have provided two parameters: 'width' and 'cap' to specify the stroke properties.

Now, to continue our example, let us suppose that we want to plot a dashed line just one pixel thick. In this case we have to cascade two transformations, the 'dash' transformation and the 'stroke' transformation. It can be done as follows::

@@ -426,48 +426,48 @@ Now, to continue our example, let us suppose that we want to plot a dashed line

.. figure:: simpler-example-3.png

-the 'a' and 'b' parameters specifies the lengths of the dash and of the blank space between the dashes. This length is calculated in pixel as it does depends on the size of the windows. Let us suppose now that you want to give the length of the dashes in the user coordinate system. In this case you have to provide it as a fourth arguments to the :meth:`~Plot.add`. So you should do something like that::

+the 'a' and 'b' parameters specify the lengths of the dash and of the blank space between the dashes. This length is calculated in pixels as it depends on the size of the windows. Let us suppose now that you want to give the length of the dashes in the user coordinate system. In this case you have to provide it as a fourth argument to :meth:`~Plot.add`. So you should do something like this::

p = graph.plot()

p:add(t, 'red', {{'stroke'}}, {{'dash', a= 0.5, b= 0.25}})

p:show()

-You can note that we have changed the size of the dashes because in the user coordinate system the value 10 is big as the edge of the triangle.

+You can note that we have changed the size of the dashes because in the user coordinate system the value 10 is as big as the edge of the triangle.

-So, if general, the :meth:`~Plot.add` methods takes two optional arguments, the list of the post-transforms and the list of the pre-transforms. The post-transforms are made when we are already in the window coordinate system while the pre-transforms are made in the user coordinate system.

+So, in general, the :meth:`~Plot.add` methods takes two optional arguments, the list of the post-transforms and the list of the pre-transforms. The post-transforms are made when we are already in the window coordinate system while the pre-transforms are made in the user coordinate system.

-Some transformations are naturally expressed as post-transforms because they does operates naturally in term of pixel while other are usually expressed as pre-transforms because they operates in the user coordinates space.

+Some transformations are naturally expressed as post-transforms because they operate naturally in terms of pixels, while others are usually expressed as pre-transforms because they operates in the user coordinates space.

Plot

----

We have seen in the previous paragraph that you can add more graphical elements in a plot by using the methods :meth:`~Plot.add` and :meth:`~Plot.addline`. The method :meth:`~Plot.addline` is just a shortcut to add elements with a 'stroke' post transform of unitary width.

-You can add elements to a plot in any moments even when it is already shown. GSL Shell will automatically calculate the bounding box so that every elements is shown on the window.

+You can add elements to a plot at any time, even when it is already shown. GSL Shell will automatically calculate the bounding box so that every element is shown on the window.

.. function:: plot([title])

Create a new empty plot with an optional title. The plot is not

attached to any window and is therefore not visible. To show the

- plot on the screen use either the :func:`show` plot's method or

+ plot on the screen, use either the :func:`show` plot's method or

use the :meth:`~Window.attach` window's method to attach the plot to a

specific window.

- This kind of plot automatically update its limits when the

- graphical objects that are added. The ``sync`` property is also

- initialized to a ``true`` value so that every operation triggers

- an update of all the windows that shows the plot. If you want to

- perform animations you may want to set the ``sync`` property to

+ This kind of plot automatically updates its limits when

+ graphical objects are added. The ``sync`` property is also

+ initialized to ``true`` so that every operation triggers

+ an update of all the windows that show the plot. If you want to

+ perform animations, you may want to set the ``sync`` property to

``false`` and use the :meth:`~Plot.flush` method to update the windows

when all the drawing operations have been done.

.. function:: canvas([title])

- Like the function above it does create a new empty plot with

+ Like the function above, it creates a new empty plot with

fixed logical limits. This latter kind of plot differs in that

- it will not update automatically its limits to fit the graphical

+ it will not update its limits automatically to fit the graphical

objects. The method :func:`limits` should be used instead to set

- the logical limits of plotting area. The other difference with

+ the logical limits of the plotting area. The other difference with

the :func:`graph.plot` function is that the property ``sync`` will be

initialized to ``false``. This kind of plot is generally better

suited for animations.

@@ -484,10 +484,10 @@ You can add elements to a plot in any moments even when it is already shown. GSL

.. method:: addline(obj, color[, post_trans, pre_trans])

Add the :ref:`graphical object <graphics-objects>` ``obj`` to

- the plot by performing automatically a stroke of it. It is

+ the plot by automatically performing a stroke of it. It is

useful because you often need to draw lines and not filled

- polygons. It is equivalent to add a 'stroke' operations of

- unitary size in the viewport coordinates system.

+ polygons. It is equivalent to adding a 'stroke' operation of

+ unitary size in the viewport coordinate system.

.. method:: limits(x1, y1, x2, y2)

@@ -502,7 +502,7 @@ You can add elements to a plot in any moments even when it is already shown. GSL

.. method:: clear()

- Remove all the graphical elements into the current

+ Remove all the graphical elements in the current

:ref:`graphical layer <graphical-layer>`.

.. method:: flush()

@@ -513,7 +513,7 @@ You can add elements to a plot in any moments even when it is already shown. GSL

.. method:: pushlayer()

- Add a new :ref:`graphical layer <graphical-layer>` and into the

+ Add a new :ref:`graphical layer <graphical-layer>` to the

plot so that it becomes the current one and all the elements

added with methods :meth:`~Plot.add` or :meth:`~Plot.addline`

are associated with this new layer.

@@ -528,7 +528,7 @@ You can add elements to a plot in any moments even when it is already shown. GSL

Save the plot in a file in a bitmap image format. The first

argument is the file name without extension while the other

- optional arguments are the width and the height in pixel of the

+ optional arguments are the width and the height in pixels of the

image. The format used is BMP on windows and PPM on Linux.

.. method:: save_svg(filename[, w, h])

@@ -536,15 +536,13 @@ You can add elements to a plot in any moments even when it is already shown. GSL

Save the plot in the given filename in SVG format.

Two optional parameters can be given to specify the width and height of the drawing area.

.. method:: set_legend(p[, placement])

- Add the plot ``p`` as a legend is the side area of the main plot.

- The argument ``placement`` is used to give the placement of the mini plot and should be on of the letters 'l', 'r', 'b', 't'.

- They stands for "left", "right", "bottom" and "top" respectively.

- By default the placement of the mini plot is on the right side.

+ Add the plot ``p`` as a legend in the side area of the main plot.

+ The argument ``placement`` is used to give the placement of the mini plot and should be one of the letters 'l', 'r', 'b', 't', which stand for "left", "right", "bottom" and "top" respectively.

+ By default, the placement of the legend is on the right side.

- The plot legend is drawn on the screen using an area that is equal, in pixal, to the logical size of the plot legend itself.

+ The plot legend is drawn on the screen using an area that is equal, in pixels, to the logical size of the plot legend itself.

.. method:: get_legend([placement])

@@ -554,10 +552,10 @@ You can add elements to a plot in any moments even when it is already shown. GSL

.. method:: legend(text, color, symbol[, trans])

Add to the plot a new legend item with the given ``text``.

- The symbol used is determinated by the string ``symbol``.

+ The symbol used is determined by the string ``symbol``.

Possible values are 'line', 'square' or anything accepted by :func:`graph.marker`.

The optional ``trans`` parameter should be a :ref:`graphical transform <graphics-transforms>`.

- If omitted the appropriate default is chosen based on the symbol type.

+ If omitted, the appropriate default is chosen based on the symbol type.

Example::

@@ -573,7 +571,7 @@ You can add elements to a plot in any moments even when it is already shown. GSL

.. method:: set_categories(axis, categories)

Configure the given ``axis`` (a letter, 'x' or 'y') to use a custom set of labels specified by ``categories``.

- This latter should be a list of that gives in a sequence, the values where the label should be placed and the label text itself.

+ This latter should be a list containing the values where the label should be placed and the label text itself.

The coordinate refers to the plot system of coordinates.

Example::

@@ -616,21 +614,21 @@ You can add elements to a plot in any moments even when it is already shown. GSL

.. attribute:: units

- A boolean value that define if the axis and grids should be

+ A boolean value that defines whether the axis and grids should be

drawn or not. By default it is true.

.. attribute:: sync

- This attribute can be either true or false. If true any changes

- in the plot will automatically update all the windows where the

+ This attribute can be either true or false. If true, any changes

+ made to the plot will automatically update all the windows where the

plot is shown. It is useful to set ``sync`` to false for

- animation so that many operations can be performed and the

+ animations so that many operations can be performed and the

window is updated only when the :meth:`~Plot.flush` method is called.

.. attribute:: pad

- This attribute determine if the padding is active or not for the plot.

- The padding determine if the viewport area should be larger than the actual plotting are to align with axis marks.

+ This attribute determines if the padding is active or not for the plot.

+ The padding determines if the viewport area should be larger than the actual plotting area to align with axis marks.

The default is ``false``.

.. attribute:: clip

@@ -643,9 +641,9 @@ You can add elements to a plot in any moments even when it is already shown. GSL

Graphical Layers

~~~~~~~~~~~~~~~~

-When you want to perform animations with plot you can take advantage of the :ref:`graphical layers <graphical-layer>` that allows to clear and redraw only some graphical elements while keeping other elements always present in the background. The idea is that if you want to make an animation you will probably clear and redraw over and over some graphical elements but you may want to keep some of them fixed in the background. In order to obtain that you can

+When you want to perform animations with plots you can take advantage of the :ref:`graphical layers <graphical-layer>`. These enable you to clear and redraw only some graphical elements while keeping other elements always present in the background. The idea is that if you want to make an animation, you will probably repeatedly clear and redraw some graphical elements, but you may want to keep some of them fixed in the background. In order to obtain that you can

- * add normally all the fixed graphical elements

+ * add all the fixed graphical elements as usual

* add a new :ref:`graphical layer <graphical-layer>` with the method :meth:`~Plot.pushlayer`

* clear and redraw all the elements using the new topmost layer

@@ -680,20 +678,20 @@ Graphical Objects

.. function:: path([x, y])

- Creates an empty path. If the two coordinates (x, y) are provided set the initial point of the path to (x, y).

+ Creates an empty path. If the two coordinates (x, y) are provided, set the initial point of the path to (x, y).

.. class:: Path

.. method:: move_to(x, y)

- Move the current point to the coordinates (x, y) and start here a new path.

+ Move the current point to the coordinates (x, y) and start a new path here.

.. method:: line_to(x, y)

Add a line into the path from the previous point to the specified (x, y) coordinates.

As a special case, if the path is empty, this method is equivalent to :meth:`~Path.move_to`.

.. hint::

- If you want to define a polygonal line you don't need to use the :meth:`~Path.move_to` method for the first point.

+ If you want to define a polygonal line, you don't need to use the :meth:`~Path.move_to` method for the first point.

Instead you can use the method :meth:`~Path.line_to` to add each point.

.. method:: close()

@@ -702,7 +700,7 @@ Graphical Objects

.. method:: arc_to(x, y, angle, large_arc, sweep, rad_x, rad_y)

- Add as arc or ellipse with radius rx and ry up to the point (x, y).

+ Add an arc or ellipse with radius rx and ry up to the point (x, y).

.. method:: curve3(x_ctrl, y_ctrl, x, y)

@@ -710,22 +708,22 @@ Graphical Objects

.. method:: curve4(x1_ctrl, y1_ctrl, x2_ctrl, y2_ctrl, x, y)

- Add a conic bezier curve up to (x, y) with two control points. The same remarks for the method :func:`curve3` applies to :func:`curve4`.

+ Add a conic bezier curve up to (x, y) with two control points. The same remarks for the method :func:`curve3` apply to :func:`curve4`.

.. function:: text(x, y, text, [height])

Create a text object with the given text at the position (x,y).

- The first optional argument ``height`` indicate the text height.

+ The first optional argument ``height`` indicates the text height.

.. class:: Text

A text object is used to display a text.

- This class of graphical object is special because it is not a shape that is resized rigidly with the window, the text is always displayed with the same size in pixel.

- Because of this difference a text object should be subject only to post-transforms.

+ This class of graphical object is special because it is not a shape that is resized rigidly with the window. Instead, the text is always displayed with the same size in pixels.

+ Because of this difference, a text object should be subject only to post-transforms.

.. attribute:: angle

- Rotate the text of the given angle (in radians).

+ Rotate the text by the given angle (in radians).

.. method:: justif(hv)

@@ -744,17 +742,17 @@ Graphical Objects

.. class:: TextShape

A text shape object is used to display a text.

- The difference with a simple text object is that a text shape has a well definite shape and extension in the plot system of coordinates.

- One of the implications is that the text shape will occupy a well definite area and the plot can adapt its area to include the text itself.

- For the other side text shape could be deformed if a different scale is used for x and y axis.

- If the aspect ration of coordinate system is not unitary a simple "text" object should be used instead.

+ The difference with a simple text object is that a text shape has a well-defined shape and extension in the plot system of coordinates.

+ One of the implications is that the text shape will occupy a well-defined area and the plot can adapt its area to include the text itself.

+ On the other hand, text shapes could be deformed if a different scale is used for x and y axis.

+ If the aspect ratio of the coordinate system is not unitary, a simple "text" object should be used instead.

- The text shape has currently no methods and its properties are determined during the creation of the object.

+ The text shape currently has no methods and its properties are determined during the creation of the object.

.. tip::

Text shape objects are useful to create plot legends.

- In this case the size and position of the text shape can be expressed in screen coordinates (pixel).

- The reason is that when a mini plot is added to a plot the area of the screen used to display the mini plot is equal to the bounding box of the mini plot itself.

+ In this case the size and position of the text shape can be expressed in screen coordinates (pixels).

+ The reason is that when a mini plot is added to a plot, the area of the screen used to display the mini plot is equal to the bounding box of the mini plot itself.

.. function:: marker(x, y, symbol, size)

@@ -767,7 +765,7 @@ Graphical Objects

Graphical transformations

-------------------------

-A generic graphical transformation is expressed in the form of table with the following layout::

+A generic graphical transformation is expressed in the form of a table with the following layout::

{'name',

property1 = value1,

@@ -781,22 +779,22 @@ For example, to express a 'stroke' transform you can write::

to mean a stroke transformation with a stroke width of 5.

-Here a complete list of all the available transforms:

+Here is a complete list of all the available transforms:

**stroke**

- A stroke create an outline of the given path. The properties are:

+ A strokes create an outline of the given path. The properties are:

* **width**, the width of the stroke, default value is width=1

* **cap**, can be 'round', 'butt' or 'square'. The default value is 'butt'.

* **join**, can be 'miter', 'miter.rev', 'miter.round', 'round' and 'bevel'

**dash**

- Transform the path to a sequence of dashes. The following elements in the table are the length of the dashes and gaps.

+ Transform the path to a sequence of dashes. The following elements in the table are the lengths of the dashes and gaps.

For example, to express a dash-dot line you can write ``{'dash', 7,3,3,3}``.

**curve**

- This transformation make the 'curve3' and 'curve4' path elements became real curves.

+ This transformation makes the 'curve3' and 'curve4' path elements become real curves.

**marker**

Replace each vertex of the path with a circular mark

@@ -804,10 +802,10 @@ Here a complete list of all the available transforms:

* **size**, the size of the marker

* **mark**, a string, an integer number or a graphical object indicating the symbol to be used for the markers.

Available symbols are 'circle', 'triangle', 'square', 'diamond', 'plus', 'cross'.

- If a number is given the symbol will be chosen in the list given above.

- If a graphical object is supplied its extension should be such that is contained in a box of size 1 and centered in (0, 0).

+ If a number is given, the symbol will be chosen from the list given above.

+ If a graphical object is supplied, its extension should be such that is contained in a box of size 1 and centered in (0, 0).

The object will be automatically scaled according to the parameter ``size``.

- * **outline**, if it is true draw the marker in outline

+ * **outline**, if it is true, draw the marker in outline

**translate**

A translation along the x and y axis. This transformation can be used only in the user coordinate system.

@@ -818,15 +816,15 @@ Here a complete list of all the available transforms:

**scale**

A scaling of the image around the point (0, 0). Only one numeric

parameter should be supplied to indicate the scaling factor. For

- example ``{'scale', 2}`` will scale the image of a factor two.

+ example ``{'scale', 2}`` will scale the image by a factor two.

**extend**

- Shrink or expand a shape of a given amount.

+ Shrink or expand a shape by a given amount.

* **width**, the size of the shrink/expansion.

- If positive is an expansion otherwise is a shrink.

+ An expansion if positive, otherwise a shrink.

**rotate**

- A rotation of a given angle with respect of the origin. This transformation can be used only in the user coordinate system.

+ A rotation by a given angle about the origin. This transformation can be used only in the user coordinate system.

* **angle**, the angle of the rotation, in radians.

diff --git a/doc/user-manual/gsl-ffi.rst b/doc/user-manual/gsl-ffi.rst
index e2d43f28..939eae6e 100644
--- a/doc/user-manual/gsl-ffi.rst
+++ b/doc/user-manual/gsl-ffi.rst

@@ -5,29 +5,29 @@

GSL FFI interface

=================

-In this chapter we are going to explain the direct GSL interface to use directly the C functions provided by the GSL library.

+In this chapter we are going to explain the direct GSL interface to use the C functions provided by the GSL library directly.

Introduction

~~~~~~~~~~~~

-The access to the C GSL functions is possible by using the FFI module provided by LuaJIT2 on which GSL Shell is based.

-The FFI module allows to call directly from Lua code any C functions available from the dynamic libraries currently loaded.

+The access to the C GSL functions is possible thanks to the FFI module provided by LuaJIT2 on which GSL Shell is based.

+The FFI module allows one to call any C function available from the dynamic libraries currently loaded, directly from Lua code.

Some of the modules available in GSL Shell are reimplemented in Lua using the FFI interface and the basic GSL functions about BLAS and linear algebra.

-For example the module for non-linear fit have been completely reimplemented in Lua using the FFI interface.

-Thanks to the capability of LuaJIT2 to generate highly optimized code on the fly the Lua implementation run at a speed comparable to compiled optimized C code.

-For the ODE systems LuaJIT2 is actually, in some cases, faster then optimized C code and is in general comparable to C in term of speed.

-Another module re-implemented in Lua is the module about numerical integration, where the QAG adaptive routine have been ported with excellent results.

+For example, the module for non-linear fitting has been completely reimplemented in Lua using the FFI interface.

+Thanks to the capability of LuaJIT2 to generate highly optimized code on the fly, the Lua implementation runs at a speed comparable to compiled optimized C code.

+For the ODE systems LuaJIT2 is actually, in some cases, faster than optimized C code and is in general comparable to C in terms of speed.

+Another module re-implemented in Lua is the module for numerical integration, where the QAG adaptive routine has been ported with excellent results.

-The non-linear fit module re-implemented in Lua has been checked for correctness using a subset of the `NIST datasets <http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml>`_.

-You can run yourself the tests by giving the following commands::

+The non-linear fit module reimplemented in Lua has been checked for correctness using a subset of the `NIST datasets <http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml>`_.

+You can run the tests yourself by giving the following command::

dofile('benchmarks/lmfit/nist_test.lua')

Then you can compare the results and the plots to the official results published in the NIST website.

-Here an example of the plot produced for the ENSO dataset:

+Here is an example of the plot produced for the ENSO dataset:

.. figure:: lmfit-enso-dataset-plot.png

@@ -53,24 +53,24 @@ Let us see a simple example::

end

-As you can see we are just using the GSL function ``gsl_matrix_set`` to set the value each element of the matrix.

+As you can see we are just using the GSL function ``gsl_matrix_set`` to set the value each of element of the matrix.

There are a few things that are important to note.

The C function that we have used has the following signature ``int gsl_matrix_set(gsl_matrix *m, int i, int j, double x)``.

-When you call it from Lua the arguments are converted to the appropriate C types by using some set of specific rules specific of the `FFI semantics <http://luajit.org/ext_ffi_semantics.html>`_.

-The implementation of GSL Shell ensure that a matrix object can be actually converted to a ``gsl_matrix`` pointer.

-For the other arguments note that the Lua numbers are converted to integer or double as appropriate to match the C function signature.

-If the arguments cannot be converted to the appropriate type an error is raised so it is always safe to call a C function.

+When you call it from Lua, the arguments are converted to the appropriate C types using a set of rules specific to the `FFI semantics <http://luajit.org/ext_ffi_semantics.html>`_.

+The implementation of GSL Shell ensures that a matrix object can actually be converted to a ``gsl_matrix`` pointer.

+As for the other arguments, note that the Lua numbers are converted to integer or double as appropriate to match the C function signature.

+If the arguments cannot be converted to the appropriate type, an error is raised, so it is always safe to call a C function.

-Another things that you may note is that we have ignored the value returned by the GSL function.

-In general the return value can signal an error condition and it could be necessary to check the returned value.

-For this purpose GSL Shell offer a simple helper function that can be used like in the following example::

+Another thing that you may note is that we have ignored the value returned by the GSL function.

+In general, the return value can signal an error condition and it could be necessary to check the returned value.

+For this purpose, GSL Shell offers a simple helper function that can be used like in the following example::

gsl_check = require 'gsl-check'

gsl_check(gsl.gsl_matrix_set(m, i, j, 1/(i+j+1)))

-The function ``gsl_check`` above just check the value returned and raise an error with an appropriate message if needed.

+The function ``gsl_check`` above just checks the returned value and raises an error with an appropriate message if needed.

Finally note that the indexing convention when calling ``gsl_matrix_set`` is the C convention where the first index is 0.

This fact is a direct implication of the fact that we are directly calling the C function defined in the GSL library.

@@ -78,6 +78,6 @@ This fact is a direct implication of the fact that we are directly calling the C

GSL FFI examples

~~~~~~~~~~~~~~~~

-If you want to learn better the usage of the GSL FF interface you may take a look to the implementation file of the ``bspline`` module.

+If you want to learn more about the usage of the GSL FFI interface, you may take a look at the implementation file of the ``bspline`` module.

-The file is quite small and easy to understand and it does illustrate all the important aspect of the GSL FFI interface.

+The file is quite small and easy to understand, and it illustrates all the important aspects of the GSL FFI interface.

diff --git a/doc/user-manual/index.rst b/doc/user-manual/index.rst
index 0158c180..c49e4d85 100644
--- a/doc/user-manual/index.rst
+++ b/doc/user-manual/index.rst

@@ -2,13 +2,13 @@

Welcome to GSL shell documentation

##################################

-GSL shell is an interactive command line interface that gives easy access

+GSL shell is an interactive command line interface that provides easy access

to the GNU Scientific Library (GSL) collection of mathematical methods for

numerical computations.

GSL shell can be used interactively to perform calculations with matrices or

-vectors but it does allow also to write complex user defined functions with

-the Lua scripting interpreter.

+vectors but it also allows for complex user-defined functions with the Lua

+scripting interpreter.

Lua is a very interesting and easy to learn scripting language that features

advanced functionalities like closures and metamethods. Lua is very

diff --git a/doc/user-manual/integ.rst b/doc/user-manual/integ.rst
index 7405c137..b9478d38 100644
--- a/doc/user-manual/integ.rst
+++ b/doc/user-manual/integ.rst

@@ -13,7 +13,7 @@ FORTRAN code for QUADPACK is available on Netlib.

Currently only two integration methods are available in GSL Shell.

Both of them are based on Gauss-Kronrod integration rules.

The first method is non-adaptive and is called QNG while the seconde one, QAG, is adaptive.

-The adaptive method QAG is suitable as a general purpose integrator and the QNG method should be used only in particular cases where the function has a simple known smooth behavior.

+The adaptive method QAG is suitable as a general-purpose integrator, whereas the QNG method should be used only in particular cases where the function has a simple known smooth behavior.

Introduction

------------

@@ -29,8 +29,8 @@ which specify the following accuracy requirement,

.. math::

| \textrm{Result} - I | \le \max(\epsilon_{\textrm{abs}}, \epsilon_{\textrm{abs}} |I|)

-where RESULT is the numerical approximation obtained by the

-algorithm. The algorithms attempt to estimate the absolute error

+where Result is the numerical approximation obtained by the

+algorithm. The algorithm attempts to estimate the absolute error

AbsErr = | Result - I | in such a way that the following inequality

holds,

@@ -55,12 +55,12 @@ Functions

.. function:: integ(f, a, b, epsabs, epsrel)

Compute the definite integral of the function ``f`` in the interval specified by ``a`` and ``b`` within the requested precision given by ``epsabs`` and ``epsrel``.

- This function always use internally the adaptive QAG algorithm.

+ This function always use the adaptive QAG algorithm internally.

.. function:: quad_prepare(spec)

Returns a function that can perform a numeric integration based on the options ``spec``.

- The argument ``spec`` is used to choose the quadrature algorithm the order and the limits for the adaptive search if applicable.

+ The argument ``spec`` is used to choose the quadrature algorithm, the order and the limits for the adaptive search if applicable.

The fields of ``spec`` that you should give are:

*method*

diff --git a/doc/user-manual/intro-first-step-plot-1.png b/doc/user-manual/intro-first-step-plot-1.png
new file mode 100644
index 00000000..d8ae7eac
--- /dev/null
+++ b/doc/user-manual/intro-first-step-plot-1.png

Binary files differ

diff --git a/doc/user-manual/intro-first-step-plot-2.png b/doc/user-manual/intro-first-step-plot-2.png
new file mode 100644
index 00000000..3c455a3a
--- /dev/null
+++ b/doc/user-manual/intro-first-step-plot-2.png

Binary files differ

diff --git a/doc/user-manual/intro-first-step-plot-3.png b/doc/user-manual/intro-first-step-plot-3.png
new file mode 100644
index 00000000..0595f8fe
--- /dev/null
+++ b/doc/user-manual/intro-first-step-plot-3.png

Binary files differ

diff --git a/doc/user-manual/intro.rst b/doc/user-manual/intro.rst
index 28ca530c..9bb7cf35 100644
--- a/doc/user-manual/intro.rst
+++ b/doc/user-manual/intro.rst

@@ -5,31 +5,89 @@

GSL Shell introduction

======================

-GSL Shell is an interactive interface that gives access to the GSL numerical routines by using Lua, an easy to learn and powerful programming language.

-With GSL Shell you can access very easily the functions provided by the GSL library without the need of writing and compile a stand-alone C application.

-In addition the power and expressiveness of the Lua language make easy to develop and tests complex procedures to treat your data and use effectively the GSL routines.

-You will be also able to create plots in real-time by using the powerful built-in graphical module.

+GSL Shell is an interactive interface that gives access to the GSL numerical routines using Lua, an easy to learn and powerful programming language.

+With GSL Shell, you can access the functions provided by the GSL library with great ease, without the need to write and compile a stand-alone C application.

+In addition, the power and expressiveness of the Lua language enables you to develop and test complex procedures to process your data and effectively use the GSL routines.

+You will be also able to create plots in real-time using the powerful built-in graphical module.

-.. note::

- GSL Shell is still a young project and it is currently not widely used.

- It still lacks some important features but the author believe that it is very promising because it is built on top of three outstanding free software projects: the GSL library, LuaJIT2 and the AGG library.

+The underlying programming language, Lua, itself is a very cleverly designed programming language. It is therefore an excellent platform for programming.

+In addition the LuaJIT2 implementation provides excellent execution speed that can be very competitive with compiled C or C++ code.

- By using and combining together these software components GSL Shell provides the possibility of doing advanced numerical calculations and produce beautiful plot of the data or mathematical functions.

- The underlying programming language, Lua, itself a very cleverly designed programming language it is therefore an excellent platform for programming.

- In addition the LuaJIT2 implementation provides excellent execution speed that can be very close to C compiled code.

-GSL Shell also introduces some extensions to the Lua language that will be explained in the following sections. Those features include :ref:`complex numbers<complex_numbers>`, easy-to-use :ref:`vector/matrix<matrices>` implementations, :ref:`short function syntax<short-func-notation>` and :ref:`iterators<lua_base>`. They are designed to facilitate the work with numbers and mathematical algorithms.

+Since GSL Shell is oriented toward mathematics it does use an optional simplified syntax to express simple mathematical functions (see :ref:`short function syntax<short-func-notation>`).

+Otherwise the syntax and the semantic of the Lua programming language is completely ensured.

First steps

-----------

The most basic usage of GSL Shell is just like a calculator: you can type any expression and GSL Shell will print the results.

GSL Shell is designed to work mainly with floating point numbers stored internally in double precision.

-Sometimes we will refer to these kind of number as *real* number in opposition to *complex* number.

+Sometimes we will refer to this kind of number as a *real* number, as opposed to a *complex* number.

+For integer numbers, GSL Shell differs from many other programming environments because it does not have a distinct type to represent them.

+In other words, integer numbers are treated just like *real* numbers with all the implications that follow.

+To give the flavor of GSL Shell let us suppose that we want to plot a simple quadratic function like :math:`y = x^2 - 1`.

+You can define the function very easily:

+ >>> f = |x| x^2 - 1

+ >>> f(2)

+ 3

+So the first line means: let f be a function that given a value x returns :math:`f(x) = x^2 - 1`.

+This kind of notation for simple functions is an extension to the Lua syntax and is explained in a specific section about the :ref:`short function notation <short-func-notation>`.

+Now you may want to plot the function f.

+This is done very easily:

+ >>> p = graph.fxplot(f, -3, 3)

+To obtain the following plot:

+.. figure:: intro-first-step-plot-1.png

+Since the plot looks a little bit empty we can try to add at least a title:

+ >>> p.title = 'Function plot example'

+You have probably noted that we have kept a reference to the plot in a variable named "p".

+It is actually important to keep a reference to the plot to be able to make further modifications.

-For integer numbers GSL Shell differs from many other programming environment because it does not have a distinct type to represent them.

-In other words integer numbers are treated just like *real* number with all the implications that follows.

+In case you forgot the assignement to a variable you can still retrieve the last returned expression using the special variable name "_".

+So for example if you type:

+ >>> graph.fxplot(f, -3, 3)

+ <plot: 0xb770eed8>

+The plot is still available using the special variable "_":

+ >>> _

+ <plot: 0xb770eed8>

+Now let us suppose that we want to add to the same plot another curve to represent the function :math:`y = 1/x`.

+In this case we don't want to create another plot, but we need to create a "curve".

+We can do that by using the function :func:`graph.fxline` that works eaxctly like :func:`graph.fxplot` but it does return a graphical object instead of a plot.

+Once the curve is created we add it to the plot using the method :meth:`~Plot.addline`.

+ >>> line = graph.fxline(|x| 1/x, 1/8, 3)

+ >>> p:addline(line, 'blue')

+If you bother to add also the other side of the hyperbole you will obtain the following plot:

+.. figure:: intro-first-step-plot-2.png

+At this point you can also add a legend.

+This can be done with a couple of more commands:

+ >>> p:legend('parabola', 'red', 'line')

+ >>> p:legend('hyperbole', 'blue', 'line')

+To obtain the following plot:

+.. figure:: intro-first-step-plot-3.png

+As you can see the graphical system is very flexible and it does offer a lot of possibilities.

+If you want to learn more about the graphical system you can read the chapter about :ref:`graphics <graphics-chapter>`.

.. _complex_numbers:

@@ -43,24 +101,24 @@ When you need to define a complex number you can use a native syntax like in the

The rule is that when you write a number followed by an 'i' it will be considered as a pure imaginary number.

The imaginary number will be accepted only if the 'i' follows immediately the number without any interleaving spaces.

Note also that if you write 'i' alone this will be not interpreted as the imaginary unit but as the variable 'i'.

-The imaginary unit can be declared by writing '1i' because the '1' at the beginning force the interpreter to consider it like a number.

+The imaginary unit can be declared by writing '1i', because the '1' at the beginning forces the interpreter to consider it as a number.

-All the functions in the :mod:`math` like exp, sin, cos etc. works on *real* numbers.

-If you want to have operations that operates on complex numbers you should use the functions defined in the :mod:`complex` module.

+All the functions in :mod:`math` such as exp, sin, cos etc. work on *real* numbers.

+If you want to operate on complex numbers, you should use the functions defined in the :mod:`complex` module.

The other important mathematical types in GSL Shell are matrices, either of complex or real numbers.

-In addition Lua offers a native type called "table".

-This latter is very useful for general purpose programming because because it can store any kind of data or structures but you should be careful to not confuse Lua tables with matrices.

+In addition, Lua offers a native type called "table".

+This latter is very useful for general purpose programming because it can store any kind of data or structures. However, you should be careful to not confuse Lua tables with matrices.

You can work with both types as far as you understand the difference and use the appropriate functions to operate on them.

-Most of the GSL functions operate on real or complex matrix because of the nature of the GSL library itself.

+Most of the GSL functions operate on real or complex matrices because of the nature of the GSL library itself.

A couple of useful tricks

~~~~~~~~~~~~~~~~~~~~~~~~~

-When you are working in a interactive session GSL Shell will always remember the last result evaluated.

+When you are working in an interactive session, GSL Shell will always remember the last result evaluated.

You can access its value using the global variable "_".

-When the you evaluate a statement or an expression that returns no values the variable "_" is not modified.

+When you evaluate a statement or an expression that returns no values, the variable "_" is not modified.

Another useful thing to know is that you can suppress the returned value by adding a ';' character at the end of line.

This can be useful to avoid to show a large table or matrix if you don't want to see them on the screen.

@@ -70,8 +128,8 @@ This can be useful to avoid to show a large table or matrix if you don't want to

Working with matrices

~~~~~~~~~~~~~~~~~~~~~

-In order to define a matrix you have basically two options, you can enumerate all the values or you can provide a function that generate the terms of the matrix.

-In the first case you should use the :func:`matrix.def` like in the following example::

+In order to define a matrix you have basically two options: you can enumerate all the values or you can provide a function that generates the terms of the matrix.

+In the first case you, should use the :func:`matrix.def` like in the following example::

use 'math'

@@ -82,25 +140,26 @@ In the first case you should use the :func:`matrix.def` like in the following ex

You can remark that we have used the :func:`matrix.def` function without parentheses to enclose its arguments.

The reason is that, when a function is called with a single argument which is a literal table or string, you can omit the enclosing parentheses.

-In this case we have therefore omitted the parenthesize because :func:`matrix.def` has a single argument that is a literal table.

-Note that in our snippet of code we have used the function :func:`use` to make the function available in the module :mod:`math` available.

-If you don't use :func:`use` the function :func:`math.sin` and :func:`math.cos` should be accessed by specifying the explicitly the ``math`` namespace.

+In this case we have therefore omitted the parentheses because :func:`matrix.def` has a single argument that is a literal table.

+Note that in our snippet of code we have used the function :func:`use` to make the functions in the module :mod:`math` available.

+If you don't use :func:`use`, the function :func:`math.sin` and :func:`math.cos` should be accessed by explicitly specifying the ``math`` namespace.

-You can define also a column matrix using the function :func:`matrix.vec` like follows::

+You can also define a column matrix using the function :func:`matrix.vec` as follows::

v = matrix.vec {cos(th), sin(th)}

-The other way to define a matrix is by using the :func:`matrix.new` function (or :func:`matrix.cnew` to create a complex matrix).

+The other way to define a matrix is through the :func:`matrix.new` function (or :func:`matrix.cnew` to create a complex matrix).

This latter function takes the number of rows and columns as the first two arguments and a function as an optional third argument.

-Let as see an example to illustrate how it works::

+Let us see an example to illustrate how it works::

- -- define a matrix whose (i, j) elements is 1/(i + j)

+ -- define a matrix whose (i, j) element is 1/(i + j)

m = matrix.new(4, 4, |i,j| 1/(i + j))

-In this example the third argument is a function expressed with the :ref:`short function notation <short-func-notation>`.

+In this example, the third argument is a function expressed with the :ref:`short function notation <short-func-notation>`.

This function takes two arguments, respectively the row and column number, and returns the value that should be assigned to the corresponding matrix element.

-Of course you are not forced to define the function in the same line, you can define it before and use it later with the :func:`matrix.new` function like in the following example::

+Of course, you are not forced to define the function in the same line; you can define it before and use it later with the :func:`matrix.new` function as in the following example::

-- define the binomial function

function binomial(n, k)

@@ -114,7 +173,7 @@ Of course you are not forced to define the function in the same line, you can de

-- define a matrix based on the function just defined

m = matrix.new(8, 8, binomial)

-and here the result:

+This is the result::

>>> m

[ 1 0 0 0 0 0 0 0 ]

@@ -158,7 +217,7 @@ Then the matrix ``minv`` will be equal to::

[ 1 -6 15 -20 15 -6 1 0 ]

[ -1 7 -21 35 -35 21 -7 1 ]

-If we want to check that ``minv`` is actually the inverse of ``m`` we can perform the matrix multiplication to check::

+If we want to check that ``minv`` is actually the inverse of ``m``, we can perform the matrix multiplication to check::

>>> minv * m

[ 1 0 0 0 0 0 0 0 ]

@@ -170,10 +229,10 @@ If we want to check that ``minv`` is actually the inverse of ``m`` we can perfor

[ 0 0 0 0 0 0 1 0 ]

[ 0 0 0 0 0 0 0 1 ]

-and as we should expect we have actually obtained the unit matrix.

+and as we should expect, we have actually obtained the unit matrix.

-The matrix inverse can be used to solve a linear system so let us try.

-First we define a column vector, fox example::

+The matrix inverse can be used to solve a linear system, so let us try that.

+First we define a column vector, for example::

b = matrix.new(8, 1, |i| sin(2*pi*(i-1)/8))

>>> b

@@ -214,7 +273,7 @@ Working with complex matrices

In the example above we have shown how to solve a linear system in the form ``m * x = b``.

We may wonder how to manage the case when ``m`` or ``b`` are complex.

-The answer is easy, since GSL Shell always check the type of the matrix and the appropriate algorithm is selected.

+The answer is easy, since GSL Shell always checks the type of the matrix, and the appropriate algorithm is selected.

So, to continue the example above, we can define b as a complex vector as follows::

@@ -229,18 +288,18 @@ So, to continue the example above, we can define b as a complex vector as follow

[ -i ]

[ 0.70710678-0.70710678i ]

-and then we can use the function :func:`matrix.solve` as above and we will obtain a complex matrix that solve the linear system.

+and then we can use the function :func:`matrix.solve` as above and we will obtain a complex matrix that solves the linear system.

Please note that above we have used the function :func:`matrix.cnew` to create a new complex matrix.

-The reason is that we need to inform GSL Shell in advance if we want a real or a complex matrix.

+The reason is that we need to inform GSL Shell in advance if we want a complex matrix.

-In general GSL Shell tries to ensure that all the common matrix operations are handle to transparently handle real or complex matrices.

+In general, GSL Shell tries to ensure that all the common matrix operations transparently handle real or complex matrices.

Matrix indexing

~~~~~~~~~~~~~~~

-You can index the matrix but only one index is permitted so the syntax ``m[2]`` is OK but ``m[2,3]`` will not be accepted.

-This is limitation of GSL Shell that is related to the Lua programming language on which it is based.

+When indexing the matrix, only one index is permitted, so the syntax ``m[2]`` is OK but ``m[2,3]`` will not be accepted.

+This is a limitation of GSL Shell that is related to the Lua programming language on which it is based.

So when you write ``m[2]`` you will obtain the second row of the matrix ``m`` but in *column* form.

So, if we use the matrix ``m`` defined above we could have:

@@ -255,10 +314,10 @@ So, if we use the matrix ``m`` defined above we could have:

[ 0 ]

-It may seems odd the the row is returned in column form but it is actually convenient because many function accept a column matrix in input.

-The idea is that in GSL Shell column matrices play the role of vectors.

+It may seems odd that the row is returned in column form but it is actually convenient because many function accept a column matrix as input.

+The idea is that in GSL Shell, column matrices play the role of vectors.

-Following the same logic of above, if you index a column matrix you will just obtain its n-th element, to return a 1x1 matrix will be not very useful.

+Following the same logic as above, if you index a column matrix you will just obtain its n-th element (as returning a 1x1 matrix will be not very useful).

So you can have for example:

>>> m[5][4]

@@ -266,17 +325,17 @@ So you can have for example:

At this point it should be clear that, in general, you can access the elements of a matrix with the double indexing syntax ``m[i][j]``.

-Something that is important to know about the matrix indexing to obtain a row is that the column matrix refer to the same underlying data of the original matrix.

-As a consequence any change to the elements of the derived matrix will be effective also for the original matrix.

+Something that is important to know about the matrix indexing to obtain a row is that the column matrix refers to the same underlying data as the original matrix.

+As a consequence, any change to the elements of the derived matrix will also be effective for the original matrix.

-The indexing method that we have explained above can be used not only for retrieving the matrix elements or an entire row but it can be equally used for assignment.

+The indexing method that we have explained above can be used not only for retrieving the matrix elements or an entire row, but it can be equally used for assignment.

This means that you can use double indexing to change an element of a matrix.

-If you use a simple indexing you can assign the content of a whole row all at once.

+If you use simple indexing, you can assign the content of a whole row all at once.

Just a small note about efficiency.

The double indexing method can be slow and should be probably avoided in the tight loop where the performance is important.

In this case you should use the methods :meth:`~Matrix.get` and :meth:`~Matrix.set`.

-Another opportunity is to address directly matrix data by using its ``data`` field but this requires a particular attention since this kind of operations are not safe and you could easily crash the application.

+Another opportunity is to directly address matrix data by using its ``data`` field, but this requires particular attention since these kinds of operations are not safe and could easily crash the application.

You can find more details in the chapter about :ref:`GSL FFI interface <gsl-ffi-interface>`.

@@ -285,10 +344,10 @@ You can find more details in the chapter about :ref:`GSL FFI interface <gsl-ffi-

Plotting functions

~~~~~~~~~~~~~~~~~~

-The plotting functions lives in the ``graph`` module. The more common and useful functions are probably :func:`graph.fxplot` and :func:`graph.fxline`.

-The first one can used to create a plot while the second one just create a graphical object of type line.

+The plotting functions live in the ``graph`` module. The more common and useful functions are probably :func:`graph.fxplot` and :func:`graph.fxline`.

+The first one can used to create a plot while the second one just creates a graphical object of type line.

A graphical object is visible only when it is added into a plot.

-The idea is that you can create the objects as needed and add them of the plot as it is more appropriate.

+The idea is that you can create the objects as needed and add them to the plot as it is more appropriate.

Here a simple example to plot some simple functions::

@@ -297,7 +356,7 @@ Here a simple example to plot some simple functions::

-- we create a plot of a simple function

p = graph.fxplot(|x| exp(-0.1*x) * sin(x), 0, 8*pi)

- -- we create a graphical object that describe second function

+ -- we create a graphical object that describes the second function

-- and we add it to the previous plot

ln = graph.fxline(|x| exp(-0.1*x) * cos(x), 0, 8*pi)

p:addline(ln, 'blue')

@@ -308,15 +367,15 @@ Let us explain the example step by step.

To use the function :func:`graph.fxplot` we pass three arguments: the function that we want to plot and the minimum and maximum value of the abscissa.

The function will therefore produce a plot of the function y=f(x) for x that span the given interval.

There is actually some magic that we have used to define the function on the fly.

-We have used the :ref:`short function syntax <short-func-notation>` that let us define a function using the syntax ``|x| f(x)`` or in the case of multiple variable ``|x,y| f(x,y)``.

-The short function syntax is very convenient to express simple function with a compact syntax.

+We have used the :ref:`short function syntax <short-func-notation>` that lets us define a function using the syntax ``|x| f(x)`` or in the case of multiple variables ``|x,y| f(x,y)``.

+The short function syntax is very convenient to express simple functions with a compact syntax.

-The second function :func:`graph.fxline` operates in a similar way but it does create a graphical object instead of a plot.

-Then in the following instruction we add the second line in the plot by using the :meth:`~Plot.addline` method.

+The second function :func:`graph.fxline` operates in a similar way, but it creates a graphical object instead of a plot.

+In the following instruction, we add the second line to the plot using the :meth:`~Plot.addline` method.

-We can also set the title of the plot by using the :attr:`~Plot.title` property of the plot.

+We can also set the title of the plot using the :attr:`~Plot.title` property of the plot.

-Here the plot that we obtain with the snippet given above:

+Here is the plot that we obtain with the snippet given above:

.. figure:: plot-intro-example.png

@@ -335,7 +394,7 @@ where ``expr`` is any expression is equivalent to::

function(a, b, ...) return expr end

-So, for example, to write the function that return a square of a number plus one you could write::

+So, for example, to write the function that returns a square of a number plus one, you could write::

f = |x| x^2+1

@@ -352,9 +411,9 @@ or, alternatively::

An Example

------------

-To illustrate most of the key features of GSL Shell, let us write a short script to calculate the volume of an n-dimensional unit sphere and compare it with the analytical solution of :math:`V_n=\pi^{n/2}/ \Gamma(1+n/2)`.

+To illustrate most of the key features of GSL Shell, let us write a short script to calculate the volume of an `n`-dimensional unit sphere and compare it with the analytical solution of :math:`V_n=\pi^{n/2}/ \Gamma(1+n/2)`.

-For the integration in high dimensions, we will the :ref:`Monte Carlo VEGAS <monte-vegas>` implementation, that is included in GSL Shell.

+For the integration in high dimensions, we will use the :ref:`Monte Carlo VEGAS <monte-vegas>` implementation, that is included in GSL Shell.

At the beginning of each script, you should think about which sections of GSL Shell you want to use.

If you utilize functions from certain modules more often, you might want to call those functions directly with the help of the :func:`use` directive::

@@ -362,15 +421,15 @@ If you utilize functions from certain modules more often, you might want to call

use 'iter'

use 'math'

-If you don't use the :func:`use` directive you can still access the functions from a module but you need to specify the full name.

+If you don't use the :func:`use` directive you can still access the functions from a module, but you need to specify the full name.

So, for example, you can refer to the VEGAS algorithm using its full name ``num.monte_vegas``.

-This latter approach is useful because avoids conflicts in the global namespace.

+This latter approach is useful because it avoids conflicts in the global namespace.

Now we need to define the integrand function.

-Since we want to calculate the volume of a `n`-dimensional sphere the function should accept a `n`-tuple of coordinates and return 1 if the sampling point is inside the unit sphere or 0 otherwise.

-To work correctly the VEGAS algorithm assume that the integrand function takes a single arguments that is a table with the `n` coordinates.

-Since the computation depends on the dimension `n` of the space we need to take this later intro account.

-The solution is to define a function that we can call `getunitsphere` that returns the integrand function for the `n`-dimension space.

+Since we want to calculate the volume of an `n`-dimensional sphere, the function should accept an `n`-tuple of coordinates and return 1 if the sampling point is inside the unit sphere or 0 otherwise.

+To work correctly, the VEGAS algorithm assumes that the integrand function takes a single argument that is a table with the `n` coordinates.

+Since the computation depends on the dimension `n` of the space, we need to take this into account.

+The solution is to define a function that we can call `getunitsphere`, that returns the integrand function for the `n`-dimension space.

The `n`-dimensional integrand function itself calculates the summed square of the table values for a given size which equals :math:`R^2=\sum_{i=1}^nx_i^2`.

So `getunitsphere` can be defined as follows::

@@ -390,7 +449,7 @@ Also ::

local ln = graph.path(1, 2) -- 1-sphere = [-1, 1] (length 2)

-Now we can start to calculate the volume of the unit sphere of the first 14 dimensions::

+Now, we can start to calculate the volume of the unit sphere of the first 14 dimensions::

for d=2, 14 do

@@ -410,16 +469,16 @@ Now we can start to calculate the volume of the unit sphere of the first 14 dime

end

The loop consists of three major parts.

-In the first part we initialize the important variables with the help of the `short function syntax` and the :func:`iter.ilist` function, which conveniently creates vectors of any size with a value provided by the function.

+In the first part, we initialize the important variables with the help of the `short function syntax` and the :func:`iter.ilist` function, which conveniently creates vectors of any size with a value provided by the function.

In this case `a` and `b` are the lower and the upper boundary for the integration.

-By calling :func:`num.monte_vegas` with the desired unitsphere function, the monte carlo vegas algorithm is being invoked for the first time.

+By calling :func:`num.monte_vegas` with the desired unitsphere function, the Monte Carlo VEGAS algorithm is being invoked for the first time.

It returns multiple arguments, namely the result itself, the precision, the number of iterations it took and a continuation function that can be called to recalculate the result with higher precision.

Depending on the relative precision `sig/res`, we continue to recalculate the integral with increasing numbers of iterations.

When it is done, we add the dimension and the result to our given path by :func:`~Path.line_to`.

-We can now continue to compare the data with analytical solutions and plot these results.

+We can now proceed to compare the data with analytical solutions and plot these results.

First we need to initialize a :func:`graph.plot` object.

Then we can add the data to the plot with :func:`~Plot.add` and the result of the analytical solution with :meth:`~Plot.addline`.

Notice that you can change the appearance of the data points at this moment.

@@ -434,12 +493,12 @@ At that point, we are using `short functions` again which greatly facilitates th

p.ytitle="V"

p:show()

-Also we are using :func:`sf.gamma` from the special functions section which offers all such functions that you can find in the GSL library.

+Also note that we use :func:`sf.gamma` from the special functions section, which offers all such functions that you can find in the GSL library.

After setting the axis-names with :func:`~Plot.xtitle` and :func:`~Plot.ytitle`, we are ready to show the plot with :func:`~Plot.show`:

.. figure:: vegas.png

-Here is the code in a whole:

+Here is the code as a whole:

.. literalinclude:: intro-example.lua

:language: lua

@@ -447,8 +506,8 @@ Here is the code in a whole:

This rather simple example showed quite a lot of important features of GSL Shell.

Creating data structures with `iterators` and `short functions` are both very common.

-With the function `getunitsphere` we have shown that some problems can be solved in an elegant way by returning a function.

-These kind of functions are called closures because they refer to local variables declared outside of the function body itself.

-In this particular case the function returned by `getunitsphere` is a closure because it does refer to the variable `n` defined outside of its body.

-The function `cont` returned my `num.monte_vegas` is also another example of closure since it does refer to the current state of the VEGAS integration.

+With the function `getunitsphere`, we have shown that some problems can be solved in an elegant way by returning a function.

+These kinds of functions are called closures because they refer to local variables declared outside of the function body itself.

+In this particular case, the function returned by `getunitsphere` is a closure because it refers to the variable `n` defined outside its body.

+The function `cont` returned my `num.monte_vegas` is also another example of closure since it refers to the current state of the VEGAS integration.

diff --git a/doc/user-manual/linalg.rst b/doc/user-manual/linalg.rst
index 8c235cee..6b01cebf 100644
--- a/doc/user-manual/linalg.rst
+++ b/doc/user-manual/linalg.rst

@@ -19,7 +19,7 @@ GSL Shell gives access to some functions of linear algebra based on GSL itself o

.. function:: solve(A, b)

Solve the square system A x = b where A is a square matrix, b

- is a column matrix. It does return the solution x of the system.

+ is a column matrix. It returns the solution x of the system.

.. function:: svd(m)

@@ -57,6 +57,6 @@ GSL Shell gives access to some functions of linear algebra based on GSL itself o

M-by-N diagonal matrix (with additional rows of zeros).

This function returns three values, in the order, U, |Sgr|, V. So you can

- write something like that::

+ write something like this::

u, s, v = svd(m)

diff --git a/doc/user-manual/linfit.rst b/doc/user-manual/linfit.rst
index f8d934c4..34165b2c 100644
--- a/doc/user-manual/linfit.rst
+++ b/doc/user-manual/linfit.rst

@@ -8,17 +8,17 @@ Linear Least Squares fit

Overview

--------

-This chapter describes routines for performing least squares fits to experimental data using linear combinations of functions. The data may be weighted or unweighted, i.e. with known or unknown errors. For weighted data the functions compute the best fit parameters and their associated covariance matrix. For unweighted data the covariance matrix is estimated from the scatter of the points, giving a variance-covariance matrix.

+This chapter describes routines for performing least squares fits to experimental data using linear combinations of functions. The data may be weighted or unweighted, i.e. with known or unknown errors. For weighted data, the functions compute the best fit parameters and their associated covariance matrix. For unweighted data, the covariance matrix is estimated from the scatter of the points, giving a variance-covariance matrix.

Linear Fit Functions

--------------------

.. function:: linfit(X, y[, w])

- Perform a linear fit for the observations ``y`` using the model matrix ``X``. It does return a vector with the coefficients of the fit, the residual chi square and the covariance matrix. You can optionally provides the weights ``w`` of the observations to obtain a weighted linear fit. The argument ``y`` should be a column matrix of length N while the model ``X`` should be a N x M matrix where M is the number of basis in the linear model.

+ Perform a linear fit for the observations ``y`` using the model matrix ``X``. It returns a vector with the coefficients of the fit, the residual chi square and the covariance matrix. You can optionally provide the weights ``w`` of the observations to obtain a weighted linear fit. The argument ``y`` should be a column matrix of length N while the model ``X`` should be a N x M matrix where M is the number of basis in the linear model.

*Example*

- Let us suppose that we have two column matrix, x and y, and we want to make a linear fit of y versus x. We build first the model matrix and then we use it to make the linear fit::

+ Let us suppose that we have two column matrices, x and y, and we want to make a linear fit of y versus x. We first build the model matrix and then we use it to perform the linear fit::

X = matrix.new(n, 2, |i,j| j == 1 and 1 or x[i])

c, chisq, cov = linfit(X, y)

diff --git a/doc/user-manual/lua-base.rst b/doc/user-manual/lua-base.rst
index 16ea768f..d46c7d39 100644
--- a/doc/user-manual/lua-base.rst
+++ b/doc/user-manual/lua-base.rst

@@ -7,18 +7,18 @@

Iterators

============================

-GSL Shell provides some simple functions to perform some common tasks related to iterators.

-The utilization of these functions allows to write more simple and compact code and to improve the readability.

-The functions describes below are available in the module :mod:`iter`.

+GSL Shell provides a number of simple functions to perform some common tasks related to iterators.

+These functions allow you to write more simple and compact code and to improve the readability.

+The functions described below are available in the module :mod:`iter`.

.. module:: iter

.. function:: sequence(f, a, b)

sequence(f, b)

- Return an "iterator" that gives the value (or the values) returned by the evaluation of ``f(i)`` where ``i`` is an integer that goes from ``a`` to ``b``. In the second form the generated values start from one.

+ Return an "iterator" that gives the value (or the values) returned by the evaluation of ``f(i)`` where ``i`` is an integer that goes from ``a`` to ``b``. In the second form, the generated values start from one.

- Generally, an iterator is a function that, each time that it is called, return one value from a sequence. The sequence is considered to be terminated when the iterator returns ``nil``. An iterator can be used directly in a ``for`` loop with the following syntax::

+ Generally, an iterator is a function that, each time that it is called, returns one value from a sequence. The sequence is considered to be terminated when the iterator returns ``nil``. An iterator can be used directly in a ``for`` loop with the following syntax::

for a, b, ... in f do

-- [ some code here]

@@ -28,7 +28,7 @@ The functions describes below are available in the module :mod:`iter`.

.. function:: sample(f, xi, xs, n)

- Return an iterators that gives the couple ``x, f(x)`` for ``x`` going from ``xi`` to ``xs`` with ``n`` uniformly spaced intervals. If ``f`` returns multiple values only the first one is retained.

+ Return an iterator that gives the couple ``x, f(x)`` for ``x`` going from ``xi`` to ``xs`` with ``n`` uniformly spaced intervals. If ``f`` returns multiple values, only the first one is retained.

Example::

@@ -40,19 +40,19 @@ The functions describes below are available in the module :mod:`iter`.

.. function:: isample(f, a, b)

isample(f, b)

- Return an iterators that gives the couple ``i, f(i)`` where ``i`` is an integer going from ``a`` to ``b``. In the second form the sequence will start from one. If ``f`` returns multiple values only the first one is retained.

+ Return an iterator that gives the couple ``i, f(i)`` where ``i`` is an integer going from ``a`` to ``b``. In the second form, the sequence will start from one. If ``f`` returns multiple values, only the first one is retained.

.. function:: ilist(f, a, b)

ilist(f, b)

Returns a list with the elements ``f(i)`` where ``i`` is an integer going from a to b.

- In the second form the sequence will start from one.

+ In the second form, the sequence will start from one.

.. function:: isum(f, a, b)

isum(f, b)

Returns the sum of ``f(i)`` for all integers ``i`` from a to b.

- In the second form the sequence will start from one.

+ In the second form, the sequence will start from one.

@@ -65,7 +65,7 @@ Actually the more general form of an iterator is the following::

-- [ some code here]

end

-In this latter form the iterator f is called in the form ``f(s, i)`` where ``s`` is the value provided in the ``for`` loop. The value of ``i`` changes every time, the value taken is the the first value returned by the function ``f`` the last times it was called or, for the first time only, ``i0``.

+In this latter form, the iterator f is called in the form ``f(s, i)``, where ``s`` is the value provided in the ``for`` loop. The value of ``i`` changes every time, the value taken is the first value returned by the function ``f`` the last times it was called or, for the first time only, ``i0``.

We give an example to build a stateless row "iterator" over the rows of a matrix.::

diff --git a/luajit2/doc/install.html b/luajit2/doc/install.html
index f487958d..19772da6 100644
--- a/luajit2/doc/install.html
+++ b/luajit2/doc/install.html

@@ -126,28 +126,28 @@ operating systems, CPUs and compilers:

<td class="compatos">MSVC + SDK v7.0 WinSDK v7.0</td>

</tr>

-<td class="compatcpu"><a href="#android">ARMv5+ ARM9E+</a></td>

+<td class="compatcpu"><a href="#cross2">ARMv5+ ARM9E+</a></td>

</tr>

-<td class="compatcpu"><a href="#ppc">PPC</a></td>

-<td class="compatos">GCC 4.3+</td>

+<td class="compatcpu"><a href="#cross2">PPC</a></td>

+<td class="compatos">GCC 4.3+ GCC 4.1 (<a href="#cross2">PS3</a>)</td>

</tr>

-<td class="compatcpu"><a href="#ppc">PPC/e500v2</a></td>

+<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td>

</tr>

-<td class="compatcpu"><a href="#mips">MIPS</a></td>

+<td class="compatcpu"><a href="#cross2">MIPS</a></td>

@@ -341,32 +341,69 @@ directory where <tt>luajit.exe</tt> is installed

<h2 id="cross">Cross-compiling LuaJIT</h2>

-The build system has limited support for cross-compilation. For details

-check the comments in <tt>src/Makefile</tt>. Here are some popular examples:

+The GNU Makefile-based build system allows cross-compiling on any host

+for any supported target, as long as both architectures have the same

+pointer size. If you want to cross-compile to any 32 bit target on an

+x64 OS, you need to install the multilib development package (e.g.

+<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part

+(<tt>HOST_CC="gcc -m32"</tt>).

-You can cross-compile to a 32 bit binary on a multilib x64 OS by

-installing the multilib development packages (e.g. <tt>libc6-dev-i386</tt>

-on Debian/Ubuntu) and running:

+You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the

+target OS differ, or you'll get assembler or linker errors. E.g. if

+you're compiling on a Windows or OSX host for embedded Linux or Android,

+you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a

+minimal target OS, you may need to disable the built-in allocator in

+<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. The examples

+below only show some popular targets — please check the comments

+in <tt>src/Makefile</tt> for more details.

+# Cross-compile to a 32 bit binary on a multilib x64 OS

make CC="gcc -m32"

-</pre>

-

-You can cross-compile for a Windows target on Debian/Ubuntu by

-installing the <tt>mingw32</tt> package and running:

-

-<pre class="code">

+# Cross-compile on Debian/Ubuntu for Windows (mingw32 package)

make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows

</pre>

-

-You can cross-compile for an ARM target on an x86 or x64 host

-system using a standard GNU cross-compile toolchain (Binutils, GCC,

-EGLIBC). The <tt>CROSS</tt> prefix may vary depending on the

-<tt>--target</tt> of the toolchain:

+

+The <tt>CROSS</tt> prefix allows specifying a standard GNU cross-compile

+toolchain (Binutils, GCC and a matching libc). The prefix may vary

+depending on the <tt>--target</tt> the toolchain was built for (note the

+<tt>CROSS</tt> prefix has a trailing <tt>"-"</tt>). The examples below

+use the canonical toolchain triplets for Linux.

+

+

+Since there's often no easy way to detect CPU features at runtime, it's

+important to compile with the proper CPU or architecture settings. You

+can specify these when building the toolchain yourself. Or add

+<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For

+ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,

+too. Otherwise LuaJIT may not run at the full performance of your target

+CPU.

-make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi-

+# ARM soft-float

+make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \

+ TARGET_CFLAGS="-mfloat-abi=soft"

+# ARM soft-float ABI with VFP (example for Cortex-a8)

+make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \

+ TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"

+# ARM hard-float ABI with VFP (armhf, requires recent toolchain)

+make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-

+# PPC

+make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-

+# PPC/e500v2 (fast interpreter only)

+make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-

+# PS3 (fast interpreter only)

+make HOST_CC="gcc -m32" CROSS=ppu-lv2-

+# MIPS big-endian

+make HOST_CC="gcc -m32" CROSS=mips-linux-

+# MIPS little-endian

+make HOST_CC="gcc -m32" CROSS=mipsel-linux-

</pre>

You can cross-compile for Android (ARM) using the <a href="http://developer.android.com/sdk/ndk/index.html">» Android NDK</a>.

@@ -393,51 +430,14 @@ much slower than the JIT compiler. Please complain to Apple, not me.

Or use Android. :-p

-ISDK=/Developer/Platforms/iPhoneOS.platform/Developer

-ISDKVER=iPhoneOS4.3.sdk

+IXCODE=/Applications/Xcode45-DP4.app/Contents

+ISDK=$IXCODE/Developer/Platforms/iPhoneOS.platform/Developer

+ISDKVER=iPhoneOS6.0.sdk

ISDKP=$ISDK/usr/bin/

-ISDKF="-arch armv6 -isysroot $ISDK/SDKs/$ISDKVER"

+ISDKF="-arch armv7 -isysroot $ISDK/SDKs/$ISDKVER"

make HOST_CC="gcc -m32 -arch i386" CROSS=$ISDKP TARGET_FLAGS="$ISDKF" \

TARGET_SYS=iOS

</pre>

-

-You can cross-compile for a PPC target or a

-PPC/e500v2 target on x86 or x64 host systems using a standard

-GNU cross-compile toolchain (Binutils, GCC, EGLIBC).

-The <tt>CROSS</tt> prefix may vary depending on the <tt>--target</tt>

-of the toolchain:

-

-<pre class="code">

-# PPC

-make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-

-</pre>

-<pre class="code">

-# PPC/e500v2

-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-

-</pre>

-

-You can cross-compile for a big-endian or little-endian

-MIPS target on x86 or x64 host systems using a standard

-GNU cross-compile toolchain (Binutils, GCC, EGLIBC).

-The <tt>CROSS</tt> prefix may vary depending on the <tt>--target</tt>

-of the toolchain:

-

-<pre class="code">

-# MIPS big-endian

-make HOST_CC="gcc -m32" CROSS=mips-linux-

-</pre>

-<pre class="code">

-# MIPS little-endian

-make HOST_CC="gcc -m32" CROSS=mipsel-linux-

-</pre>

-

-Whenever the host OS and the target OS differ, you need to specify

-<tt>TARGET_SYS</tt> or you'll get assembler or linker errors. E.g. if

-you're compiling on a Windows or OSX host for embedded Linux or Android,

-you need to add <tt>TARGET_SYS=Linux</tt> to the examples above. For a

-minimal target OS, you may need to disable the built-in allocator in

-<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.

-

<h2 id="embed">Embedding LuaJIT</h2>

diff --git a/luajit2/dynasm/dasm_arm.h b/luajit2/dynasm/dasm_arm.h
index b770c2df..d49ecae0 100644
--- a/luajit2/dynasm/dasm_arm.h
+++ b/luajit2/dynasm/dasm_arm.h

@@ -22,7 +22,7 @@ enum {

DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,

/* The following actions also have an argument. */

DASM_REL_PC, DASM_LABEL_PC,

- DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12,

+ DASM_IMM, DASM_IMM12, DASM_IMM16, DASM_IMML8, DASM_IMML12, DASM_IMMV8,

DASM__MAX

};

@@ -250,6 +250,9 @@ void dasm_put(Dst_DECL, int start, ...)

#endif

b[pos++] = n;

break;

+ case DASM_IMMV8:

+ CK((n & 3) == 0, RANGE_I);

+ n >>= 2;

case DASM_IMML8:

case DASM_IMML12:

CK(n >= 0 ? ((n>>((ins>>5)&31)) == 0) :

@@ -316,7 +319,7 @@ int dasm_link(Dst_DECL, size_t *szp)

case DASM_REL_LG: case DASM_REL_PC: pos++; break;

case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;

case DASM_IMM: case DASM_IMM12: case DASM_IMM16:

- case DASM_IMML8: case DASM_IMML12: pos++; break;

+ case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break;

}

stop: (void)0;

@@ -377,9 +380,13 @@ int dasm_encode(Dst_DECL, void *buffer)

} else if ((ins & 0x1000)) {

CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL);

goto patchimml8;

- } else {

+ } else if ((ins & 0x2000) == 0) {

CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL);

- goto patchimml12;

+ goto patchimml;

+ } else {

+ CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL);

+ n >>= 2;

+ goto patchimml;

}

break;

case DASM_LABEL_LG:

@@ -399,7 +406,7 @@ int dasm_encode(Dst_DECL, void *buffer)

cp[-1] |= n >= 0 ? (0x00800000 | (n & 0x0f) | ((n & 0xf0) << 4)) :

((-n & 0x0f) | ((-n & 0xf0) << 4));

break;

- case DASM_IMML12: patchimml12:

+ case DASM_IMML12: case DASM_IMMV8: patchimml:

cp[-1] |= n >= 0 ? (0x00800000 | n) : (-n);

break;

default: *cp++ = ins; break;

diff --git a/luajit2/dynasm/dasm_arm.lua b/luajit2/dynasm/dasm_arm.lua
index 4735f323..11701691 100644
--- a/luajit2/dynasm/dasm_arm.lua
+++ b/luajit2/dynasm/dasm_arm.lua

@@ -39,7 +39,7 @@ local wline, werror, wfatal, wwarn

local action_names = {

"STOP", "SECTION", "ESC", "REL_EXT",

"ALIGN", "REL_LG", "LABEL_LG",

- "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12",

+ "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8",

}

-- Maximum number of section buffer positions for dasm_put().

@@ -405,14 +405,14 @@ local map_op = {

strd_2 = "e00000f0DL", strd_3 = "e00000f0DL", -- v5TE

ldrsh_2 = "e01000f0DL", ldrsh_3 = "e01000f0DL",

- ldm_2 = "e8900000nR", ldmia_2 = "e8900000nR", ldmfd_2 = "e8900000nR",

- ldmda_2 = "e8100000nR", ldmfa_2 = "e8100000nR",

- ldmdb_2 = "e9100000nR", ldmea_2 = "e9100000nR",

- ldmib_2 = "e9900000nR", ldmed_2 = "e9900000nR",

- stm_2 = "e8800000nR", stmia_2 = "e8800000nR", stmfd_2 = "e8800000nR",

- stmda_2 = "e8000000nR", stmfa_2 = "e8000000nR",

- stmdb_2 = "e9000000nR", stmea_2 = "e9000000nR",

- stmib_2 = "e9800000nR", stmed_2 = "e9800000nR",

+ ldm_2 = "e8900000oR", ldmia_2 = "e8900000oR", ldmfd_2 = "e8900000oR",

+ ldmda_2 = "e8100000oR", ldmfa_2 = "e8100000oR",

+ ldmdb_2 = "e9100000oR", ldmea_2 = "e9100000oR",

+ ldmib_2 = "e9900000oR", ldmed_2 = "e9900000oR",

+ stm_2 = "e8800000oR", stmia_2 = "e8800000oR", stmfd_2 = "e8800000oR",

+ stmda_2 = "e8000000oR", stmfa_2 = "e8000000oR",

+ stmdb_2 = "e9000000oR", stmea_2 = "e9000000oR",

+ stmib_2 = "e9800000oR", stmed_2 = "e9800000oR",

pop_1 = "e8bd0000R", push_1 = "e92d0000R",

-- Branch instructions.

@@ -428,9 +428,89 @@ local map_op = {

svc_1 = "ef000000T", swi_1 = "ef000000T",

ud_0 = "e7f001f0",

- -- NYI: Advanced SIMD and VFP instructions.

- -- NYI instructions, since I have no need for them right now:

+ -- VFP instructions.

+ ["vadd.f32_3"] = "ee300a00dnm",

+ ["vadd.f64_3"] = "ee300b00Gdnm",

+ ["vsub.f32_3"] = "ee300a40dnm",

+ ["vsub.f64_3"] = "ee300b40Gdnm",

+ ["vmul.f32_3"] = "ee200a00dnm",

+ ["vmul.f64_3"] = "ee200b00Gdnm",

+ ["vnmul.f32_3"] = "ee200a40dnm",

+ ["vnmul.f64_3"] = "ee200b40Gdnm",

+ ["vmla.f32_3"] = "ee000a00dnm",

+ ["vmla.f64_3"] = "ee000b00Gdnm",

+ ["vmls.f32_3"] = "ee000a40dnm",

+ ["vmls.f64_3"] = "ee000b40Gdnm",

+ ["vnmla.f32_3"] = "ee100a40dnm",

+ ["vnmla.f64_3"] = "ee100b40Gdnm",

+ ["vnmls.f32_3"] = "ee100a00dnm",

+ ["vnmls.f64_3"] = "ee100b00Gdnm",

+ ["vdiv.f32_3"] = "ee800a00dnm",

+ ["vdiv.f64_3"] = "ee800b00Gdnm",

+ ["vabs.f32_2"] = "eeb00ac0dm",

+ ["vabs.f64_2"] = "eeb00bc0Gdm",

+ ["vneg.f32_2"] = "eeb10a40dm",

+ ["vneg.f64_2"] = "eeb10b40Gdm",

+ ["vsqrt.f32_2"] = "eeb10ac0dm",

+ ["vsqrt.f64_2"] = "eeb10bc0Gdm",

+ ["vcmp.f32_2"] = "eeb40a40dm",

+ ["vcmp.f64_2"] = "eeb40b40Gdm",

+ ["vcmpe.f32_2"] = "eeb40ac0dm",

+ ["vcmpe.f64_2"] = "eeb40bc0Gdm",

+ ["vcmpz.f32_1"] = "eeb50a40d",

+ ["vcmpz.f64_1"] = "eeb50b40Gd",

+ ["vcmpze.f32_1"] = "eeb50ac0d",

+ ["vcmpze.f64_1"] = "eeb50bc0Gd",

+ vldr_2 = "ed100a00dl|ed100b00Gdl",

+ vstr_2 = "ed000a00dl|ed000b00Gdl",

+ vldm_2 = "ec900a00or",

+ vldmia_2 = "ec900a00or",

+ vldmdb_2 = "ed100a00or",

+ vpop_1 = "ecbd0a00r",

+ vstm_2 = "ec800a00or",

+ vstmia_2 = "ec800a00or",

+ vstmdb_2 = "ed000a00or",

+ vpush_1 = "ed2d0a00r",

+ ["vmov.f32_2"] = "eeb00a40dm|eeb00a00dY", -- #imm is VFPv3 only

+ ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", -- #imm is VFPv3 only

+ vmov_2 = "ee100a10Dn|ee000a10nD",

+ vmov_3 = "ec500a10DNm|ec400a10mDN|ec500b10GDNm|ec400b10GmDN",

+ vmrs_0 = "eef1fa10",

+ vmrs_1 = "eef10a10D",

+ vmsr_1 = "eee10a10D",

+ ["vcvt.s32.f32_2"] = "eebd0ac0dm",

+ ["vcvt.s32.f64_2"] = "eebd0bc0dGm",

+ ["vcvt.u32.f32_2"] = "eebc0ac0dm",

+ ["vcvt.u32.f64_2"] = "eebc0bc0dGm",

+ ["vcvtr.s32.f32_2"] = "eebd0a40dm",

+ ["vcvtr.s32.f64_2"] = "eebd0b40dGm",

+ ["vcvtr.u32.f32_2"] = "eebc0a40dm",

+ ["vcvtr.u32.f64_2"] = "eebc0b40dGm",

+ ["vcvt.f32.s32_2"] = "eeb80ac0dm",

+ ["vcvt.f64.s32_2"] = "eeb80bc0GdFm",

+ ["vcvt.f32.u32_2"] = "eeb80a40dm",

+ ["vcvt.f64.u32_2"] = "eeb80b40GdFm",

+ ["vcvt.f32.f64_2"] = "eeb70bc0dGm",

+ ["vcvt.f64.f32_2"] = "eeb70ac0GdFm",

+ -- VFPv4 only:

+ ["vfma.f32_3"] = "eea00a00dnm",

+ ["vfma.f64_3"] = "eea00b00Gdnm",

+ ["vfms.f32_3"] = "eea00a40dnm",

+ ["vfms.f64_3"] = "eea00b40Gdnm",

+ ["vfnma.f32_3"] = "ee900a40dnm",

+ ["vfnma.f64_3"] = "ee900b40Gdnm",

+ ["vfnms.f32_3"] = "ee900a00dnm",

+ ["vfnms.f64_3"] = "ee900b00Gdnm",

+ -- NYI: Advanced SIMD instructions.

+ -- NYI: I have no need for these instructions right now:

-- swp, swpb, strex, ldrex, strexd, ldrexd, strexb, ldrexb, strexh, ldrexh

-- msr, nopv6, yield, wfe, wfi, sev, dbg, bxj, smc, srs, rfe

-- cps, setend, pli, pld, pldw, clrex, dsb, dmb, isb

@@ -476,6 +556,18 @@ local function parse_gpr_pm(expr)

return parse_gpr(expr2), (pm == "-")

end

+local function parse_vr(expr, tp)

+ local t, r = match(expr, "^([sd])([0-9]+)$")

+ if t == tp then

+ r = tonumber(r)

+ if r <= 31 then

+ if t == "s" then return shr(r, 1), band(r, 1) end

+ return band(r, 15), shr(r, 4)

+ end

+ werror("bad register name `"..expr.."'")

+end

local function parse_reglist(reglist)

reglist = match(reglist, "^{%s*([^}]*)}$")

if not reglist then werror("register list expected") end

@@ -490,6 +582,21 @@ local function parse_reglist(reglist)

return rr

end

+local function parse_vrlist(reglist)

+ local ta, ra, tb, rb = match(reglist,

+ "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$")

+ ra, rb = tonumber(ra), tonumber(rb)

+ if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then

+ local nr = rb+1 - ra

+ if ta == "s" then

+ return shl(shr(ra,1),12)+shl(band(ra,1),22) + nr

+ else

+ return shl(band(ra,15),12)+shl(shr(ra,4),22) + nr*2 + 0x100

+ end

+ werror("register list expected")

+end

local function parse_imm(imm, bits, shift, scale, signed)

imm = match(imm, "^#(.*)$")

if not imm then werror("expected immediate operand") end

@@ -680,81 +787,132 @@ local function parse_load(params, nparams, n, op)

return op

end

+local function parse_vload(q)

+ local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$")

+ if reg then

+ local d = shl(parse_gpr(reg), 16)

+ if imm == "" then return d end

+ imm = match(imm, "^,%s*#(.*)$")

+ if imm then

+ local n = tonumber(imm)

+ if n then

+ if n >= -1020 and n <= 1020 and n%4 == 0 then

+ return d + (n >= 0 and n/4+0x00800000 or -n/4)

+ end

+ werror("out of range immediate `"..imm.."'")

+ else

+ waction("IMMV8", 32768 + 32*8, imm)

+ return d

+ end

+ else

+ if match(q, "^[<>=%-]") or match(q, "^extern%s+") then

+ local mode, n, s = parse_label(q, false)

+ waction("REL_"..mode, n + 0x2800, s, 1)

+ return 15 * 65536

+ end

+ local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$")

+ if reg and tailr ~= "" then

+ local d, tp = parse_gpr(reg)

+ if tp then

+ waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr))

+ return shl(d, 16)

+ end

+ werror("expected address operand")

+end

------------------------------------------------------------------------------

-- Handle opcodes defined with template strings.

-map_op[".template__"] = function(params, template, nparams)

- if not params then return sub(template, 9) end

+local function parse_template(params, template, nparams, pos)

local op = tonumber(sub(template, 1, 8), 16)

local n = 1

- -- Limit number of section buffer positions used by a single dasm_put().

- -- A single opcode needs a maximum of 3 positions.

- if secpos+3 > maxsecpos then wflush() end

- local pos = wpos()

+ local vr = "s"

-- Process each character.

for p in gmatch(sub(template, 9), ".") do

+ local q = params[n]

if p == "D" then

- op = op + shl(parse_gpr(params[n]), 12); n = n + 1

+ op = op + shl(parse_gpr(q), 12); n = n + 1

elseif p == "N" then

- op = op + shl(parse_gpr(params[n]), 16); n = n + 1

+ op = op + shl(parse_gpr(q), 16); n = n + 1

elseif p == "S" then

- op = op + shl(parse_gpr(params[n]), 8); n = n + 1

+ op = op + shl(parse_gpr(q), 8); n = n + 1

elseif p == "M" then

- op = op + parse_gpr(params[n]); n = n + 1

+ op = op + parse_gpr(q); n = n + 1

+ elseif p == "d" then

+ local r,h = parse_vr(q, vr); op = op+shl(r,12)+shl(h,22); n = n + 1

+ elseif p == "n" then

+ local r,h = parse_vr(q, vr); op = op+shl(r,16)+shl(h,7); n = n + 1

+ elseif p == "m" then

+ local r,h = parse_vr(q, vr); op = op+r+shl(h,5); n = n + 1

elseif p == "P" then

- local imm = match(params[n], "^#(.*)$")

+ local imm = match(q, "^#(.*)$")

if imm then

op = op + parse_imm12(imm) + 0x02000000

else

- op = op + parse_gpr(params[n])

+ op = op + parse_gpr(q)

end

n = n + 1

elseif p == "p" then

- op = op + parse_shift(params[n], true); n = n + 1

+ op = op + parse_shift(q, true); n = n + 1

elseif p == "L" then

op = parse_load(params, nparams, n, op)

+ elseif p == "l" then

+ op = op + parse_vload(q)

elseif p == "B" then

- local mode, n, s = parse_label(params[n], false)

+ local mode, n, s = parse_label(q, false)

waction("REL_"..mode, n, s, 1)

elseif p == "C" then -- blx gpr vs. blx label.

- local p = params[n]

- if match(p, "^([%w_]+):(r1?[0-9])$") or match(p, "^r(1?[0-9])$") then

- op = op + parse_gpr(p)

+ if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then

+ op = op + parse_gpr(q)

else

if op < 0xe0000000 then werror("unconditional instruction") end

- local mode, n, s = parse_label(p, false)

+ local mode, n, s = parse_label(q, false)

waction("REL_"..mode, n, s, 1)

op = 0xfa000000

end

- elseif p == "n" then

- local r, wb = match(params[n], "^([^!]*)(!?)$")

+ elseif p == "F" then

+ vr = "s"

+ elseif p == "G" then

+ vr = "d"

+ elseif p == "o" then

+ local r, wb = match(q, "^([^!]*)(!?)$")

op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0)

n = n + 1

elseif p == "R" then

- op = op + parse_reglist(params[n]); n = n + 1

+ op = op + parse_reglist(q); n = n + 1

+ elseif p == "r" then

+ op = op + parse_vrlist(q); n = n + 1

elseif p == "W" then

- op = op + parse_imm16(params[n]); n = n + 1

+ op = op + parse_imm16(q); n = n + 1

elseif p == "v" then

- op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1

+ op = op + parse_imm(q, 5, 7, 0, false); n = n + 1

elseif p == "w" then

- local imm = match(params[n], "^#(.*)$")

+ local imm = match(q, "^#(.*)$")

if imm then

- op = op + parse_imm(params[n], 5, 7, 0, false); n = n + 1

+ op = op + parse_imm(q, 5, 7, 0, false); n = n + 1

else

- op = op + shl(parse_gpr(params[n]), 8) + 16

+ op = op + shl(parse_gpr(q), 8) + 16

end

elseif p == "X" then

- op = op + parse_imm(params[n], 5, 16, 0, false); n = n + 1

+ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1

+ elseif p == "Y" then

+ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1

+ if not imm or shr(imm, 8) ~= 0 then

+ werror("bad immediate operand")

+ end

+ op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f)

elseif p == "K" then

- local imm = tonumber(match(params[n], "^#(.*)$")); n = n + 1

+ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1

if not imm or shr(imm, 16) ~= 0 then

werror("bad immediate operand")

end

op = op + shl(band(imm, 0xfff0), 4) + band(imm, 0x000f)

elseif p == "T" then

- op = op + parse_imm(params[n], 24, 0, 0, false); n = n + 1

+ op = op + parse_imm(q, 24, 0, 0, false); n = n + 1

elseif p == "s" then

-- Ignored.

else

@@ -764,6 +922,27 @@ map_op[".template__"] = function(params, template, nparams)

wputpos(pos, op)

end

+map_op[".template__"] = function(params, template, nparams)

+ if not params then return sub(template, 9) end

+ -- Limit number of section buffer positions used by a single dasm_put().

+ -- A single opcode needs a maximum of 3 positions.

+ if secpos+3 > maxsecpos then wflush() end

+ local pos = wpos()

+ local apos, spos = #actargs, secpos

+ local ok, err

+ for t in gmatch(template, "[^|]+") do

+ ok, err = pcall(parse_template, params, t, nparams, pos)

+ if ok then return end

+ secpos = spos

+ actargs[apos+1] = nil

+ actargs[apos+2] = nil

+ actargs[apos+3] = nil

+ end

+ error(err, 0)

+end

------------------------------------------------------------------------------

-- Pseudo-opcode to mark the position where the action list is to be emitted.

@@ -923,10 +1102,10 @@ function _M.mergemaps(map_coreop, map_def)

setmetatable(map_op, { __index = function(t, k)

local v = map_coreop[k]

if v then return v end

- local cc = sub(k, -4, -3)

+ local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$")

local cv = map_cond[cc]

if cv then

- local v = rawget(t, sub(k, 1, -5)..sub(k, -2))

+ local v = rawget(t, k1..k2)

if type(v) == "string" then return format("%x%s", cv, sub(v, 2)) end

end

end })

diff --git a/luajit2/src/Makefile b/luajit2/src/Makefile
index 6b4b31d0..1fdfb671 100644
--- a/luajit2/src/Makefile
+++ b/luajit2/src/Makefile

@@ -389,9 +389,15 @@ ifneq (,$(findstring LJ_DUALNUM 1,$(TARGET_TESTARCH)))

endif

ifneq (,$(findstring LJ_ARCH_HASFPU 1,$(TARGET_TESTARCH)))

DASM_AFLAGS+= -D FPU

+ TARGET_ARCH+= -DLJ_ARCH_HASFPU=1

+else

+ TARGET_ARCH+= -DLJ_ARCH_HASFPU=0

endif

ifeq (,$(findstring LJ_ABI_SOFTFP 1,$(TARGET_TESTARCH)))

- DASM_AFLAGS+= -D HF

+ DASM_AFLAGS+= -D HFABI

+ TARGET_ARCH+= -DLJ_ABI_SOFTFP=0

+else

+ TARGET_ARCH+= -DLJ_ABI_SOFTFP=1

endif

DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))

ifeq (Windows,$(TARGET_SYS))

diff --git a/luajit2/src/jit/dis_arm.lua b/luajit2/src/jit/dis_arm.lua
index 0fcd1bed..d37a9750 100644
--- a/luajit2/src/jit/dis_arm.lua
+++ b/luajit2/src/jit/dis_arm.lua

@@ -23,24 +23,118 @@ local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift

------------------------------------------------------------------------------

local map_loadc = {

- shift = 9, mask = 7,

- [5] = {

- shift = 0, mask = 0 -- NYI VFP load/store.

+ shift = 8, mask = 15,

+ [10] = {

+ shift = 20, mask = 1,

+ [0] = {

+ shift = 23, mask = 3,

+ [0] = "vmovFmDN", "vstmFNdr",

+ _ = {

+ shift = 21, mask = 1,

+ [0] = "vstrFdl",

+ { shift = 16, mask = 15, [13] = "vpushFdr", _ = "vstmdbFNdr", }

+ },

+ {

+ shift = 23, mask = 3,

+ [0] = "vmovFDNm",

+ { shift = 16, mask = 15, [13] = "vpopFdr", _ = "vldmFNdr", },

+ _ = {

+ shift = 21, mask = 1,

+ [0] = "vldrFdl", "vldmdbFNdr",

+ },

+ [11] = {

+ shift = 20, mask = 1,

+ [0] = {

+ shift = 23, mask = 3,

+ [0] = "vmovGmDN", "vstmGNdr",

+ _ = {

+ shift = 21, mask = 1,

+ [0] = "vstrGdl",

+ { shift = 16, mask = 15, [13] = "vpushGdr", _ = "vstmdbGNdr", }

+ },

+ {

+ shift = 23, mask = 3,

+ [0] = "vmovGDNm",

+ { shift = 16, mask = 15, [13] = "vpopGdr", _ = "vldmGNdr", },

+ _ = {

+ shift = 21, mask = 1,

+ [0] = "vldrGdl", "vldmdbGNdr",

+ },

_ = {

shift = 0, mask = 0 -- NYI ldc, mcrr, mrrc.

}

+local map_vfps = {

+ shift = 6, mask = 0x2c001,

+ [0] = "vmlaF.dnm", "vmlsF.dnm",

+ [0x04000] = "vnmlsF.dnm", [0x04001] = "vnmlaF.dnm",

+ [0x08000] = "vmulF.dnm", [0x08001] = "vnmulF.dnm",

+ [0x0c000] = "vaddF.dnm", [0x0c001] = "vsubF.dnm",

+ [0x20000] = "vdivF.dnm",

+ [0x24000] = "vfnmsF.dnm", [0x24001] = "vfnmaF.dnm",

+ [0x28000] = "vfmaF.dnm", [0x28001] = "vfmsF.dnm",

+ [0x2c000] = "vmovF.dY",

+ [0x2c001] = {

+ shift = 7, mask = 0x1e01,

+ [0] = "vmovF.dm", "vabsF.dm",

+ [0x0200] = "vnegF.dm", [0x0201] = "vsqrtF.dm",

+ [0x0800] = "vcmpF.dm", [0x0801] = "vcmpeF.dm",

+ [0x0a00] = "vcmpzF.d", [0x0a01] = "vcmpzeF.d",

+ [0x0e01] = "vcvtG.dF.m",

+ [0x1000] = "vcvt.f32.u32Fdm", [0x1001] = "vcvt.f32.s32Fdm",

+ [0x1800] = "vcvtr.u32F.dm", [0x1801] = "vcvt.u32F.dm",

+ [0x1a00] = "vcvtr.s32F.dm", [0x1a01] = "vcvt.s32F.dm",

+ },

+local map_vfpd = {

+ shift = 6, mask = 0x2c001,

+ [0] = "vmlaG.dnm", "vmlsG.dnm",

+ [0x04000] = "vnmlsG.dnm", [0x04001] = "vnmlaG.dnm",

+ [0x08000] = "vmulG.dnm", [0x08001] = "vnmulG.dnm",

+ [0x0c000] = "vaddG.dnm", [0x0c001] = "vsubG.dnm",

+ [0x20000] = "vdivG.dnm",

+ [0x24000] = "vfnmsG.dnm", [0x24001] = "vfnmaG.dnm",

+ [0x28000] = "vfmaG.dnm", [0x28001] = "vfmsG.dnm",

+ [0x2c000] = "vmovG.dY",

+ [0x2c001] = {

+ shift = 7, mask = 0x1e01,

+ [0] = "vmovG.dm", "vabsG.dm",

+ [0x0200] = "vnegG.dm", [0x0201] = "vsqrtG.dm",

+ [0x0800] = "vcmpG.dm", [0x0801] = "vcmpeG.dm",

+ [0x0a00] = "vcmpzG.d", [0x0a01] = "vcmpzeG.d",

+ [0x0e01] = "vcvtF.dG.m",

+ [0x1000] = "vcvt.f64.u32GdFm", [0x1001] = "vcvt.f64.s32GdFm",

+ [0x1800] = "vcvtr.u32FdG.m", [0x1801] = "vcvt.u32FdG.m",

+ [0x1a00] = "vcvtr.s32FdG.m", [0x1a01] = "vcvt.s32FdG.m",

+ },

local map_datac = {

shift = 24, mask = 1,

[0] = {

- shift = 9, mask = 7,

- [5] = {

- shift = 0, mask = 0 -- NYI VFP data.

+ shift = 4, mask = 1,

+ [0] = {

+ shift = 8, mask = 15,

+ [10] = map_vfps,

+ [11] = map_vfpd,

+ -- NYI cdp, mcr, mrc.

- _ = {

- shift = 0, mask = 0 -- NYI cdp, mcr, mrc.

+ {

+ shift = 8, mask = 15,

+ [10] = {

+ shift = 20, mask = 15,

+ [0] = "vmovFnD", "vmovFDn",

+ [14] = "vmsrD",

+ [15] = { shift = 12, mask = 15, [15] = "vmrs", _ = "vmrsD", },

+ },

"svcT",

@@ -390,6 +484,27 @@ local function fmtload(ctx, op, pos)

return x

end

+-- Format operand 2 of vector load/store opcodes.

+local function fmtvload(ctx, op, pos)

+ local base = map_gpr[band(rshift(op, 16), 15)]

+ local ofs = band(op, 255)*4

+ if band(op, 0x00800000) == 0 then ofs = -ofs end

+ if base == "pc" then ctx.rel = ctx.addr + pos + 8 + ofs end

+ if ofs == 0 then

+ return format("[%s]", base)

+ else

+ return format("[%s, #%d]", base, ofs)

+ end

+end

+local function fmtvr(op, vr, sh0, sh1)

+ if vr == "s" then

+ return format("s%d", 2*band(rshift(op, sh0), 15)+band(rshift(op, sh1), 1))

+ else

+ return format("d%d", band(rshift(op, sh0), 15)+band(rshift(op, sh1-4), 16))

+ end

+end

-- Disassemble a single instruction.

local function disass_ins(ctx)

local pos = ctx.pos

@@ -398,6 +513,7 @@ local function disass_ins(ctx)

local operands = {}

local suffix = ""

local last, name, pat

+ local vr

ctx.op = op

ctx.rel = nil

@@ -414,6 +530,11 @@ local function disass_ins(ctx)

opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._

end

name, pat = match(opat, "^([a-z0-9]*)(.*)")

+ if sub(pat, 1, 1) == "." then

+ local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")

+ suffix = suffix..s2

+ pat = p2

+ end

for p in gmatch(pat, ".") do

local x = nil

@@ -425,6 +546,12 @@ local function disass_ins(ctx)

x = map_gpr[band(rshift(op, 8), 15)]

elseif p == "M" then

x = map_gpr[band(op, 15)]

+ elseif p == "d" then

+ x = fmtvr(op, vr, 12, 22)

+ elseif p == "n" then

+ x = fmtvr(op, vr, 16, 7)

+ elseif p == "m" then

+ x = fmtvr(op, vr, 0, 5)

elseif p == "P" then

if band(op, 0x02000000) ~= 0 then

x = ror(band(op, 255), 2*band(rshift(op, 8), 15))

@@ -447,12 +574,20 @@ local function disass_ins(ctx)

end

elseif p == "L" then

- x = fmtload(ctx, op, pos, false)

+ x = fmtload(ctx, op, pos)

+ elseif p == "l" then

+ x = fmtvload(ctx, op, pos)

elseif p == "B" then

local addr = ctx.addr + pos + 8 + arshift(lshift(op, 8), 6)

if cond == 15 then addr = addr + band(rshift(op, 23), 2) end

ctx.rel = addr

x = "0x"..tohex(addr)

+ elseif p == "F" then

+ vr = "s"

+ elseif p == "G" then

+ vr = "d"

+ elseif p == "." then

+ suffix = suffix..(vr == "s" and ".f32" or ".f64")

elseif p == "R" then

if band(op, 0x00200000) ~= 0 and #operands == 1 then

operands[1] = operands[1].."!"

@@ -462,6 +597,14 @@ local function disass_ins(ctx)

if band(rshift(op, i), 1) == 1 then t[#t+1] = map_gpr[i] end

end

x = "{"..concat(t, ", ").."}"

+ elseif p == "r" then

+ if band(op, 0x00200000) ~= 0 and #operands == 2 then

+ operands[1] = operands[1].."!"

+ end

+ local s = tonumber(sub(last, 2))

+ local n = band(op, 255)

+ if vr == "d" then n = rshift(n, 1) end

+ operands[#operands] = format("{%s-%s%d}", last, vr, s+n-1)

elseif p == "W" then

x = band(op, 0x0fff) + band(rshift(op, 4), 0xf000)

elseif p == "T" then

@@ -484,6 +627,8 @@ local function disass_ins(ctx)

x = band(rshift(op, 16), 31) + 1

elseif p == "X" then

x = band(rshift(op, 16), 31) - last + 1

+ elseif p == "Y" then

+ x = band(rshift(op, 12), 0xf0) + band(op, 0x0f)

elseif p == "K" then

x = "#0x"..tohex(band(rshift(op, 4), 0x0000fff0) + band(op, 15), 4)

elseif p == "s" then

@@ -531,7 +676,8 @@ end

-- Return register name for RID.

local function regname_(r)

- return map_gpr[r]

+ if r < 16 then return map_gpr[r] end

+ return "d"..(r-16)

end

-- Public module functions.

diff --git a/luajit2/src/lib_ffi.c b/luajit2/src/lib_ffi.c
index 5cb9086e..24a6625c 100644
--- a/luajit2/src/lib_ffi.c
+++ b/luajit2/src/lib_ffi.c

@@ -112,10 +112,14 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)

const char *s;

err_index:

s = strdata(lj_ctype_repr(L, id, NULL));

- if (tvisstr(L->base+1))

+ if (tvisstr(L->base+1)) {

lj_err_callerv(L, LJ_ERR_FFI_BADMEMBER, s, strVdata(L->base+1));

- else

- lj_err_callerv(L, LJ_ERR_FFI_BADIDX, s);

+ } else {

+ const char *key = tviscdata(L->base+1) ?

+ strdata(lj_ctype_repr(L, cdataV(L->base+1)->ctypeid, NULL)) :

+ lj_typename(L->base+1);

+ lj_err_callerv(L, LJ_ERR_FFI_BADIDXW, s, key);

+ }

}

if (!tvisfunc(tv)) {

if (mm == MM_index) {

@@ -518,7 +522,7 @@ LJLIB_CF(ffi_cast) LJLIB_REC(ffi_new)

return 1;

}

-LJLIB_CF(ffi_typeof)

+LJLIB_CF(ffi_typeof) LJLIB_REC(.)

{

CTState *cts = ctype_cts(L);

CTypeID id = ffi_checkctype(L, cts, L->base+1);

@@ -529,7 +533,7 @@ LJLIB_CF(ffi_typeof)

return 1;

}

-LJLIB_CF(ffi_istype) LJLIB_REC(ffi_istype)

+LJLIB_CF(ffi_istype) LJLIB_REC(.)

{

CTState *cts = ctype_cts(L);

CTypeID id1 = ffi_checkctype(L, cts, NULL);

diff --git a/luajit2/src/lj_arch.h b/luajit2/src/lj_arch.h
index a3d51542..9ff3c2b6 100644
--- a/luajit2/src/lj_arch.h
+++ b/luajit2/src/lj_arch.h

@@ -121,7 +121,6 @@

#define LJ_ARCH_NAME "x86"

#define LJ_ARCH_BITS 32

#define LJ_ARCH_ENDIAN LUAJIT_LE

-#define LJ_ARCH_HASFPU 1

#if LJ_TARGET_WINDOWS || __CYGWIN__

#define LJ_ABI_WIN 1

#else

@@ -139,7 +138,6 @@

#define LJ_ARCH_NAME "x64"

#define LJ_ARCH_BITS 64

#define LJ_ARCH_ENDIAN LUAJIT_LE

-#define LJ_ARCH_HASFPU 1

#define LJ_ABI_WIN LJ_TARGET_WINDOWS

#define LJ_TARGET_X64 1

#define LJ_TARGET_X86ORX64 1

@@ -154,8 +152,12 @@

#define LJ_ARCH_NAME "arm"

#define LJ_ARCH_BITS 32

#define LJ_ARCH_ENDIAN LUAJIT_LE

+#if !defined(LJ_ARCH_HASFPU) && __SOFTFP__

#define LJ_ARCH_HASFPU 0

+#endif

+#if !defined(LJ_ABI_SOFTFP) && !__ARM_PCS_VFP

#define LJ_ABI_SOFTFP 1

+#endif

#define LJ_ABI_EABI 1

#define LJ_TARGET_ARM 1

#define LJ_TARGET_EHRETREG 0

@@ -184,7 +186,6 @@

#define LJ_ARCH_BITS 32

#endif

#define LJ_ARCH_ENDIAN LUAJIT_BE

-#define LJ_ARCH_HASFPU 1

#define LJ_TARGET_PPC 1

#define LJ_TARGET_EHRETREG 3

#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */

@@ -228,8 +229,9 @@

#define LJ_ARCH_NAME "ppcspe"

#define LJ_ARCH_BITS 32

#define LJ_ARCH_ENDIAN LUAJIT_BE

-#define LJ_ARCH_HASFPU 1

+#ifndef LJ_ABI_SOFTFP

#define LJ_ABI_SOFTFP 1

+#endif

#define LJ_ABI_EABI 1

#define LJ_TARGET_PPCSPE 1

#define LJ_TARGET_EHRETREG 3

@@ -251,7 +253,6 @@

#define LJ_ARCH_ENDIAN LUAJIT_BE

#endif

#define LJ_ARCH_BITS 32

-#define LJ_ARCH_HASFPU 1

#define LJ_TARGET_MIPS 1

#define LJ_TARGET_EHRETREG 4

#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */

@@ -301,9 +302,6 @@

#if defined(__ARMEB__)

#error "No support for big-endian ARM"

#endif

-#if defined(__ARM_PCS_VFP)

-#error "No support for ARM hard-float ABI (yet)"

-#endif

#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__

#error "No support for Cortex-M CPUs"

#endif

@@ -358,6 +356,12 @@

#define LJ_HASFFI 1

#endif

+#ifndef LJ_ARCH_HASFPU

+#define LJ_ARCH_HASFPU 1

+#endif

+#ifndef LJ_ABI_SOFTFP

+#define LJ_ABI_SOFTFP 0

+#endif

#define LJ_SOFTFP (!LJ_ARCH_HASFPU)

#if LJ_ARCH_ENDIAN == LUAJIT_BE

diff --git a/luajit2/src/lj_asm.c b/luajit2/src/lj_asm.c
index 2e6b1745..8d2cafb3 100644
--- a/luajit2/src/lj_asm.c
+++ b/luajit2/src/lj_asm.c

@@ -1610,6 +1610,7 @@ static void asm_setup_regsp(ASMState *as)

break;

/* fallthrough */

case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:

+ if (!LJ_SOFTFP && irt_isnum(ir->t)) break;

ir->prev = (uint16_t)REGSP_HINT((rload & 15));

rload = lj_ror(rload, 4);

continue;

@@ -1641,7 +1642,7 @@ static void asm_setup_regsp(ASMState *as)

}

break;

#endif

-#if LJ_NEED_FP64

+#if !LJ_SOFTFP && LJ_NEED_FP64

case IR_CONV:

if (irt_isfp((ir-1)->t)) {

ir->prev = REGSP_HINT(RID_FPRET);

diff --git a/luajit2/src/lj_asm_arm.h b/luajit2/src/lj_asm_arm.h
index 6a44e5ef..c380a6e8 100644
--- a/luajit2/src/lj_asm_arm.h
+++ b/luajit2/src/lj_asm_arm.h

@@ -48,6 +48,32 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)

return r;

}

+#if !LJ_SOFTFP

+/* Allocate two source registers for three-operand instructions. */

+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)

+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);

+ Reg left = irl->r, right = irr->r;

+ if (ra_hasreg(left)) {

+ ra_noweak(as, left);

+ if (ra_noreg(right))

+ right = ra_allocref(as, ir->op2, rset_exclude(allow, left));

+ else

+ ra_noweak(as, right);

+ } else if (ra_hasreg(right)) {

+ ra_noweak(as, right);

+ left = ra_allocref(as, ir->op1, rset_exclude(allow, right));

+ } else if (ra_hashint(right)) {

+ right = ra_allocref(as, ir->op2, allow);

+ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));

+ } else {

+ left = ra_allocref(as, ir->op1, allow);

+ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));

+ }

+ return left | (right << 8);

+#endif

/* -- Guard handling ------------------------------------------------------ */

/* Generate an exit stub group at the bottom of the reserved MCode memory. */

@@ -125,7 +151,8 @@ static int32_t asm_fuseabase(ASMState *as, IRRef ref)

}

/* Fuse array/hash/upvalue reference into register+offset operand. */

-static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)

+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,

+ int lim)

{

IRIns *ir = IR(ref);

if (ra_noreg(ir->r)) {

@@ -136,7 +163,7 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)

int32_t ofs = asm_fuseabase(as, tab);

IRRef refa = ofs ? tab : ir->op1;

ofs += 8*IR(ir->op2)->i;

- if (ofs > -4096 && ofs < 4096) {

+ if (ofs > -lim && ofs < lim) {

*ofsp = ofs;

return ra_alloc1(as, refa, allow);

}

@@ -145,7 +172,7 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)

} else if (ir->o == IR_HREFK) {

if (mayfuse(as, ref)) {

int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));

- if (ofs < 4096) {

+ if (ofs < lim) {

*ofsp = ofs;

return ra_alloc1(as, ir->op1, allow);

}

@@ -211,14 +238,16 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,

IRIns *ir = IR(ref);

Reg base;

if (ra_noreg(ir->r) && mayfuse(as, ref)) {

- int32_t lim = (ai & 0x04000000) ? 4096 : 256;

+ int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 :

+ (ai & 0x04000000) ? 4096 : 256;

if (ir->o == IR_ADD) {

int32_t ofs2;

if (irref_isk(ir->op2) &&

- (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim) {

+ (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim &&

+ (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) {

ofs = ofs2;

ref = ir->op1;

- } else if (ofs == 0) {

+ } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) {

IRRef lref = ir->op1, rref = ir->op2;

Reg rn, rm;

if ((ai & 0x04000000)) {

@@ -238,7 +267,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,

emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm);

return;

}

- } else if (ir->o == IR_STRREF) {

+ } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {

lua_assert(ofs == 0);

ofs = (int32_t)sizeof(GCstr);

if (irref_isk(ir->op2)) {

@@ -268,12 +297,41 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,

}

base = ra_alloc1(as, ref, allow);

+#if !LJ_SOFTFP

+ if ((ai & 0x08000000))

+ emit_vlso(as, ai, rd, base, ofs);

+ else

+#endif

if ((ai & 0x04000000))

emit_lso(as, ai, rd, base, ofs);

else

emit_lsox(as, ai, rd, base, ofs);

}

+#if !LJ_SOFTFP

+/* Fuse to multiply-add/sub instruction. */

+static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)

+ IRRef lref = ir->op1, rref = ir->op2;

+ IRIns *irm;

+ if (lref != rref &&

+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&

+ ra_noreg(irm->r)) ||

+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&

+ (rref = lref, ai = air, ra_noreg(irm->r))))) {

+ Reg dest = ra_dest(as, ir, RSET_FPR);

+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);

+ Reg right, left = ra_alloc2(as, irm,

+ rset_exclude(rset_exclude(RSET_FPR, dest), add));

+ right = (left >> 8); left &= 255;

+ emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15));

+ if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15));

+ return 1;

+ }

+ return 0;

+#endif

/* -- Calls --------------------------------------------------------------- */

/* Generate a call to a C function. */

@@ -282,21 +340,69 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)

uint32_t n, nargs = CCI_NARGS(ci);

int32_t ofs = 0;

Reg gpr = REGARG_FIRSTGPR;

+#if !LJ_SOFTFP

+ Reg fpr = REGARG_FIRSTFPR, fprodd = 0;

+#endif

if ((void *)ci->func)

emit_call(as, (void *)ci->func);

for (n = 0; n < nargs; n++) { /* Setup args. */

IRRef ref = args[n];

IRIns *ir = IR(ref);

- if (gpr <= REGARG_LASTGPR) {

- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */

- if (ref) ra_leftov(as, gpr, ref);

- gpr++;

- } else {

- if (ref) {

- Reg r = ra_alloc1(as, ref, RSET_GPR);

- emit_spstore(as, ir, r, ofs);

+#if !LJ_SOFTFP

+ if (irt_isfp(ir->t)) {

+ Reg src;

+ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {

+ if (irt_isnum(ir->t)) {

+ if (fpr <= REGARG_LASTFPR) {

+ ra_leftov(as, fpr, ref);

+ fpr++;

+ continue;

+ }

+ } else if (fprodd) { /* Ick. */

+ src = ra_alloc1(as, ref, RSET_FPR);

+ emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000);

+ fprodd = 0;

+ continue;

+ } else if (fpr <= REGARG_LASTFPR) {

+ ra_leftov(as, fpr, ref);

+ fprodd = fpr++;

+ continue;

+ }

+ src = ra_alloc1(as, ref, RSET_FPR);

+ fprodd = 0;

+ goto stackfp;

+ }

+ src = ra_alloc1(as, ref, RSET_FPR);

+ if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;

+ if (gpr <= REGARG_LASTGPR) {

+ lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */

+ if (irt_isnum(ir->t)) {

+ emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));

+ gpr += 2;

+ } else {

+ emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15));

+ gpr++;

+ }

+ } else {

+ stackfp:

+ if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;

+ emit_spstore(as, ir, src, ofs);

+ ofs += irt_isnum(ir->t) ? 8 : 4;

+ }

+ } else

+#endif

+ {

+ if (gpr <= REGARG_LASTGPR) {

+ lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */

+ if (ref) ra_leftov(as, gpr, ref);

+ gpr++;

+ } else {

+ if (ref) {

+ Reg r = ra_alloc1(as, ref, RSET_GPR);

+ emit_spstore(as, ir, r, ofs);

+ }

+ ofs += 4;

}

- ofs += 4;

}

@@ -313,10 +419,21 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)

ra_evictset(as, drop); /* Evictions must be performed first. */

if (ra_used(ir)) {

lua_assert(!irt_ispri(ir->t));

- if (hiop)

+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {

+ if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {

+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);

+ if (irt_isnum(ir->t))

+ emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest);

+ else

+ emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest);

+ } else {

+ ra_destreg(as, ir, RID_FPRET);

+ }

+ } else if (hiop) {

ra_destpair(as, ir);

- else

+ } else {

ra_destreg(as, ir, RID_RET);

+ }

}

UNUSED(ci);

}

@@ -373,33 +490,125 @@ static void asm_retf(ASMState *as, IRIns *ir)

/* -- Type conversions ---------------------------------------------------- */

-static void asm_conv(ASMState *as, IRIns *ir)

+#if !LJ_SOFTFP

+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)

+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));

+ Reg dest = ra_dest(as, ir, RSET_GPR);

+ asm_guardcc(as, CC_NE);

+ emit_d(as, ARMI_VMRS, 0);

+ emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15));

+ emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15));

+ emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));

+ emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15));

+static void asm_tobit(ASMState *as, IRIns *ir)

{

+ RegSet allow = RSET_FPR;

Reg dest = ra_dest(as, ir, RSET_GPR);

+ Reg left = ra_alloc1(as, ir->op1, allow);

+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));

+ Reg tmp = ra_scratch(as, rset_clear(allow, right));

+ emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));

+ emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));

+#endif

+static void asm_conv(ASMState *as, IRIns *ir)

IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);

- /* FP conversions and 64 bit integer conversions are handled by SPLIT. */

- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));

+#if !LJ_SOFTFP

+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);

+#endif

+ IRRef lref = ir->op1;

+ /* 64 bit integer conversions are handled by SPLIT. */

lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));

- if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */

- Reg left = ra_alloc1(as, ir->op1, RSET_GPR);

- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));

- if ((as->flags & JIT_F_ARMV6)) {

- ARMIns ai = st == IRT_I8 ? ARMI_SXTB :

- st == IRT_U8 ? ARMI_UXTB :

- st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH;

- emit_dm(as, ai, dest, left);

- } else if (st == IRT_U8) {

- emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left);

+#if LJ_SOFTFP

+ /* FP conversions are handled by SPLIT. */

+ lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));

+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */

+#else

+ lua_assert(irt_type(ir->t) != st);

+ if (irt_isfp(ir->t)) {

+ Reg dest = ra_dest(as, ir, RSET_FPR);

+ if (stfp) { /* FP to FP conversion. */

+ emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32,

+ (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15));

+ } else { /* Integer to FP conversion. */

+ Reg left = ra_alloc1(as, lref, RSET_GPR);

+ ARMIns ai = irt_isfloat(ir->t) ?

+ (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) :

+ (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32);

+ emit_dm(as, ai, (dest & 15), (dest & 15));

+ emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15));

+ }

+ } else if (stfp) { /* FP to integer conversion. */

+ if (irt_isguard(ir->t)) {

+ /* Checked conversions are only supported from number to int. */

+ lua_assert(irt_isint(ir->t) && st == IRT_NUM);

+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));

} else {

- uint32_t shift = st == IRT_I8 ? 24 : 16;

- ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR;

- emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP);

- emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left);

+ Reg dest = ra_dest(as, ir, RSET_GPR);

+ Reg left = ra_alloc1(as, lref, RSET_FPR);

+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));

+ ARMIns ai;

+ emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));

+ ai = irt_isint(ir->t) ?

+ (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :

+ (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);

+ emit_dm(as, ai, (tmp & 15), (left & 15));

}

- } else { /* Handle 32/32 bit no-op (cast). */

- ra_leftov(as, dest, ir->op1); /* Do nothing, but may need to move regs. */

+ } else

+#endif

+ {

+ Reg dest = ra_dest(as, ir, RSET_GPR);

+ if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */

+ Reg left = ra_alloc1(as, lref, RSET_GPR);

+ lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));

+ if ((as->flags & JIT_F_ARMV6)) {

+ ARMIns ai = st == IRT_I8 ? ARMI_SXTB :

+ st == IRT_U8 ? ARMI_UXTB :

+ st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH;

+ emit_dm(as, ai, dest, left);

+ } else if (st == IRT_U8) {

+ emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left);

+ } else {

+ uint32_t shift = st == IRT_I8 ? 24 : 16;

+ ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR;

+ emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP);

+ emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left);

+ }

+ } else { /* Handle 32/32 bit no-op (cast). */

+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */

+ }

+#if !LJ_SOFTFP && LJ_HASFFI

+static void asm_conv64(ASMState *as, IRIns *ir)

+ IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);

+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);

+ IRCallID id;

+ CCallInfo ci;

+ IRRef args[2];

+ args[0] = (ir-1)->op1;

+ args[1] = ir->op1;

+ if (st == IRT_NUM || st == IRT_FLOAT) {

+ id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);

+ ir--;

+ } else {

+ id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);

}

+ ci = lj_ir_callinfo[id];

+#if !LJ_ABI_SOFTFP

+ ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */

+#endif

+ asm_setupresult(as, ir, &ci);

+ asm_gencall(as, &ci, args);

}

+#endif

static void asm_strto(ASMState *as, IRIns *ir)

{

@@ -409,6 +618,7 @@ static void asm_strto(ASMState *as, IRIns *ir)

int destused = ra_used(ir);

int32_t ofs = 0;

ra_evictset(as, RSET_SCRATCH);

+#if LJ_SOFTFP

if (destused) {

if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&

(ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) {

@@ -433,6 +643,25 @@ static void asm_strto(ASMState *as, IRIns *ir)

emit_lso(as, ARMI_LDR, rhi, RID_SP, 4);

emit_lso(as, ARMI_LDR, rlo, RID_SP, 0);

}

+#else

+ UNUSED(rhi);

+ if (destused) {

+ if (ra_hasspill(ir->s)) {

+ ofs = sps_scale(ir->s);

+ destused = 0;

+ if (ra_hasreg(ir->r)) {

+ ra_free(as, ir->r);

+ ra_modified(as, ir->r);

+ emit_spload(as, ir, ir->r, ofs);

+ }

+ } else {

+ rlo = ra_dest(as, ir, RSET_FPR);

+ }

+ asm_guardcc(as, CC_EQ);

+ if (destused)

+ emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0);

+#endif

emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */

args[0] = ir->op1; /* GCstr *str */

args[1] = ASMREF_TMP1; /* TValue *n */

@@ -448,9 +677,18 @@ static void asm_strto(ASMState *as, IRIns *ir)

static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)

{

IRIns *ir = IR(ref);

- if (irt_isnum(ir->t)) { /* Use the number constant itself as a TValue. */

- lua_assert(irref_isk(ref));

- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);

+ if (irt_isnum(ir->t)) {

+ if (irref_isk(ref)) {

+ /* Use the number constant itself as a TValue. */

+ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);

+ } else {

+#if LJ_SOFTFP

+ lua_assert(0);

+#else

+ /* Otherwise force a spill and use the spill slot. */

+ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);

+#endif

+ }

} else {

/* Otherwise use [sp] and [sp+4] to hold the TValue. */

RegSet allow = rset_exclude(RSET_GPR, dest);

@@ -532,6 +770,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)

MCLabel l_end, l_loop;

rset_clear(allow, tab);

if (!irref_isk(refkey) || irt_isstr(kt)) {

+#if LJ_SOFTFP

key = ra_alloc1(as, refkey, allow);

rset_clear(allow, key);

if (irkey[1].o == IR_HIOP) {

@@ -545,6 +784,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)

rset_clear(allow, keynumhi);

khi = 0;

}

+#else

+ if (irt_isnum(kt)) {

+ key = ra_scratch(as, allow);

+ rset_clear(allow, key);

+ keyhi = keynumhi = ra_scratch(as, allow);

+ rset_clear(allow, keyhi);

+ khi = 0;

+ } else {

+ key = ra_alloc1(as, refkey, allow);

+ rset_clear(allow, key);

+ }

+#endif

} else if (irt_isnum(kt)) {

int32_t val = (int32_t)ir_knum(irkey)->u32.lo;

k = emit_isk12(ARMI_CMP, val);

@@ -630,6 +881,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)

emit_dnm(as, ARMI_EOR, tmp+1, tmp, key);

emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */

emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi);

+#if !LJ_SOFTFP

+ emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi,

+ (ra_alloc1(as, refkey, RSET_FPR) & 15));

+#endif

} else {

emit_dnm(as, ARMI_EOR, tmp+1, tmp, key);

emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS,

@@ -775,8 +1030,8 @@ static ARMIns asm_fxloadins(IRIns *ir)

case IRT_U8: return ARMI_LDRB;

case IRT_I16: return ARMI_LDRSH;

case IRT_U16: return ARMI_LDRH;

- case IRT_NUM: lua_assert(0);

- case IRT_FLOAT:

+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;

+ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S;

default: return ARMI_LDR;

}

@@ -786,8 +1041,8 @@ static ARMIns asm_fxstoreins(IRIns *ir)

switch (irt_type(ir->t)) {

case IRT_I8: case IRT_U8: return ARMI_STRB;

case IRT_I16: case IRT_U16: return ARMI_STRH;

- case IRT_NUM: lua_assert(0);

- case IRT_FLOAT:

+ case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;

+ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S;

default: return ARMI_STR;

}

@@ -829,7 +1084,8 @@ static void asm_fstore(ASMState *as, IRIns *ir)

static void asm_xload(ASMState *as, IRIns *ir)

{

- Reg dest = ra_dest(as, ir, RSET_GPR);

+ Reg dest = ra_dest(as, ir,

+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);

lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));

asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);

}

@@ -837,7 +1093,8 @@ static void asm_xload(ASMState *as, IRIns *ir)

static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)

{

if (ir->r != RID_SINK) {

- Reg src = ra_alloc1(as, ir->op2, RSET_GPR);

+ Reg src = ra_alloc1(as, ir->op2,

+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);

asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,

rset_exclude(RSET_GPR, src), ofs);

}

@@ -845,7 +1102,7 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)

static void asm_ahuvload(ASMState *as, IRIns *ir)

{

- int hiop = ((ir+1)->o == IR_HIOP);

+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);

IRType t = hiop ? IRT_NUM : irt_type(ir->t);

Reg dest = RID_NONE, type = RID_NONE, idx;

RegSet allow = RSET_GPR;

@@ -855,11 +1112,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)

rset_clear(allow, type);

}

if (ra_used(ir)) {

- lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));

- dest = ra_dest(as, ir, allow);

+ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||

+ irt_isint(ir->t) || irt_isaddr(ir->t));

+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);

rset_clear(allow, dest);

}

- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);

+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow,

+ (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);

if (!hiop || type == RID_NONE) {

rset_clear(allow, idx);

if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&

@@ -872,7 +1131,14 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)

}

asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);

emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);

- if (ra_hasreg(dest)) emit_lso(as, ARMI_LDR, dest, idx, ofs);

+ if (ra_hasreg(dest)) {

+#if !LJ_SOFTFP

+ if (t == IRT_NUM)

+ emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs);

+ else

+#endif

+ emit_lso(as, ARMI_LDR, dest, idx, ofs);

+ }

emit_lso(as, ARMI_LDR, type, idx, ofs+4);

}

@@ -882,44 +1148,80 @@ static void asm_ahustore(ASMState *as, IRIns *ir)

RegSet allow = RSET_GPR;

Reg idx, src = RID_NONE, type = RID_NONE;

int32_t ofs = 0;

- int hiop = ((ir+1)->o == IR_HIOP);

- if (!irt_ispri(ir->t)) {

- src = ra_alloc1(as, ir->op2, allow);

- rset_clear(allow, src);

+#if !LJ_SOFTFP

+ if (irt_isnum(ir->t)) {

+ src = ra_alloc1(as, ir->op2, RSET_FPR);

+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024);

+ emit_vlso(as, ARMI_VSTR_D, src, idx, ofs);

+ } else

+#endif

+ {

+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);

+ if (!irt_ispri(ir->t)) {

+ src = ra_alloc1(as, ir->op2, allow);

+ rset_clear(allow, src);

+ }

+ if (hiop)

+ type = ra_alloc1(as, (ir+1)->op2, allow);

+ else

+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);

+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096);

+ if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);

+ emit_lso(as, ARMI_STR, type, idx, ofs+4);

}

- if (hiop)

- type = ra_alloc1(as, (ir+1)->op2, allow);

- else

- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);

- idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type));

- if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);

- emit_lso(as, ARMI_STR, type, idx, ofs+4);

}

static void asm_sload(ASMState *as, IRIns *ir)

{

int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);

- int hiop = ((ir+1)->o == IR_HIOP);

+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);

IRType t = hiop ? IRT_NUM : irt_type(ir->t);

Reg dest = RID_NONE, type = RID_NONE, base;

RegSet allow = RSET_GPR;

lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */

lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));

+#if LJ_SOFTFP

lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */

if (hiop && ra_used(ir+1)) {

type = ra_dest(as, ir+1, allow);

rset_clear(allow, type);

}

+#else

+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) {

+ dest = ra_scratch(as, RSET_FPR);

+ asm_tointg(as, ir, dest);

+ t = IRT_NUM; /* Continue with a regular number type check. */

+ } else

+#endif

if (ra_used(ir)) {

- lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));

- dest = ra_dest(as, ir, allow);

+ lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||

+ irt_isint(ir->t) || irt_isaddr(ir->t));

+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);

rset_clear(allow, dest);

+ base = ra_alloc1(as, REF_BASE, allow);

+ if ((ir->op2 & IRSLOAD_CONVERT)) {

+ if (t == IRT_INT) {

+ Reg tmp = ra_scratch(as, RSET_FPR);

+ emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));

+ emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));

+ dest = tmp;

+ t = IRT_NUM; /* Check for original type. */

+ } else {

+ Reg tmp = ra_scratch(as, RSET_GPR);

+ emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));

+ emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15));

+ dest = tmp;

+ t = IRT_INT; /* Check for original type. */

+ }

+ goto dotypecheck;

}

base = ra_alloc1(as, REF_BASE, allow);

+dotypecheck:

+ rset_clear(allow, base);

if ((ir->op2 & IRSLOAD_TYPECHECK)) {

if (ra_noreg(type)) {

- rset_clear(allow, base);

if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&

rset_test((as->freeset & allow), dest+1)) {

type = dest+1;

@@ -931,7 +1233,21 @@ static void asm_sload(ASMState *as, IRIns *ir)

asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);

emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);

}

- if (ra_hasreg(dest)) emit_lso(as, ARMI_LDR, dest, base, ofs);

+ if (ra_hasreg(dest)) {

+#if !LJ_SOFTFP

+ if (t == IRT_NUM) {

+ if (ofs < 1024) {

+ emit_vlso(as, ARMI_VLDR_D, dest, base, ofs);

+ } else {

+ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4);

+ emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0);

+ emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow);

+ return;

+ }

+ } else

+#endif

+ emit_lso(as, ARMI_LDR, dest, base, ofs);

+ }

if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4);

}

@@ -1045,6 +1361,42 @@ static void asm_obar(ASMState *as, IRIns *ir)

/* -- Arithmetic and logic operations ------------------------------------- */

+#if !LJ_SOFTFP

+static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai)

+ Reg dest = ra_dest(as, ir, RSET_FPR);

+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);

+ right = (left >> 8); left &= 255;

+ emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15));

+static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)

+ Reg dest = ra_dest(as, ir, RSET_FPR);

+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);

+ emit_dm(as, ai, (dest & 15), (left & 15));

+static int asm_fpjoin_pow(ASMState *as, IRIns *ir)

+ IRIns *irp = IR(ir->op1);

+ if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {

+ IRIns *irpp = IR(irp->op1);

+ if (irpp == ir-2 && irpp->o == IR_FPMATH &&

+ irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {

+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];

+ IRRef args[2];

+ args[0] = irpp->op1;

+ args[1] = irp->op2;

+ asm_setupresult(as, ir, ci);

+ asm_gencall(as, ci, args);

+ return 1;

+ }

+ return 0;

+#endif

static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)

{

IRIns *ir;

@@ -1082,6 +1434,16 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)

emit_dn(as, ai^m, dest, left);

}

+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)

+ if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */

+ as->flagmcp = NULL;

+ as->mcp++;

+ ai |= ARMI_S;

+ }

+ asm_intop(as, ir, ai);

static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)

{

if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */

@@ -1108,16 +1470,6 @@ static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)

}

-static void asm_arithop(ASMState *as, IRIns *ir, ARMIns ai)

- if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */

- as->flagmcp = NULL;

- as->mcp++;

- ai |= ARMI_S;

- }

- asm_intop(as, ir, ai);

static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)

{

Reg dest = ra_dest(as, ir, RSET_GPR);

@@ -1148,9 +1500,55 @@ static void asm_intmul(ASMState *as, IRIns *ir)

if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right);

}

-static void asm_intmod(ASMState *as, IRIns *ir)

+static void asm_add(ASMState *as, IRIns *ir)

+#if !LJ_SOFTFP

+ if (irt_isnum(ir->t)) {

+ if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D))

+ asm_fparith(as, ir, ARMI_VADD_D);

+ return;

+ }

+#endif

+ asm_intop_s(as, ir, ARMI_ADD);

+static void asm_sub(ASMState *as, IRIns *ir)

+#if !LJ_SOFTFP

+ if (irt_isnum(ir->t)) {

+ if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D))

+ asm_fparith(as, ir, ARMI_VSUB_D);

+ return;

+ }

+#endif

+ asm_intop_s(as, ir, ARMI_SUB);

+static void asm_mul(ASMState *as, IRIns *ir)

+#if !LJ_SOFTFP

+ if (irt_isnum(ir->t)) {

+ asm_fparith(as, ir, ARMI_VMUL_D);

+ return;

+ }

+#endif

+ asm_intmul(as, ir);

+static void asm_neg(ASMState *as, IRIns *ir)

+#if !LJ_SOFTFP

+ if (irt_isnum(ir->t)) {

+ asm_fpunary(as, ir, ARMI_VNEG_D);

+ return;

+ }

+#endif

+ asm_intneg(as, ir, ARMI_RSB);

+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)

{

- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];

+ const CCallInfo *ci = &lj_ir_callinfo[id];

IRRef args[2];

args[0] = ir->op1;

args[1] = ir->op2;

@@ -1158,6 +1556,21 @@ static void asm_intmod(ASMState *as, IRIns *ir)

asm_gencall(as, ci, args);

}

+#if !LJ_SOFTFP

+static void asm_callround(ASMState *as, IRIns *ir, int id)

+ /* The modified regs must match with the *.dasc implementation. */

+ RegSet drop = RID2RSET(RID_D1)|RID2RSET(RID_D2)|

+ RID2RSET(RID_R0)|RID2RSET(RID_R1);

+ ra_evictset(as, drop);

+ ra_destreg(as, ir, RID_FPRET);

+ emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_hf :

+ id == IRFPM_CEIL ? (void *)lj_vm_ceil_hf :

+ (void *)lj_vm_trunc_hf);

+ ra_leftov(as, RID_D0, ir->op1);

+#endif

static void asm_bitswap(ASMState *as, IRIns *ir)

{

Reg dest = ra_dest(as, ir, RSET_GPR);

@@ -1216,7 +1629,8 @@ static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)

emit_nm(as, ARMI_CMP^kcmp, left, right);

}

-static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)

+#if LJ_SOFTFP

+static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc)

{

const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];

RegSet drop = RSET_SCRATCH;

@@ -1239,6 +1653,30 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)

for (r = RID_R0; r <= RID_R3; r++)

ra_leftov(as, r, args[r-RID_R0]);

}

+#else

+static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)

+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);

+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);

+ right = ((left >> 8) & 15); left &= 15;

+ if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left);

+ if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right);

+ emit_d(as, ARMI_VMRS, 0);

+ emit_dm(as, ARMI_VCMP_D, left, right);

+#endif

+static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)

+#if LJ_SOFTFP

+ UNUSED(fcc);

+#else

+ if (irt_isnum(ir->t))

+ asm_fpmin_max(as, ir, fcc);

+ else

+#endif

+ asm_intmin_max(as, ir, cc);

/* -- Comparisons --------------------------------------------------------- */

@@ -1258,8 +1696,9 @@ static const uint8_t asm_compmap[IR_ABC+1] = {

/* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */

};

+#if LJ_SOFTFP

/* FP comparisons. */

-static void asm_fpcomp(ASMState *as, IRIns *ir)

+static void asm_sfpcomp(ASMState *as, IRIns *ir)

{

const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];

RegSet drop = RSET_SCRATCH;

@@ -1278,6 +1717,31 @@ static void asm_fpcomp(ASMState *as, IRIns *ir)

for (r = RID_R0; r <= RID_R3; r++)

ra_leftov(as, r, args[r-RID_R0]);

}

+#else

+/* FP comparisons. */

+static void asm_fpcomp(ASMState *as, IRIns *ir)

+ Reg left, right;

+ ARMIns ai;

+ int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);

+ if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {

+ left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15);

+ right = 0;

+ ai = ARMI_VCMPZ_D;

+ } else {

+ left = ra_alloc2(as, ir, RSET_FPR);

+ if (swp) {

+ right = (left & 15); left = ((left >> 8) & 15);

+ } else {

+ right = ((left >> 8) & 15); left &= 15;

+ }

+ ai = ARMI_VCMP_D;

+ }

+ asm_guardcc(as, (asm_compmap[ir->o] >> 4));

+ emit_d(as, ARMI_VMRS, 0);

+ emit_dm(as, ai, left, right);

+#endif

/* Integer comparisons. */

static void asm_intcomp(ASMState *as, IRIns *ir)

@@ -1370,21 +1834,32 @@ static void asm_int64comp(ASMState *as, IRIns *ir)

/* Hiword op of a split 64 bit op. Previous op must be the loword op. */

static void asm_hiop(ASMState *as, IRIns *ir)

{

+#if LJ_HASFFI || LJ_SOFTFP

/* HIOP is marked as a store because it needs its own DCE logic. */

int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */

if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;

if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */

as->curins--; /* Always skip the loword comparison. */

- if (irt_isint(ir->t))

- asm_int64comp(as, ir-1);

+#if LJ_SOFTFP

+ if (!irt_isint(ir->t))

+ asm_sfpcomp(as, ir-1);

else

- asm_fpcomp(as, ir-1);

+#endif

+ asm_int64comp(as, ir-1);

return;

+#if LJ_SOFTFP

} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {

as->curins--; /* Always skip the loword min/max. */

if (uselo || usehi)

- asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);

+ asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);

+ return;

+#elif LJ_HASFFI

+ } else if ((ir-1)->o == IR_CONV) {

+ as->curins--; /* Always skip the CONV. */

+ if (usehi || uselo)

+ asm_conv64(as, ir);

return;

+#endif

} else if ((ir-1)->o == IR_XSTORE) {

if ((ir-1)->r != RID_SINK)

asm_xstore(as, ir, 4);

@@ -1409,23 +1884,30 @@ static void asm_hiop(ASMState *as, IRIns *ir)

asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);

break;

#endif

+#if LJ_SOFTFP

case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:

case IR_STRTO:

if (!uselo)

ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */

break;

+#endif

case IR_CALLN:

case IR_CALLS:

case IR_CALLXS:

if (!uselo)

ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */

break;

- case IR_ASTORE: case IR_HSTORE: case IR_USTORE:

- case IR_TOSTR: case IR_CNEWI:

+#if LJ_SOFTFP

+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:

+#endif

+ case IR_CNEWI:

/* Nothing to do here. Handled by lo op itself. */

break;

default: lua_assert(0); break;

}

+#else

+ UNUSED(as); UNUSED(ir); lua_assert(0);

+#endif

}

/* -- Stack handling ------------------------------------------------------ */

@@ -1485,6 +1967,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)

if ((sn & SNAP_NORESTORE))

continue;

if (irt_isnum(ir->t)) {

+#if LJ_SOFTFP

RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);

Reg tmp;

lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */

@@ -1494,6 +1977,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)

if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1);

tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd);

emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4);

+#else

+ Reg src = ra_alloc1(as, ref, RSET_FPR);

+ emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);

+#endif

} else {

RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);

Reg type;

@@ -1506,8 +1993,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)

if ((sn & (SNAP_CONT|SNAP_FRAME))) {

if (s == 0) continue; /* Do not overwrite link to previous frame. */

type = ra_allock(as, (int32_t)(*flinks--), odd);

+#if LJ_SOFTFP

} else if ((sn & SNAP_SOFTFPNUM)) {

type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));

+#endif

} else {

type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);

}

@@ -1648,7 +2137,8 @@ static void asm_ir(ASMState *as, IRIns *ir)

/* Miscellaneous ops. */

case IR_LOOP: asm_loop(as); break;

case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;

- case IR_USE: ra_alloc1(as, ir->op1, RSET_GPR); break;

+ case IR_USE:

+ ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;

case IR_PHI: asm_phi(as, ir); break;

case IR_HIOP: asm_hiop(as, ir); break;

case IR_GCSTEP: asm_gcstep(as, ir); break;

@@ -1664,6 +2154,9 @@ static void asm_ir(ASMState *as, IRIns *ir)

case IR_LT: case IR_GE: case IR_LE: case IR_GT:

case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:

case IR_ABC:

+#if !LJ_SOFTFP

+ if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }

+#endif

asm_intcomp(as, ir);

break;

@@ -1684,20 +2177,38 @@ static void asm_ir(ASMState *as, IRIns *ir)

case IR_BROL: lua_assert(0); break;

/* Arithmetic ops. */

- case IR_ADD: case IR_ADDOV: asm_arithop(as, ir, ARMI_ADD); break;

- case IR_SUB: case IR_SUBOV: asm_arithop(as, ir, ARMI_SUB); break;

- case IR_MUL: case IR_MULOV: asm_intmul(as, ir); break;

- case IR_MOD: asm_intmod(as, ir); break;

- case IR_NEG: asm_intneg(as, ir, ARMI_RSB); break;

- case IR_MIN: asm_intmin_max(as, ir, CC_GT); break;

- case IR_MAX: asm_intmin_max(as, ir, CC_LT); break;

- case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:

- case IR_DIV: case IR_POW: case IR_ABS: case IR_TOBIT:

+ case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;

+ case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;

+ case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;

+ case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;

+ case IR_NEG: asm_neg(as, ir); break;

+#if LJ_SOFTFP

+ case IR_DIV: case IR_POW: case IR_ABS:

+ case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:

lua_assert(0); /* Unused for LJ_SOFTFP. */

break;

+#else

+ case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;

+ case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;

+ case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;

+ case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;

+ case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;

+ case IR_FPMATH:

+ if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))

+ break;

+ if (ir->op2 <= IRFPM_TRUNC)

+ asm_callround(as, ir, ir->op2);

+ else if (ir->op2 == IRFPM_SQRT)

+ asm_fpunary(as, ir, ARMI_VSQRT_D);

+ else

+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);

+ break;

+ case IR_TOBIT: asm_tobit(as, ir); break;

+#endif

+ case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;

+ case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;

/* Memory references. */

case IR_AREF: asm_aref(as, ir); break;

@@ -1754,15 +2265,29 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)

{

IRRef args[CCI_NARGS_MAX];

uint32_t i, nargs = (int)CCI_NARGS(ci);

- int nslots = 0, ngpr = REGARG_NUMGPR;

+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;

asm_collectargs(as, ir, ci, args);

- for (i = 0; i < nargs; i++)

- if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) {

- ngpr &= ~1;

- if (ngpr > 0) ngpr -= 2; else nslots += 2;

+ for (i = 0; i < nargs; i++) {

+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {

+ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {

+ if (irt_isnum(IR(args[i])->t)) {

+ if (nfpr > 0) nfpr--;

+ else fprodd = 0, nslots = (nslots + 3) & ~1;

+ } else {

+ if (fprodd) fprodd--;

+ else if (nfpr > 0) fprodd = 1, nfpr--;

+ else nslots++;

+ }

+ } else if (irt_isnum(IR(args[i])->t)) {

+ ngpr &= ~1;

+ if (ngpr > 0) ngpr -= 2; else nslots += 2;

+ } else {

+ if (ngpr > 0) ngpr--; else nslots++;

+ }

} else {

if (ngpr > 0) ngpr--; else nslots++;

}

+ }

if (nslots > as->evenspill) /* Leave room for args in stack slots. */

as->evenspill = nslots;

return REGSP_HINT(RID_RET);

diff --git a/luajit2/src/lj_ccall.c b/luajit2/src/lj_ccall.c
index c3eb25f6..71331f39 100644
--- a/luajit2/src/lj_ccall.c
+++ b/luajit2/src/lj_ccall.c

@@ -168,6 +168,8 @@

#elif LJ_TARGET_ARM

/* -- ARM calling conventions --------------------------------------------- */

+#if LJ_ABI_SOFTFP

#define CCALL_HANDLE_STRUCTRET \

/* Return structs of size <= 4 in a GPR. */ \

cc->retref = !(sz <= 4); \

@@ -186,13 +188,70 @@

#define CCALL_HANDLE_COMPLEXARG \

/* Pass complex by value in 2 or 4 GPRs. */

-/* ARM has a softfp ABI. */

+#define CCALL_HANDLE_REGARG_FP1

+#define CCALL_HANDLE_REGARG_FP2

+#else

+#define CCALL_HANDLE_STRUCTRET \

+ cc->retref = !ccall_classify_struct(cts, ctr, ct); \

+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;

+#define CCALL_HANDLE_STRUCTRET2 \

+ if (ccall_classify_struct(cts, ctr, ct) > 1) sp = (uint8_t *)&cc->fpr[0]; \

+ memcpy(dp, sp, ctr->size);

+#define CCALL_HANDLE_COMPLEXRET \

+ if (!(ct->info & CTF_VARARG)) cc->retref = 0; /* Return complex in FPRs. */

+#define CCALL_HANDLE_COMPLEXRET2 \

+ if (!(ct->info & CTF_VARARG)) memcpy(dp, &cc->fpr[0], ctr->size);

+#define CCALL_HANDLE_STRUCTARG \

+ isfp = (ccall_classify_struct(cts, d, ct) > 1);

+ /* Pass all structs by value in registers and/or on the stack. */

+#define CCALL_HANDLE_COMPLEXARG \

+ isfp = 1; /* Pass complex by value in FPRs or on stack. */

+#define CCALL_HANDLE_REGARG_FP1 \

+ if (isfp && !(ct->info & CTF_VARARG)) { \

+ if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \

+ if (nfpr + (n >> 1) <= CCALL_NARG_FPR) { \

+ dp = &cc->fpr[nfpr]; \

+ nfpr += (n >> 1); \

+ goto done; \

+ } \

+ } else { \

+ if (sz > 1 && fprodd != nfpr) fprodd = 0; \

+ if (fprodd) { \

+ if (2*nfpr+n <= 2*CCALL_NARG_FPR+1) { \

+ dp = (void *)&cc->fpr[fprodd-1].f[1]; \

+ nfpr += (n >> 1); \

+ if ((n & 1)) fprodd = 0; else fprodd = nfpr-1; \

+ goto done; \

+ } \

+ } else { \

+ if (2*nfpr+n <= 2*CCALL_NARG_FPR) { \

+ dp = (void *)&cc->fpr[nfpr]; \

+ nfpr += (n >> 1); \

+ if ((n & 1)) fprodd = ++nfpr; else fprodd = 0; \

+ goto done; \

+ } \

+ fprodd = 0; /* No reordering after the first FP value is on stack. */ \

+ } else {

+#define CCALL_HANDLE_REGARG_FP2 }

+#endif

#define CCALL_HANDLE_REGARG \

+ CCALL_HANDLE_REGARG_FP1 \

if ((d->info & CTF_ALIGN) > CTALIGN_PTR) { \

if (ngpr < maxgpr) \

ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \

- else \

- nsp = (nsp + 1u) & ~1u; /* Align argument on stack. */ \

} \

if (ngpr < maxgpr) { \

dp = &cc->gpr[ngpr]; \

@@ -204,7 +263,10 @@

ngpr += n; \

} \

goto done; \

- }

+ } CCALL_HANDLE_REGARG_FP2

+#define CCALL_HANDLE_RET \

+ if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];

#elif LJ_TARGET_PPC

/* -- PPC calling conventions --------------------------------------------- */

@@ -453,6 +515,49 @@ static void ccall_struct_ret(CCallState *cc, int *rcl, uint8_t *dp, CTSize sz)

}

#endif

+/* -- ARM hard-float ABI struct classification ---------------------------- */

+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP

+/* Classify a struct based on its fields. */

+static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)

+ CTSize sz = ct->size;

+ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);

+ if ((ctf->info & CTF_VARARG)) goto noth;

+ while (ct->sib) {

+ ct = ctype_get(cts, ct->sib);

+ if (ctype_isfield(ct->info)) {

+ CType *sct = ctype_rawchild(cts, ct);

+ if (ctype_isfp(sct->info)) {

+ r |= sct->size;

+ if (!isu) n++; else if (n == 0) n = 1;

+ } else if (ctype_iscomplex(sct->info)) {

+ r |= (sct->size >> 1);

+ if (!isu) n += 2; else if (n < 2) n = 2;

+ } else {

+ goto noth;

+ }

+ } else if (ctype_isbitfield(ct->info)) {

+ goto noth;

+ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {

+ CType *sct = ctype_child(cts, ct);

+ if (sct->size > 0) {

+ unsigned int s = ccall_classify_struct(cts, sct, ctf);

+ if (s <= 1) goto noth;

+ r |= (s & 255);

+ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);

+ }

+ if ((r == 4 || r == 8) && n <= 4)

+ return r + (n << 8);

+noth: /* Not a homogeneous float/double aggregate. */

+ return (sz <= 4); /* Return structs of size <= 4 in a GPR. */

+#endif

/* -- Common C call handling ---------------------------------------------- */

/* Infer the destination CTypeID for a vararg argument. */

@@ -494,6 +599,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,

MSize maxgpr, ngpr = 0, nsp = 0, narg;

#if CCALL_NARG_FPR

MSize nfpr = 0;

+#if LJ_TARGET_ARM

+ MSize fprodd = 0;

+#endif

#endif

/* Clear unused regs to get some determinism in case of misdeclaration. */

diff --git a/luajit2/src/lj_ccall.h b/luajit2/src/lj_ccall.h
index 5985c4a9..26198a02 100644
--- a/luajit2/src/lj_ccall.h
+++ b/luajit2/src/lj_ccall.h

@@ -36,6 +36,7 @@

#endif

#define CCALL_SPS_FREE 1

+#define CCALL_ALIGN_CALLSTATE 16

typedef LJ_ALIGN(16) union FPRArg {

double d[2];

@@ -51,12 +52,21 @@ typedef intptr_t GPRArg;

#elif LJ_TARGET_ARM

#define CCALL_NARG_GPR 4

-#define CCALL_NARG_FPR 0

#define CCALL_NRET_GPR 2 /* For softfp double. */

+#if LJ_ABI_SOFTFP

+#define CCALL_NARG_FPR 0

#define CCALL_NRET_FPR 0

+#else

+#define CCALL_NARG_FPR 8

+#define CCALL_NRET_FPR 4

+#endif

#define CCALL_SPS_FREE 0

typedef intptr_t GPRArg;

+typedef union FPRArg {

+ double d;

+ float f[2];

+} FPRArg;

#elif LJ_TARGET_PPC

@@ -108,6 +118,9 @@ typedef union FPRArg {

#ifndef CCALL_ALIGN_STACKARG

#define CCALL_ALIGN_STACKARG 1

#endif

+#ifndef CCALL_ALIGN_CALLSTATE

+#define CCALL_ALIGN_CALLSTATE 8

+#endif

#define CCALL_NUM_GPR \

(CCALL_NARG_GPR > CCALL_NRET_GPR ? CCALL_NARG_GPR : CCALL_NRET_GPR)

@@ -122,7 +135,7 @@ LJ_STATIC_ASSERT(CCALL_NUM_FPR <= CCALL_MAX_FPR);

/* -- C call state -------------------------------------------------------- */

-typedef struct CCallState {

+typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {

void (*func)(void); /* Pointer to called function. */

uint32_t spadj; /* Stack pointer adjustment. */

uint8_t nsp; /* Number of stack slots. */

@@ -135,10 +148,10 @@ typedef struct CCallState {

#elif LJ_TARGET_PPC

uint8_t nfpr; /* Number of arguments in FPRs. */

#endif

-#if CCALL_NUM_FPR

#if LJ_32

int32_t align1;

#endif

+#if CCALL_NUM_FPR

FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */

#endif

GPRArg gpr[CCALL_NUM_GPR]; /* Arguments/results in GPRs. */

diff --git a/luajit2/src/lj_ccallback.c b/luajit2/src/lj_ccallback.c
index a9567bc5..430643ee 100644
--- a/luajit2/src/lj_ccallback.c
+++ b/luajit2/src/lj_ccallback.c

@@ -310,22 +310,53 @@ void lj_ccallback_mcode_free(CTState *cts)

#elif LJ_TARGET_ARM

+#if LJ_ABI_SOFTFP

+#define CALLBACK_HANDLE_REGARG_FP1 UNUSED(isfp);

+#define CALLBACK_HANDLE_REGARG_FP2

+#else

+#define CALLBACK_HANDLE_REGARG_FP1 \

+ if (isfp) { \

+ if (n == 1) { \

+ if (fprodd) { \

+ sp = &cts->cb.fpr[fprodd-1]; \

+ fprodd = 0; \

+ goto done; \

+ } else if (nfpr + 1 <= CCALL_NARG_FPR) { \

+ sp = &cts->cb.fpr[nfpr++]; \

+ fprodd = nfpr; \

+ goto done; \

+ } \

+ } else { \

+ if (nfpr + 1 <= CCALL_NARG_FPR) { \

+ sp = &cts->cb.fpr[nfpr++]; \

+ goto done; \

+ } \

+ fprodd = 0; /* No reordering after the first FP value is on stack. */ \

+ } else {

+#define CALLBACK_HANDLE_REGARG_FP2 }

+#endif

#define CALLBACK_HANDLE_REGARG \

- UNUSED(isfp); \

+ CALLBACK_HANDLE_REGARG_FP1 \

if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \

if (ngpr + n <= maxgpr) { \

sp = &cts->cb.gpr[ngpr]; \

ngpr += n; \

goto done; \

- }

+ } CALLBACK_HANDLE_REGARG_FP2

#elif LJ_TARGET_PPC

#define CALLBACK_HANDLE_REGARG \

if (isfp) { \

if (nfpr + 1 <= CCALL_NARG_FPR) { \

- sp = &cts->cb.fpr[nfpr]; \

- nfpr += 1; \

+ sp = &cts->cb.fpr[nfpr++]; \

cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \

goto done; \

} \

@@ -382,6 +413,9 @@ static void callback_conv_args(CTState *cts, lua_State *L)

MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;

#if CCALL_NARG_FPR

MSize nfpr = 0;

+#if LJ_TARGET_ARM

+ MSize fprodd = 0;

+#endif

#endif

if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {

diff --git a/luajit2/src/lj_crecord.c b/luajit2/src/lj_crecord.c
index 2a475035..30d315d5 100644
--- a/luajit2/src/lj_crecord.c
+++ b/luajit2/src/lj_crecord.c

@@ -422,6 +422,7 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)

if (ctype_isref(s->info)) {

svisnz = *(void **)svisnz;

s = ctype_rawchild(cts, s);

+ if (ctype_isenum(s->info)) s = ctype_child(cts, s);

t = crec_ct2irt(cts, s);

} else {

goto doconv;

@@ -431,6 +432,7 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)

lj_needsplit(J);

goto doconv;

} else if (t == IRT_INT || t == IRT_U32) {

+ if (ctype_isenum(s->info)) s = ctype_child(cts, s);

sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_INT);

goto doconv;

} else {

@@ -691,14 +693,15 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)

setintV(&tv, 0);

if (!gcref(df->name)) continue; /* Ignore unnamed fields. */

dc = ctype_rawchild(cts, df); /* Field type. */

- if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))

+ if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||

+ ctype_isenum(dc->info)))

lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init aggregates. */

if (J->base[i]) {

sp = J->base[i];

sval = &rd->argv[i];

i++;

} else {

- sp = ctype_isnum(dc->info) ? lj_ir_kint(J, 0) : TREF_NIL;

+ sp = ctype_isptr(dc->info) ? TREF_NIL : lj_ir_kint(J, 0);

}

dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,

lj_ir_kintp(J, df->size + sizeof(GCcdata)));

@@ -1313,6 +1316,18 @@ void LJ_FASTCALL recff_ffi_fill(jit_State *J, RecordFFData *rd)

} /* else: interpreter will throw. */

}

+void LJ_FASTCALL recff_ffi_typeof(jit_State *J, RecordFFData *rd)

+ if (tref_iscdata(J->base[0])) {

+ TRef trid = lj_ir_kint(J, argv2ctype(J, J->base[0], &rd->argv[0]));

+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA),

+ lj_ir_kint(J, CTID_CTYPEID), trid);

+ } else {

+ setfuncV(J->L, &J->errinfo, J->fn);

+ lj_trace_err_info(J, LJ_TRERR_NYIFFU);

+ }

void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd)

{

argv2ctype(J, J->base[0], &rd->argv[0]);

diff --git a/luajit2/src/lj_crecord.h b/luajit2/src/lj_crecord.h
index 0f93e145..c2a3758d 100644
--- a/luajit2/src/lj_crecord.h
+++ b/luajit2/src/lj_crecord.h

@@ -20,6 +20,7 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_errno(jit_State *J, RecordFFData *rd);

LJ_FUNC void LJ_FASTCALL recff_ffi_string(jit_State *J, RecordFFData *rd);

LJ_FUNC void LJ_FASTCALL recff_ffi_copy(jit_State *J, RecordFFData *rd);

LJ_FUNC void LJ_FASTCALL recff_ffi_fill(jit_State *J, RecordFFData *rd);

+LJ_FUNC void LJ_FASTCALL recff_ffi_typeof(jit_State *J, RecordFFData *rd);

LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);

LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);

LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);

@@ -33,6 +34,7 @@ LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);

#define recff_ffi_string recff_nyi

#define recff_ffi_copy recff_nyi

#define recff_ffi_fill recff_nyi

+#define recff_ffi_typeof recff_nyi

#define recff_ffi_istype recff_nyi

#define recff_ffi_abi recff_nyi

#endif

diff --git a/luajit2/src/lj_ctype.c b/luajit2/src/lj_ctype.c
index c9aace16..80594ab5 100644
--- a/luajit2/src/lj_ctype.c
+++ b/luajit2/src/lj_ctype.c

@@ -581,6 +581,7 @@ CTState *lj_ctype_init(lua_State *L)

CTInfo info = lj_ctype_typeinfo[id];

ct->size = (CTSize)((int32_t)(info << 16) >> 26);

ct->info = info & 0xffff03ffu;

+ ct->sib = 0;

if (ctype_type(info) == CT_KW || ctype_istypedef(info)) {

size_t len = strlen(name);

GCstr *str = lj_str_new(L, name, len);

@@ -589,6 +590,7 @@ CTState *lj_ctype_init(lua_State *L)

lj_ctype_addname(cts, ct, id);

} else {

setgcrefnull(ct->name);

+ ct->next = 0;

if (!ctype_isenum(info)) ctype_addtype(cts, ct, id);

}

diff --git a/luajit2/src/lj_ctype.h b/luajit2/src/lj_ctype.h
index 7953654f..7c3b667c 100644
--- a/luajit2/src/lj_ctype.h
+++ b/luajit2/src/lj_ctype.h

@@ -155,7 +155,7 @@ typedef struct CType {

#define CCALL_MAX_GPR 8

#define CCALL_MAX_FPR 8

-typedef LJ_ALIGN(8) union FPRCBArg { double d; float f; } FPRCBArg;

+typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg;

/* C callback state. Defined here, to avoid dragging in lj_ccall.h. */

diff --git a/luajit2/src/lj_emit_arm.h b/luajit2/src/lj_emit_arm.h
index 27de6852..79ca2db7 100644
--- a/luajit2/src/lj_emit_arm.h
+++ b/luajit2/src/lj_emit_arm.h

@@ -103,6 +103,15 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)

*--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs;

}

+#if !LJ_SOFTFP

+static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)

+ lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0);

+ if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;

+ *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);

+#endif

/* -- Emit loads/stores --------------------------------------------------- */

/* Prefer spills of BASE/L. */

@@ -208,6 +217,28 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)

(i & 4095));

}

+#if !LJ_SOFTFP

+/* Load a number constant into an FPR. */

+static void emit_loadn(ASMState *as, Reg r, cTValue *tv)

+ int32_t i;

+ if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {

+ uint32_t hi = tv->u32.hi;

+ uint32_t b = ((hi >> 22) & 0x1ff);

+ if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) {

+ *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) |

+ ((tv->u32.hi >> 12) & 0x00080000) |

+ ((tv->u32.hi >> 4) & 0x00070000) |

+ ((tv->u32.hi >> 16) & 0x0000000f);

+ return;

+ }

+ i = i32ptr(tv);

+ emit_vlso(as, ARMI_VLDR_D, r,

+ ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020));

+#endif

/* Get/set global_State fields. */

#define emit_getgl(as, r, field) \

emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)

@@ -256,7 +287,15 @@ static void emit_call(ASMState *as, void *target)

/* Generic move between two regs. */

static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)

{

+#if LJ_SOFTFP

lua_assert(!irt_isnum(ir->t)); UNUSED(ir);

+#else

+ if (dst >= RID_MAX_GPR) {

+ emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,

+ (dst & 15), (src & 15));

+ return;

+ }

+#endif

if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */

MCode ins = *as->mcp, swp = (src^dst);

if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) {

@@ -272,15 +311,27 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)

/* Generic load of register from stack slot. */

static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)

{

+#if LJ_SOFTFP

lua_assert(!irt_isnum(ir->t)); UNUSED(ir);

- emit_lso(as, ARMI_LDR, r, RID_SP, ofs);

+#else

+ if (r >= RID_MAX_GPR)

+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs);

+ else

+#endif

+ emit_lso(as, ARMI_LDR, r, RID_SP, ofs);

}

/* Generic store of register to stack slot. */

static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)

{

+#if LJ_SOFTFP

lua_assert(!irt_isnum(ir->t)); UNUSED(ir);

- emit_lso(as, ARMI_STR, r, RID_SP, ofs);

+#else

+ if (r >= RID_MAX_GPR)

+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs);

+ else

+#endif

+ emit_lso(as, ARMI_STR, r, RID_SP, ofs);

}

/* Emit an arithmetic/logic operation with a constant operand. */

diff --git a/luajit2/src/lj_err.c b/luajit2/src/lj_err.c
index 3ee8cf43..60d8fe12 100644
--- a/luajit2/src/lj_err.c
+++ b/luajit2/src/lj_err.c

@@ -185,7 +185,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)

/* -- External frame unwinding -------------------------------------------- */

-#if defined(__GNUC__) && !LJ_NO_UNWIND

+#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_TARGET_WINDOWS

** We have to use our own definitions instead of the mandatory (!) unwind.h,

diff --git a/luajit2/src/lj_errmsg.h b/luajit2/src/lj_errmsg.h
index 2d677cca..d59d76a2 100644
--- a/luajit2/src/lj_errmsg.h
+++ b/luajit2/src/lj_errmsg.h

@@ -160,6 +160,7 @@ ERRDEF(FFI_BADCALL, LUA_QS " is not callable")

ERRDEF(FFI_NUMARG, "wrong number of arguments for function call")

ERRDEF(FFI_BADMEMBER, LUA_QS " has no member named " LUA_QS)

ERRDEF(FFI_BADIDX, LUA_QS " cannot be indexed")

+ERRDEF(FFI_BADIDXW, LUA_QS " cannot be indexed with " LUA_QS)

ERRDEF(FFI_WRCONST, "attempt to write to constant location")

ERRDEF(FFI_NODECL, "missing declaration for symbol " LUA_QS)

ERRDEF(FFI_BADCBACK, "bad callback")

diff --git a/luajit2/src/lj_frame.h b/luajit2/src/lj_frame.h
index b8429c2a..b8af2349 100644
--- a/luajit2/src/lj_frame.h
+++ b/luajit2/src/lj_frame.h

@@ -97,7 +97,11 @@ enum {

#define CFRAME_OFS_L 12

#define CFRAME_OFS_PC 8

#define CFRAME_OFS_MULTRES 4

+#if LJ_ARCH_HASFPU

+#define CFRAME_SIZE 128

+#else

#define CFRAME_SIZE 64

+#endif

#define CFRAME_SHIFT_MULTRES 3

#elif LJ_TARGET_PPC

#if LJ_ARCH_PPC64

diff --git a/luajit2/src/lj_ircall.h b/luajit2/src/lj_ircall.h
index 7f08bc4a..8f481106 100644
--- a/luajit2/src/lj_ircall.h
+++ b/luajit2/src/lj_ircall.h

@@ -66,7 +66,7 @@ typedef struct CCallInfo {

#define IRCALLCOND_SOFTFP_FFI(x) NULL

#endif

-#define LJ_NEED_FP64 (LJ_TARGET_PPC || LJ_TARGET_MIPS)

+#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)

#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)

#define IRCALLCOND_FP64_FFI(x) x

@@ -242,7 +242,7 @@ extern uint32_t softfp_f2ui(float a);

#endif

-#if LJ_HASFFI && LJ_NEED_FP64

+#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)

#ifdef __GNUC__

#define fp64_l2d __floatdidf

#define fp64_ul2d __floatundidf

diff --git a/luajit2/src/lj_opt_sink.c b/luajit2/src/lj_opt_sink.c
index 28291d77..5f38b647 100644
--- a/luajit2/src/lj_opt_sink.c
+++ b/luajit2/src/lj_opt_sink.c

@@ -79,7 +79,7 @@ static void sink_mark_ins(jit_State *J)

case IR_BASE:

return; /* Finished. */

case IR_CALLL: /* IRCALL_lj_tab_len */

- case IR_ALOAD: case IR_HLOAD: case IR_XLOAD:

+ case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:

irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */

break;

case IR_FLOAD:

diff --git a/luajit2/src/lj_snap.c b/luajit2/src/lj_snap.c
index 0cf12243..dd123692 100644
--- a/luajit2/src/lj_snap.c
+++ b/luajit2/src/lj_snap.c

@@ -464,8 +464,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)

J->baseslot = s+1;

}

if (pass23) {

- IRIns *irlast = &T->ir[(snap+1)->ref];

- lua_assert(J->exitno+1 < T->nsnap);

+ IRIns *irlast = &T->ir[snap->ref];

pass23 = 0;

/* Emit dependent PVALs. */

for (n = 0; n < nent; n++) {

diff --git a/luajit2/src/lj_target_arm.h b/luajit2/src/lj_target_arm.h
index a24fc819..ee920f0b 100644
--- a/luajit2/src/lj_target_arm.h
+++ b/luajit2/src/lj_target_arm.h

@@ -14,7 +14,9 @@

#if LJ_SOFTFP

#define FPRDEF(_)

#else

-#error "NYI: hard-float support for ARM"

+#define FPRDEF(_) \

+ _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \

+ _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15)

#endif

#define VRIDDEF(_)

@@ -30,7 +32,11 @@ enum {

RID_RET = RID_R0,

RID_RETLO = RID_R0,

RID_RETHI = RID_R1,

+#if LJ_SOFTFP

RID_FPRET = RID_R0,

+#else

+ RID_FPRET = RID_D0,

+#endif

/* These definitions must match with the *.dasc file(s): */

RID_BASE = RID_R9, /* Interpreter BASE. */

@@ -45,7 +51,7 @@ enum {

#if LJ_SOFTFP

RID_MAX_FPR = RID_MIN_FPR,

#else

-#error "NYI: VFP support for ARM"

+ RID_MAX_FPR = RID_D15+1,

#endif

RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,

RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR

@@ -66,10 +72,10 @@ enum {

RID2RSET(RID_R9)|RID2RSET(RID_R11))

#if LJ_SOFTFP

#define RSET_FPR 0

-#define RSET_ALL RSET_GPR

#else

-#error "NYI: VFP support for ARM"

+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))

#endif

+#define RSET_ALL (RSET_GPR|RSET_FPR)

#define RSET_INIT RSET_ALL

/* ABI-specific register sets. lr is an implicit scratch register. */

@@ -82,12 +88,21 @@ enum {

#if LJ_SOFTFP

#define RSET_SCRATCH_FPR 0

#else

-#error "NYI: VFP support for ARM"

+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))

#endif

#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)

#define REGARG_FIRSTGPR RID_R0

#define REGARG_LASTGPR RID_R3

#define REGARG_NUMGPR 4

+#if LJ_ABI_SOFTFP

+#define REGARG_FIRSTFPR 0

+#define REGARG_LASTFPR 0

+#define REGARG_NUMFPR 0

+#else

+#define REGARG_FIRSTFPR RID_D0

+#define REGARG_LASTFPR RID_D7

+#define REGARG_NUMFPR 8

+#endif

/* -- Spill slots --------------------------------------------------------- */

@@ -196,6 +211,53 @@ typedef enum ARMIns {

/* ARMv6T2 */

ARMI_MOVW = 0xe3000000,

ARMI_MOVT = 0xe3400000,

+ /* VFP */

+ ARMI_VMOV_D = 0xeeb00b40,

+ ARMI_VMOV_S = 0xeeb00a40,

+ ARMI_VMOVI_D = 0xeeb00b00,

+ ARMI_VMOV_R_S = 0xee100a10,

+ ARMI_VMOV_S_R = 0xee000a10,

+ ARMI_VMOV_RR_D = 0xec500b10,

+ ARMI_VMOV_D_RR = 0xec400b10,

+ ARMI_VADD_D = 0xee300b00,

+ ARMI_VSUB_D = 0xee300b40,

+ ARMI_VMUL_D = 0xee200b00,

+ ARMI_VMLA_D = 0xee000b00,

+ ARMI_VMLS_D = 0xee000b40,

+ ARMI_VNMLS_D = 0xee100b00,

+ ARMI_VDIV_D = 0xee800b00,

+ ARMI_VABS_D = 0xeeb00bc0,

+ ARMI_VNEG_D = 0xeeb10b40,

+ ARMI_VSQRT_D = 0xeeb10bc0,

+ ARMI_VCMP_D = 0xeeb40b40,

+ ARMI_VCMPZ_D = 0xeeb50b40,

+ ARMI_VMRS = 0xeef1fa10,

+ ARMI_VCVT_S32_F32 = 0xeebd0ac0,

+ ARMI_VCVT_S32_F64 = 0xeebd0bc0,

+ ARMI_VCVT_U32_F32 = 0xeebc0ac0,

+ ARMI_VCVT_U32_F64 = 0xeebc0bc0,

+ ARMI_VCVTR_S32_F32 = 0xeebd0a40,

+ ARMI_VCVTR_S32_F64 = 0xeebd0b40,

+ ARMI_VCVTR_U32_F32 = 0xeebc0a40,

+ ARMI_VCVTR_U32_F64 = 0xeebc0b40,

+ ARMI_VCVT_F32_S32 = 0xeeb80ac0,

+ ARMI_VCVT_F64_S32 = 0xeeb80bc0,

+ ARMI_VCVT_F32_U32 = 0xeeb80a40,

+ ARMI_VCVT_F64_U32 = 0xeeb80b40,

+ ARMI_VCVT_F32_F64 = 0xeeb70bc0,

+ ARMI_VCVT_F64_F32 = 0xeeb70ac0,

+ ARMI_VLDR_S = 0xed100a00,

+ ARMI_VLDR_D = 0xed100b00,

+ ARMI_VSTR_S = 0xed000a00,

+ ARMI_VSTR_D = 0xed000b00,

} ARMIns;

typedef enum ARMShift {

diff --git a/luajit2/src/lj_vm.h b/luajit2/src/lj_vm.h
index a13a8ce0..4f9a10b8 100644
--- a/luajit2/src/lj_vm.h
+++ b/luajit2/src/lj_vm.h

@@ -55,6 +55,10 @@ LJ_ASMF void lj_vm_exit_interp(void);

#else

LJ_ASMF double lj_vm_floor(double);

LJ_ASMF double lj_vm_ceil(double);

+#if LJ_TARGET_ARM

+LJ_ASMF double lj_vm_floor_hf(double);

+LJ_ASMF double lj_vm_ceil_hf(double);

+#endif

#endif

#if LJ_HASJIT

@@ -71,6 +75,9 @@ LJ_ASMF void lj_vm_powi_sse(void);

#define lj_vm_trunc trunc

#else

LJ_ASMF double lj_vm_trunc(double);

+#if LJ_TARGET_ARM

+LJ_ASMF double lj_vm_trunc_hf(double);

+#endif

#endif

LJ_ASMF double lj_vm_powi(double, int32_t);

#ifdef LUAJIT_NO_LOG2

diff --git a/luajit2/src/vm_arm.dasc b/luajit2/src/vm_arm.dasc
index 8ddce49e..355a53e6 100644
--- a/luajit2/src/vm_arm.dasc
+++ b/luajit2/src/vm_arm.dasc

@@ -46,6 +46,7 @@

|.define CRET2, r1

|// Stack layout while in interpreter. Must match with lj_frame.h.

+|.define SAVE_R4, [sp, #28]

|.define CFRAME_SPACE, #28

|.define SAVE_ERRF, [sp, #24]

|.define SAVE_NRES, [sp, #20]

@@ -60,6 +61,20 @@

|.define TMPD, [sp]

|.define TMPDp, sp

+|.if FPU

+|.macro saveregs

+| push {r5, r6, r7, r8, r9, r10, r11, lr}

+| vpush {d8-d15}

+| sub sp, sp, CFRAME_SPACE+4

+| str r4, SAVE_R4

+|.endmacro

+|.macro restoreregs_ret

+| ldr r4, SAVE_R4

+| add sp, sp, CFRAME_SPACE+4

+| vpop {d8-d15}

+| pop {r5, r6, r7, r8, r9, r10, r11, pc}

+|.endmacro

+|.else

|.macro saveregs

| push {r4, r5, r6, r7, r8, r9, r10, r11, lr}

| sub sp, sp, CFRAME_SPACE

@@ -68,6 +83,7 @@

| add sp, sp, CFRAME_SPACE

| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}

|.endmacro

+|.endif

|// Type definitions. Some of these are only used for documentation.

|.type L, lua_State, LREG

@@ -875,6 +891,29 @@ static void build_subroutines(BuildCtx *ctx)

| bhs ->fff_fallback

|.endmacro

+ |.macro .ffunc_d, name

+ | .ffunc name

+ | ldr CARG2, [BASE, #4]

+ | cmp NARGS8:RC, #8

+ | vldr d0, [BASE]

+ | blo ->fff_fallback

+ | checktp CARG2, LJ_TISNUM

+ | bhs ->fff_fallback

+ |.endmacro

+ |

+ |.macro .ffunc_dd, name

+ | .ffunc name

+ | ldr CARG2, [BASE, #4]

+ | ldr CARG4, [BASE, #12]

+ | cmp NARGS8:RC, #16

+ | vldr d0, [BASE]

+ | vldr d1, [BASE, #8]

+ | blo ->fff_fallback

+ | checktp CARG2, LJ_TISNUM

+ | cmnlo CARG4, #-LJ_TISNUM

+ | bhs ->fff_fallback

+ |.endmacro

+ |

|// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.

|.macro ffgccheck

| ldr CARG1, [DISPATCH, #DISPATCH_GL(gc.total)]

@@ -1327,8 +1366,14 @@ static void build_subroutines(BuildCtx *ctx)

| movmi CARG1, #0x80000000

| bmi <1

|4:

+ |.if HFABI

+ | vmov d0, CARG1, CARG2

+ | bl ->vm_..func.._hf

+ | b ->fff_resd

+ |.else

| bl ->vm_..func

| b ->fff_restv

+ |.endif

|.endmacro

| math_round floor

@@ -1381,22 +1426,48 @@ static void build_subroutines(BuildCtx *ctx)

| b <5

|.macro math_extern, func

+ |.if HFABI

+ | .ffunc_d math_ .. func

+ |.else

| .ffunc_n math_ .. func

+ |.endif

| .IOS mov RA, BASE

| bl extern func

| .IOS mov BASE, RA

+ |.if HFABI

+ | b ->fff_resd

+ |.else

| b ->fff_restv

+ |.endif

|.endmacro

|.macro math_extern2, func

+ |.if HFABI

+ | .ffunc_dd math_ .. func

+ |.else

| .ffunc_nn math_ .. func

+ |.endif

| .IOS mov RA, BASE

| bl extern func

| .IOS mov BASE, RA

+ |.if HFABI

+ | b ->fff_resd

+ |.else

| b ->fff_restv

+ |.endif

|.endmacro

+ |.if FPU

+ | .ffunc_d math_sqrt

+ | vsqrt.f64 d0, d0

+ |->fff_resd:

+ | ldr PC, [BASE, FRAME_PC]

+ | vstr d0, [BASE, #-8]

+ | b ->fff_res1

+ |.else

| math_extern sqrt

+ |.endif

+ |

| math_extern log

| math_extern log10

| math_extern exp

@@ -1414,11 +1485,34 @@ static void build_subroutines(BuildCtx *ctx)

| math_extern2 fmod

|->ff_math_deg:

- |.ffunc_n math_rad

+ |.if FPU

+ | .ffunc_d math_rad

+ | vldr d1, CFUNC:CARG3->upvalue[0]

+ | vmul.f64 d0, d0, d1

+ | b ->fff_resd

+ |.else

+ | .ffunc_n math_rad

| ldrd CARG34, CFUNC:CARG3->upvalue[0]

| bl extern __aeabi_dmul

| b ->fff_restv

+ |.endif

+ |.if HFABI

+ | .ffunc math_ldexp

+ | ldr CARG4, [BASE, #4]

+ | ldrd CARG12, [BASE, #8]

+ | cmp NARGS8:RC, #16

+ | blo ->fff_fallback

+ | vldr d0, [BASE]

+ | checktp CARG4, LJ_TISNUM

+ | bhs ->fff_fallback

+ | checktp CARG2, LJ_TISNUM

+ | bne ->fff_fallback

+ | .IOS mov RA, BASE

+ | bl extern ldexp // (double x, int exp)

+ | .IOS mov BASE, RA

+ | b ->fff_resd

+ |.else

|.ffunc_2 math_ldexp

| checktp CARG2, LJ_TISNUM

| bhs ->fff_fallback

@@ -1428,7 +1522,22 @@ static void build_subroutines(BuildCtx *ctx)

| bl extern ldexp // (double x, int exp)

| .IOS mov BASE, RA

| b ->fff_restv

+ |.endif

+ |.if HFABI

+ |.ffunc_d math_frexp

+ | mov CARG1, sp

+ | .IOS mov RA, BASE

+ | bl extern frexp

+ | .IOS mov BASE, RA

+ | ldr CARG3, [sp]

+ | mvn CARG4, #~LJ_TISNUM

+ | ldr PC, [BASE, FRAME_PC]

+ | vstr d0, [BASE, #-8]

+ | mov RC, #(2+1)*8

+ | strd CARG34, [BASE]

+ | b ->fff_res

+ |.else

|.ffunc_n math_frexp

| mov CARG3, sp

| .IOS mov RA, BASE

@@ -1441,7 +1550,19 @@ static void build_subroutines(BuildCtx *ctx)

| mov RC, #(2+1)*8

| strd CARG34, [BASE]

| b ->fff_res

+ |.endif

+ |.if HFABI

+ |.ffunc_d math_modf

+ | sub CARG1, BASE, #8

+ | ldr PC, [BASE, FRAME_PC]

+ | .IOS mov RA, BASE

+ | bl extern modf

+ | .IOS mov BASE, RA

+ | mov RC, #(2+1)*8

+ | vstr d0, [BASE]

+ | b ->fff_res

+ |.else

|.ffunc_n math_modf

| sub CARG3, BASE, #8

| ldr PC, [BASE, FRAME_PC]

@@ -1451,8 +1572,56 @@ static void build_subroutines(BuildCtx *ctx)

| mov RC, #(2+1)*8

| strd CARG12, [BASE]

| b ->fff_res

+ |.endif

|.macro math_minmax, name, cond, fcond

+ |.if FPU

+ | .ffunc_1 name

+ | add RB, BASE, RC

+ | checktp CARG2, LJ_TISNUM

+ | add RA, BASE, #8

+ | bne >4

+ |1: // Handle integers.

+ | ldrd CARG34, [RA]

+ | cmp RA, RB

+ | bhs ->fff_restv

+ | checktp CARG4, LJ_TISNUM

+ | bne >3

+ | cmp CARG1, CARG3

+ | add RA, RA, #8

+ | mov..cond CARG1, CARG3

+ | b <1

+ |3: // Convert intermediate result to number and continue below.

+ | vmov s4, CARG1

+ | bhi ->fff_fallback

+ | vldr d1, [RA]

+ | vcvt.f64.s32 d0, s4

+ | b >6

+ |

+ |4:

+ | vldr d0, [BASE]

+ | bhi ->fff_fallback

+ |5: // Handle numbers.

+ | ldrd CARG34, [RA]

+ | vldr d1, [RA]

+ | cmp RA, RB

+ | bhs ->fff_resd

+ | checktp CARG4, LJ_TISNUM

+ | bhs >7

+ |6:

+ | vcmp.f64 d0, d1

+ | vmrs

+ | add RA, RA, #8

+ | vmov..fcond.f64 d0, d1

+ | b <5

+ |7: // Convert integer to number and continue above.

+ | vmov s4, CARG3

+ | bhi ->fff_fallback

+ | vcvt.f64.s32 d1, s4

+ | b <6

+ |

+ |.else

+ |

| .ffunc_1 name

| checktp CARG2, LJ_TISNUM

| mov RA, #8

@@ -1467,9 +1636,8 @@ static void build_subroutines(BuildCtx *ctx)

| add RA, RA, #8

| mov..cond CARG1, CARG3

| b <1

- |3:

+ |3: // Convert intermediate result to number and continue below.

| bhi ->fff_fallback

- | // Convert intermediate result to number and continue below.

| bl extern __aeabi_i2d

| ldrd CARG34, [BASE, RA]

| b >6

@@ -1495,6 +1663,7 @@ static void build_subroutines(BuildCtx *ctx)

| bl extern __aeabi_i2d

| ldrd CARG34, TMPD

| b <6

+ |.endif

|.endmacro

| math_minmax math_min, gt, hi

@@ -1959,6 +2128,9 @@ static void build_subroutines(BuildCtx *ctx)

| ldr CARG2, [CARG1, #-4]! // Get exit instruction.

| str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC.

| str CARG1, [sp, #60]

+ |.if FPU

+ | vpush {d0-d15}

+ |.endif

| lsl CARG2, CARG2, #8

| add CARG1, CARG1, CARG2, asr #6

| ldr CARG2, [lr, #4] // Load exit stub group offset.

@@ -2025,8 +2197,53 @@ static void build_subroutines(BuildCtx *ctx)

|// FP value rounding. Called from JIT code.

|//

|// double lj_vm_floor/ceil/trunc(double x);

- |.macro vm_round, func

- |->vm_ .. func:

+ |.macro vm_round, func, hf

+ |.if FPU

+ |.if hf == 0

+ | vmov d0, CARG1, CARG2

+ | vldr d2, <8 // 2^52

+ |.else

+ | vldr d2, <8 // 2^52

+ | vmov CARG1, CARG2, d0

+ |.endif

+ | vabs.f64 d1, d0

+ | vcmp.f64 d1, d2 // |x| >= 2^52 or NaN?

+ | vmrs

+ |.if "func" == "trunc"

+ | vadd.f64 d0, d1, d2

+ | bxpl lr // Return argument unchanged.

+ | vsub.f64 d0, d0, d2 // (|x| + 2^52) - 2^52

+ | vldr d2, <9 // +1.0

+ | vcmp.f64 d1, d0 // |x| < result: subtract +1.0

+ | vmrs

+ | vsubmi.f64 d0, d1, d2

+ | cmp CARG2, #0

+ | vnegmi.f64 d0, d0 // Merge sign bit back in.

+ |.else

+ | vadd.f64 d1, d1, d2

+ | bxpl lr // Return argument unchanged.

+ | cmp CARG2, #0

+ | vsub.f64 d1, d1, d2 // (|x| + 2^52) - 2^52

+ | vldr d2, <9 // +1.0

+ | vnegmi.f64 d1, d1 // Merge sign bit back in.

+ |.if "func" == "floor"

+ | vcmp.f64 d0, d1 // x < result: subtract +1.0.

+ | vmrs

+ | vsubmi.f64 d0, d1, d2

+ |.else

+ | vcmp.f64 d1, d0 // x > result: add +1.0.

+ | vmrs

+ | vaddmi.f64 d0, d1, d2

+ |.endif

+ | vmovpl.f64 d0, d1

+ |.endif

+ |.if hf == 0

+ | vmov CARG1, CARG2, d0

+ |.endif

+ | bx lr

+ |

+ |.else

+ |

| lsl CARG3, CARG2, #1

| adds RB, CARG3, #0x00200000

| bpl >2 // |x| < 1?

@@ -2069,15 +2286,40 @@ static void build_subroutines(BuildCtx *ctx)

| ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)

| orrne CARG2, CARG2, CARG4

| bx lr

+ |.endif

|.endmacro

+ |.if FPU

+ |.align 8

|9:

- | .long 0x3ff00000 // hiword(1.0)

- | vm_round floor

- | vm_round ceil

+ | .long 0, 0x3ff00000 // +1.0

+ |8:

+ | .long 0, 0x43300000 // 2^52

+ |.else

+ |9:

+ | .long 0x3ff00000 // hiword(+1.0)

+ |.endif

+ |

+ |->vm_floor:

+ |.if not HFABI

+ | vm_round floor, 0

+ |.endif

+ |->vm_floor_hf:

+ |.if FPU

+ | vm_round floor, 1

+ |.endif

+ |

+ |->vm_ceil:

+ |.if not HFABI

+ | vm_round ceil, 0

+ |.endif

+ |->vm_ceil_hf:

+ |.if FPU

+ | vm_round ceil, 1

+ |.endif

|->vm_trunc:

- |.if JIT

+ |.if JIT and not HFABI

| lsl CARG3, CARG2, #1

| adds RB, CARG3, #0x00200000

| andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.

@@ -2093,8 +2335,23 @@ static void build_subroutines(BuildCtx *ctx)

| bx lr

|.endif

+ |->vm_trunc_hf:

+ |.if JIT and FPU

+ | vm_round trunc, 1

+ |.endif

+ |

| // double lj_vm_mod(double dividend, double divisor);

|->vm_mod:

+ |.if FPU

+ | // Special calling convention. Also, RC (r11) is not preserved.

+ | vdiv.f64 d0, d6, d7

+ | mov RC, lr

+ | bl ->vm_floor_hf

+ | vmul.f64 d0, d0, d7

+ | mov lr, RC

+ | vsub.f64 d6, d6, d0

+ | bx lr

+ |.else

| push {r0, r1, r2, r3, r4, lr}

| bl extern __aeabi_ddiv

| bl ->vm_floor

@@ -2105,6 +2362,7 @@ static void build_subroutines(BuildCtx *ctx)

| bl extern __aeabi_dadd

| add sp, sp, #20

| pop {pc}

+ |.endif

| // int lj_vm_modi(int dividend, int divisor);

|->vm_modi:

@@ -2156,8 +2414,19 @@ static void build_subroutines(BuildCtx *ctx)

|.type CTSTATE, CTState, PC

| ldr CTSTATE, GL:r12->ctype_state

| add DISPATCH, r12, #GG_G2DISP

- | strd CARG12, CTSTATE->cb.gpr[0]

+ |.if FPU

+ | str r4, SAVE_R4

+ | add r4, sp, CFRAME_SPACE+4+8*8

+ | vstmdb r4!, {d8-d15}

+ |.endif

+ |.if HFABI

+ | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8])

+ |.endif

| strd CARG34, CTSTATE->cb.gpr[2]

+ | strd CARG12, CTSTATE->cb.gpr[0]

+ |.if HFABI

+ | vstmdb r12!, {d0-d7}

+ |.endif

| ldr CARG4, [sp]

| add CARG3, sp, #CFRAME_SIZE

| mov CARG1, CTSTATE

@@ -2190,6 +2459,9 @@ static void build_subroutines(BuildCtx *ctx)

| mov CARG2, RA

| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)

| ldrd CARG12, CTSTATE->cb.gpr[0]

+ |.if HFABI

+ | vldr d0, CTSTATE->cb.fpr[0]

+ |.endif

| b ->vm_leave_unw

|.endif

@@ -2202,9 +2474,15 @@ static void build_subroutines(BuildCtx *ctx)

| ldr CARG1, CCSTATE:CARG1->spadj

| ldrb CARG2, CCSTATE->nsp

| add CARG3, CCSTATE, #offsetof(CCallState, stack)

+ |.if HFABI

+ | add RB, CCSTATE, #offsetof(CCallState, fpr[0])

+ |.endif

| mov r11, sp

| sub sp, sp, CARG1 // Readjust stack.

| subs CARG2, CARG2, #1

+ |.if HFABI

+ | vldm RB, {d0-d7}

+ |.endif

| ldr RB, CCSTATE->func

| bmi >2

|1: // Copy stack slots.

@@ -2213,14 +2491,17 @@ static void build_subroutines(BuildCtx *ctx)

| subs CARG2, CARG2, #1

| bpl <1

|2:

- | ldr CARG1, CCSTATE->gpr[0]

- | ldr CARG2, CCSTATE->gpr[1]

- | ldr CARG3, CCSTATE->gpr[2]

- | ldr CARG4, CCSTATE->gpr[3]

+ | ldrd CARG12, CCSTATE->gpr[0]

+ | ldrd CARG34, CCSTATE->gpr[2]

| blx RB

| mov sp, r11

- | str CRET1, CCSTATE->gpr[0]

- | str CRET2, CCSTATE->gpr[1]

+ |.if HFABI

+ | add r12, CCSTATE, #offsetof(CCallState, fpr[4])

+ |.endif

+ | strd CRET1, CCSTATE->gpr[0]

+ |.if HFABI

+ | vstmdb r12!, {d0-d3}

+ |.endif

| pop {CCSTATE, r5, r11, pc}

|.endif

|// Note: vm_ffi_call must be the last function in this object file!

@@ -2266,6 +2547,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| ins_next

|3: // CARG12 is not an integer.

+ |.if FPU

+ | vldr d0, [RA]

+ | bhi ->vmeta_comp

+ | // d0 is a number.

+ | checktp CARG4, LJ_TISNUM

+ | vldr d1, [RC]

+ | blo >5

+ | // d0 is a number, CARG3 is an integer.

+ | vmov s4, CARG3

+ | vcvt.f64.s32 d1, s4

+ | b >5

+ |4: // CARG1 is an integer, CARG34 is not an integer.

+ | vldr d1, [RC]

+ | bhi ->vmeta_comp

+ | // CARG1 is an integer, d1 is a number.

+ | vmov s4, CARG1

+ | vcvt.f64.s32 d0, s4

+ |5: // d0 and d1 are numbers.

+ | vcmp.f64 d0, d1

+ | vmrs

+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.

+ if (op == BC_ISLT) {

+ | sublo PC, RB, #0x20000

+ } else if (op == BC_ISGE) {

+ | subhs PC, RB, #0x20000

+ } else if (op == BC_ISLE) {

+ | subls PC, RB, #0x20000

+ } else {

+ | subhi PC, RB, #0x20000

+ }

+ | b <1

+ |.else

| bhi ->vmeta_comp

| // CARG12 is a number.

| checktp CARG4, LJ_TISNUM

@@ -2282,7 +2595,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| b >5

|4: // CARG1 is an integer, CARG34 is not an integer.

| bhi ->vmeta_comp

- | // CARG1 is an integer, CARG34 is a number

+ | // CARG1 is an integer, CARG34 is a number.

| mov RA, RB // Save RB.

| bl extern __aeabi_i2d

| ldrd CARG34, [RC] // Restore second operand.

@@ -2299,6 +2612,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| subhi PC, RA, #0x20000

}

| b <1

+ |.endif

break;

case BC_ISEQV: case BC_ISNEV:

@@ -2439,6 +2753,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

}

| bhi <2

|.endif

+ |.if FPU

+ | checktp CARG4, LJ_TISNUM

+ | vmov s4, CARG3

+ | vldr d0, [RA]

+ | vldrlo d1, [RC]

+ | vcvths.f64.s32 d1, s4

+ | b >5

+ |4: // CARG1 is an integer, d1 is a number.

+ | vmov s4, CARG1

+ | vldr d1, [RC]

+ | vcvt.f64.s32 d0, s4

+ |5: // d0 and d1 are numbers.

+ | vcmp.f64 d0, d1

+ | vmrs

+ if (vk) {

+ | subeq PC, RB, #0x20000

+ } else {

+ | subne PC, RB, #0x20000

+ }

+ | b <2

+ |.else

| // CARG12 is a number.

| checktp CARG4, LJ_TISNUM

| movlo RA, RB // Save RB.

@@ -2458,6 +2793,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| subne PC, RA, #0x20000

}

| b <2

+ |.endif

|.if FFI

|7:

@@ -2617,20 +2953,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);

||switch (vk) {

||case 0:

+ | .if FPU

+ | ldrd CARG12, [RB, BASE]!

+ | ldrd CARG34, [RC, KBASE]!

+ | .else

| ldrd CARG12, [BASE, RB]

| ldrd CARG34, [KBASE, RC]

+ | .endif

|| break;

||case 1:

+ | .if FPU

+ | ldrd CARG34, [RB, BASE]!

+ | ldrd CARG12, [RC, KBASE]!

+ | .else

| ldrd CARG34, [BASE, RB]

| ldrd CARG12, [KBASE, RC]

+ | .endif

|| break;

||default:

+ | .if FPU

+ | ldrd CARG12, [RB, BASE]!

+ | ldrd CARG34, [RC, BASE]!

+ | .else

| ldrd CARG12, [BASE, RB]

| ldrd CARG34, [BASE, RC]

+ | .endif

|| break;

||}

|.endmacro

+ |.macro ins_arithpre_fpu, reg1, reg2

+ |.if FPU

+ ||if (vk == 1) {

+ | vldr reg2, [RB]

+ | vldr reg1, [RC]

+ ||} else {

+ | vldr reg1, [RB]

+ | vldr reg2, [RC]

+ ||}

+ |.endif

+ |.endmacro

+ |

+ |.macro ins_arithpost_fpu, reg

+ | ins_next1

+ | add RA, BASE, RA

+ | ins_next2

+ | vstr reg, [RA]

+ | ins_next3

+ |.endmacro

+ |

|.macro ins_arithfallback, ins

||switch (vk) {

||case 0:

@@ -2645,9 +3016,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

||}

|.endmacro

- |.macro ins_arithdn, intins, fpcall

+ |.macro ins_arithdn, intins, fpins, fpcall

| ins_arithpre

- |.if "intins" ~= "vm_modi"

+ |.if "intins" ~= "vm_modi" and not FPU

| ins_next1

|.endif

| ins_arithcheck_int >5

@@ -2665,57 +3036,74 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| ins_arithfallback bvs

|.endif

|4:

- |.if "intins" == "vm_modi"

+ |.if "intins" == "vm_modi" or FPU

| ins_next1

|.endif

| ins_next2

| strd CARG12, [BASE, RA]

| ins_next3

|5: // FP variant.

+ | ins_arithpre_fpu d6, d7

| ins_arithfallback ins_arithcheck_num

+ |.if FPU

|.if "intins" == "vm_modi"

| bl fpcall

|.else

+ | fpins d6, d6, d7

+ |.endif

+ | ins_arithpost_fpu d6

+ |.else

| bl fpcall

- | ins_next1

+ |.if "intins" ~= "vm_modi"

+ | ins_next1

|.endif

| b <4

+ |.endif

|.endmacro

- |.macro ins_arithfp, fpcall

+ |.macro ins_arithfp, fpins, fpcall

| ins_arithpre

+ |.if "fpins" ~= "extern" or HFABI

+ | ins_arithpre_fpu d0, d1

+ |.endif

| ins_arithfallback ins_arithcheck_num

- |.if "fpcall" == "extern pow"

+ |.if "fpins" == "extern"

| .IOS mov RC, BASE

| bl fpcall

| .IOS mov BASE, RC

+ |.elif FPU

+ | fpins d0, d0, d1

|.else

| bl fpcall

|.endif

+ |.if ("fpins" ~= "extern" or HFABI) and FPU

+ | ins_arithpost_fpu d0

+ |.else

| ins_next1

| ins_next2

| strd CARG12, [BASE, RA]

| ins_next3

+ |.endif

|.endmacro

case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:

- | ins_arithdn adds, extern __aeabi_dadd

+ | ins_arithdn adds, vadd.f64, extern __aeabi_dadd

break;

case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:

- | ins_arithdn subs, extern __aeabi_dsub

+ | ins_arithdn subs, vsub.f64, extern __aeabi_dsub

break;

case BC_MULVN: case BC_MULNV: case BC_MULVV:

- | ins_arithdn smull, extern __aeabi_dmul

+ | ins_arithdn smull, vmul.f64, extern __aeabi_dmul

break;

case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:

- | ins_arithfp extern __aeabi_ddiv

+ | ins_arithfp vdiv.f64, extern __aeabi_ddiv

break;

case BC_MODVN: case BC_MODNV: case BC_MODVV:

- | ins_arithdn vm_modi, ->vm_mod

+ | ins_arithdn vm_modi, vm_mod, ->vm_mod

break;

case BC_POW:

| // NYI: (partial) integer arithmetic.

- | ins_arithfp extern pow

+ | ins_arithfp extern, extern pow

break;

case BC_CAT:

@@ -3775,20 +4163,46 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| cmnlo CARG4, #-LJ_TISNUM

| cmnlo RB, #-LJ_TISNUM

| bhs ->vmeta_for

+ |.if FPU

+ | vldr d0, FOR_IDX

+ | vldr d1, FOR_STOP

+ | cmp RB, #0

+ | vstr d0, FOR_EXT

+ |.else

| cmp RB, #0

- | strd CARG12, FOR_IDX

| strd CARG12, FOR_EXT

| blt >8

+ |.endif

} else {

+ |.if FPU

+ | vldr d0, FOR_IDX

+ | vldr d2, FOR_STEP

+ | vldr d1, FOR_STOP

+ | cmp CARG4, #0

+ | vadd.f64 d0, d0, d2

+ |.else

| cmp CARG4, #0

| blt >8

| bl extern __aeabi_dadd

| strd CARG12, FOR_IDX

| ldrd CARG34, FOR_STOP

| strd CARG12, FOR_EXT

+ |.endif

}

|6:

+ |.if FPU

+ | vcmpge.f64 d0, d1

+ | vcmplt.f64 d1, d0

+ | vmrs

+ |.else

| bl extern __aeabi_cdcmple

+ |.endif

+ if (vk) {

+ |.if FPU

+ | vstr d0, FOR_IDX

+ | vstr d0, FOR_EXT

+ |.endif

+ }

if (op == BC_FORI) {

| subhi PC, RC, #0x20000

} else if (op == BC_JFORI) {

@@ -3804,6 +4218,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| ins_next2

| b <3

+ |.if not FPU

|8: // Invert check for negative step.

if (vk) {

| bl extern __aeabi_dadd

@@ -3814,6 +4229,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)

| mov CARG4, CARG2

| ldrd CARG12, FOR_STOP

| b <6

+ |.endif

break;

case BC_ITERL:

@@ -4048,8 +4464,14 @@ static void emit_asm_debug(BuildCtx *ctx)

"\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */

"\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */

fcofs, CFRAME_SIZE);

- for (i = 11; i >= 4; i--) /* offset r4-r11 */

+ for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */

fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));

+#if LJ_ARCH_HASFPU

+ for (i = 15; i >= 8; i--) /* offset d8-d15 */

+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n",

+ 64+2*i, 10+2*(15-i));

+ fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */

+#endif

fprintf(ctx->fp,

"\t.align 2\n"

".LEFDE0:\n\n");

diff --git a/luajit2/src/vm_x86.dasc b/luajit2/src/vm_x86.dasc
index 44199606..c455795e 100644
--- a/luajit2/src/vm_x86.dasc
+++ b/luajit2/src/vm_x86.dasc

@@ -6201,51 +6201,6 @@ static void emit_asm_debug(BuildCtx *ctx)

".LEFDE3:\n\n", (int)ctx->codesz - fcofs);

#endif

break;

- case BUILD_coffasm:

- fprintf(ctx->fp, "\t.section .eh_frame,\"dr\"\n");

- fprintf(ctx->fp,

- "\t.def %slj_err_unwind_dwarf; .scl 2; .type 32; .endef\n",

- LJ_32 ? "_" : "");

- fprintf(ctx->fp,

- "Lframe1:\n"

- "\t.long LECIE1-LSCIE1\n"

- "LSCIE1:\n"

- "\t.long 0\n"

- "\t.byte 0x1\n"

- "\t.string \"zP\"\n"

- "\t.uleb128 0x1\n"

- "\t.sleb128 -" SZPTR "\n"

- "\t.byte " REG_RA "\n"

- "\t.uleb128 5\n" /* augmentation length */

- "\t.byte 0x00\n" /* absptr */

- "\t.long %slj_err_unwind_dwarf\n"

- "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"

- "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"

- "\t.align " SZPTR "\n"

- "LECIE1:\n\n", LJ_32 ? "_" : "");

- fprintf(ctx->fp,

- "LSFDE1:\n"

- "\t.long LEFDE1-LASFDE1\n"

- "LASFDE1:\n"

- "\t.long LASFDE1-Lframe1\n"

- "\t.long %slj_vm_asm_begin\n"

- "\t.long %d\n"

- "\t.uleb128 0\n" /* augmentation length */

- "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */

-#if LJ_64

- "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */

- "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */

- "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */

- "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */

-#else

- "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */

- "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */

- "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */

- "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */

-#endif

- "\t.align " SZPTR "\n"

- "LEFDE1:\n\n", LJ_32 ? "_" : "", (int)ctx->codesz, CFRAME_SIZE);

- break;

/* Mental note: never let Apple design an assembler.

** Or a linker. Or a plastic case. But I digress.

generated by cgit v1.2.3 (git 2.39.1) at 2025年09月13日 16:49:33 +0000