LuaJIT vs. PyPy JIT

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]

Subject: LuaJIT vs. PyPy JIT
From: François Perrad <francois.perrad@...>
Date: 2010年2月10日 10:05:08 +0100

After reading this post
http://indefinitestudies.org/2010/02/08/creating-a-toy-virtual-machine-with-pypy/
and the referenced paper
http://codespeak.net/pypy/extradoc/talk/Ficooolps2009/bolz-tracing-jit.pdf
.
I try to compare LuaJIT 2 and PyPy JIT, so I port the same toy
interpreter in Lua (toy.lua)
with the same application which computes the square of the accumulator.
And I write too a pure Python version without any PyPy stuff (toy.py)
and C version (toy.c).
I obtain the following results :
the compilation of target-toy-native needs 75s (size ~ 120kb).
the compilation of target-toy-jit needs 698s (size ~ 2Gb).
time python target-toy.py 1000000
	real 41.777s
time target-toy-native 1000000
	real 1.668s
time target-toy-jit 1000000
	real 0.622s
time python toy.py 1000000
	real 7.860s
time lua toy.lua 1000000
	real 3.853s
time luajit toy.lua 1000000
	real 0.355s
time ./toy 1000000
	real 0.145s
	real 0.068s (when compiled with gcc -O2)
These tests are done on Ubuntu 9.10 (x86) with :
- gcc 4.4.1 (Ubuntu package)
- lua 5.1.4 (Ubuntu package)
- luajit 2 head
- python 2.6.4 (Ubuntu package)
- pypy trunk
Lua is faster than Python, and LuaJIT is faster than PyPy/C output and
PyPy/JIT output.
In order to speed up toy.lua, Mike Pall propose to replace all those
pseudo-constants with real constants (see toy-mp.lua).
François

import os, sys
import autopath
import py
# these are the opcodes for the interpreted language
JUMP_IF_A = 1
MOV_A_R = 2
MOV_R_A = 3
ADD_R_TO_A = 4
DECR_A = 5
RETURN_A = 6
from pypy.rlib.jit import JitDriver
tlrjitdriver = JitDriver(greens = ['pc', 'bytecode'],
 reds = ['a', 'regs'])
# the main interpreter loop
def interpret(bytecode, a):
 regs = [0] * 256
 pc = 0
 while True:
 tlrjitdriver.jit_merge_point(bytecode=bytecode, pc=pc, a=a, regs=regs)
 opcode = bytecode[pc]
 pc += 1
 if opcode == JUMP_IF_A:
 target = bytecode[pc]
 pc += 1
 if a:
 if target<pc:
 tlrjitdriver.can_enter_jit(bytecode=bytecode, pc=target, a=a, regs=regs)
 pc = target
 elif opcode == MOV_A_R:
 n = bytecode[pc]
 pc += 1
 regs[n] = a
 elif opcode == MOV_R_A:
 n = bytecode[pc]
 pc += 1
 a = regs[n]
 elif opcode == ADD_R_TO_A:
 n = bytecode[pc]
 pc += 1
 a += regs[n]
 elif opcode == DECR_A:
 a -= 1
 elif opcode == RETURN_A:
 return a
# __________ Entry point __________
def entry_point(argv):
 # the program we want to interpret
 # it computes the square of its argument
 bytecode = [
 MOV_A_R, 0, # i = a
 MOV_A_R, 1, # copy of 'a'
 # 4:
 MOV_R_A, 0, # i--
 DECR_A,
 MOV_A_R, 0,
 MOV_R_A, 2, # res += a
 ADD_R_TO_A, 1,
 MOV_A_R, 2,
 MOV_R_A, 0, # if i!=0: goto 4
 JUMP_IF_A, 4,
 MOV_R_A, 2,
 RETURN_A
 ]
 result = interpret(bytecode, int(argv[1]))
 print result
 return 0
def jitpolicy(driver):
 from pypy.jit.metainterp.policy import JitPolicy
 return JitPolicy()
# _____ Define and setup target ___
def target(*args):
 return entry_point, None
# main function, if this script is called from the command line
if __name__ == '__main__':
 entry_point(sys.argv)

import sys
# these are the opcodes for the interpreted language
JUMP_IF_A = 1
MOV_A_R = 2
MOV_R_A = 3
ADD_R_TO_A = 4
DECR_A = 5
RETURN_A = 6
# the main interpreter loop
def interpret(bytecode, a):
 regs = [0] * 256
 pc = 0
 while True:
 opcode = bytecode[pc]
 pc += 1
 if opcode == JUMP_IF_A:
 target = bytecode[pc]
 pc += 1
 if a:
 pc = target
 elif opcode == MOV_A_R:
 n = bytecode[pc]
 pc += 1
 regs[n] = a
 elif opcode == MOV_R_A:
 n = bytecode[pc]
 pc += 1
 a = regs[n]
 elif opcode == ADD_R_TO_A:
 n = bytecode[pc]
 pc += 1
 a += regs[n]
 elif opcode == DECR_A:
 a -= 1
 elif opcode == RETURN_A:
 return a
# __________ Entry point __________
def entry_point(argv):
 # the program we want to interpret
 # it computes the square of its argument
 bytecode = [
 MOV_A_R, 0, # i = a
 MOV_A_R, 1, # copy of 'a'
 # 4:
 MOV_R_A, 0, # i--
 DECR_A,
 MOV_A_R, 0,
 MOV_R_A, 2, # res += a
 ADD_R_TO_A, 1,
 MOV_A_R, 2,
 MOV_R_A, 0, # if i!=0: goto 4
 JUMP_IF_A, 4,
 MOV_R_A, 2,
 RETURN_A
 ]
 result = interpret(bytecode, int(argv[1]))
 print result
# main function, if this script is called from the command line
entry_point(sys.argv)

-- these are the opcodes for the interpreted language
local JUMP_IF_A = 1
local MOV_A_R = 2
local MOV_R_A = 3
local ADD_R_TO_A = 4
local DECR_A = 5
local RETURN_A = 6
-- the main interpreter loop
local function interpret (bytecode, a)
 local regs = {
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 }
 local pc = 1
 while true do
 local opcode = bytecode[pc]
 pc = pc + 1
 if opcode == JUMP_IF_A then
 local target = bytecode[pc]
 pc = pc + 1
 if a ~= 0 then
 pc = target
 end
 elseif opcode == MOV_A_R then
 local n = bytecode[pc]
 pc = pc + 1
 regs[n] = a
 elseif opcode == MOV_R_A then
 local n = bytecode[pc]
 pc = pc + 1
 a = regs[n]
 elseif opcode == ADD_R_TO_A then
 local n = bytecode[pc]
 pc = pc + 1
 a = a + regs[n]
 elseif opcode == DECR_A then
 a = a - 1
 elseif opcode == RETURN_A then
 return a
 end
 end
end
-- __________ Entry point __________
local function entry_point(argv)
 -- the program we want to interpret
 -- it computes the square of its argument
 bytecode = {
 MOV_A_R, 1, -- i = a
 MOV_A_R, 2, -- copy of 'a'
 -- 5:
 MOV_R_A, 1, -- i--
 DECR_A,
 MOV_A_R, 1,
 MOV_R_A, 3, -- res += a
 ADD_R_TO_A, 2,
 MOV_A_R, 3,
 MOV_R_A, 1, -- if i!=0: goto 5
 JUMP_IF_A, 5,
 MOV_R_A, 3,
 RETURN_A
 }
 result = interpret(bytecode, tonumber(argv[1]))
 print(result)
end
-- main function
entry_point(arg)

-- the main interpreter loop
local function interpret (bytecode, a)
 local regs = {
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 }
 local pc = 1
 while true do
 local opcode = bytecode[pc]
 pc = pc + 1
 if opcode == 'JUMP_IF_A' then
 local target = bytecode[pc]
 pc = pc + 1
 if a ~= 0 then
 pc = target
 end
 elseif opcode == 'MOV_A_R' then
 local n = bytecode[pc]
 pc = pc + 1
 regs[n] = a
 elseif opcode == 'MOV_R_A' then
 local n = bytecode[pc]
 pc = pc + 1
 a = regs[n]
 elseif opcode == 'ADD_R_TO_A' then
 local n = bytecode[pc]
 pc = pc + 1
 a = a + regs[n]
 elseif opcode == 'DECR_A' then
 a = a - 1
 elseif opcode == 'RETURN_A' then
 return a
 end
 end
end
-- __________ Entry point __________
local function entry_point(argv)
 -- the program we want to interpret
 -- it computes the square of its argument
 bytecode = {
 'MOV_A_R', 1, -- i = a
 'MOV_A_R', 2, -- copy of 'a'
 -- 5:
 'MOV_R_A', 1, -- i--
 'DECR_A',
 'MOV_A_R', 1,
 'MOV_R_A', 3, -- res += a
 'ADD_R_TO_A', 2,
 'MOV_A_R', 3,
 'MOV_R_A', 1, -- if i!=0: goto 5
 'JUMP_IF_A', 5,
 'MOV_R_A', 3,
 'RETURN_A'
 }
 result = interpret(bytecode, tonumber(argv[1]))
 print(result)
end
-- main function, if this script is called from the command line
entry_point(arg)

#include <stdio.h>
#include <stdlib.h>
// these are the opcodes for the interpreted language
#define JUMP_IF_A 1
#define MOV_A_R 2
#define MOV_R_A 3
#define ADD_R_TO_A 4
#define DECR_A 5
#define RETURN_A 6
typedef unsigned char opcode_t;
typedef long long register__t;
// the main interpreter loop
static register__t interpret(const opcode_t bytecode[], register__t a)
{
 register__t regs[256];
 unsigned pc = 0;
 for (;;) {
 opcode_t opcode = bytecode[pc];
 pc += 1;
 switch (opcode) {
 case JUMP_IF_A: {
 unsigned target = bytecode[pc];
 pc += 1;
 if (a)
 pc = target;
 break;
 }
 case MOV_A_R: {
 opcode_t n = bytecode[pc];
 pc += 1;
 regs[n] = a;
 break;
 }
 case MOV_R_A: {
 opcode_t n = bytecode[pc];
 pc += 1;
 a = regs[n];
 break;
 }
 case ADD_R_TO_A: {
 opcode_t n = bytecode[pc];
 pc += 1;
 a += regs[n];
 break;
 }
 case DECR_A: {
 a -= 1;
 break;
 }
 case RETURN_A: {
 return a;
 }
 }
 }
}
// __________ Entry point __________
static void entry_point(char *argv[])
{
 // the program we want to interpret
 // it computes the square of its argument
 static const opcode_t bytecode[] = {
 MOV_A_R, 0, // i = a
 MOV_A_R, 1, // copy of 'a'
 // 4:
 MOV_R_A, 0, // i--
 DECR_A,
 MOV_A_R, 0,
 MOV_R_A, 2, // res += a
 ADD_R_TO_A, 1,
 MOV_A_R, 2,
 MOV_R_A, 0, // if i!=0: goto 4
 JUMP_IF_A, 4,
 MOV_R_A, 2,
 RETURN_A
 };
 register__t result = interpret(bytecode, atoi(argv[1]));
 printf("%lld\n", result);
}
int main(int argc, char* argv[])
{
 entry_point(argv);
 return EXIT_SUCCESS;
}

Prev by Date: Possibility of generational garbage collection
Next by Date: Re: require cascade
Previous by thread: Possibility of generational garbage collection
Next by thread: [ANN] New version of Token Filter Patch (Alpha!)
Index(es):
- Date
- Thread