Slow Erlang code to find Pythagorean triplets

Question 1

Why does this Erlang code take about one minute on my machine?

On Pascal something like this took less then second on my machine.

What I can do to speed it up?

-module(slow).
-export([slow/0]).
pyth(N) ->
 [{A, B, C} ||
 A <- lists:seq(1, N - 2),
 B <- lists:seq(A + 1, N - 1),
 C <- lists:seq(B + 1, N),
 A + B + C =< N,
 A * A + B * B == C * C].
slow() -> io:format("~p\n", [pyth(1000)]).

Question 2

i think it is possible to generate triplets in linear time, just think how to limit lists

Question 3

Erlang runs on a VM, so you will get better performance on such a pure arithmetic computation by compiling it into native code using HIPE.

From the command line:

erlc +native slow.erl

Or from the Erlang shell:

1> hipe:c(slow).

Or:

1> c(slow, [native]).

Question 4

Thank you very much. Now it runs for 6 seconds. But: 1. How can I precisely mark down needed time? 2. How to run it much faster, like in notVM languages?

Question 5

For such a small piece of code, structured around one nested list comprehension I am not sure what kind of profiling to suggest. I can't think of any reasonable optimization that would make this algorithm run faster, either.

Question 6

The problem in this code is the lists:seq/2 calls which generates lists in memory which you then traverse. Since you have 3 such generations, you end up with a cartesian product of generating lists. In effect, the Garbage collector has a lot of work to do then.

A much faster approach is to write functions which does the traversal by tail-calling each other. This way, you should be able to run in no time. It won't be Pascal-fast, but it will answer in a couple of milliseconds.

Question 7

Would you mind providing a code example of what you are talking about?

Question 8

After reading the answers from Stavros and Jesper, I decided to make some test of various implementation of this function. The results are shown at the end of the code, measured by the method: timer:tc(test_pyth,testx,[N]). I was surprised to see that the method 4 is slightly faster then the 3, and I was even more surprised to see that if I remove all the when guards it becomes a little slower.

The version from John is, in my opinion, a good compromise between code efficiency and coding effort (writing, reading, maintenance).

Re-reading the algorithm, I realize that I forgot to test the case A + B + C =< N (a differnet problem tha the one I intent to solve...), so I have tested much longer lists. the initial code can be optimized like this:

test2(M) ->
 [{A,B,C} || A <- lists:seq(1,M-2), B<- lists:seq(A+1,max(M-A-1,A)), C <- lists:seq(B+1,max(M-A-B,B)), A*A+B*B == C*C].

[edit] I add 2 tests compliant with john algorithm

-module(test_pyth).
-export([test1/1,test2/1,test3/1,test4/1,test2b/1,test4b/1,test/0]).
test1(M)->
 Test = fun(i,i,i,i) -> [];(A,B,C,R) when A*A+B*B == C*C -> [{A,B,C}|R]; (_,_,_,R)->R end,
 L = fun(Max) ->
 A1 = 1,
 A2 = Max-2, 
 L1 = fun L1(A,R) when A > A2 -> R; 
 L1(A,R) ->
 B2 = Max-1,
 B1 = A+1, 
 L2 = fun L2(B,Rb) when B > B2 -> Rb; 
 L2(B,Rb) ->
 C2 = Max,
 C1 = B+1, 
 L3 = fun L3(C,Rc) when C > C2 -> Rc;
 L3(C,Rc) -> L3(C+1,Test(A,B,C,Rc)) 
 end, 
 L2(B+1,L3(C1,Rb)) 
 end, 
 L1(A+1,L2(B1,R)) 
 end, 
 L1(A1,[]) 
 end,
 L(M).
test2(M) ->
 [{A,B,C} || A <- lists:seq(1,M-2), B<- lists:seq(A+1,M-1), C <- lists:seq(B+1,M), A*A+B*B == C*C].
test2b(M) ->
 [{A,B,C} || A <- lists:seq(1,M-2), B<- lists:seq(A+1,max(M-A-1,A)), C <- lists:seq(B+1,max(M-A-B,B)), A*A+B*B == C*C].
test3(M) -> loop(1,M-2,[]).
loop(A,A2,R) when A > A2 -> R;
loop(A,A2,R) -> loop(A+1,A2,loop(A,A+1,A2+1,R)).
loop(_A,B,B2,R) when B > B2 -> R;
loop(A,B,B2,R)-> loop(A,B+1,B2,loop(A,B,B+1,B2+1,R)).
loop(_A,_B,C,C2,R) when C > C2 -> R;
loop(A,B,C,C2,R) when A*A+B*B == C*C -> loop(A,B,C+1,C2,[{A,B,C}|R]);
loop(A,B,C,C2,R) -> loop(A,B,C+1,C2,R).
test4(M) -> loop4(1,M-2,[]).
loop4(A,A2,R) when A > A2 -> R;
loop4(A,A2,R) -> loop4(A+1,A2,loop4(A,A+1,A2+1,R)).
loop4(_A,B,B2,R) when B > B2 -> R;
loop4(A,B,B2,R) -> loop4(A,B+1,B2,loop4(A,B,B+1,B2+1,R)).
loop4(_A,_B,C,C2,R) when C > C2 -> R;
loop4(A,B,C,C2,R) -> 
 case A*A+B*B -C*C of 
 0 -> loop4(A,B,C+1,C2,[{A,B,C}|R]);
 _ ->loop4(A,B,C+1,C2,R)
 end.
test4b(M) -> loop4b(1,M-2,[],M).
loop4b(A,A2,R,_M) when A > A2 -> R;
loop4b(A,A2,R,M) -> loop4b(A+1,A2,loop4b(A,A+1,M-A-A,R,M),M).
loop4b(_A,B,B2,R,_M) when B > B2 -> R;
loop4b(A,B,B2,R,M) -> loop4b(A,B+1,B2,loop4b(A,B,B+1,M-A-B,R,M),M).
loop4b(_A,_B,C,C2,R,_M) when C > C2 -> R;
loop4b(A,B,C,C2,R,M) -> 
 case A*A+B*B -C*C of 
 0 -> loop4b(A,B,C+1,C2,[{A,B,C}|R],M);
 _ ->loop4b(A,B,C+1,C2,R,M)
 end.
run_all(V) ->
 L = [{_T1,R},{_T2,R},{_T3,R},{_T4,R},{_T5,R1},{_T6,R1}] = [run(T,V) || T <- [test1,test2,test3,test4,test2b,test4b]],
 [X || {X,_R} <- L].
run(T,V) ->
 {T1,V1} = timer:tc(?MODULE,T,[V]),
 {T1/1000000,lists:sort(V1)}.
test() ->
 T500 = run_all(500),
 T1000 = run_all(1000),
 io:format("%% testx(500) -> 1: ~5.2f, 2: ~5.2f, 3: ~5.2f, 4: ~5.2f, 2b: ~5.2f, 4b: ~5.2f~n", T500),
 io:format("%% testx(1000) -> 1: ~5.2f, 2: ~5.2f, 3: ~5.2f, 4: ~5.2f, 2b: ~5.2f, 4b: ~5.2f~n", T1000).
%% non native compilation
%% testx(500) -> 1: 3.63, 2: 2.19, 3: 2.07, 4: 2.01, 2b: 0.37, 4b: 0.33
%% testx(1000) -> 1: 29.61, 2: 17.64, 3: 16.55, 4: 16.00, 2b: 2.97, 4b: 2.66
%% native compilation
%% testx(500) -> 1: 1.03, 2: 0.64, 3: 0.35, 4: 0.23, 2b: 0.11, 4b: 0.03
%% testx(1000) -> 1: 9.49, 2: 5.47, 3: 2.68, 4: 1.71, 2b: 0.91, 4b: 0.27

aronisstav aronisstav 2361 silver badge6 bronze badges · Accepted Answer · 2013-05-09 18:18:51Z

3

\$\begingroup\$

Erlang runs on a VM, so you will get better performance on such a pure arithmetic computation by compiling it into native code using HIPE.

From the command line:

erlc +native slow.erl

Or from the Erlang shell:

1> hipe:c(slow).

Or:

1> c(slow, [native]).

Share

edited May 9, 2013 at 19:49

answered May 9, 2013 at 18:18

aronisstav's user avatar

aronisstav aronisstav

2361 silver badge6 bronze badges

\$\endgroup\$

2

\$\begingroup\$ Thank you very much. Now it runs for 6 seconds. But: 1. How can I precisely mark down needed time? 2. How to run it much faster, like in notVM languages? \$\endgroup\$

John Smith
– John Smith

2013年05月09日 18:51:14 +00:00
Commented May 9, 2013 at 18:51
\$\begingroup\$ For such a small piece of code, structured around one nested list comprehension I am not sure what kind of profiling to suggest. I can't think of any reasonable optimization that would make this algorithm run faster, either. \$\endgroup\$

aronisstav
– aronisstav

2013年05月09日 19:59:22 +00:00
Commented May 9, 2013 at 19:59

Add a comment |

Stack Exchange Network

Slow Erlang code to find Pythagorean triplets

3 Answers 3

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Slow Erlang code to find Pythagorean triplets

3 Answers 3

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions