We use some essential cookies to make our website work.

We use optional cookies, as detailed in our cookie policy, to remember your settings and understand how you use our website.

233 posts
ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Sat May 03, 2025 4:41 am

ejolson wrote:
Wed Apr 30, 2025 3:32 pm
ejolson wrote:
Tue Apr 29, 2025 8:22 pm
Shy has been shyly meowing that the code should run three times faster. Scratchy claims that's not enough. Fortunately, the kittens are litter mates so never get into a fight.
Scratchy moved the allocation for mem outside the rungates routine and obtained.

Code: Select all

$ ./go24 # Pi 4B 1500 MHz
Advent of Code 2024 Day 24 Crossed Wires (GOMAXPROCS=4)
Part 1 The z wires output 61495910098126
Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37
Total execution time 4.693544958 seconds.
This saves another half second of runtime.
I ported the changes tested in Go back to Chapel and obtained

Code: Select all

$ ./day24 -nl1 # Pi 4B 1500 MHz
Advent of Code 2024 Day 24 Crossed Wires (CHPL_RT_NUM_THREADS_PER_LOCALE=4)
Part 1 The z wires output 61495910098126
Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37
Total execution time 5.37652 seconds.
Given the high-performance goal of Chapel I'm surprised that the performance is slower than Go. I wonder if the relative performance would be the same for a Pi 5 or whether Chapel would be faster on a Pi 5 compared to Go.

The latest solution to day 24 in Chapel is

Code: Select all

/* Advent of Code 2024 Day 24 Crossed Wires
 Written 2025 by Eric Olson
 Parallel version.
 Increase esize if this produces wrong answers. Increate ksize
 if it fails to produce any answers.
*/
use IO,Time,List,Sort,Random;
type byte=uint(8);
const esize=20;
const ksize=10;
var xstart=0,ystart=0,zstart=0;
var xbits=0,ybits=0,zbits=0;
var myrnd=new randomStream(int);
var ncpus=here.maxTaskPar;
proc check(ref i:int,ref s:bytes,const p:bytes):bool {
 if s.size<i+p.size {
 return false;
 }
 for k in 0..<p.size {
 if s[k+i]!=p[k] {
 return false;
 }
 }
 i+=p.size;
 return true;
}
proc scanint(ref i:int,ref s:bytes):int {
 const d0=b"0"[0],d9=b"9"[0],minus=b"-"[0];
 var r=0,iold=i,m=-1;
 while i<s.size {
 var d=s[i];
 if iold==i&&d==minus {
 m=1;
 } else if d>=d0 && d<=d9 {
 r=r*10-(d-d0);
 } else {
 break;
 }
 i+=1;
 }
 return m*r;
}
proc wire2num(ref stab:[]bytes,ref w:bytes):int {
 var ai=0,bi=stab.size,cold=-1;
 while true {
 var ci=(bi+ai)/2;
 if ci==cold {
 break;
 }
 if stab[ci]<w {
 ai=ci;
 } else if stab[ci]>w {
 bi=ci;
 } else {
 return ci;
 }
 cold=ci;
 }
 return -1;
}
proc setinput(ref x:int,ref s:bytes) {
 var w=s[0..2];
 var iold,i=1;
 iold=i;
 var j=scanint(i,s);
 if iold==i {
 writeln("Can't find input bitnumber in ",s);
 exit(1);
 }
 if !check(i,s,b": ") {
 writeln("Missing : delimiter in ",s);
 exit(1);
 }
 iold=i;
 var b=scanint(i,s);
 if iold==i {
 writeln("Can't find input bit value in ",s);
 exit(1);
 }
 if b<0||b>1 {
 writeln("Bit value out of range in ",s);
 exit(1);
 }
 var p=(1<<j);
 if s[0]==b"x"[0] {
 if xbits<=j {
 xbits=j+1;
 }
 }
 if s[0]==b"y"[0] {
 if ybits<=j {
 ybits=j+1;
 }
 }
 if b==1 {
 x|=p;
 } else {
 x&=~p;
 }
}
type optype=proc(x:bool,y:bool):bool;
proc g_AND(x:bool,y:bool):bool {
 return x&&y;
}
proc g_OR(x:bool,y:bool):bool {
 return x||y;
}
proc g_XOR(x:bool,y:bool):bool {
 return x!=y;
}
proc g_NUL(x:bool,y:bool):bool {
 return false;
}
const g_OP:[0..2]optype=[g_AND,g_OR,g_XOR];
const s_OP:[0..2]bytes=[b"AND",b"OR",b"XOR"];
record gspec {
 var x1,x2,y:int;
 var f1,f2:bool;
 var op:optype=g_NUL;
}
proc getgate(ref stab:[]bytes,ref s:bytes):gspec {
 var v=s.split();
 if v.size!=5 {
 writeln("Gate ",s," doesn't have 4 fields!");
 exit(1);
 }
 var r:gspec;
 r.x1=wire2num(stab,v[0]);
 for i in s_OP.domain {
 if v[1]==s_OP[i] {
 r.op=g_OP[i];
 }
 }
 if r.op==g_NUL {
 writeln("Gate ",s," unrecognizable operation!");
 exit(1);
 }
 r.x2=wire2num(stab,v[2]);
 if v[3]!=b"->" {
 writeln("Gate ",s," missing -> assignment!");
 exit(1);
 }
 r.y=wire2num(stab,v[4]);
 return r;
}
proc mkorder(ref stab:[]bytes,ref gates:[]gspec):[]int {
 var order:[0..<gates.size]int;
 var op=0,oq=0;
 proc setfn(ref fn:bool,c:byte){
 if c==b"x"[0]||c==b"y"[0] {
 fn=false;
 } else {
 fn=true;
 }
 }
 for i in gates.domain {
 setfn(gates[i].f1,stab[gates[i].x1][0]);
 setfn(gates[i].f2,stab[gates[i].x2][0]);
 if !gates[i].f1&&!gates[i].f2 {
 order[oq]=i;
 oq+=1;
 }
 }
 var leaf:[0..<stab.size]list(int);
 for i in gates.domain {
 ref r=gates[i];
 leaf[r.x1].pushBack(i);
 leaf[r.x2].pushBack(i);
 }
 while op<oq {
 var i=order[op];
 ref r=gates[i];
 if leaf[r.y].size==0 {
 op+=1;
 } else {
 for j in leaf[r.y] {
 if gates[j].x1==r.y {
 gates[j].f1=false;
 if !gates[j].f2 {
 order[oq]=j;
 oq+=1;
 }
 } else if gates[j].x2==r.y {
 gates[j].f2=false;
 if !gates[j].f1 {
 order[oq]=j;
 oq+=1;
 }
 }
 }
 op+=1;
 }
 }
 return order;
}
proc rungates(ref order:[]int,ref mem:[]bool,
 ref gates:[]gspec,xp:int,yp:int):int {
 var x=xp,y=yp;
 for j in 0..<xbits {
 mem[xstart+j]=(x&1)==1;
 x>>=1;
 }
 for j in 0..<ybits {
 mem[ystart+j]=(y&1)==1;
 y>>=1;
 }
 for i in order.domain {
 ref r=gates[order[i]];
 var x1=mem[r.x1];
 var x2=mem[r.x2];
 var y=r.op(x1,x2);
 mem[r.y]=y;
 }
 var p=1;
 var p1=0;
 for j in 0..<zbits {
 if mem[zstart+j] {
 p1+=p;
 }
 p*=2;
 }
 return p1;
}
proc part1(ref stab:[]bytes,ref gates:[]gspec,
 ref inputs:list(bytes)):int {
 var x=0,y=0;
 for s in inputs {
 if s[0]==b"x"[0] {
 setinput(x,s);
 } else if s[0]==b"y"[0] {
 setinput(y,s);
 } else {
 writeln("Unknown register input in ",s);
 exit(1);
 }
 }
 ref order=mkorder(stab,gates);
 var mem:[0..<stab.size]bool;
 return rungates(order,mem,gates,x,y);
}
proc loss(ref stab:[]bytes,ref gates:[]gspec,
 ref ensemble:[](int,int)):int {
 ref order=mkorder(stab,gates);
 var mem:[0..<stab.size]bool;
 var r=0;
 for i in ensemble.domain {
 var x=ensemble[i](0);
 var y=ensemble[i](1);
 var z=rungates(order,mem,gates,x,y);
 var e=abs(x+y-z);
 r+=e;
 x=y;
 }
 return r;
}
record lspec {
 var l:int;
 var i,j:int;
}
proc tryswap(ref stab:[]bytes,ref gates:[]gspec,
 ref ensemble:[](int,int),d:int):list(int) {
 var losses:[0..<gates.size*(gates.size-1)/2]lspec;
 var ij:[0..ncpus](int,int);
 ij[0]=(1,0);
 for n in 1..ncpus {
 var (i,j)=ij[n-1];
 var na=i*(i-1)/2+j;
 var nb=n*losses.size/ncpus;
 var delta=nb-na;
 while true {
 if i-j>delta {
 j+=delta;
 ij[n]=(i,j);
 break;
 }
 delta-=i-j;
 i+=1; j=0;
 }
 }
 record rspec {
 var i,j:int;
 }
 var rfound:sync rspec;
 var rfzero=new rspec(0,0);
 rfound.writeEF(rfzero);
 coforall n in 1..ncpus {
 var gatesn=gates;
 var (i,j)=ij[n-1],(ib,jb)=ij[n];
 var k=i*(i-1)/2+j;
 while true {
 gatesn[i].y<=>gatesn[j].y;
 var ts=loss(stab,gatesn,ensemble);
 gatesn[i].y<=>gatesn[j].y;
 if ts==0 {
 rfound.writeFF(new rspec(i,j));
 } else {
 losses[k].i=i;
 losses[k].j=j;
 losses[k].l=ts;
 k+=1;
 }
 j+=1;
 if j>=i {
 j=0; i+=1;
 }
 if (i==ib&&j==jb)||rfound.readFF()!=rfzero {
 break;
 }
 }
 }
 var rf=rfound.readFF();
 if rf!=rfzero {
 var r:list(int);
 r.pushBack(rf.i);
 r.pushBack(rf.j);
 return r;
 }
 var klen=losses.size;
 sort(losses[0..<klen]);
 if klen>ksize {
 klen=ksize;
 }
 if d==1 {
 var r:list(int);
 return r;
 }
 for k in 0..<klen {
 var i=losses[k].i;
 var j=losses[k].j;
 gates[i].y<=>gates[j].y;
 var r=tryswap(stab,gates,ensemble,d-1);
 gates[i].y<=>gates[j].y;
 if r.size>0 {
 r.pushBack(i);
 r.pushBack(j);
 return r;
 }
 }
 var r:list(int);
 return r;
}
proc mkensemble():[](int,int) {
 var ensemble:[0..<esize](int,int);
 var xmask=1<<xbits-1;
 var ymask=1<<ybits-1;
 for i in ensemble.domain {
 var x=myrnd.next()&xmask;
 var y=myrnd.next()&ymask;
 ensemble[i]=(x,y);
 }
 return ensemble;
}
proc part2(ref stab:[]bytes,ref gates:[]gspec):bytes {
 ref ensemble=mkensemble();
 var r=tryswap(stab,gates,ensemble,4);
 var wll:[0..<r.size]bytes;
 for i in wll.domain {
 wll[i]=stab[gates[r[i]].y];
 }
 sort(wll);
 var p2:bytes;
 for i in wll.domain {
 if i>0 {
 p2+=b","+wll[i];
 } else {
 p2=wll[i];
 }
 }
 return p2;
}
proc dowork(){
 var io=open("day24.txt",ioMode.r);
 var fp=io.reader(locking=false);
 var s:bytes;
 var inputs:list(bytes);
 while fp.readLine(s,stripNewline=true) {
 if s.size==0 {
 break;
 }
 inputs.pushBack(s);
 }
 var assign:list(bytes);
 while fp.readLine(s,stripNewline=true) {
 assign.pushBack(s);
 }
 var slen=inputs.size+assign.size;
 var stab:[0..<slen]bytes;
 var i=0;
 for s in inputs {
 stab[i]=s[0..2];
 i+=1;
 }
 for s in assign {
 var v=s.split();
 stab[i]=v[4];
 i+=1;
 }
 sort(stab);
 var x00=b"x00";
 xstart=wire2num(stab,x00);
 var y00=b"y00";
 ystart=wire2num(stab,y00);
 var z00=b"z00";
 zstart=wire2num(stab,z00);
 zbits=stab.size-zstart;
 var gates:[0..<assign.size]gspec;
 i=0;
 for s in assign {
 gates[i]=getgate(stab,s);
 i+=1;
 }
 var p1=part1(stab,gates,inputs);
 var p2=part2(stab,gates);
 writeln("Part 1 The z wires output ",p1);
 writeln("Part 2 Swap wires ",p2);
}
proc main(){
 var t:stopwatch;
 t.start();
 writeln("Advent of Code 2024 Day 24 Crossed Wires (CHPL_RT_NUM_THREADS_PER_LOCALE=",ncpus,")\n");
 dowork();
 t.stop();
 writeln("\nTotal execution time ",t.elapsed()," seconds.");
}
Although the kittens may have other ideas, that finishes this Advent of Code for me. Woohoo! Half the year is left before Advent 2025 begins.

In summary, I found Chapel tedious because it took too long to compile and because some of the features seemed only half implemented. On the other hand, being able pass the domain for an array around as a first class object was nice as was the fact that support for parallel programming is built in. I did not try the GPU features, but CPU threads worked well.

Chapel supports slices, but bounds checking did not appear by default. This could be due to the high-performance computing audience.

One thing that strikes me is that semicolons were never a problem when I only wrote programs in C and Pascal, but after switching between C, Julia, Go and Chapel I forget semicolons where they belong and add them where they don't. I think the languages are different but too similar.
Last edited by ejolson on Tue May 27, 2025 12:14 am, edited 1 time in total.

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Wed May 07, 2025 5:18 pm

ejolson wrote:
Sat May 03, 2025 4:41 am
Although the kittens may have other ideas, that finishes this Advent of Code for me.
Looking through the forum there've been threads for Advent of Code starting in 2020.

https://forums.raspberrypi.com/viewtopic.php?t=293084

https://forums.raspberrypi.com/viewtopic.php?t=325236

https://forums.raspberrypi.com/viewtopic.php?t=343515

https://forums.raspberrypi.com/viewtopic.php?t=360011

and this one

https://forums.raspberrypi.com/viewtopic.php?t=380295

For the first three years I switched programming languages every week. The most memorable was a week in Scratch. The last two years I focused on one language that might be useful: In 2023 that was Julia and 2024 Chapel.

Julia and Chapel are both languages designed for numerics and high-performance computing on modern hardware. One could make a case it was useful to learn them better. At the same time, the three black kittens claim to learn more by taking one puzzle and solving it multiple times using different languages and hardware.

To that end I have challenged Scratchy, Purr and Shy to create a parallel solution for day 24 that runs on the super cheap cluster.

https://forums.raspberrypi.com/viewtopic.php?t=199994

The kittens are difficult to motivate so this may not work out, but I'm hoping.

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Thu May 15, 2025 4:49 am

ejolson wrote:
Wed May 07, 2025 5:18 pm
The kittens are difficult to motivate so this may not work out, but I'm hoping.
It looks like there has been some progress with the super cheap cluster. In order to keep the MPI parallel programming examples together, the program for solving the day 24 puzzle will be described at

viewtopic.php?p=2315193#p2315193

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Sun May 25, 2025 6:13 am

ejolson wrote:
Thu May 15, 2025 4:49 am
ejolson wrote:
Wed May 07, 2025 5:18 pm
The kittens are difficult to motivate so this may not work out, but I'm hoping.
It looks like there has been some progress with the super cheap cluster. In order to keep the MPI parallel programming examples together, the program for solving the day 24 puzzle will be described at

viewtopic.php?p=2315193#p2315193
The finished MPI code is at

viewtopic.php?p=2316257#p2316257

Since it's also possible to run MPI code on a multi-core SMP machine, I installed MPICH on the Pi 4.

The result

Code: Select all

$ mpirun -n 4 ./mpi24 # Pi 4B 1500 MHz
Advent of Code 2024 Day 24 Crossed Wires (mpi_size=4)
Part 1 The z wires output 61495910098126
Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37
Total execution time 1.23296 seconds.
indicates a factor

4.69 / 1.23 = 3.81

fold increase in performance over the parallel Go code and even more compared to Chapel.

The algorithm is basically the same. I was a little more careful allocating memory on the stack in C and represented the leaf list for the topological sort as a n x 3 array rather than an array of slices. Maybe this last change accounts for most of the improvement. It's difficult to tell without testing.

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Sun Jun 01, 2025 5:05 am

ejolson wrote:
Sun May 25, 2025 6:13 am
I was a little more careful allocating memory on the stack in C and represented the leaf list for the topological sort as a n x 3 array rather than an array of slices. Maybe this last change accounts for most of the improvement. It's difficult to tell without testing.
Shy got curious and changed the leaf list in the Go program to be like the C code. This saved about a second. The new runtime is 3.22 seconds. That's

3.22 / 1.23 = 2.62

times slower than the C code. Shy looked shyly at the ground.

Purr stretched, yawned and meowed something about fish. At this I instrumented the code and found 240879 heap allocations are made during a run but no fish.

Unfortunately, there is no explicit way to allocate an array sized at runtime from the stack in Go and all the slices appear to escape to the heap. Maybe that's where the fish went as well.

According to Scratchy, one of the design goals for the Go programming language was to make any type of performance optimisation difficult. I think because optimised code is harder to understand.

The other goal was to hide C-style preprocessing directives as magic comments with no syntax checking. I'm not certain about that.

Somehow writing in Go reminds me of attempts in the 80's to write well-performing Basic.

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Fri Jun 20, 2025 4:49 am

ejolson wrote:
Sun May 25, 2025 6:13 am
indicates a factor

4.69 / 1.23 = 3.81

fold increase in performance over the parallel Go code and even more compared to Chapel.
There is a new version of Chapel released

https://www.hpcwire.com/2025/06/19/chap ... -speedups/

Among other things the speed of the compiler has been improved. That's good, because compilation speed was particularly problematic on a Pi.

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Tue Sep 30, 2025 8:56 pm

It's only October but I'm thinking about software and hardware to avoid the panic that happened last year.

viewtopic.php?p=2273355#p2273355

One idea for an artistic constraint to increase the aesthetic quality of the puzzle solutions is to do a week in Kotlin on a Raspberry Pi 500+ using the AI assistant in IntelliJ IDE. Purr suggests finishing the whole season with an AI assistant. That might be too limiting for me.

Another idea is to install a Sun 3 emulator and do a week using SunOS. Shy wants Hercules and a week in Fortran G. That might be fun.

I wonder if the first week could be done using the new Commodore C64.

ejolson
Posts: 13865
Joined: Tue Mar 18, 2014 11:47 am

Re: Advent of Code 2024

Wed Nov 12, 2025 5:05 am

ejolson wrote:
Tue Sep 30, 2025 8:56 pm
Another idea is to install a Sun 3 emulator and do a week using SunOS. Shy wants Hercules and a week in Fortran G. That might be fun.
From what I read
Eric Wastl wrote: It takes a ton of my free time every year to run Advent of Code, and building the puzzles accounts for the majority of that time. After keeping a consistent schedule for ten years(!), I needed a change. The puzzles still start on December 1st so that the day numbers make sense (Day 1 = Dec 1), and puzzles come out every day (ending mid-December).
https://adventofcode.com/

Twelve days of Advent means there won't be two full weeks, so I've cancelled Shy's plan of Sun the first week IBM the second. Fortunately, Shy does not look disappointed. With only half the puzzles I'm looking forward to finishing on time this year!

Purr purred, with only half as many puzzles each will be twice as difficult. At this Scratchy hissed, the puzzles seem harder, but only because AI is making people stupider. Fortunately cats are not affected. I'm not so sure about that.

233 posts

Return to "Teaching and learning resources"

AltStyle によって変換されたページ (->オリジナル) /