Re: Advent of Code 2024
I ported the changes tested in Go back to Chapel and obtainedejolson wrote: ↑Wed Apr 30, 2025 3:32 pmScratchy moved the allocation for mem outside the rungates routine and obtained.This saves another half second of runtime.Code: Select all
$ ./go24 # Pi 4B 1500 MHz Advent of Code 2024 Day 24 Crossed Wires (GOMAXPROCS=4) Part 1 The z wires output 61495910098126 Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37 Total execution time 4.693544958 seconds.
Code: Select all
$ ./day24 -nl1 # Pi 4B 1500 MHz
Advent of Code 2024 Day 24 Crossed Wires (CHPL_RT_NUM_THREADS_PER_LOCALE=4)
Part 1 The z wires output 61495910098126
Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37
Total execution time 5.37652 seconds.
The latest solution to day 24 in Chapel is
Code: Select all
/* Advent of Code 2024 Day 24 Crossed Wires
Written 2025 by Eric Olson
Parallel version.
Increase esize if this produces wrong answers. Increate ksize
if it fails to produce any answers.
*/
use IO,Time,List,Sort,Random;
type byte=uint(8);
const esize=20;
const ksize=10;
var xstart=0,ystart=0,zstart=0;
var xbits=0,ybits=0,zbits=0;
var myrnd=new randomStream(int);
var ncpus=here.maxTaskPar;
proc check(ref i:int,ref s:bytes,const p:bytes):bool {
if s.size<i+p.size {
return false;
}
for k in 0..<p.size {
if s[k+i]!=p[k] {
return false;
}
}
i+=p.size;
return true;
}
proc scanint(ref i:int,ref s:bytes):int {
const d0=b"0"[0],d9=b"9"[0],minus=b"-"[0];
var r=0,iold=i,m=-1;
while i<s.size {
var d=s[i];
if iold==i&&d==minus {
m=1;
} else if d>=d0 && d<=d9 {
r=r*10-(d-d0);
} else {
break;
}
i+=1;
}
return m*r;
}
proc wire2num(ref stab:[]bytes,ref w:bytes):int {
var ai=0,bi=stab.size,cold=-1;
while true {
var ci=(bi+ai)/2;
if ci==cold {
break;
}
if stab[ci]<w {
ai=ci;
} else if stab[ci]>w {
bi=ci;
} else {
return ci;
}
cold=ci;
}
return -1;
}
proc setinput(ref x:int,ref s:bytes) {
var w=s[0..2];
var iold,i=1;
iold=i;
var j=scanint(i,s);
if iold==i {
writeln("Can't find input bitnumber in ",s);
exit(1);
}
if !check(i,s,b": ") {
writeln("Missing : delimiter in ",s);
exit(1);
}
iold=i;
var b=scanint(i,s);
if iold==i {
writeln("Can't find input bit value in ",s);
exit(1);
}
if b<0||b>1 {
writeln("Bit value out of range in ",s);
exit(1);
}
var p=(1<<j);
if s[0]==b"x"[0] {
if xbits<=j {
xbits=j+1;
}
}
if s[0]==b"y"[0] {
if ybits<=j {
ybits=j+1;
}
}
if b==1 {
x|=p;
} else {
x&=~p;
}
}
type optype=proc(x:bool,y:bool):bool;
proc g_AND(x:bool,y:bool):bool {
return x&&y;
}
proc g_OR(x:bool,y:bool):bool {
return x||y;
}
proc g_XOR(x:bool,y:bool):bool {
return x!=y;
}
proc g_NUL(x:bool,y:bool):bool {
return false;
}
const g_OP:[0..2]optype=[g_AND,g_OR,g_XOR];
const s_OP:[0..2]bytes=[b"AND",b"OR",b"XOR"];
record gspec {
var x1,x2,y:int;
var f1,f2:bool;
var op:optype=g_NUL;
}
proc getgate(ref stab:[]bytes,ref s:bytes):gspec {
var v=s.split();
if v.size!=5 {
writeln("Gate ",s," doesn't have 4 fields!");
exit(1);
}
var r:gspec;
r.x1=wire2num(stab,v[0]);
for i in s_OP.domain {
if v[1]==s_OP[i] {
r.op=g_OP[i];
}
}
if r.op==g_NUL {
writeln("Gate ",s," unrecognizable operation!");
exit(1);
}
r.x2=wire2num(stab,v[2]);
if v[3]!=b"->" {
writeln("Gate ",s," missing -> assignment!");
exit(1);
}
r.y=wire2num(stab,v[4]);
return r;
}
proc mkorder(ref stab:[]bytes,ref gates:[]gspec):[]int {
var order:[0..<gates.size]int;
var op=0,oq=0;
proc setfn(ref fn:bool,c:byte){
if c==b"x"[0]||c==b"y"[0] {
fn=false;
} else {
fn=true;
}
}
for i in gates.domain {
setfn(gates[i].f1,stab[gates[i].x1][0]);
setfn(gates[i].f2,stab[gates[i].x2][0]);
if !gates[i].f1&&!gates[i].f2 {
order[oq]=i;
oq+=1;
}
}
var leaf:[0..<stab.size]list(int);
for i in gates.domain {
ref r=gates[i];
leaf[r.x1].pushBack(i);
leaf[r.x2].pushBack(i);
}
while op<oq {
var i=order[op];
ref r=gates[i];
if leaf[r.y].size==0 {
op+=1;
} else {
for j in leaf[r.y] {
if gates[j].x1==r.y {
gates[j].f1=false;
if !gates[j].f2 {
order[oq]=j;
oq+=1;
}
} else if gates[j].x2==r.y {
gates[j].f2=false;
if !gates[j].f1 {
order[oq]=j;
oq+=1;
}
}
}
op+=1;
}
}
return order;
}
proc rungates(ref order:[]int,ref mem:[]bool,
ref gates:[]gspec,xp:int,yp:int):int {
var x=xp,y=yp;
for j in 0..<xbits {
mem[xstart+j]=(x&1)==1;
x>>=1;
}
for j in 0..<ybits {
mem[ystart+j]=(y&1)==1;
y>>=1;
}
for i in order.domain {
ref r=gates[order[i]];
var x1=mem[r.x1];
var x2=mem[r.x2];
var y=r.op(x1,x2);
mem[r.y]=y;
}
var p=1;
var p1=0;
for j in 0..<zbits {
if mem[zstart+j] {
p1+=p;
}
p*=2;
}
return p1;
}
proc part1(ref stab:[]bytes,ref gates:[]gspec,
ref inputs:list(bytes)):int {
var x=0,y=0;
for s in inputs {
if s[0]==b"x"[0] {
setinput(x,s);
} else if s[0]==b"y"[0] {
setinput(y,s);
} else {
writeln("Unknown register input in ",s);
exit(1);
}
}
ref order=mkorder(stab,gates);
var mem:[0..<stab.size]bool;
return rungates(order,mem,gates,x,y);
}
proc loss(ref stab:[]bytes,ref gates:[]gspec,
ref ensemble:[](int,int)):int {
ref order=mkorder(stab,gates);
var mem:[0..<stab.size]bool;
var r=0;
for i in ensemble.domain {
var x=ensemble[i](0);
var y=ensemble[i](1);
var z=rungates(order,mem,gates,x,y);
var e=abs(x+y-z);
r+=e;
x=y;
}
return r;
}
record lspec {
var l:int;
var i,j:int;
}
proc tryswap(ref stab:[]bytes,ref gates:[]gspec,
ref ensemble:[](int,int),d:int):list(int) {
var losses:[0..<gates.size*(gates.size-1)/2]lspec;
var ij:[0..ncpus](int,int);
ij[0]=(1,0);
for n in 1..ncpus {
var (i,j)=ij[n-1];
var na=i*(i-1)/2+j;
var nb=n*losses.size/ncpus;
var delta=nb-na;
while true {
if i-j>delta {
j+=delta;
ij[n]=(i,j);
break;
}
delta-=i-j;
i+=1; j=0;
}
}
record rspec {
var i,j:int;
}
var rfound:sync rspec;
var rfzero=new rspec(0,0);
rfound.writeEF(rfzero);
coforall n in 1..ncpus {
var gatesn=gates;
var (i,j)=ij[n-1],(ib,jb)=ij[n];
var k=i*(i-1)/2+j;
while true {
gatesn[i].y<=>gatesn[j].y;
var ts=loss(stab,gatesn,ensemble);
gatesn[i].y<=>gatesn[j].y;
if ts==0 {
rfound.writeFF(new rspec(i,j));
} else {
losses[k].i=i;
losses[k].j=j;
losses[k].l=ts;
k+=1;
}
j+=1;
if j>=i {
j=0; i+=1;
}
if (i==ib&&j==jb)||rfound.readFF()!=rfzero {
break;
}
}
}
var rf=rfound.readFF();
if rf!=rfzero {
var r:list(int);
r.pushBack(rf.i);
r.pushBack(rf.j);
return r;
}
var klen=losses.size;
sort(losses[0..<klen]);
if klen>ksize {
klen=ksize;
}
if d==1 {
var r:list(int);
return r;
}
for k in 0..<klen {
var i=losses[k].i;
var j=losses[k].j;
gates[i].y<=>gates[j].y;
var r=tryswap(stab,gates,ensemble,d-1);
gates[i].y<=>gates[j].y;
if r.size>0 {
r.pushBack(i);
r.pushBack(j);
return r;
}
}
var r:list(int);
return r;
}
proc mkensemble():[](int,int) {
var ensemble:[0..<esize](int,int);
var xmask=1<<xbits-1;
var ymask=1<<ybits-1;
for i in ensemble.domain {
var x=myrnd.next()&xmask;
var y=myrnd.next()&ymask;
ensemble[i]=(x,y);
}
return ensemble;
}
proc part2(ref stab:[]bytes,ref gates:[]gspec):bytes {
ref ensemble=mkensemble();
var r=tryswap(stab,gates,ensemble,4);
var wll:[0..<r.size]bytes;
for i in wll.domain {
wll[i]=stab[gates[r[i]].y];
}
sort(wll);
var p2:bytes;
for i in wll.domain {
if i>0 {
p2+=b","+wll[i];
} else {
p2=wll[i];
}
}
return p2;
}
proc dowork(){
var io=open("day24.txt",ioMode.r);
var fp=io.reader(locking=false);
var s:bytes;
var inputs:list(bytes);
while fp.readLine(s,stripNewline=true) {
if s.size==0 {
break;
}
inputs.pushBack(s);
}
var assign:list(bytes);
while fp.readLine(s,stripNewline=true) {
assign.pushBack(s);
}
var slen=inputs.size+assign.size;
var stab:[0..<slen]bytes;
var i=0;
for s in inputs {
stab[i]=s[0..2];
i+=1;
}
for s in assign {
var v=s.split();
stab[i]=v[4];
i+=1;
}
sort(stab);
var x00=b"x00";
xstart=wire2num(stab,x00);
var y00=b"y00";
ystart=wire2num(stab,y00);
var z00=b"z00";
zstart=wire2num(stab,z00);
zbits=stab.size-zstart;
var gates:[0..<assign.size]gspec;
i=0;
for s in assign {
gates[i]=getgate(stab,s);
i+=1;
}
var p1=part1(stab,gates,inputs);
var p2=part2(stab,gates);
writeln("Part 1 The z wires output ",p1);
writeln("Part 2 Swap wires ",p2);
}
proc main(){
var t:stopwatch;
t.start();
writeln("Advent of Code 2024 Day 24 Crossed Wires (CHPL_RT_NUM_THREADS_PER_LOCALE=",ncpus,")\n");
dowork();
t.stop();
writeln("\nTotal execution time ",t.elapsed()," seconds.");
}
In summary, I found Chapel tedious because it took too long to compile and because some of the features seemed only half implemented. On the other hand, being able pass the domain for an array around as a first class object was nice as was the fact that support for parallel programming is built in. I did not try the GPU features, but CPU threads worked well.
Chapel supports slices, but bounds checking did not appear by default. This could be due to the high-performance computing audience.
One thing that strikes me is that semicolons were never a problem when I only wrote programs in C and Pascal, but after switching between C, Julia, Go and Chapel I forget semicolons where they belong and add them where they don't. I think the languages are different but too similar.
Last edited by ejolson on Tue May 27, 2025 12:14 am, edited 1 time in total.
Re: Advent of Code 2024
Looking through the forum there've been threads for Advent of Code starting in 2020.
https://forums.raspberrypi.com/viewtopic.php?t=293084
https://forums.raspberrypi.com/viewtopic.php?t=325236
https://forums.raspberrypi.com/viewtopic.php?t=343515
https://forums.raspberrypi.com/viewtopic.php?t=360011
and this one
https://forums.raspberrypi.com/viewtopic.php?t=380295
For the first three years I switched programming languages every week. The most memorable was a week in Scratch. The last two years I focused on one language that might be useful: In 2023 that was Julia and 2024 Chapel.
Julia and Chapel are both languages designed for numerics and high-performance computing on modern hardware. One could make a case it was useful to learn them better. At the same time, the three black kittens claim to learn more by taking one puzzle and solving it multiple times using different languages and hardware.
To that end I have challenged Scratchy, Purr and Shy to create a parallel solution for day 24 that runs on the super cheap cluster.
https://forums.raspberrypi.com/viewtopic.php?t=199994
The kittens are difficult to motivate so this may not work out, but I'm hoping.
Re: Advent of Code 2024
It looks like there has been some progress with the super cheap cluster. In order to keep the MPI parallel programming examples together, the program for solving the day 24 puzzle will be described at
viewtopic.php?p=2315193#p2315193
Re: Advent of Code 2024
The finished MPI code is atejolson wrote: ↑Thu May 15, 2025 4:49 amIt looks like there has been some progress with the super cheap cluster. In order to keep the MPI parallel programming examples together, the program for solving the day 24 puzzle will be described at
viewtopic.php?p=2315193#p2315193
viewtopic.php?p=2316257#p2316257
Since it's also possible to run MPI code on a multi-core SMP machine, I installed MPICH on the Pi 4.
The result
Code: Select all
$ mpirun -n 4 ./mpi24 # Pi 4B 1500 MHz
Advent of Code 2024 Day 24 Crossed Wires (mpi_size=4)
Part 1 The z wires output 61495910098126
Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37
Total execution time 1.23296 seconds.
4.69 / 1.23 = 3.81
fold increase in performance over the parallel Go code and even more compared to Chapel.
The algorithm is basically the same. I was a little more careful allocating memory on the stack in C and represented the leaf list for the topological sort as a n x 3 array rather than an array of slices. Maybe this last change accounts for most of the improvement. It's difficult to tell without testing.
Re: Advent of Code 2024
Shy got curious and changed the leaf list in the Go program to be like the C code. This saved about a second. The new runtime is 3.22 seconds. That'sejolson wrote: ↑Sun May 25, 2025 6:13 amI was a little more careful allocating memory on the stack in C and represented the leaf list for the topological sort as a n x 3 array rather than an array of slices. Maybe this last change accounts for most of the improvement. It's difficult to tell without testing.
3.22 / 1.23 = 2.62
times slower than the C code. Shy looked shyly at the ground.
Purr stretched, yawned and meowed something about fish. At this I instrumented the code and found 240879 heap allocations are made during a run but no fish.
Unfortunately, there is no explicit way to allocate an array sized at runtime from the stack in Go and all the slices appear to escape to the heap. Maybe that's where the fish went as well.
According to Scratchy, one of the design goals for the Go programming language was to make any type of performance optimisation difficult. I think because optimised code is harder to understand.
The other goal was to hide C-style preprocessing directives as magic comments with no syntax checking. I'm not certain about that.
Somehow writing in Go reminds me of attempts in the 80's to write well-performing Basic.
Re: Advent of Code 2024
There is a new version of Chapel released
https://www.hpcwire.com/2025/06/19/chap ... -speedups/
Among other things the speed of the compiler has been improved. That's good, because compilation speed was particularly problematic on a Pi.
Re: Advent of Code 2024
It's only October but I'm thinking about software and hardware to avoid the panic that happened last year.
viewtopic.php?p=2273355#p2273355
One idea for an artistic constraint to increase the aesthetic quality of the puzzle solutions is to do a week in Kotlin on a Raspberry Pi 500+ using the AI assistant in IntelliJ IDE. Purr suggests finishing the whole season with an AI assistant. That might be too limiting for me.
Another idea is to install a Sun 3 emulator and do a week using SunOS. Shy wants Hercules and a week in Fortran G. That might be fun.
I wonder if the first week could be done using the new Commodore C64.
viewtopic.php?p=2273355#p2273355
One idea for an artistic constraint to increase the aesthetic quality of the puzzle solutions is to do a week in Kotlin on a Raspberry Pi 500+ using the AI assistant in IntelliJ IDE. Purr suggests finishing the whole season with an AI assistant. That might be too limiting for me.
Another idea is to install a Sun 3 emulator and do a week using SunOS. Shy wants Hercules and a week in Fortran G. That might be fun.
I wonder if the first week could be done using the new Commodore C64.
Re: Advent of Code 2024
From what I read
https://adventofcode.com/Eric Wastl wrote: It takes a ton of my free time every year to run Advent of Code, and building the puzzles accounts for the majority of that time. After keeping a consistent schedule for ten years(!), I needed a change. The puzzles still start on December 1st so that the day numbers make sense (Day 1 = Dec 1), and puzzles come out every day (ending mid-December).
Twelve days of Advent means there won't be two full weeks, so I've cancelled Shy's plan of Sun the first week IBM the second. Fortunately, Shy does not look disappointed. With only half the puzzles I'm looking forward to finishing on time this year!
Purr purred, with only half as many puzzles each will be twice as difficult. At this Scratchy hissed, the puzzles seem harder, but only because AI is making people stupider. Fortunately cats are not affected. I'm not so sure about that.
Return to "Teaching and learning resources"
Jump to
- Community
- General discussion
- Announcements
- Other languages
- Deutsch
- Español
- Français
- Italiano
- Nederlands
- 日本語
- Polski
- Português
- Русский
- Türkçe
- User groups and events
- Raspberry Pi Official Magazine
- Using the Raspberry Pi
- Beginners
- Troubleshooting
- Advanced users
- Assistive technology and accessibility
- Education
- Picademy
- Teaching and learning resources
- Staffroom, classroom and projects
- Astro Pi
- Mathematica
- High Altitude Balloon
- Weather station
- Programming
- C/C++
- Java
- Python
- Scratch
- Other programming languages
- Windows 10 for IoT
- Wolfram Language
- Bare metal, Assembly language
- Graphics programming
- OpenGLES
- OpenVG
- OpenMAX
- General programming discussion
- Projects
- Networking and servers
- Automation, sensing and robotics
- Graphics, sound and multimedia
- Other projects
- Media centres
- Gaming
- AIY Projects
- Hardware and peripherals
- Camera board
- Compute Module
- Official Display
- HATs and other add-ons
- Device Tree
- Interfacing (DSI, CSI, I2C, etc.)
- Keyboard computers (400, 500, 500+)
- Raspberry Pi Pico
- General
- SDK
- MicroPython
- Other RP2040 boards
- Zephyr
- Rust
- AI Accelerator
- AI Camera - IMX500
- Hailo
- Software
- Raspberry Pi OS
- Raspberry Pi Connect
- Raspberry Pi Desktop for PC and Mac
- Beta testing
- Other
- Android
- Debian
- FreeBSD
- Gentoo
- Linux Kernel
- NetBSD
- openSUSE
- Plan 9
- Puppy
- Arch
- Pidora / Fedora
- RISCOS
- Ubuntu
- Ye Olde Pi Shoppe
- For sale
- Wanted
- Off topic
- Off topic discussion