2
\$\begingroup\$

This is a follow up of the question I posted previously here.

I made some changes, based in the answer by MikeMB, making use of asynchronous calls (with std::async and std::future), instead of std::thread as before.

monte_carlo_pi.cpp:

#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cstdint>
#include <thread>
#include <future>
#include <random>
#include <vector>
#include <chrono>
#include "big_wheel.hpp"
typedef struct
{
 double x, y, z;
} point_t;
static const double PI = 3.1415926535897932384626433832795;
static const std::uint32_t prng_max_value = big_wheel_engine<>::max();
double approximate_pi(std::uint32_t pc,std::uint32_t tp)
{
 return 4.0*(double(pc)/double(tp));
}
double pi_error(double x)
{
 return 100.0*((x-PI)/PI);
}
std::uint32_t generate_points(std::uint32_t nppt)
{
 // PRNGs are initialized and seeded within different threads,
 // instead of using a shared one
 std::random_device rd;
 std::uint32_t seed[16] = {0x0};
 for (int i=0;i<16;i++)
 seed[i] = rd();
 big_wheel_engine<> rgen(
 seed[ 0],seed[ 1],seed[ 2],seed[ 3],
 seed[ 4],seed[ 5],seed[ 6],seed[ 7],
 seed[ 8],seed[ 9],seed[10],seed[11],
 seed[12],seed[13],seed[14],seed[15]
 );
 std::uint32_t pcnt = 0;
 point_t rpt;
 for (std::uint32_t i=0;i<nppt;i++)
 {
 rpt.x = double(rgen())/static_cast<double>(prng_max_value);
 rpt.y = double(rgen())/static_cast<double>(prng_max_value);
 rpt.z = (rpt.x*rpt.x)+(rpt.y*rpt.y);
 if (rpt.z <= 1.0) pcnt++;
 }
 return pcnt;
}
int main(int argc,char **argv)
{
 // Check number of cores
 std::size_t num_cores = std::thread::hardware_concurrency() ? std::thread::hardware_concurrency() : 1;
 // Future set
 std::vector<std::future<std::uint32_t>> future_set;
 const std::uint32_t num_points = 3221225472;
 const std::uint32_t num_points_per_thread = num_points/num_cores;
 for (std::size_t i=0;i<num_cores;i++)
 future_set.push_back(
 std::async(
 std::launch::async,
 generate_points,
 num_points_per_thread
 )
 );
 // Print info
 std::cout << "Number of points: " << num_points << "\n";
 std::cout << "Number of cores: " << num_cores << "\n";
 std::cout << " ==> Number of points per thread: " << num_points_per_thread << "\n\n";
 std::cout << "Generating random points...\n";
 std::chrono::high_resolution_clock::time_point tpa = std::chrono::high_resolution_clock::now();
 // Number of points inside circle
 std::uint32_t pcnt = 0;
 // Sum points inside circle from all threads
 for (auto &ftr : future_set)
 pcnt += ftr.get();
 std::chrono::high_resolution_clock::time_point tpb = std::chrono::high_resolution_clock::now();
 std::chrono::duration<double> tm = std::chrono::duration_cast<std::chrono::duration<double>>(tpb-tpa);
 // Duration
 double dur = tm.count();
 // Approximate pi and calculate error
 const double gen_pi = approximate_pi(pcnt,num_points);
 const double pi_err = pi_error(gen_pi);
 std::cout << "\nResults:\n";
 std::cout << " Number of points within circle: " << pcnt << "\n";
 std::cout << " Pi approximation: " << std::setprecision(32) << gen_pi << "\n";
 std::cout << " Error: " << std::setprecision(10) << pi_err << "%\n";
 std::cout << "Time elapsed: " << std::setprecision(10) << dur << " seconds\n";
}

big_wheel.hpp:

#ifndef BIG_WHEEL_HPP
#define BIG_WHEEL_HPP
#include <iostream>
#include <cstdlib>
#include <cstdint>
#include <climits>
#include <limits>
template <
 std::uint32_t m = 0x3655accc,
 std::size_t rs1 = 11,std::size_t rs2 = 20,std::size_t rs3 = 14,std::size_t rs4 = 18,
 std::size_t rm1 = 16,std::size_t rm2 = 16,
 std::size_t rf = 7
>
class big_wheel_engine
{
 public:
 typedef std::uint32_t result_type;
 static constexpr std::size_t state_size = 4096;
 static constexpr std::uint32_t mix_mask = m;
 static constexpr std::size_t rot_sel1 = rs1;
 static constexpr std::size_t rot_sel2 = rs2;
 static constexpr std::size_t rot_sel3 = rs3;
 static constexpr std::size_t rot_sel4 = rs4;
 static constexpr std::size_t rot_mix1 = rm1;
 static constexpr std::size_t rot_mix2 = rm2;
 static constexpr std::size_t rot_final = rf;
 big_wheel_engine(
 const result_type& s01 = 0x0,const result_type& s02 = 0x0,const result_type& s03 = 0x0,const result_type& s04 = 0x0,
 const result_type& s05 = 0x0,const result_type& s06 = 0x0,const result_type& s07 = 0x0,const result_type& s08 = 0x0,
 const result_type& s09 = 0x0,const result_type& s10 = 0x0,const result_type& s11 = 0x0,const result_type& s12 = 0x0,
 const result_type& s13 = 0x0,const result_type& s14 = 0x0,const result_type& s15 = 0x0,const result_type& s16 = 0x0)
 {
 // Inverse mask
 m_i = ~m;
 seed(s01,s02,s03,s04,
 s05,s06,s07,s08,
 s09,s10,s11,s12,
 s13,s14,s15,s16);
 }
 big_wheel_engine(const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& x)
 {
 for (std::size_t i=0;i<state_size;i++)
 state[i] = x.state[i];
 counter = x.counter;
 }
 big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& operator=(const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& x)
 {
 for (std::size_t i=0;i<state_size;i++)
 state[i] = x.state[i];
 counter = x.counter;
 return *this;
 }
 void seed(
 const result_type& s01 = 0x0,const result_type& s02 = 0x0,const result_type& s03 = 0x0,const result_type& s04 = 0x0,
 const result_type& s05 = 0x0,const result_type& s06 = 0x0,const result_type& s07 = 0x0,const result_type& s08 = 0x0,
 const result_type& s09 = 0x0,const result_type& s10 = 0x0,const result_type& s11 = 0x0,const result_type& s12 = 0x0,
 const result_type& s13 = 0x0,const result_type& s14 = 0x0,const result_type& s15 = 0x0,const result_type& s16 = 0x0)
 {
 counter = 0;
 // Based on decimal part of sqrt(61/19)
 result_type off1[16] = {
 0xcab305ab,0x22132737,0xbda768dd,0x2438d3ab,
 0x00f4ca5a,0xd4681d61,0xc6420bc7,0x2828bdba,
 0x64806a32,0x2fa2391f,0xbc61cbca,0xcc6eb7a0,
 0xfa01a2cd,0xb766ce25,0x5499507a,0x19c839a0
 };
 // Based on decimal part of sqrt(73/29)
 result_type off2[16] = {
 0x962a374b,0x0fc75efe,0x86dfdeff,0xddb14a03,
 0xf61361c6,0xc908fde9,0x27df3b7d,0x04ae7492,
 0x5ee38142,0xfb21ee51,0x56fae74d,0x4f688f90,
 0x7889f34c,0x44e4aa33,0x547e3778,0xb6201d7b
 };
 // Based on decimal part of sqrt(79/31)
 result_type off3[16] = {
 0x98ab7f56,0x38ca9f10,0xa2f3d406,0x322c4113,
 0xb8d42dd6,0xe6490633,0x4c5bdcf7,0x454c3122,
 0x7b22e315,0x1a205e44,0xc6d1b31b,0x06eb7dea,
 0x875f1cf1,0xbe9bec2a,0x0350aa26,0x8b3d5aed
 };
 result_type sset[16] = {
 s01,s02,s03,s04,
 s05,s06,s07,s08,
 s09,s10,s11,s12,
 s13,s14,s15,s16
 };
 result_type st_a[16] = {0x0};
 result_type st_b[256] = {0x0};
 result_type st_c[4096] = {0x0};
 result_type rn_1[4096] = {0x0};
 result_type rn_2[4096] = {0x0};
 result_type rn_3[4096] = {0x0};
 result_type rn_4[4096] = {0x0};
 // Expansion
 for (std::size_t i=0;i<16;i++)
 {
 st_a[i] = sset[i]+off1[i];
 st_a[i] = smix(st_a[i],sbox_i1);
 }
 for (std::size_t i=0;i<16;i++)
 {
 for (std::size_t j=0;j<16;j++)
 {
 st_b[16*i+j] = st_a[i]+off2[j];
 st_b[16*i+j] = smix(st_b[16*i+j],sbox_i2);
 }
 }
 for (std::size_t i=0;i<256;i++)
 {
 for (std::size_t j=0;j<16;j++)
 {
 st_c[16*i+j] = st_b[i]+off3[j];
 st_c[16*i+j] = smix(st_c[16*i+j],sbox_i3);
 }
 }
 // Mixing
 for (std::size_t i=0;i<4096;i++)
 {
 rn_1[i] = (st_c[(i+ 0)%4096] << 13) | (st_c[(i+ 4)%4096] >> 19);
 rn_1[i] = smix(rn_1[i],sbox_i4);
 }
 for (std::size_t i=0;i<4096;i++)
 {
 rn_2[i] = (rn_1[(i+10)%4096] << 11) | (rn_1[(i+18)%4096] >> 21);
 rn_2[i] = smix(rn_2[i],sbox_i4);
 }
 for (std::size_t i=0;i<4096;i++)
 {
 rn_3[i] = (rn_2[(i+28)%4096] << 17) | (rn_2[(i+40)%4096] >> 15);
 rn_3[i] = smix(rn_3[i],sbox_i4);
 }
 for (std::size_t i=0;i<4096;i++)
 {
 rn_4[i] = (rn_3[(i+54)%4096] << 10) | (rn_3[(i+70)%4096] >> 22);
 rn_4[i] = smix(rn_4[i],sbox_i4);
 }
 for (std::size_t i=0;i<4096;i++)
 {
 state[i] = (rn_4[(i+88)%4096] << 16) | (rn_4[(i+108)%4096] >> 16);
 state[i] = smix(state[i],sbox_i5);
 }
 }
 // Get next random number
 result_type operator()()
 {
 // Temporal variables p[1..9] and tmp_xx are declared here
 // Elements from state to mix
 const std::size_t p1 = (counter+ 0)%4096;
 const std::size_t p2 = (counter+ 8)%4096;
 const std::size_t p3 = (counter+ 18)%4096;
 const std::size_t p4 = (counter+ 30)%4096;
 const std::size_t p5 = (counter+ 44)%4096;
 const std::size_t p6 = (counter+ 60)%4096;
 const std::size_t p7 = (counter+ 78)%4096;
 const std::size_t p8 = (counter+ 98)%4096;
 const std::size_t p9 = (counter+120)%4096;
 // Start mixing
 const result_type tmp_11 = (state[p1] << rs1) | (state[p2] >> (32-rs1));
 const result_type tmp_12 = (state[p3] << rs2) | (state[p4] >> (32-rs2));
 const result_type tmp_13 = (state[p5] << rs3) | (state[p6] >> (32-rs3));
 const result_type tmp_14 = (state[p7] << rs4) | (state[p8] >> (32-rs4));
 const result_type tmp_21 = ((tmp_11 << rm1) | (tmp_12 >> (32-rm1)));
 const result_type tmp_22 = ((tmp_13 << rm2) | (tmp_14 >> (32-rm2)));
 const result_type tmp_3 = (tmp_21 & m) | (tmp_22 & m_i);
 const result_type tmp_4 = rot_r(state[p9] ^ tmp_3,rf);
 state[p9] = smix(tmp_4,sbox_r);
 // For jumping to next elements
 counter++;
 return state[p9];
 }
 // Discard z elements
 void discard(unsigned long long z)
 {
 for (unsigned long long i=0;i<z;i++)
 (*this)();
 }
 // Minimum and maximum
 static constexpr result_type min()
 {
 return std::numeric_limits<result_type>::min();
 }
 static constexpr result_type max()
 {
 return std::numeric_limits<result_type>::max();
 }
 // Comparision
 friend bool operator==(
 const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& lhs,
 const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& rhs)
 {
 if (lhs.counter != rhs.counter)
 return false;
 for (std::size_t i=0;i<state_size;i++)
 {
 if (lhs.state[i] != rhs.state[i])
 return false;
 else
 continue;
 }
 return true;
 }
 friend bool operator!=(
 const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& lhs,
 const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& rhs)
 {
 return !(lhs == rhs);
 }
 // Stream ops
 template <class CharT,class Traits>
 friend std::basic_ostream<CharT,Traits>& operator<<(
 std::basic_ostream<CharT,Traits>& os,
 const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& bw)
 {
 os << bw.counter;
 for (std::size_t i=0;i<bw.state_size;i++)
 os << ' ' << bw.state[i];
 return os;
 }
 template <class CharT,class Traits>
 friend std::basic_istream<CharT,Traits>& operator>>(
 std::basic_istream<CharT,Traits>& is,
 const big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>& bw)
 {
 is >> bw.counter >> std::ws;
 for (std::size_t i=0;i<bw.state_size;i++)
 is >> bw.state[i] >> std::ws;
 return is;
 }
 private:
 // State
 result_type state[4096];
 // Reverse mask
 result_type m_i;
 // Counter (for picking elements from state)
 std::size_t counter;
 // S-boxes:
 // - sbox_i{1,2,3,4,5}: For initialization
 // -- i{1,2,3}: Seed expansion
 // -- i{4,5}: Mixing
 // - sbox_r: For generating the next random number
 static std::uint32_t sbox_i1[256];
 static std::uint32_t sbox_i2[256];
 static std::uint32_t sbox_i3[256];
 static std::uint32_t sbox_i4[256];
 static std::uint32_t sbox_i5[256];
 static std::uint32_t sbox_r[256];
 // Left/right bit rotation
 template <typename D> D rot_l(D x,unsigned r) { return ((x << r) | (x >> (CHAR_BIT*sizeof(D)-r))); }
 template <typename D> D rot_r(D x,unsigned r) { return ((x >> r) | (x << (CHAR_BIT*sizeof(D)-r))); }
 // Mix x with an S-box sb
 result_type smix(result_type x,std::uint32_t *sb)
 {
 return result_type(
 (sb[(x >> 24)&0xff] << 24)|
 (sb[(x >> 16)&0xff] << 16)|
 (sb[(x >> 8)&0xff] << 8)|
 (sb[(x >> 0)&0xff] << 0)
 );
 }
};
// The S-boxes (randomly generated)
template <
 std::uint32_t m,
 std::size_t rs1,std::size_t rs2,std::size_t rs3,std::size_t rs4,
 std::size_t rm1,std::size_t rm2,
 std::size_t rf
>
std::uint32_t big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>::sbox_i1[256] = {
 0x36,0x09,0x2d,0x7f,0x28,0x8a,0x51,0x81,0x2b,0x69,0x90,0x58,0xaf,0x21,0xda,0x71,
 0x8d,0x4f,0xb9,0xa4,0x91,0xbc,0xde,0x3f,0xad,0x6e,0xbd,0x5a,0xbb,0x92,0x1a,0x5d,
 0x49,0x6a,0x94,0x0a,0xaa,0x30,0xf3,0x73,0x37,0xfa,0xcf,0x7e,0x76,0xdf,0xb1,0x56,
 0xd5,0x78,0x12,0x08,0xe3,0xf1,0x07,0x9d,0x98,0xc5,0xe6,0xcd,0x34,0xf8,0xfd,0xd0,
 0x2f,0x84,0x66,0x4b,0x3c,0x4e,0xc6,0x63,0x99,0x57,0x60,0x54,0x97,0x9f,0x39,0xf0,
 0x59,0xa1,0xcb,0x18,0xed,0x0b,0xd1,0xdb,0x44,0x1e,0x0f,0x72,0x2c,0x5b,0x27,0xc9,
 0x55,0x1c,0x41,0xfc,0x11,0x50,0x4d,0x9b,0x88,0xea,0xd6,0xc8,0x45,0xa2,0xcc,0x8c,
 0xa6,0x61,0xb5,0x01,0xb4,0x6d,0x93,0xc3,0xab,0xdd,0x9e,0x6c,0x38,0xe7,0x47,0xe9,
 0x70,0x31,0xe8,0x03,0x52,0x75,0xe0,0xb6,0x53,0x40,0x46,0x0e,0x3a,0x5e,0x79,0x5c,
 0xc2,0x7c,0x7a,0x6f,0xe1,0xd4,0xb8,0x85,0xee,0x22,0xb0,0x80,0x42,0xf6,0xec,0x24,
 0xe2,0x9a,0x14,0x8f,0xac,0x15,0x10,0x82,0xa3,0x20,0x3b,0xf2,0x68,0x4c,0xd3,0x9c,
 0xca,0x48,0xfe,0x00,0x04,0xc0,0x0c,0xba,0xb7,0xc7,0x3e,0xff,0xd8,0xfb,0x6b,0x65,
 0x26,0x17,0x8b,0xf9,0x02,0xb2,0xe4,0xbf,0x8e,0xc1,0x62,0xd7,0x1d,0x77,0x2e,0x96,
 0x25,0x33,0x5f,0xef,0x35,0x43,0xf5,0xf7,0xa0,0x64,0x05,0xc4,0x1b,0xd2,0x0d,0x16,
 0xdc,0x86,0x7d,0xeb,0x87,0xa5,0x83,0xf4,0x23,0xd9,0x4a,0x95,0x1f,0x7b,0x74,0xa9,
 0x2a,0x13,0x06,0x29,0xae,0xce,0x89,0xa7,0x3d,0x19,0x67,0xa8,0x32,0xb3,0xbe,0xe5
};
template <
 std::uint32_t m,
 std::size_t rs1,std::size_t rs2,std::size_t rs3,std::size_t rs4,
 std::size_t rm1,std::size_t rm2,
 std::size_t rf
>
std::uint32_t big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>::sbox_i2[256] = {
 0x2d,0xa9,0x38,0x42,0x24,0xd1,0x55,0x8a,0x3c,0x22,0x84,0x7a,0xd6,0xbc,0x2a,0xb7,
 0x99,0xd7,0xe6,0x2f,0x9e,0x1f,0x1e,0x0a,0xd0,0x97,0x45,0xe9,0x90,0x8f,0xe7,0x35,
 0x40,0x6e,0x9a,0xeb,0x28,0x75,0x03,0x5a,0x29,0x34,0x6b,0x17,0xaa,0x4a,0x6d,0x00,
 0x36,0xd8,0x71,0x9b,0xf1,0x25,0x5b,0x46,0x12,0x69,0x9d,0xb9,0x66,0xf7,0x74,0x95,
 0x61,0x48,0x1a,0x23,0xcb,0x5c,0xcd,0x70,0x18,0xb3,0x76,0x0e,0x37,0xc8,0x3a,0x92,
 0x59,0xdd,0x26,0xaf,0xb0,0x8e,0x44,0xce,0xe3,0xcf,0x4f,0x0d,0x4c,0x19,0x49,0xca,
 0x07,0xc7,0xd2,0x72,0x7b,0x3e,0xc5,0xc1,0xc2,0x91,0x86,0x4b,0x57,0xe0,0x02,0xb6,
 0xf4,0x7e,0x39,0xd9,0x20,0x65,0xac,0x87,0xb8,0x33,0xe5,0xb2,0xa1,0x96,0x64,0xa6,
 0x2b,0x01,0x85,0x50,0xed,0x27,0xff,0x62,0x30,0x80,0xc9,0xa4,0x31,0xde,0x21,0x7c,
 0xdc,0xf6,0xae,0x52,0x8c,0x51,0x32,0xbf,0x0c,0xa8,0xb4,0xe2,0xe8,0x94,0x14,0x43,
 0xd3,0xdf,0x58,0x77,0x15,0x0b,0xad,0xab,0x7f,0xbd,0xbb,0xf5,0xf9,0x1c,0x5d,0x04,
 0xdb,0x54,0xa3,0x47,0x2e,0xfd,0x67,0x53,0x60,0xee,0x6a,0xa2,0xd4,0xfe,0x8b,0x82,
 0x4e,0xc6,0xc4,0x98,0xb5,0xbe,0x06,0x3d,0x13,0xf8,0xe1,0x3b,0x81,0xa7,0x79,0x8d,
 0xf2,0xb1,0xd5,0x2c,0xc3,0x1d,0x1b,0x73,0x9f,0x6f,0xba,0xcc,0xfc,0xea,0x09,0xa0,
 0xec,0x10,0xc0,0x4d,0x11,0x0f,0x88,0x08,0x5f,0x56,0xf0,0x5e,0x63,0xfa,0x05,0x78,
 0x3f,0x7d,0x41,0xfb,0x68,0xa5,0x89,0xf3,0xef,0x83,0x6c,0x16,0xe4,0xda,0x9c,0x93
};
template <
 std::uint32_t m,
 std::size_t rs1,std::size_t rs2,std::size_t rs3,std::size_t rs4,
 std::size_t rm1,std::size_t rm2,
 std::size_t rf
>
std::uint32_t big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>::sbox_i3[256] = {
 0x06,0x38,0x28,0x6a,0x51,0xb1,0x0a,0x55,0xf9,0xa3,0x73,0x11,0xed,0x3a,0x43,0x83,
 0x54,0x74,0x89,0xe0,0x85,0x10,0xcc,0x6f,0x64,0x2b,0x71,0xc1,0xa4,0x5b,0x42,0x92,
 0xc8,0x99,0x4a,0x8f,0xda,0xbc,0xb7,0x5c,0x81,0x16,0x52,0x50,0x82,0x61,0x3f,0xf5,
 0x1b,0x12,0x90,0x98,0x02,0x05,0x8d,0xa6,0x5a,0xbf,0xe9,0xa7,0x24,0x56,0xfd,0x40,
 0x33,0xe6,0x17,0x72,0xf8,0x18,0xd1,0xac,0xa2,0xfe,0x0f,0x88,0x39,0x09,0x6c,0x0e,
 0x67,0x7c,0x84,0xeb,0x8a,0xba,0x63,0x03,0x75,0xcd,0x93,0x97,0xab,0x6d,0x76,0xdc,
 0xad,0x53,0xb9,0x8e,0xd2,0x4b,0xf2,0xa5,0x4f,0x77,0x7b,0xd7,0x91,0x9f,0x20,0x70,
 0xcb,0xc3,0xb4,0x9c,0xa9,0xd8,0x9d,0xb5,0xca,0x7a,0x1a,0xc4,0x0d,0x19,0xc6,0x30,
 0xf0,0x5d,0x65,0x79,0x7e,0x2c,0x31,0x13,0x9e,0xbd,0x44,0xbe,0x57,0xb2,0xc5,0x80,
 0xe3,0x27,0xd0,0xec,0x48,0x4e,0xae,0x86,0x8c,0xf4,0x3b,0xfb,0xb3,0xa8,0x69,0xb8,
 0x1f,0x0b,0xcf,0x4c,0x45,0xaa,0xdf,0xf7,0xe8,0x68,0xef,0x58,0x34,0x7d,0xbb,0xf6,
 0x2e,0x22,0xdb,0x35,0xd6,0x6b,0x1d,0x5e,0xc0,0xa1,0x3d,0xfa,0xb0,0xff,0x60,0x2a,
 0x41,0x5f,0xc7,0xd5,0x4d,0x1e,0x2f,0x01,0xea,0x49,0xdd,0x26,0xb6,0x78,0xde,0x25,
 0x36,0xf1,0x0c,0xe1,0x04,0x32,0x14,0x3e,0x87,0x6e,0x96,0xe5,0xce,0x95,0xe2,0xaf,
 0xf3,0xa0,0x46,0x37,0x15,0x1c,0x00,0x2d,0x7f,0xe4,0x62,0xee,0x3c,0x08,0x47,0x9b,
 0x66,0xfc,0x21,0xd3,0x94,0x59,0x8b,0x07,0xd4,0x29,0xc9,0x23,0xc2,0x9a,0xe7,0xd9
};
template <
 std::uint32_t m,
 std::size_t rs1,std::size_t rs2,std::size_t rs3,std::size_t rs4,
 std::size_t rm1,std::size_t rm2,
 std::size_t rf
>
std::uint32_t big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>::sbox_i4[256] = {
 0x1a,0x6b,0xfd,0xd0,0x0d,0x11,0x50,0x9e,0x1f,0xd3,0x95,0xca,0xbc,0x0c,0x80,0x29,
 0x47,0xa4,0x3f,0x3c,0x2c,0x76,0x3a,0x91,0x60,0x8d,0x39,0x9b,0x7e,0x6d,0x1b,0x12,
 0x7f,0xf3,0xab,0xb6,0x63,0x7b,0x53,0x31,0xd6,0x4d,0xbb,0xcb,0x56,0xda,0x15,0xbe,
 0x67,0xfb,0xa9,0xdb,0x69,0x97,0x42,0xf1,0x5c,0xc5,0xb3,0x34,0xa3,0x6f,0x51,0x4f,
 0xc2,0x70,0xf8,0x52,0x92,0x81,0xd1,0x0f,0xba,0x3d,0x4c,0xb0,0x48,0xff,0xd2,0x7d,
 0xaf,0x1c,0x65,0xe8,0x4b,0x5f,0x94,0xcc,0x84,0x09,0x64,0x2e,0xb1,0x77,0x96,0xea,
 0x79,0x24,0xb8,0x0e,0x90,0x33,0xf9,0xb2,0x57,0x28,0xe9,0x04,0x03,0xa1,0x17,0xe7,
 0x82,0xaa,0x19,0x83,0x43,0x9a,0xfe,0xfc,0xc6,0xef,0x1e,0xa6,0x5b,0xbd,0xdd,0x08,
 0xb9,0x16,0x9d,0xf2,0x20,0x78,0x06,0xae,0xf0,0xc3,0x59,0x45,0xb7,0xcd,0xa8,0x05,
 0xeb,0x26,0x02,0xf6,0x6e,0x8a,0x14,0x98,0x0b,0x25,0xdc,0x7c,0xc1,0x6a,0x86,0x71,
 0xed,0x68,0xec,0x85,0x35,0x87,0x18,0x93,0x46,0x23,0xd8,0x6c,0xbf,0x61,0x4a,0xac,
 0xcf,0xe3,0x1d,0xb5,0x5e,0xa0,0x9c,0xf7,0x55,0x73,0x2a,0xad,0x00,0xc8,0x75,0xf5,
 0xb4,0x8f,0xc7,0x36,0xe5,0x41,0xde,0xee,0xa7,0x9f,0xce,0xd5,0x21,0x8e,0xa5,0x32,
 0x2b,0x54,0x99,0x7a,0xe0,0x58,0xfa,0x30,0x5d,0x44,0x8b,0x8c,0x4e,0x72,0x66,0xc4,
 0x40,0x2f,0xe6,0x5a,0x2d,0x37,0x01,0x27,0xa2,0xe1,0x22,0x10,0xd7,0x3e,0x89,0x74,
 0xc0,0x38,0x88,0xd9,0x62,0x49,0xdf,0x3b,0xd4,0x0a,0x13,0xf4,0xe4,0x07,0xe2,0xc9
};
template <
 std::uint32_t m,
 std::size_t rs1,std::size_t rs2,std::size_t rs3,std::size_t rs4,
 std::size_t rm1,std::size_t rm2,
 std::size_t rf
>
std::uint32_t big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>::sbox_i5[256] = {
 0x28,0x21,0xde,0xd7,0x8f,0x86,0xba,0xb3,0xeb,0xe2,0x7c,0x75,0x2d,0x24,0x18,0x11,
 0x49,0x40,0x46,0x4f,0x17,0x1e,0x22,0x2b,0x73,0x7a,0xe4,0xed,0xb5,0xbc,0x80,0x89,
 0xd1,0xd8,0x4b,0x42,0x1a,0x13,0x2f,0x26,0x7e,0x77,0xe9,0xe0,0xb8,0xb1,0x8d,0x84,
 0xdc,0xd5,0xd3,0xda,0x82,0x8b,0xb7,0xbe,0xe6,0xef,0x71,0x78,0x20,0x29,0x15,0x1c,
 0x44,0x4d,0x98,0x91,0xc9,0xc0,0xfc,0xf5,0xad,0xa4,0x3a,0x33,0x6b,0x62,0x5e,0x57,
 0x0f,0x06,0x00,0x09,0x51,0x58,0x64,0x6d,0x35,0x3c,0xa2,0xab,0xf3,0xfa,0xc6,0xcf,
 0x97,0x9e,0x0d,0x04,0x5c,0x55,0x69,0x60,0x38,0x31,0xaf,0xa6,0xfe,0xf7,0xcb,0xc2,
 0x9a,0x93,0x95,0x9c,0xc4,0xcd,0xf1,0xf8,0xa0,0xa9,0x37,0x3e,0x66,0x6f,0x53,0x5a,
 0x02,0x0b,0xf4,0xfd,0xa5,0xac,0x90,0x99,0xc1,0xc8,0x56,0x5f,0x07,0x0e,0x32,0x3b,
 0x63,0x6a,0x6c,0x65,0x3d,0x34,0x08,0x01,0x59,0x50,0xce,0xc7,0x9f,0x96,0xaa,0xa3,
 0xfb,0xf2,0x61,0x68,0x30,0x39,0x05,0x0c,0x54,0x5d,0xc3,0xca,0x92,0x9b,0xa7,0xae,
 0xf6,0xff,0xf9,0xf0,0xa8,0xa1,0x9d,0x94,0xcc,0xc5,0x5b,0x52,0x0a,0x03,0x3f,0x36,
 0x6e,0x67,0xb2,0xbb,0xe3,0xea,0xd6,0xdf,0x87,0x8e,0x10,0x19,0x41,0x48,0x74,0x7d,
 0x25,0x2c,0x2a,0x23,0x7b,0x72,0x4e,0x47,0x1f,0x16,0x88,0x81,0xd9,0xd0,0xec,0xe5,
 0xbd,0xb4,0x27,0x2e,0x76,0x7f,0x43,0x4a,0x12,0x1b,0x85,0x8c,0xd4,0xdd,0xe1,0xe8,
 0xb0,0xb9,0xbf,0xb6,0xee,0xe7,0xdb,0xd2,0x8a,0x83,0x1d,0x14,0x4c,0x45,0x79,0x70
};
template <
 std::uint32_t m,
 std::size_t rs1,std::size_t rs2,std::size_t rs3,std::size_t rs4,
 std::size_t rm1,std::size_t rm2,
 std::size_t rf
>
std::uint32_t big_wheel_engine<m,rs1,rs2,rs3,rs4,rm1,rm2,rf>::sbox_r[256] = {
 0xe9,0xe0,0xdc,0xd5,0x8d,0x84,0x1a,0x13,0x4b,0x42,0x7e,0x77,0x2f,0x26,0x20,0x29,
 0x71,0x78,0x44,0x4d,0x15,0x1c,0x82,0x8b,0xd3,0xda,0xe6,0xef,0xb7,0xbe,0x6b,0x62,
 0x3a,0x33,0x0f,0x06,0x5e,0x57,0xc9,0xc0,0x98,0x91,0xad,0xa4,0xfc,0xf5,0xf3,0xfa,
 0xa2,0xab,0x97,0x9e,0xc6,0xcf,0x51,0x58,0x00,0x09,0x35,0x3c,0x64,0x6d,0xfe,0xf7,
 0xaf,0xa6,0x9a,0x93,0xcb,0xc2,0x5c,0x55,0x0d,0x04,0x38,0x31,0x69,0x60,0x66,0x6f,
 0x37,0x3e,0x02,0x0b,0x53,0x5a,0xc4,0xcd,0x95,0x9c,0xa0,0xa9,0xf1,0xf8,0x07,0x0e,
 0x56,0x5f,0x63,0x6a,0x32,0x3b,0xa5,0xac,0xf4,0xfd,0xc1,0xc8,0x90,0x99,0x9f,0x96,
 0xce,0xc7,0xfb,0xf2,0xaa,0xa3,0x3d,0x34,0x6c,0x65,0x59,0x50,0x08,0x01,0x92,0x9b,
 0xc3,0xca,0xf6,0xff,0xa7,0xae,0x30,0x39,0x61,0x68,0x54,0x5d,0x05,0x0c,0x0a,0x03,
 0x5b,0x52,0x6e,0x67,0x3f,0x36,0xa8,0xa1,0xf9,0xf0,0xcc,0xc5,0x9d,0x94,0x41,0x48,
 0x10,0x19,0x25,0x2c,0x74,0x7d,0xe3,0xea,0xb2,0xbb,0x87,0x8e,0xd6,0xdf,0xd9,0xd0,
 0x88,0x81,0xbd,0xb4,0xec,0xe5,0x7b,0x72,0x2a,0x23,0x1f,0x16,0x4e,0x47,0xd4,0xdd,
 0x85,0x8c,0xb0,0xb9,0xe1,0xe8,0x76,0x7f,0x27,0x2e,0x12,0x1b,0x43,0x4a,0x4c,0x45,
 0x1d,0x14,0x28,0x21,0x79,0x70,0xee,0xe7,0xbf,0xb6,0x8a,0x83,0xdb,0xd2,0x2d,0x24,
 0x7c,0x75,0x49,0x40,0x18,0x11,0x8f,0x86,0xde,0xd7,0xeb,0xe2,0xba,0xb3,0xb5,0xbc,
 0xe4,0xed,0xd1,0xd8,0x80,0x89,0x17,0x1e,0x46,0x4f,0x73,0x7a,0x22,0x2b,0xb8,0xb1
};
#endif

Any improvements and suggestions, especially about the use of multithreading and asynchronous calling, are welcome.

asked Dec 29, 2016 at 6:32
\$\endgroup\$

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.