I'm worried about my code performance. The code I've written is an implementation of SHA-256 using pseudocode from Wikipedia. It works fine, but I want to speed it up. With a buffer of 4096 bytes, I only get 40 Mb/s, whereas commercial software gets 120Mb/s. I've degugged the code, and it seems that the problem is in the loop: it computes the SHA-256 slowly.
Can anybody help me or offer advice?
void sha256_file(HWND hDlg, TCHAR *filename, unsigned int *sha256)
{
unsigned long long lenBits, lenBytes, lenPadding, ctBuffer, index, j, i, nBlocks, chunk, pr, temp;
unsigned int w[64], h0, h1, h2, h3, h4, h5, h6, h7, ch, z, ct , memR;
unsigned int s0, s1, a, b, c, d, e, f, g, h, maj, t1, t2;
unsigned char *padding;
TCHAR szPr[20];
HWND hPb;
HANDLE hf, hp;
LARGE_INTEGER fSize;
DWORD bytesRead;
//Create the file
hf = CreateFile(filename, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if(hf == NULL)
{
MessageBox(hDlg, TEXT("Can't create file!"), TEXT("SHA - 256 Calc"), MB_OK);
exit(1);
}
if(GetLastError() == ERROR_FILE_NOT_FOUND)
{
MessageBox(hDlg, TEXT("File not found!"), TEXT("SHA - 256 Calc"), MB_OK | MB_ICONEXCLAMATION);
exit(1);
}
hp = HeapCreate(HEAP_GENERATE_EXCEPTIONS, 0, 0);
if(hp == NULL)
{
MessageBox(hDlg, TEXT("Can't create heap!"), TEXT("SHA - 256 Calc"), MB_OK);
exit(1);
}
//Get Process bar handle
hPb = GetDlgItem(hDlg, IDC_PROGRESS1);
//Set range 0-100
SendMessage(hPb, PBM_SETRANGE, 0, MAKELPARAM(0, 100));
SendMessage(hPb, PBM_SETSTEP, (WPARAM) 1, 0);
SendMessage(hPb, PBM_SETPOS, (WPARAM) 0, 0);
//Get the size of file
GetFileSizeEx(hf, &fSize);
//Get 64 bit size
lenBytes = (unsigned long long)fSize.QuadPart;
//len message in bits
lenBits = lenBytes << 3;
//Blocks of message
nBlocks = ((lenBytes + 8) >> 6) + 1;
lenPadding = lenBytes + 1;
//Calculate padding len msg MOD 512 == 448
while((lenPadding & 0x3f) != 56) lenPadding++;
lenPadding -= lenBytes;
memR = (unsigned int) lenPadding;
memR += 8;
//Allocate memory for padding
padding = (unsigned char *) HeapAlloc(hp, HEAP_ZERO_MEMORY, memR);
if(padding == NULL)
{
MessageBox(hDlg, TEXT("Can't create heap!"), TEXT("SHA - 256 Calc"), MB_OK);
exit(1);
}
//Append 1 bit to message
padding[0] = 0x80;
//Append 64 bit len of message big endian
for(i = (memR - 1); i >= (memR - 8); i--)
{
padding[i] = (unsigned char) lenBits;
lenBits >>= 8;
}
//Hash vars
h0 = H0;
h1 = H1;
h2 = H2;
h3 = H3;
h4 = H4;
h5 = H5;
h6 = H6;
h7 = H7;
index = 0;
ctBuffer = 0;
temp = 0;
pr = 0;
ct = 0;
//Read the first chunk before enter in main loop
ReadFile(hf, buffer, BUFFER_SIZE, &bytesRead, NULL);
for(chunk = 0; chunk < nBlocks ; chunk++)
{
//Handle messages
DoEvents();
//Pregress bar stuff
temp = pr;
pr = ((chunk + 1) * 100 / nBlocks);
if(pr != temp)
{
StringCbPrintf(szPr, sizeof(szPr), TEXT("%d %%"), pr);
SetDlgItemText(hDlg, IDC_STATIC_PR, szPr);
SendMessage(hPb, PBM_STEPIT, 0, 0);
}
//Break message in 32 bit chunks
j = 0;
z = 0;
for(i = index; i < (index + 64); i++, j++, ctBuffer++)
{
if(ctBuffer < lenBytes)
{
w[j >> 2] = (unsigned int)(w[j >> 2] << 8) | buffer[i];
}
else
{
w[j >> 2] = (unsigned int)(w[j >> 2] << 8) | padding[ctBuffer - lenBytes];
z++;
}
}
index += 64;
//Extend the sixteen 32-bit words into sixty-four 32-bit words:
for (i = 16 ; i < 64; i++)
{
s0 = (RROT(w[i-15], 7)) ^ (RROT(w[i-15],18)) ^ (w[i-15] >> 3);
s1 = (RROT(w[i-2], 17)) ^ (RROT(w[i-2],19)) ^ (w[i-2] >> 10);
w[i] = w[i - 16] + s0 + w[i - 7] + s1;
}
a = h0;
b = h1;
c = h2;
d = h3;
e = h4;
f = h5;
g = h6;
h = h7;
//SHA 256 calculations
for(i = 0 ; i < 64 ; i++)
{
s0 = (RROT(a,2)) ^ (RROT(a,13)) ^ (RROT(a, 22));
maj = (a & b) ^ (a & c) ^ (b & c);
t2 = s0 + maj;
s1 = (RROT(e,6)) ^ (RROT(e, 11)) ^ (RROT(e,25));
ch = (e & f) ^ ((~e) & g);
t1 = h + s1 + ch + k[i] + w[i];
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
//Add results to current hash values
h0 += a;
h1 += b;
h2 += c;
h3 += d;
h4 += e;
h5 += f;
h6 += g;
h7 += h;
//Update buffer for read next chunk of file
if(index == BUFFER_SIZE)
{
ReadFile(hf, buffer, BUFFER_SIZE, &bytesRead, NULL);
index = 0;
}
}
sha256[0] = h0;
sha256[1] = h1;
sha256[2] = h2;
sha256[3] = h3;
sha256[4] = h4;
sha256[5] = h5;
sha256[6] = h6;
sha256[7] = h7;
CloseHandle(hf);
if(!HeapFree(hp, 0, padding))MessageBox(hDlg, TEXT("Can't free memory!"), TEXT("SHA - 256 Calc"), MB_OK);
}
void ParseDigest(TCHAR *szMsg, unsigned int len ,unsigned int *digest)
{
StringCbPrintf(szMsg, len, TEXT("%02x%02x%02x%02x%02x%02x%02x%02x"), digest[0],
digest[1],
digest[2],
digest[3],
digest[4],
digest[5],
digest[6],
digest[7]);
}
I'm going to change my code by computing SHA-256 using OpenSSL style with three functions because it's cleaner:
SHA_CTX ctx; // <-- Struct wih Hash vars for this context
unsigned char disgest[len_of_sha_disgest]
SHA_Init(&ctx); // Init Hash Vars
//This function computes "The SHA Main loop" and it will be inside file read loop
while( not EOF)SHA_Update(ctx,buffer, len);
//This function calculates the padding and creates the digest
SHA_Final(ctx, digest)
4 Answers 4
As a really quick comment, you can do :
for(i = index, index+=64; i < index; i++, j++, ctBuffer++) { (...) }
instead of
for(i = index; i < (index + 64); i++, j++, ctBuffer++) { (...) } index += 64;
to make the code very slightly shorter and faster.
Also, to improve readability, please define your variables in the smallest possible scope and as you initialise them (if possible as a
const
object), such as :for (unsigned long long chunk = 0; chunk < nBlocks ; chunk++)
or
const HANDLE hf = CreateFile(filename, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
Suggestions off the top of my head:
- Disable debugging switches and enable full optimization (assuming you haven't already).
- Enable profiling to find bottleneck.
- Make sure BUFFER_SIZE is sufficiently large for desired throughput, say at least 1 MiB. Personally, I would go with 16 MiB.
For helping detect bitrot, I once wrote a file-checksumming utility in Perl which runs at disk speed (80 MB/sec external FireWire in my case) using Perl's SHA library, and I found that the block size I used for file read operations made a huge huge huge difference to overall throughput.
I think your problem may be SendMessage
. It doesn't return until the message gets processed. So may be waiting on something UI related at that point. I'd try commenting out all of the UI stuff and seeing if the speed improves.
Your edit makes your code look like C not C++.
Any style where you have init() doStuff() fin() is a case for where a RAII could be good:
MartinSha ctx(stdandard_stream);
std::cout << ctx.c_str() << "\n";
// Alternative constructor for internal buffers.
std::stringstream streambuffer;
streambuffer.rdbuf()->pubsetbuf(buffer,bufferSize);
MartinSha ctx(streambuffer);
std::cout << ctx.c_str() << "\n";
Your first attempt did everything in one function.
You should have split that up. There is UI code/file reading code/sh256 code all intermixed into the same function. A better technique is to separate these out into different things.
Explore related questions
See similar questions with these tags.