11
\$\begingroup\$

I'm worried about my code performance. The code I've written is an implementation of SHA-256 using pseudocode from Wikipedia. It works fine, but I want to speed it up. With a buffer of 4096 bytes, I only get 40 Mb/s, whereas commercial software gets 120Mb/s. I've degugged the code, and it seems that the problem is in the loop: it computes the SHA-256 slowly.

Can anybody help me or offer advice?

void sha256_file(HWND hDlg, TCHAR *filename, unsigned int *sha256)
{
 unsigned long long lenBits, lenBytes, lenPadding, ctBuffer, index, j, i, nBlocks, chunk, pr, temp;
 unsigned int w[64], h0, h1, h2, h3, h4, h5, h6, h7, ch, z, ct , memR;
 unsigned int s0, s1, a, b, c, d, e, f, g, h, maj, t1, t2;
 unsigned char *padding;
 TCHAR szPr[20];
 HWND hPb;
 HANDLE hf, hp;
 LARGE_INTEGER fSize;
 DWORD bytesRead;
 //Create the file
 hf = CreateFile(filename, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
 if(hf == NULL)
 {
 MessageBox(hDlg, TEXT("Can't create file!"), TEXT("SHA - 256 Calc"), MB_OK);
 exit(1);
 }
 if(GetLastError() == ERROR_FILE_NOT_FOUND)
 {
 MessageBox(hDlg, TEXT("File not found!"), TEXT("SHA - 256 Calc"), MB_OK | MB_ICONEXCLAMATION);
 exit(1);
 }
 hp = HeapCreate(HEAP_GENERATE_EXCEPTIONS, 0, 0);
 if(hp == NULL)
 {
 MessageBox(hDlg, TEXT("Can't create heap!"), TEXT("SHA - 256 Calc"), MB_OK);
 exit(1);
 }
 //Get Process bar handle
 hPb = GetDlgItem(hDlg, IDC_PROGRESS1);
 //Set range 0-100
 SendMessage(hPb, PBM_SETRANGE, 0, MAKELPARAM(0, 100));
 SendMessage(hPb, PBM_SETSTEP, (WPARAM) 1, 0);
 SendMessage(hPb, PBM_SETPOS, (WPARAM) 0, 0);
 //Get the size of file
 GetFileSizeEx(hf, &fSize);
 //Get 64 bit size
 lenBytes = (unsigned long long)fSize.QuadPart;
 //len message in bits
 lenBits = lenBytes << 3;
 //Blocks of message
 nBlocks = ((lenBytes + 8) >> 6) + 1;
 lenPadding = lenBytes + 1;
 //Calculate padding len msg MOD 512 == 448
 while((lenPadding & 0x3f) != 56) lenPadding++;
 lenPadding -= lenBytes;
 memR = (unsigned int) lenPadding;
 memR += 8;
 //Allocate memory for padding
 padding = (unsigned char *) HeapAlloc(hp, HEAP_ZERO_MEMORY, memR);
 if(padding == NULL)
 {
 MessageBox(hDlg, TEXT("Can't create heap!"), TEXT("SHA - 256 Calc"), MB_OK);
 exit(1);
 }
 //Append 1 bit to message
 padding[0] = 0x80;
 //Append 64 bit len of message big endian
 for(i = (memR - 1); i >= (memR - 8); i--)
 {
 padding[i] = (unsigned char) lenBits;
 lenBits >>= 8;
 }
 //Hash vars
 h0 = H0;
 h1 = H1;
 h2 = H2;
 h3 = H3;
 h4 = H4;
 h5 = H5;
 h6 = H6;
 h7 = H7;
 index = 0;
 ctBuffer = 0;
 temp = 0;
 pr = 0;
 ct = 0; 
 //Read the first chunk before enter in main loop
 ReadFile(hf, buffer, BUFFER_SIZE, &bytesRead, NULL);
 for(chunk = 0; chunk < nBlocks ; chunk++)
 {
 //Handle messages
 DoEvents();
 //Pregress bar stuff
 temp = pr;
 pr = ((chunk + 1) * 100 / nBlocks);
 if(pr != temp)
 {
 StringCbPrintf(szPr, sizeof(szPr), TEXT("%d %%"), pr);
 SetDlgItemText(hDlg, IDC_STATIC_PR, szPr);
 SendMessage(hPb, PBM_STEPIT, 0, 0);
 }
 //Break message in 32 bit chunks
 j = 0;
 z = 0;
 for(i = index; i < (index + 64); i++, j++, ctBuffer++)
 {
 if(ctBuffer < lenBytes)
 {
 w[j >> 2] = (unsigned int)(w[j >> 2] << 8) | buffer[i];
 }
 else
 {
 w[j >> 2] = (unsigned int)(w[j >> 2] << 8) | padding[ctBuffer - lenBytes];
 z++;
 }
 }
 index += 64;
 //Extend the sixteen 32-bit words into sixty-four 32-bit words:
 for (i = 16 ; i < 64; i++)
 {
 s0 = (RROT(w[i-15], 7)) ^ (RROT(w[i-15],18)) ^ (w[i-15] >> 3);
 s1 = (RROT(w[i-2], 17)) ^ (RROT(w[i-2],19)) ^ (w[i-2] >> 10);
 w[i] = w[i - 16] + s0 + w[i - 7] + s1;
 }
 a = h0;
 b = h1;
 c = h2;
 d = h3;
 e = h4;
 f = h5;
 g = h6;
 h = h7;
 //SHA 256 calculations
 for(i = 0 ; i < 64 ; i++)
 {
 s0 = (RROT(a,2)) ^ (RROT(a,13)) ^ (RROT(a, 22));
 maj = (a & b) ^ (a & c) ^ (b & c);
 t2 = s0 + maj;
 s1 = (RROT(e,6)) ^ (RROT(e, 11)) ^ (RROT(e,25));
 ch = (e & f) ^ ((~e) & g);
 t1 = h + s1 + ch + k[i] + w[i];
 h = g;
 g = f;
 f = e;
 e = d + t1;
 d = c;
 c = b;
 b = a;
 a = t1 + t2;
 }
 //Add results to current hash values
 h0 += a;
 h1 += b;
 h2 += c;
 h3 += d;
 h4 += e;
 h5 += f;
 h6 += g;
 h7 += h;
 //Update buffer for read next chunk of file
 if(index == BUFFER_SIZE)
 {
 ReadFile(hf, buffer, BUFFER_SIZE, &bytesRead, NULL);
 index = 0;
 }
 }
 sha256[0] = h0;
 sha256[1] = h1;
 sha256[2] = h2;
 sha256[3] = h3;
 sha256[4] = h4;
 sha256[5] = h5;
 sha256[6] = h6;
 sha256[7] = h7;
 CloseHandle(hf);
 if(!HeapFree(hp, 0, padding))MessageBox(hDlg, TEXT("Can't free memory!"), TEXT("SHA - 256 Calc"), MB_OK);
}
void ParseDigest(TCHAR *szMsg, unsigned int len ,unsigned int *digest)
{
 StringCbPrintf(szMsg, len, TEXT("%02x%02x%02x%02x%02x%02x%02x%02x"), digest[0], 
 digest[1],
 digest[2],
 digest[3],
 digest[4],
 digest[5],
 digest[6],
 digest[7]);
}

I'm going to change my code by computing SHA-256 using OpenSSL style with three functions because it's cleaner:

SHA_CTX ctx; // <-- Struct wih Hash vars for this context
unsigned char disgest[len_of_sha_disgest]
SHA_Init(&ctx); // Init Hash Vars
//This function computes "The SHA Main loop" and it will be inside file read loop
while( not EOF)SHA_Update(ctx,buffer, len);
//This function calculates the padding and creates the digest
SHA_Final(ctx, digest) 
Jamal
35.2k13 gold badges134 silver badges238 bronze badges
asked Jan 4, 2012 at 13:56
\$\endgroup\$

4 Answers 4

5
\$\begingroup\$
  • As a really quick comment, you can do :

    for(i = index, index+=64; i < index; i++, j++, ctBuffer++) { (...) }
    

    instead of

    for(i = index; i < (index + 64); i++, j++, ctBuffer++) { (...) } index += 64;
    

    to make the code very slightly shorter and faster.

  • Also, to improve readability, please define your variables in the smallest possible scope and as you initialise them (if possible as a const object), such as :

    for (unsigned long long chunk = 0; chunk < nBlocks ; chunk++)
    

    or

    const HANDLE hf = CreateFile(filename, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
    
Jamal
35.2k13 gold badges134 silver badges238 bronze badges
answered Jan 4, 2012 at 15:56
\$\endgroup\$
0
5
\$\begingroup\$

Suggestions off the top of my head:

  • Disable debugging switches and enable full optimization (assuming you haven't already).
  • Enable profiling to find bottleneck.
  • Make sure BUFFER_SIZE is sufficiently large for desired throughput, say at least 1 MiB. Personally, I would go with 16 MiB.

For helping detect bitrot, I once wrote a file-checksumming utility in Perl which runs at disk speed (80 MB/sec external FireWire in my case) using Perl's SHA library, and I found that the block size I used for file read operations made a huge huge huge difference to overall throughput.

answered Jan 4, 2012 at 23:59
\$\endgroup\$
4
\$\begingroup\$

I think your problem may be SendMessage. It doesn't return until the message gets processed. So may be waiting on something UI related at that point. I'd try commenting out all of the UI stuff and seeing if the speed improves.

answered Jan 5, 2012 at 0:25
\$\endgroup\$
2
\$\begingroup\$

Your edit makes your code look like C not C++.

Any style where you have init() doStuff() fin() is a case for where a RAII could be good:

 MartinSha ctx(stdandard_stream);
 std::cout << ctx.c_str() << "\n";
 // Alternative constructor for internal buffers.
 std::stringstream streambuffer;
 streambuffer.rdbuf()->pubsetbuf(buffer,bufferSize);
 MartinSha ctx(streambuffer);
 std::cout << ctx.c_str() << "\n";

Your first attempt did everything in one function.
You should have split that up. There is UI code/file reading code/sh256 code all intermixed into the same function. A better technique is to separate these out into different things.

answered Jan 5, 2012 at 18:58
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.