Reading from a stream but reserving N bytes at the end of stream

Question 1

What I'd like to do is read data from a stream (like ifstream::read), but reserve N = 4 bytes from being returned. The last 4 bytes of the stream are not part of the data itself, but are metadata.

In the general case, the stream is not an ifstream so I won't be able to seek, putback, or determine the size of the stream ahead of time. Here I'm just using it as an example.

My approach is to keep a reserve buffer of 4 bytes to ensure I never accidentally return the last four bytes of the stream. However, the logic turned out to be pretty hard to manage... I'm hoping there's a cleaner way to accomplish what I want.

I'd like to ask for a code review in terms readability (is there a better approach?) and/or performance (is there anything I'm doing that is suboptimal?)

#include <iostream>
#include <array>
#include <vector>
#include <fstream>
#include <cstring>
#define RESERVE_SIZE 4
static std::array<char, RESERVE_SIZE> reserve = {0,0,0,0}; // buffer for holding bytes at end of stream
static bool first_read = true; // true if first read and reserve buffer is empty
// helper function to print output
void data_printer(std::string msg, char * bd, size_t bytes_read) {
 std::cout << msg << bytes_read << ": ";
 for(size_t i=0; i < bytes_read; i++) std::cout << (int)bd[i] << " ";
 std::cout << std::endl;
}
// helper function to read and return # of bytes read
size_t read_count(std::ifstream & myFile, char * dst, size_t length) {
 myFile.read(dst, length);
 return myFile.gcount();
}
// dst -- output buffer, has at least "length" bytes
// exact -- if true, expect "length" bytes read from myFile and copied to dst; throw error if less than that
// return value is the number of bytes read from my File and copied to dst
size_t read_reserve(std::ifstream & myFile, char * dst, size_t length, bool exact=false) {
 if(first_read) {
 myFile.read(reserve.data(), RESERVE_SIZE);
 first_read = false;
 }
 if(exact) {
 if(length >= RESERVE_SIZE) {
 std::memcpy(dst, reserve.data(), RESERVE_SIZE);
 myFile.read(dst + RESERVE_SIZE, length - RESERVE_SIZE);
 myFile.read(reserve.data(), RESERVE_SIZE);
 if(myFile.gcount() != RESERVE_SIZE) throw std::runtime_error("not enough data in file :(");
 return length;
 } else { // RESERVE_SIZE > length
 std::memcpy(dst, reserve.data(), length);
 // since some of the reserve buffer was consumed, shift the unconsumed bytes to beginning of array
 // then read from file to fill up reserve buffer
 std::memmove(reserve.data(), reserve.data() + length, RESERVE_SIZE - length);
 myFile.read(reserve.data() + RESERVE_SIZE - length, length);
 if(myFile.gcount() != length) throw std::runtime_error("not enough data in file :(");
 return length;
 }
 } else { // !exact -- we can't assume that "length" bytes are left in myFile; there could even be zero bytes left
 if(length >= RESERVE_SIZE) {
 // use "dst" as a temporary buffer, since it's already allocated
 // it is not a good idea to allocate a temp buffer of size length, as length can be large
 std::memcpy(dst, reserve.data(), RESERVE_SIZE);
 size_t n_read = read_count(myFile, dst + RESERVE_SIZE, length - RESERVE_SIZE);
 size_t n_bufferable = n_read + RESERVE_SIZE;
 if(n_bufferable < length) {
 std::memcpy(reserve.data(), dst + n_bufferable - RESERVE_SIZE, RESERVE_SIZE);
 return n_bufferable - RESERVE_SIZE;
 } else {
 std::array<char, RESERVE_SIZE> temp_buffer = {0,0,0,0};
 size_t temp_size = read_count(myFile, temp_buffer.data(), RESERVE_SIZE);
 std::memcpy(reserve.data(), dst + n_bufferable - (RESERVE_SIZE - temp_size), RESERVE_SIZE - temp_size);
 std::memcpy(reserve.data() + RESERVE_SIZE - temp_size, temp_buffer.data(), temp_size);
 return n_bufferable - (RESERVE_SIZE - temp_size);
 }
 } else { // length < RESERVE_SIZE
 std::vector<char> temp_buffer(length, '0円');
 size_t return_value = read_count(myFile, temp_buffer.data(), length);
 // n_bufferable is at most RESERVE_SIZE*2 - 1 = 7
 std::memcpy(dst, reserve.data(), return_value);
 std::memmove(reserve.data(), reserve.data() + return_value, RESERVE_SIZE - return_value);
 std::memcpy(reserve.data() + (RESERVE_SIZE - return_value), temp_buffer.data(), return_value);
 return return_value;
 }
 }
}
// example function usage
int main() {
 std::ofstream outfile("/tmp/temp.bin", std::ios::out | std::ios::binary);
 std::array<char, 30> mydata = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29};
 outfile.write(mydata.data(), 30);
 outfile.close();
 std::cout << "test1" << std::endl;
 std::ifstream infile("/tmp/temp.bin", std::ios::in | std::ios::binary);
 std::vector<char> buffer(100, '0円');
 size_t bytes_read;
 char * bd = buffer.data();
 bytes_read = read_reserve(infile, bd, 3, true); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 3, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 5, true); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 5, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 100, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 infile.close();
 std::cout << "test2" << std::endl;
 first_read = true;
 std::ifstream infile2("/tmp/temp.bin", std::ios::in | std::ios::binary);
 bytes_read = read_reserve(infile2, bd, 28, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile2, bd, 3, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 infile2.close();
}
// g++ -std=c++11 -O3 read_reserve.cpp -o test

Question 2

In general, the code looks nice. It is structured and well-formatted.

It is probably a bad idea to have more than 80 characters in each line, because this may happen:

line wrapping

Also, put the body of a control statement on a separate line. For example, instead of

for(size_t i=0; i < bytes_read; i++) std::cout << (int)bd[i] << " ";

do

for (size_t i=0; i < bytes_read; i++)
 std::cout << (int)bd[i] << " ";

(more on this loop later.)

General design

It seems that you are using a translation unit as a "module" and use global static variables for implementation. Requiring the user to set first_read manually feels wrong. What you are developing is a class that wraps the functionalities.

Also, your read_reserve function is way too complex. A rule of thumb is that if a function is longer than about 10 lines, it is doing too much. In your case, the exact parameter should be a separate function:

class Reader {
public:
 Reader(std::istream& i)
 :is{i}
 {
 init();
 }
 std::streamsize read (char* dst, std::streamsize length);
 std::streamsize read_exact(char* dst, std::streamsize length);
 // for the test
 void print_reserve(std::ostream& os);
private:
 void init();
 std::streamsize read_count(char* dst, std::streamsize length);
 std::istream& is;
 static constexpr std::size_t reserve_size = 4;
 std::array<char, reserve_size> reserve{};
};
void Reader::init()
{
 read_count(reserve.data(), reserve_size);
}

This way, the user does not need to bother with first_read. They simply create a Reader object when they want to read something. The implementation also doesn't have to check for first_read each time.

Code

Now let's go through the code and figure out some possible improvements.

#include <iostream>
#include <array>
#include <vector>
#include <fstream>
#include <cstring>

It is considered good practice to sort the #includes in alphabetical order, so that you can easily figure out whether a particular header is included. Like:

#include <array>
#include <cstring>
#include <fstream>
#include <iostream>
#include <vector>

In particular, you forgot #include <cstddef> for std::size_t.

#define RESERVE_SIZE 4
static std::array<char, RESERVE_SIZE> reserve = {0,0,0,0}; // buffer for holding bytes at end of stream
static bool first_read = true; // true if first read and reserve buffer is empty

In C++, don't use #define for constants. Use constexpr instead.

constexpr std::size_t reserve_size = 4;

The initialization of reserve can be a simple {} instead of = {0,0,0,0}. In my opinion, that's clearer.

// helper function to print output
void data_printer(std::string msg, char * bd, size_t bytes_read) {
 std::cout << msg << bytes_read << ": ";
 for(size_t i=0; i < bytes_read; i++) std::cout << (int)bd[i] << " ";
 std::cout << std::endl;
}

Don't take msg by value. Take by const reference instead. Also, I don't see why bd is a non-const pointer. size_t should be std::size_t. Prefer ++i over i++. In C++, avoid C-style casts. Use '\n' instead of std::endl when flushing is not needed. Something along the lines of:

// helper function to print output
void data_printer(const std::string& msg, const char * bd, std::size_t bytes_read)
{
 std::cout << msg << bytes_read << ": ";
 for (std::size_t i = 0; i < bytes_read; ++i)
 std::cout << static_cast<int>(bd[i]) << ' ';
 std::cout << '\n';
}

I would put a space after the for keyword, but that is a matter of taste, I guess.

Also, the STL algorithms can be used here: (needs #include <algorithm> and #include <iterator>)

// helper function to print output
void data_printer(const std::string& msg, const char* bd, std::size_t bytes_read)
{
 std::cout << msg << bytes_read << ": ";
 std::copy_n(bd, bytes_read, std::ostream_iterator<int>{std::cout, " "});
 std::cout << '\n';
}

In fact, I feel that msg doesn't fit in here well. And it would be nice if you take a std::ostream& parameter instead of always outputting to std::cout. Also, this function may deserve an inline. I may write the function like this:

void print_data(std::ostream& os, const char* data, std::size_t cnt)
{
 std::copy_n(bd, bytes_read, std::ostream_iterator<int>{std::cout, " "});
}

and let the user handle the message and/or newline. It would be even nicer if we write an I/O manipulator so that we can use it like

std::cout << "bytes read: " << print_data(bd, bytes_read) << '\n';

The implementation is left as an exercise to the reader. (oops)

// helper function to read and return # of bytes read
size_t read_count(std::ifstream & myFile, char * dst, size_t length) {
 myFile.read(dst, length);
 return myFile.gcount();
}

Nice little function. Also a good candidate for inline. myFile can be any std::istream, not just std::ifstream. It may be better to use std::streamsize instead of std::size_t here:

// helper function to read and return # of bytes read
std::streamsize read_count(std::istream& myFile, char* dst, std::streamsize length)
{
 myFile.read(dst, length);
 return myFile.gcount();
}

Also, stream operations can be chained, but I'm not sure whether return myFile.read(dst, length).gcount() is more readable.

Initialization code:

myFile.read(reserve.data(), RESERVE_SIZE);

As I wrote above, since you wrote read_count, why not use it?

Non-exact reading code, part one:

if(length >= RESERVE_SIZE) {
 std::memcpy(dst, reserve.data(), RESERVE_SIZE);
 myFile.read(dst + RESERVE_SIZE, length - RESERVE_SIZE);
 myFile.read(reserve.data(), RESERVE_SIZE);
 if(myFile.gcount() != RESERVE_SIZE) throw std::runtime_error("not enough data in file :(");
 return length;
}

Here, the logic is actually simpler than it seems to be: emit the cached data, read the remaining characters and emit immediately, and then read and cache specified amount of data. It would be nice if you place a comment explaining this. Also, std::copy is easier to work with than std::memcpy, and it increments the pointer for you. (std::copy automatically calls std::memcpy for trivial types, so there is no performance quality.) And since you have written read_count, use it. I am starting to feel that the if statement deserves its own function:

void read_ensure(char* dest, std::streampos size)
{
 if (read_count(dest, size) != size)
 throw std::runtime{"not enough data in file :("};
}

then you can use it to make the code more readable:

if (length >= reserve_size) {
 dst = std::copy(reserve.begin(), reserve.end(), dst);
 read_count(dst, length - reserve_size);
 read_ensure(reserve.data(), reserve_size);
}

The return statement is common to both branches, so don't repeat it.

Non-exact reading code, part two:

else { // RESERVE_SIZE > length
 std::memcpy(dst, reserve.data(), length);
 // since some of the reserve buffer was consumed, shift the unconsumed bytes to beginning of array
 // then read from file to fill up reserve buffer
 std::memmove(reserve.data(), reserve.data() + length, RESERVE_SIZE - length);
 myFile.read(reserve.data() + RESERVE_SIZE - length, length);
 if(myFile.gcount() != length) throw std::runtime_error("not enough data in file :(");
 return length;
}

We can use std::copy here since we know the direction: (std::copy is likely to call std::memmove internally)

else {
 std::copy(reserve.begin(), reserve.end(), dst);
 std::copy(reserve.begin() + length, reserve.end(), reserve.begin());
 read_ensure(reserve, length);
}

Exact reading code, part one:

if(length >= RESERVE_SIZE) {
 // use "dst" as a temporary buffer, since it's already allocated
 // it is not a good idea to allocate a temp buffer of size length, as length can be large
 std::memcpy(dst, reserve.data(), RESERVE_SIZE);
 size_t n_read = read_count(myFile, dst + RESERVE_SIZE, length - RESERVE_SIZE);
 size_t n_bufferable = n_read + RESERVE_SIZE;
 if(n_bufferable < length) {
 std::memcpy(reserve.data(), dst + n_bufferable - RESERVE_SIZE, RESERVE_SIZE);
 return n_bufferable - RESERVE_SIZE;
 } else {
 std::array<char, RESERVE_SIZE> temp_buffer = {0,0,0,0};
 size_t temp_size = read_count(myFile, temp_buffer.data(), RESERVE_SIZE);
 std::memcpy(reserve.data(), dst + n_bufferable - (RESERVE_SIZE - temp_size), RESERVE_SIZE - temp_size);
 std::memcpy(reserve.data() + RESERVE_SIZE - temp_size, temp_buffer.data(), temp_size);
 return n_bufferable - (RESERVE_SIZE - temp_size);
 }
}

You are actually leaking the reserved bytes. You rely on the user to not read them, but are users always careful? It may be beneficial to std::fill the remaining bytes to zero, but that depends on your case.

Exact reading code, part two:

else { // length < RESERVE_SIZE
 std::vector<char> temp_buffer(length, '0円');
 size_t return_value = read_count(myFile, temp_buffer.data(), length);
 // n_bufferable is at most RESERVE_SIZE*2 - 1 = 7
 std::memcpy(dst, reserve.data(), return_value);
 std::memmove(reserve.data(), reserve.data() + return_value, RESERVE_SIZE - return_value);
 std::memcpy(reserve.data() + (RESERVE_SIZE - return_value), temp_buffer.data(), return_value);
 return return_value;
}

Well, using std::vector seems a little bit strange here. reserve_size is a small number, and since you are using it in other places, so why not use std::array?

// example function usage
int main() {
 std::ofstream outfile("/tmp/temp.bin", std::ios::out | std::ios::binary);
 std::array<char, 30> mydata = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29};
 outfile.write(mydata.data(), 30);
 outfile.close();
 std::cout << "test1" << std::endl;
 std::ifstream infile("/tmp/temp.bin", std::ios::in | std::ios::binary);
 std::vector<char> buffer(100, '0円');
 size_t bytes_read;
 char * bd = buffer.data();
 bytes_read = read_reserve(infile, bd, 3, true); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 3, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 5, true); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 5, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 100, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 infile.close();
 std::cout << "test2" << std::endl;
 first_read = true;
 std::ifstream infile2("/tmp/temp.bin", std::ios::in | std::ios::binary);
 bytes_read = read_reserve(infile2, bd, 28, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile2, bd, 3, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 infile2.close();
}

out is implied for std::ofstream, and in is implied for std::ifstream, so omit them. Initializing mydata like that is no fun; std::iota is better. Also, it may be better to use scopes instead of manually calling close. The reading tests are a lot of duplicate code and may deserve a helper function. Also, the three parts can probably be extracted into their own functions. This is just a test, so it doesn't really matter too much, though.

Question 3

Thank you! This was more than I could have hoped for! :)

L. F. L. F. 9,6952 gold badges27 silver badges69 bronze badges · Accepted Answer · 2019-08-05 12:05:45Z

In general, the code looks nice. It is structured and well-formatted.

It is probably a bad idea to have more than 80 characters in each line, because this may happen:

line wrapping

Also, put the body of a control statement on a separate line. For example, instead of

for(size_t i=0; i < bytes_read; i++) std::cout << (int)bd[i] << " ";

do

for (size_t i=0; i < bytes_read; i++)
 std::cout << (int)bd[i] << " ";

(more on this loop later.)

General design

It seems that you are using a translation unit as a "module" and use global static variables for implementation. Requiring the user to set first_read manually feels wrong. What you are developing is a class that wraps the functionalities.

Also, your read_reserve function is way too complex. A rule of thumb is that if a function is longer than about 10 lines, it is doing too much. In your case, the exact parameter should be a separate function:

class Reader {
public:
 Reader(std::istream& i)
 :is{i}
 {
 init();
 }
 std::streamsize read (char* dst, std::streamsize length);
 std::streamsize read_exact(char* dst, std::streamsize length);
 // for the test
 void print_reserve(std::ostream& os);
private:
 void init();
 std::streamsize read_count(char* dst, std::streamsize length);
 std::istream& is;
 static constexpr std::size_t reserve_size = 4;
 std::array<char, reserve_size> reserve{};
};
void Reader::init()
{
 read_count(reserve.data(), reserve_size);
}

This way, the user does not need to bother with first_read. They simply create a Reader object when they want to read something. The implementation also doesn't have to check for first_read each time.

Code

Now let's go through the code and figure out some possible improvements.

#include <iostream>
#include <array>
#include <vector>
#include <fstream>
#include <cstring>

It is considered good practice to sort the #includes in alphabetical order, so that you can easily figure out whether a particular header is included. Like:

#include <array>
#include <cstring>
#include <fstream>
#include <iostream>
#include <vector>

In particular, you forgot #include <cstddef> for std::size_t.

#define RESERVE_SIZE 4
static std::array<char, RESERVE_SIZE> reserve = {0,0,0,0}; // buffer for holding bytes at end of stream
static bool first_read = true; // true if first read and reserve buffer is empty

In C++, don't use #define for constants. Use constexpr instead.

constexpr std::size_t reserve_size = 4;

The initialization of reserve can be a simple {} instead of = {0,0,0,0}. In my opinion, that's clearer.

// helper function to print output
void data_printer(std::string msg, char * bd, size_t bytes_read) {
 std::cout << msg << bytes_read << ": ";
 for(size_t i=0; i < bytes_read; i++) std::cout << (int)bd[i] << " ";
 std::cout << std::endl;
}

Don't take msg by value. Take by const reference instead. Also, I don't see why bd is a non-const pointer. size_t should be std::size_t. Prefer ++i over i++. In C++, avoid C-style casts. Use '\n' instead of std::endl when flushing is not needed. Something along the lines of:

// helper function to print output
void data_printer(const std::string& msg, const char * bd, std::size_t bytes_read)
{
 std::cout << msg << bytes_read << ": ";
 for (std::size_t i = 0; i < bytes_read; ++i)
 std::cout << static_cast<int>(bd[i]) << ' ';
 std::cout << '\n';
}

I would put a space after the for keyword, but that is a matter of taste, I guess.

Also, the STL algorithms can be used here: (needs #include <algorithm> and #include <iterator>)

// helper function to print output
void data_printer(const std::string& msg, const char* bd, std::size_t bytes_read)
{
 std::cout << msg << bytes_read << ": ";
 std::copy_n(bd, bytes_read, std::ostream_iterator<int>{std::cout, " "});
 std::cout << '\n';
}

In fact, I feel that msg doesn't fit in here well. And it would be nice if you take a std::ostream& parameter instead of always outputting to std::cout. Also, this function may deserve an inline. I may write the function like this:

void print_data(std::ostream& os, const char* data, std::size_t cnt)
{
 std::copy_n(bd, bytes_read, std::ostream_iterator<int>{std::cout, " "});
}

and let the user handle the message and/or newline. It would be even nicer if we write an I/O manipulator so that we can use it like

std::cout << "bytes read: " << print_data(bd, bytes_read) << '\n';

The implementation is left as an exercise to the reader. (oops)

// helper function to read and return # of bytes read
size_t read_count(std::ifstream & myFile, char * dst, size_t length) {
 myFile.read(dst, length);
 return myFile.gcount();
}

Nice little function. Also a good candidate for inline. myFile can be any std::istream, not just std::ifstream. It may be better to use std::streamsize instead of std::size_t here:

// helper function to read and return # of bytes read
std::streamsize read_count(std::istream& myFile, char* dst, std::streamsize length)
{
 myFile.read(dst, length);
 return myFile.gcount();
}

Also, stream operations can be chained, but I'm not sure whether return myFile.read(dst, length).gcount() is more readable.

Initialization code:

myFile.read(reserve.data(), RESERVE_SIZE);

As I wrote above, since you wrote read_count, why not use it?

Non-exact reading code, part one:

if(length >= RESERVE_SIZE) {
 std::memcpy(dst, reserve.data(), RESERVE_SIZE);
 myFile.read(dst + RESERVE_SIZE, length - RESERVE_SIZE);
 myFile.read(reserve.data(), RESERVE_SIZE);
 if(myFile.gcount() != RESERVE_SIZE) throw std::runtime_error("not enough data in file :(");
 return length;
}

Here, the logic is actually simpler than it seems to be: emit the cached data, read the remaining characters and emit immediately, and then read and cache specified amount of data. It would be nice if you place a comment explaining this. Also, std::copy is easier to work with than std::memcpy, and it increments the pointer for you. (std::copy automatically calls std::memcpy for trivial types, so there is no performance quality.) And since you have written read_count, use it. I am starting to feel that the if statement deserves its own function:

void read_ensure(char* dest, std::streampos size)
{
 if (read_count(dest, size) != size)
 throw std::runtime{"not enough data in file :("};
}

then you can use it to make the code more readable:

if (length >= reserve_size) {
 dst = std::copy(reserve.begin(), reserve.end(), dst);
 read_count(dst, length - reserve_size);
 read_ensure(reserve.data(), reserve_size);
}

The return statement is common to both branches, so don't repeat it.

Non-exact reading code, part two:

else { // RESERVE_SIZE > length
 std::memcpy(dst, reserve.data(), length);
 // since some of the reserve buffer was consumed, shift the unconsumed bytes to beginning of array
 // then read from file to fill up reserve buffer
 std::memmove(reserve.data(), reserve.data() + length, RESERVE_SIZE - length);
 myFile.read(reserve.data() + RESERVE_SIZE - length, length);
 if(myFile.gcount() != length) throw std::runtime_error("not enough data in file :(");
 return length;
}

We can use std::copy here since we know the direction: (std::copy is likely to call std::memmove internally)

else {
 std::copy(reserve.begin(), reserve.end(), dst);
 std::copy(reserve.begin() + length, reserve.end(), reserve.begin());
 read_ensure(reserve, length);
}

Exact reading code, part one:

if(length >= RESERVE_SIZE) {
 // use "dst" as a temporary buffer, since it's already allocated
 // it is not a good idea to allocate a temp buffer of size length, as length can be large
 std::memcpy(dst, reserve.data(), RESERVE_SIZE);
 size_t n_read = read_count(myFile, dst + RESERVE_SIZE, length - RESERVE_SIZE);
 size_t n_bufferable = n_read + RESERVE_SIZE;
 if(n_bufferable < length) {
 std::memcpy(reserve.data(), dst + n_bufferable - RESERVE_SIZE, RESERVE_SIZE);
 return n_bufferable - RESERVE_SIZE;
 } else {
 std::array<char, RESERVE_SIZE> temp_buffer = {0,0,0,0};
 size_t temp_size = read_count(myFile, temp_buffer.data(), RESERVE_SIZE);
 std::memcpy(reserve.data(), dst + n_bufferable - (RESERVE_SIZE - temp_size), RESERVE_SIZE - temp_size);
 std::memcpy(reserve.data() + RESERVE_SIZE - temp_size, temp_buffer.data(), temp_size);
 return n_bufferable - (RESERVE_SIZE - temp_size);
 }
}

You are actually leaking the reserved bytes. You rely on the user to not read them, but are users always careful? It may be beneficial to std::fill the remaining bytes to zero, but that depends on your case.

Exact reading code, part two:

else { // length < RESERVE_SIZE
 std::vector<char> temp_buffer(length, '0円');
 size_t return_value = read_count(myFile, temp_buffer.data(), length);
 // n_bufferable is at most RESERVE_SIZE*2 - 1 = 7
 std::memcpy(dst, reserve.data(), return_value);
 std::memmove(reserve.data(), reserve.data() + return_value, RESERVE_SIZE - return_value);
 std::memcpy(reserve.data() + (RESERVE_SIZE - return_value), temp_buffer.data(), return_value);
 return return_value;
}

Well, using std::vector seems a little bit strange here. reserve_size is a small number, and since you are using it in other places, so why not use std::array?

// example function usage
int main() {
 std::ofstream outfile("/tmp/temp.bin", std::ios::out | std::ios::binary);
 std::array<char, 30> mydata = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29};
 outfile.write(mydata.data(), 30);
 outfile.close();
 std::cout << "test1" << std::endl;
 std::ifstream infile("/tmp/temp.bin", std::ios::in | std::ios::binary);
 std::vector<char> buffer(100, '0円');
 size_t bytes_read;
 char * bd = buffer.data();
 bytes_read = read_reserve(infile, bd, 3, true); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 3, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 5, true); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 5, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile, bd, 100, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 infile.close();
 std::cout << "test2" << std::endl;
 first_read = true;
 std::ifstream infile2("/tmp/temp.bin", std::ios::in | std::ios::binary);
 bytes_read = read_reserve(infile2, bd, 28, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 bytes_read = read_reserve(infile2, bd, 3, false); data_printer("bytes read: ", bd, bytes_read); data_printer("reserve: ", reserve.data(), RESERVE_SIZE);
 infile2.close();
}

out is implied for std::ofstream, and in is implied for std::ifstream, so omit them. Initializing mydata like that is no fun; std::iota is better. Also, it may be better to use scopes instead of manually calling close. The reading tests are a lot of duplicate code and may deserve a helper function. Also, the three parts can probably be extracted into their own functions. This is just a test, so it doesn't really matter too much, though.

\$\begingroup\$ Thank you! This was more than I could have hoped for! :) \$\endgroup\$

thc
– thc

2019年08月05日 17:48:48 +00:00
Commented Aug 5, 2019 at 17:48

Stack Exchange Network

Reading from a stream but reserving N bytes at the end of stream

1 Answer 1

General design

Code

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Reading from a stream but reserving N bytes at the end of stream

1 Answer 1

General design

Code

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions