Intro
This post is the continuation of Get histogram of bytes in any set of files in C++14 - take II. This time, I have incorporated almost all suggestions in the answers.
Code
My newest trial looks like this:
#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <ios>
#include <istream>
#include <iostream>
#include <sstream>
#include <cstdio>
#include <string>
#include <vector>
static constexpr size_t screenWidth = 80;
static constexpr size_t linePreambleWidth = 11;
using Histogram = std::vector<std::size_t>;
class ByteHistogram {
static const size_t histogramCapacity = 0x100;
Histogram histogram_;
public:
ByteHistogram() {
histogram_.resize(histogramCapacity, 0);
}
void insert(unsigned char ch) {
++histogram_[ch];
}
Histogram::const_iterator cbegin() const {
return histogram_.cbegin();
}
Histogram::const_iterator cend() const {
return histogram_.cend();
}
Histogram::iterator begin() {
return histogram_.begin();
}
Histogram::iterator end() {
return histogram_.end();
}
size_t size() const {
return histogram_.size();
}
size_t getMaximumCount() const {
return *std::max_element(histogram_.cbegin(),
histogram_.cend());
}
std::istream& insert(std::istream& is)
{
std::istream::int_type c;
while ((c = is.get()) != std::istream::traits_type::eof()) {
insert(static_cast<unsigned char>(c));
}
return is;
}
auto const operator[](std::size_t i) const {
return histogram_[i];
}
};
static size_t computeCounterStringLength(const size_t maximumCount) {
return std::snprintf(nullptr, 0, "%zu", maximumCount);
}
static std::size_t computeBarLength(const size_t count,
const size_t maximumCount,
const size_t maximumCountLength) {
auto const availableSpace =
static_cast<double>(screenWidth - linePreambleWidth
- maximumCountLength);
return static_cast<size_t>(availableSpace * count / maximumCount);
}
std::ostream& operator<<(std::ostream& os,
ByteHistogram& hist) {
const size_t maximumCount = hist.getMaximumCount();
const size_t countStringLength = computeCounterStringLength(maximumCount);
const size_t maximumBarLength = screenWidth - linePreambleWidth
- countStringLength;
std::size_t i = 0;
for (auto const& count : hist) {
const double ratio = static_cast<double>(count) /
static_cast<double>(maximumCount);
const std::size_t barLength =
static_cast<std::size_t>(ratio * maximumBarLength);
const unsigned char ch = static_cast<char>(i);
std::cout << "0x"
<< std::hex
<< std::setfill('0')
<< std::setw(2)
<< i
<< " ["
<< (std::isprint(ch) ? (char) ch : '?')
<< "]: "
<< std::dec
<< std::setfill(' ')
<< std::setw(countStringLength)
<< count
<< ' '
<< std::setfill('*')
<< std::setw(barLength)
<< '\n';
i++;
}
return os;
}
int main(int argc, char* argv[]) {
ByteHistogram hist;
if (argc == 1) {
hist.insert(std::cin);
if (std::cin.bad()) {
std::cerr << "Reading from standard input failed.\n";
return EXIT_FAILURE;
}
} else {
for (int i = 1; i < argc; ++i) {
std::ifstream stream{ argv[i], std::ios::binary };
hist.insert(stream);
if (stream.bad()) {
std::cerr << "Reading from \"" << argv[i] << "\" failed.\n";
return EXIT_FAILURE;
}
}
}
std::cout << hist;
return EXIT_SUCCESS;
}
Typical output
0x00 [?]: 519 ****************
0x01 [?]: 513 ****************
0x02 [?]: 436 *************
0x03 [?]: 520 ****************
0x04 [?]: 449 **************
0x05 [?]: 516 ****************
0x06 [?]: 495 ***************
0x07 [?]: 479 ***************
0x08 [?]: 479 ***************
0x09 [?]: 499 ***************
0x0a [?]: 1007 ********************************
0x0b [?]: 486 ***************
0x0c [?]: 481 ***************
0x0d [?]: 499 ***************
0x0e [?]: 529 ****************
0x0f [?]: 518 ****************
0x10 [?]: 461 **************
0x11 [?]: 476 **************
0x12 [?]: 454 **************
0x13 [?]: 498 ***************
0x14 [?]: 493 ***************
0x15 [?]: 468 **************
0x16 [?]: 502 ***************
0x17 [?]: 522 ****************
0x18 [?]: 511 ****************
0x19 [?]: 500 ***************
0x1a [?]: 493 ***************
0x1b [?]: 517 ****************
0x1c [?]: 508 ****************
0x1d [?]: 473 **************
0x1e [?]: 499 ***************
0x1f [?]: 525 ****************
0x20 [ ]: 1941 ****************************************************************
0x21 [!]: 502 ***************
0x22 ["]: 443 *************
0x23 [#]: 490 ***************
0x24 [$]: 496 ***************
0x25 [%]: 508 ****************
0x26 [&]: 443 *************
0x27 [']: 473 **************
0x28 [(]: 492 ***************
0x29 [)]: 496 ***************
0x2a [*]: 446 *************
0x2b [+]: 475 **************
0x2c [,]: 510 ****************
0x2d [-]: 518 ****************
0x2e [.]: 533 ****************
0x2f [/]: 886 ****************************
0x30 [0]: 1361 ********************************************
0x31 [1]: 635 ********************
0x32 [2]: 681 *********************
0x33 [3]: 680 *********************
0x34 [4]: 661 *********************
0x35 [5]: 759 ************************
0x36 [6]: 693 **********************
0x37 [7]: 670 *********************
0x38 [8]: 598 *******************
0x39 [9]: 584 ******************
0x3a [:]: 563 *****************
0x3b [;]: 524 ****************
0x3c [<]: 641 ********************
0x3d [=]: 543 *****************
0x3e [>]: 619 *******************
0x3f [?]: 516 ****************
0x40 [@]: 483 ***************
0x41 [A]: 572 ******************
0x42 [B]: 538 *****************
0x43 [C]: 574 ******************
0x44 [D]: 561 *****************
0x45 [E]: 452 **************
0x46 [F]: 628 ********************
0x47 [G]: 513 ****************
0x48 [H]: 512 ****************
0x49 [I]: 572 ******************
0x4a [J]: 448 **************
0x4b [K]: 488 ***************
0x4c [L]: 446 *************
0x4d [M]: 500 ***************
0x4e [N]: 505 ***************
0x4f [O]: 525 ****************
0x50 [P]: 495 ***************
0x51 [Q]: 466 **************
0x52 [R]: 549 *****************
0x53 [S]: 536 ****************
0x54 [T]: 539 *****************
0x55 [U]: 504 ***************
0x56 [V]: 493 ***************
0x57 [W]: 493 ***************
0x58 [X]: 474 **************
0x59 [Y]: 508 ****************
0x5a [Z]: 525 ****************
0x5b [[]: 493 ***************
0x5c [\]: 499 ***************
0x5d []]: 527 ****************
0x5e [^]: 464 **************
0x5f [_]: 566 *****************
0x60 [`]: 505 ***************
0x61 [a]: 640 ********************
0x62 [b]: 629 ********************
0x63 [c]: 577 ******************
0x64 [d]: 694 **********************
0x65 [e]: 940 ******************************
0x66 [f]: 540 *****************
0x67 [g]: 561 *****************
0x68 [h]: 553 *****************
0x69 [i]: 640 ********************
0x6a [j]: 635 ********************
0x6b [k]: 481 ***************
0x6c [l]: 619 *******************
0x6d [m]: 594 ******************
0x6e [n]: 800 *************************
0x6f [o]: 792 *************************
0x70 [p]: 603 *******************
0x71 [q]: 518 ****************
0x72 [r]: 628 ********************
0x73 [s]: 658 *********************
0x74 [t]: 844 ***************************
0x75 [u]: 521 ****************
0x76 [v]: 484 ***************
0x77 [w]: 512 ****************
0x78 [x]: 561 *****************
0x79 [y]: 557 *****************
0x7a [z]: 503 ***************
0x7b [{]: 568 ******************
0x7c [|]: 536 ****************
0x7d [}]: 537 ****************
0x7e [~]: 554 *****************
0x7f [?]: 541 *****************
0x80 [?]: 465 **************
0x81 [?]: 512 ****************
0x82 [?]: 475 **************
0x83 [?]: 468 **************
0x84 [?]: 505 ***************
0x85 [?]: 483 ***************
0x86 [?]: 509 ****************
0x87 [?]: 501 ***************
0x88 [?]: 463 **************
0x89 [?]: 461 **************
0x8a [?]: 496 ***************
0x8b [?]: 501 ***************
0x8c [?]: 507 ***************
0x8d [?]: 467 **************
0x8e [?]: 475 **************
0x8f [?]: 544 *****************
0x90 [?]: 505 ***************
0x91 [?]: 512 ****************
0x92 [?]: 490 ***************
0x93 [?]: 482 ***************
0x94 [?]: 428 *************
0x95 [?]: 511 ****************
0x96 [?]: 489 ***************
0x97 [?]: 518 ****************
0x98 [?]: 485 ***************
0x99 [?]: 464 **************
0x9a [?]: 522 ****************
0x9b [?]: 528 ****************
0x9c [?]: 493 ***************
0x9d [?]: 516 ****************
0x9e [?]: 558 *****************
0x9f [?]: 531 ****************
0xa0 [?]: 529 ****************
0xa1 [?]: 550 *****************
0xa2 [?]: 474 **************
0xa3 [?]: 534 ****************
0xa4 [?]: 497 ***************
0xa5 [?]: 487 ***************
0xa6 [?]: 487 ***************
0xa7 [?]: 491 ***************
0xa8 [?]: 495 ***************
0xa9 [?]: 483 ***************
0xaa [?]: 498 ***************
0xab [?]: 451 **************
0xac [?]: 474 **************
0xad [?]: 480 ***************
0xae [?]: 490 ***************
0xaf [?]: 522 ****************
0xb0 [?]: 493 ***************
0xb1 [?]: 507 ***************
0xb2 [?]: 492 ***************
0xb3 [?]: 504 ***************
0xb4 [?]: 528 ****************
0xb5 [?]: 516 ****************
0xb6 [?]: 499 ***************
0xb7 [?]: 519 ****************
0xb8 [?]: 451 **************
0xb9 [?]: 507 ***************
0xba [?]: 517 ****************
0xbb [?]: 533 ****************
0xbc [?]: 483 ***************
0xbd [?]: 517 ****************
0xbe [?]: 527 ****************
0xbf [?]: 538 *****************
0xc0 [?]: 510 ****************
0xc1 [?]: 464 **************
0xc2 [?]: 490 ***************
0xc3 [?]: 492 ***************
0xc4 [?]: 449 **************
0xc5 [?]: 505 ***************
0xc6 [?]: 512 ****************
0xc7 [?]: 529 ****************
0xc8 [?]: 472 **************
0xc9 [?]: 493 ***************
0xca [?]: 510 ****************
0xcb [?]: 490 ***************
0xcc [?]: 482 ***************
0xcd [?]: 512 ****************
0xce [?]: 521 ****************
0xcf [?]: 497 ***************
0xd0 [?]: 498 ***************
0xd1 [?]: 524 ****************
0xd2 [?]: 482 ***************
0xd3 [?]: 545 *****************
0xd4 [?]: 459 **************
0xd5 [?]: 506 ***************
0xd6 [?]: 459 **************
0xd7 [?]: 527 ****************
0xd8 [?]: 485 ***************
0xd9 [?]: 523 ****************
0xda [?]: 515 ****************
0xdb [?]: 482 ***************
0xdc [?]: 468 **************
0xdd [?]: 557 *****************
0xde [?]: 532 ****************
0xdf [?]: 542 *****************
0xe0 [?]: 513 ****************
0xe1 [?]: 543 *****************
0xe2 [?]: 494 ***************
0xe3 [?]: 549 *****************
0xe4 [?]: 493 ***************
0xe5 [?]: 464 **************
0xe6 [?]: 505 ***************
0xe7 [?]: 545 *****************
0xe8 [?]: 539 *****************
0xe9 [?]: 540 *****************
0xea [?]: 513 ****************
0xeb [?]: 537 ****************
0xec [?]: 514 ****************
0xed [?]: 506 ***************
0xee [?]: 496 ***************
0xef [?]: 556 *****************
0xf0 [?]: 552 *****************
0xf1 [?]: 545 *****************
0xf2 [?]: 520 ****************
0xf3 [?]: 532 ****************
0xf4 [?]: 566 *****************
0xf5 [?]: 509 ****************
0xf6 [?]: 497 ***************
0xf7 [?]: 533 ****************
0xf8 [?]: 547 *****************
0xf9 [?]: 539 *****************
0xfa [?]: 518 ****************
0xfb [?]: 524 ****************
0xfc [?]: 560 *****************
0xfd [?]: 527 ****************
0xfe [?]: 482 ***************
0xff [?]: 496 ***************
Critique request
Is there anything left to improve? Please tell me anything that comes to mind.
2 Answers 2
It's got all my suggestions incorporated (apart from a late edit to recommend std::uint_fast64_t
from <cstdint>
instead of std::size_t
for the counts), so not much I can add now.
Just a handful of tiny observations:
Includes are almost in sorted order - except for
<cstdio>
.We use
std::isprint()
, but failed to include<cctype>
.size_t
assumed in global namespace - should bestd::size_t
.It's not clear why the
Histogram
class needs to be visible outside ofByteHistogram
.We should construct
histogram_
just once, in the initialiser list, rather than default-constructing and then resizing:public: ByteHistogram() : histogram_(histogramCapacity) { }
I think we should be providing (only)
const
versions ofbegin()
andend()
- unless you really want to allow modification of the internals from outside (I don't recommend that). A member function toclear()
the container may be worth considering.This code might print one fewer star than you expect:
std::cout << std::setfill('*') << std::setw(barLength) << '\n';
I don't see anywhere that the length of the
\n
is accounted for when computingbarLength
.Eliminate all compiler warnings - I'm getting a few for lossy arithmetic conversions and for the unused function
computeBarLength()
.
Making more use of C++20
Like the previous versions, this is valid C++14 code. But this one is tagged C++20, so let's have a look at what newer features we might take advantage of, now we're no longer supporting ten-year-old compilers.
These are all light suggestions rather than recommendations - the code being replaced is not awful, and the suggestion isn't always unequivocally better.
Finding the maximum value can be done with a simpler function call. Instead of
return *std::max_element(histogram_.cbegin(), histogram_.cend());
we can write
return std::ranges::max(histogram_);
We might choose to add a template overload of insert()
that accepts any range type (such as strings, string-views, other containers, etc):
#include <concepts>
#include <ranges>
template<std::ranges::input_range R>
void insert(const R& input)
requires std::convertible_to<std::ranges::range_value_t<R>, unsigned char>
{
for (unsigned char c: input) {
insert(c);
}
}
We're using two constraints here. The first is secreted away in template<std::ranges::input_range R>
, meaning that R
can be any type that satisfies the std::ranges::input_range
concept. The second is more explicit (written with requires
) and says that this template exists only if the value type of the range can be converted to unsigned char
.
You might be tempted to use this function to implement the stream input (by creating a std::views::istream<unsigned char>
from the stream to make a range), but that won't quite work, because the range takes formatted input from the stream and we want unformatted. Disappointingly, there's no std::views::istreambuf
to parallel std::istreambuf_iterator
, so if you want such a view, you'll have to make your own.
With a range adapter, we can change this for
loop that iterates over the arguments into a range-based loop:
for (int i = 1; i < argc; ++i) { std::ifstream stream{ argv[i], std::ios::binary }; ⋮
Sadly, the need to convert argc
to std::size_t
for the count argument obfuscates it a bit:
#include <ranges>
#include <span>
std::span const args{argv, static_cast<std::size_t>(argc)};
for (auto filename: args | std::views::drop(1)) {
std::ifstream stream{filename, std::ios::binary};
⋮
I think it's a bit more readable if we use the span constructor that takes two iterators (pointers, in this case):
for (auto filename: std::span{argv+1, argv+argc}) {
Here, I was able to adjust the first pointer to skip the program name as a simpler alternative to using the "drop" view.
Anticipating C++23
If and when we're able to move to C++23, the new formatted printing facility could be used for our error reporting. We can replace
std::cerr << "Reading from \"" << filename << "\" failed.\n";
with
std::println(std::cerr, R"(Reading from "{}" failed.)", filename);
(I've used a raw string here so that we don't need backslashes; that's available as far back as C++11, but often overlooked).
ByteHistogram
class. You'll need to spilt your source into separate translation units first. \$\endgroup\$.get()
in a loop, and performs the same as before.) BTW, I tested this with blitiri.com.ar/p/libfiu/doc/posix.html and it does correctly stop on file read errors and report them, so apparently error also returnseof
from.get()
. At least glibc / libstdc++ does. \$\endgroup\$