This is a follow-up question for An Updated Multi-dimensional Image Data Structure with Variadic Template Functions in C++, histogram Template Function Implementation for Image in C++, Histogram of Image using std::map in C++, histogram_normalized and histogram_with_bins Template Functions Implementation for Image in C++, normalize_histogram Template Function Implementation for Image in C++, otsu_threshold Template Function Implementation for Image in C++ and Histogram Class Implementation for Image in C++. Considering the comments from G. Sliepen, I implemented the Histogram
class using std::variant
in this post.
Why use
std::variant
?For the case of
ElementT
isstd::uint8_t
orstd::uint16_t
, usingstd::vector
would be much more efficient. However, for other case whichElementT
could befloat
ordouble
, it's impossible to use its value as the index ofstd::vector
object. Therefore,std::map
is used for those cases.
The experimental implementation
Histogram
class implementationnamespace TinyDIP { template<class ElementT, class CountT = std::size_t> class Histogram { private: std::variant<std::vector<CountT>, std::map<ElementT, CountT>> histogram; public: // Histogram constructor // Explicitly initialize based on ElementT type Histogram() { if constexpr ((std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { histogram.template emplace<std::vector<CountT>>(std::numeric_limits<ElementT>::max() + 1, 0); } else { histogram.template emplace<std::map<ElementT, CountT>>(); } } Histogram(const std::map<ElementT, CountT>& input) { histogram = input; } Histogram(const std::vector<CountT>& input) { histogram = input; } // Histogram constructor Histogram(const Image<ElementT>& input) { if constexpr ((std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { histogram.template emplace<std::vector<CountT>>(std::numeric_limits<ElementT>::max() + 1, 0); } else { histogram.template emplace<std::map<ElementT, CountT>>(); } auto image_data = input.getImageData(); for (std::size_t i = 0; i < image_data.size(); ++i) { addCount(image_data[i]); } } // getCount member function implementation constexpr auto getCount(const ElementT& input) const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { return std::get<std::vector<CountT>>(histogram).at(input); } else { if (auto search = std::get<std::map<ElementT, CountT>>(histogram).find(input); search != std::get<std::map<ElementT, CountT>>(histogram).end()) { return search->second; } else { return CountT{ 0 }; } } } // getCountSum member function with execution policy template<class ExecutionPolicy> requires(std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>) constexpr auto getCountSum(ExecutionPolicy&& execution_policy) const { CountT output{}; if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return std::reduce( std::forward<ExecutionPolicy>(execution_policy), std::ranges::cbegin(get_result), std::ranges::cend(get_result), output); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); for (const auto& [key, value] : get_result) { output += value; } return output; } } // getCountSum member function without execution policy constexpr auto getCountSum() const { return getCountSum(std::execution::seq); } // addCount member function constexpr Histogram& addCount(const ElementT& input) { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); ++get_result[input]; histogram = get_result; return *this; } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); ++get_result[input]; histogram = get_result; return *this; } } // normalize Template Function Implementation with Execution Policy template<class ExecutionPolicy, class ProbabilityType = double> requires(std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>) constexpr auto normalize(ExecutionPolicy&& execution_policy) { auto count_sum = static_cast<ProbabilityType>(getCountSum(std::forward<ExecutionPolicy>(execution_policy))); if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { std::vector<ProbabilityType> output(std::numeric_limits<ElementT>::max() + 1); const auto& get_result = std::get<std::vector<CountT>>(histogram); for (std::size_t i = 0; i < get_result.size(); ++i) { output[i] = static_cast<ProbabilityType>(get_result[i]) / count_sum; } return Histogram<ElementT, ProbabilityType>{output}; } else { std::map<ElementT, ProbabilityType> output; auto get_result = std::get<std::map<ElementT, CountT>>(histogram); for (const auto& [key, value] : get_result) { output.emplace(key, static_cast<ProbabilityType>(value) / count_sum); } return Histogram<ElementT, ProbabilityType>{output}; } } // normalize Template Function Implementation without Execution Policy template<class ProbabilityType = double> constexpr auto normalize() { return normalize<const std::execution::sequenced_policy, ProbabilityType>(std::move(std::execution::seq)); } constexpr auto size() const { if constexpr ( std::same_as<ElementT, std::uint8_t> || std::same_as<ElementT, std::uint16_t>) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.size(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.size(); } } // to_probabilities_vector member function with execution policy template<class ExecutionPolicy, class FloatingType = double> requires((std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>) and (std::floating_point<FloatingType>)) constexpr std::vector<FloatingType> to_probabilities_vector(ExecutionPolicy&& execution_policy) const { std::vector<FloatingType> probabilities; if constexpr ( std::same_as<ElementT, std::uint8_t> || std::same_as<ElementT, std::uint16_t>) { probabilities.resize(std::numeric_limits<ElementT>::max() + 1); std::size_t total_count = getCountSum(std::forward<ExecutionPolicy>(execution_policy)); for (std::size_t i = 0; i < probabilities.size(); ++i) { probabilities[i] = static_cast<FloatingType>(getCount(static_cast<ElementT>(i))) / static_cast<FloatingType>(total_count); } } else { std::vector<std::pair<ElementT, double>> probability_vector(size()); auto total_count = getCountSum(std::forward<ExecutionPolicy>(execution_policy)); for (const auto& [key, value] : *this) { probability_vector.emplace_back( { key, static_cast<FloatingType>(value) / static_cast<FloatingType>(total_count) }); } std::sort(std::forward<ExecutionPolicy>(execution_policy), probability_vector.begin(), probability_vector.end(), [&](const auto& a, const auto& b) { return a.first < b.first; }); probabilities.resize(probability_vector.back().first + 1, 0.0); for (const auto& pair : probability_vector) { probabilities[pair.first] = pair.second; } } return probabilities; } // to_probabilities_vector member function without execution policy template<class FloatingType = double> requires(std::floating_point<FloatingType>) constexpr std::vector<FloatingType> to_probabilities_vector() const { return to_probabilities_vector(std::execution::seq); } auto cbegin() const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.cbegin(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.cbegin(); } } auto cend() const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.cend(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.cend(); } } auto begin() const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.cbegin(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.cbegin(); } } auto end() const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.cend(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.cend(); } } auto begin() { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.begin(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.begin(); } } auto end() { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto get_result = std::get<std::vector<CountT>>(histogram); return get_result.end(); } else { auto get_result = std::get<std::map<ElementT, CountT>>(histogram); return get_result.end(); } } // += operator to add two Histograms Histogram& operator+=(const Histogram& other) const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { auto& get_result = std::get<std::vector<CountT>>(histogram); const auto& get_result_other = std::get<const std::vector<CountT>>(other.histogram); if (get_result.size() != get_result_other.size()) { throw std::runtime_error("Size mismatched for vector-based histograms!"); } for (std::size_t i = 0; i < get_result.size(); i++) { get_result[i] += get_result_other[i]; } histogram = get_result; } else { auto& get_result = std::get<std::map<ElementT, CountT>>(histogram); const auto get_result_other = std::get<const std::map<ElementT, CountT>>(other.histogram); for (const auto& [key, value] : get_result_other) { get_result[key] += value; } histogram = get_result; } return *this; } // operator+ (using operator+=) Histogram operator+(const Histogram& other) const { Histogram result = *this; result += other; return result; } // -= operator to subtract two Histograms Histogram& operator-=(const Histogram& other) const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) || (std::same_as<ElementT, std::uint16_t>)) { auto& get_result = std::get<std::vector<CountT>>(histogram); const auto& get_result_other = std::get<const std::vector<CountT>>(other.histogram); if (get_result.size() != get_result_other.size()) { throw std::runtime_error("Size mismatched!"); } std::vector<CountT> result_data = get_result; for (std::size_t i = 0; i < result_data.size(); i++) { if (result_data[i] >= get_result_other[i]) { result_data[i] -= get_result_other[i]; } else { result_data[i] = 0; } } return Histogram<ElementT, CountT>{ result_data }; } else { auto& get_result = std::get<std::map<ElementT, CountT>>(histogram); const auto& get_result_other = std::get<const std::map<ElementT, CountT>>(other.histogram); std::map<ElementT, CountT> result_data = get_result; for (const auto& [key, value] : get_result_other) { if (result_data.contains(key)) { if (result_data.at(key) >= value) { result_data[key] -= value; } else { result_data[key] = 0; } } } return Histogram<ElementT, CountT>{ result_data }; } } // operator- (using operator-=) Histogram operator-(const Histogram& other) const { Histogram result = *this; result -= other; return result; } // operator[] to access and modify counts CountT& operator[](const ElementT& key) { if constexpr ( (std::same_as<ElementT, std::uint8_t>) || (std::same_as<ElementT, std::uint16_t>)) { auto& get_result = std::get<std::vector<CountT>>(histogram); if (static_cast<std::size_t>(key) >= get_result.size()) { std::cerr << "key = " << +key << '\n'; throw std::out_of_range("Index out of range"); } return get_result[static_cast<std::size_t>(key)]; } else { return std::get<std::map<ElementT, CountT>>(histogram)[key]; } } // const operator[] Implementation const CountT& operator[](const ElementT& key) const { if constexpr ( (std::same_as<ElementT, std::uint8_t>) or (std::same_as<ElementT, std::uint16_t>)) { const auto& get_result = std::get<std::vector<CountT>>(histogram); if (static_cast<std::size_t>(key) >= get_result.size()) { static const CountT zero_count = 0; return zero_count; } return get_result[static_cast<std::size_t>(key)]; } else { const auto& get_result = std::get<std::map<ElementT, CountT>>(histogram); if (auto it = get_result.find(key); it != get_result.end()) { return it->second; } else { static const CountT zero_count = 0; return zero_count; } } } }; }
The usage of Histogram
class:
namespace TinyDIP
{
// otsu_threshold template function implementation (with Execution Policy)
// mimic https://github.com/DIPlib/diplib/blob/004755a94fa25a1da79928fd59728abe177bf304/src/histogram/threshold_algorithms.h#L30
template <class ExPo, std::ranges::input_range RangeT, class FloatingType = double>
requires(std::is_execution_policy_v<std::remove_cvref_t<ExPo>>)
constexpr static auto otsu_threshold(
ExPo&& execution_policy,
const RangeT& histogram)
{
FloatingType w1{};
FloatingType w2{};
FloatingType m1{};
FloatingType m2{};
for (std::size_t ii = 0; ii < histogram.size(); ++ii)
{
w2 += static_cast<FloatingType>(histogram[ii]);
m2 += static_cast<FloatingType>(histogram[ii]) * static_cast<FloatingType>(ii);
}
FloatingType ssMax = -1e6;
std::size_t maxInd{};
for (std::size_t ii = 0; ii < histogram.size(); ++ii)
{
FloatingType tmp = static_cast<FloatingType>(histogram[ii]);
w1 += tmp;
w2 -= tmp;
tmp *= static_cast<FloatingType>(ii);
m1 += tmp;
m2 -= tmp;
FloatingType c1 = m1 / w1;
FloatingType c2 = m2 / w2;
FloatingType c = c1 - c2;
FloatingType ss = w1 * w2 * c * c;
if (ss > ssMax)
{
ssMax = ss;
maxInd = ii;
}
}
return (ssMax == -1e6) ? histogram.size() : maxInd;
}
// otsu_threshold template function implementation
template <std::ranges::input_range RangeT>
constexpr static auto otsu_threshold(const RangeT& histogram)
{
return otsu_threshold(std::execution::seq, histogram);
}
// otsu_threshold template function implementation
template <class ExPo, class ElementT>
requires(std::is_execution_policy_v<std::remove_cvref_t<ExPo>>)
constexpr static auto otsu_threshold(
ExPo&& execution_policy,
const Image<ElementT>& input)
{
return otsu_threshold(
std::forward<ExPo>(execution_policy),
Histogram<ElementT>(input).to_probabilities_vector(
std::forward<ExPo>(execution_policy)
)
);
}
// otsu_threshold template function implementation
template<class ElementT>
constexpr static auto otsu_threshold(
const Image<ElementT>& input)
{
return otsu_threshold(std::execution::seq, Histogram<ElementT>(input).to_probabilities_vector());
}
}
The main function:
/* Developed by Jimmy Hu */
#include <chrono>
#include <filesystem>
#include <ostream>
#include "../base_types.h"
#include "../basic_functions.h"
#include "../histogram.h"
#include "../image.h"
#include "../image_io.h"
#include "../image_operations.h"
#include "../timer.h"
template<class ExPo, class ElementT>
requires (std::is_execution_policy_v<std::remove_cvref_t<ExPo>>)
constexpr static auto otsuThresholdTest(
ExPo&& execution_policy,
const TinyDIP::Image<ElementT>& input,
std::ostream& os = std::cout
)
{
auto hsv_image = TinyDIP::rgb2hsv(std::forward<ExPo>(execution_policy), input);
TinyDIP::Timer timer1;
auto unit8_image = TinyDIP::im2uint8(TinyDIP::getVplane(hsv_image));
return TinyDIP::apply_threshold_openmp(
unit8_image,
static_cast<TinyDIP::GrayScale>(
TinyDIP::otsu_threshold(
std::forward<ExPo>(execution_policy),
unit8_image
)
)
);
}
int main(int argc, char* argv[])
{
std::string image_filename = "../InputImages/1.bmp"; // Default file path
if (argc == 2) // User has specified input file
{
image_filename = std::string(argv[1]);
}
if (!std::filesystem::is_regular_file(image_filename))
{
throw std::runtime_error("Error: File not found!");
}
TinyDIP::Timer timer1;
auto image_input = TinyDIP::bmp_read(image_filename.c_str(), true);
image_input = TinyDIP::copyResizeBicubic(image_input, 3 * image_input.getWidth(), 3 * image_input.getHeight());
auto image_output = otsuThresholdTest(std::execution::par, image_input);
TinyDIP::bmp_write("test_output", TinyDIP::constructRGB(image_output, image_output, image_output));
return EXIT_SUCCESS;
}
The output of the test code above:
Width of the input image: 512
Height of the input image: 512
Size of the input image(Byte): 786432
Computation finished at Wed Apr 2 12:45:20 2025
elapsed time: 0.3790096 seconds.
Computation finished at Wed Apr 2 12:45:20 2025
elapsed time: 1.4712973 seconds.
All suggestions are welcome.
The summary information:
Which question it is a follow-up to?
An Updated Multi-dimensional Image Data Structure with Variadic Template Functions in C++,
histogram Template Function Implementation for Image in C++,
Histogram of Image using std::map in C++,
histogram_normalized and histogram_with_bins Template Functions Implementation for Image in C++,
normalize_histogram Template Function Implementation for Image in C++,
otsu_threshold Template Function Implementation for Image in C++ and
What changes has been made in the code since last question?
I implemented
Histogram
template class usingstd::variant
in this post.Why a new review is being asked for?
Please review the implementation of
Histogram
template class and its tests.
1 Answer 1
Repeating test on template parameter
Almost all of the members have a test
if constexpr ((std::same_as<ElementT, std::uint8_t>) or
(std::same_as<ElementT, std::uint16_t>))
Consider adding a member that hold this
static constexpr bool is_dense = (std::same_as<ElementT, std::uint8_t>) or
(std::same_as<ElementT, std::uint16_t>);
Which then simplifies the other members to
if constexpr (is_dense)
Histogram Invariants
The default constructor (and Image
constructor) setup an invariant that the rest of the class relies on. However you have two constructors that allow users to totally bypass that:
Histogram(const std::map<ElementT, CountT>& input);
Histogram(const std::vector<CountT>& input);
You can partially fix that by constraining those constructors, and ensuring the vector is properly sized.
Histogram(const std::map<ElementT, CountT>& input) requires !is_dense;
Histogram(std::vector<CountT> input) requires is_dense
{
input.resize(std::numeric_limits<ElementT>::max() + 1, 0);
histogram = std::move(input);
}
std::visit
A number of your members do the same action in each branch of the if constexpr
. You can re-write these with std::visit
and a generic lambda.
constexpr Histogram& addCount(const ElementT& input)
{
return std::visit([&](auto& data)
{
++data[input];
return *this;
}, histogram);
}
Unintentional copies
For the remaining members, often you are copying your data. std::get
returns a reference, hold on to that.
template<class ExecutionPolicy>
requires(std::is_execution_policy_v<std::remove_cvref_t<ExecutionPolicy>>)
constexpr auto getCountSum(ExecutionPolicy&& execution_policy) const
{
CountT output{};
if constexpr (is_dense)
{
const auto& data = std::get<std::vector<CountT>>(histogram);
return std::reduce(
std::forward<ExecutionPolicy>(execution_policy),
std::ranges::cbegin(data),
std::ranges::cend(data),
output);
}
else
{
const auto& data = std::get<std::map<ElementT, CountT>>(histogram) | std::views::values;
return std::reduce(
std::forward<ExecutionPolicy>(execution_policy),
std::ranges::cbegin(data),
std::ranges::cend(data),
output);
}
}
to_probabilities_vector
This just doesn't work with floating point ElementT
, and is potentially huge for wide integral types.
Iterators
All of your iterators dangle, and they don't provide a consistent view on the data. Dangling is fixed by using references to the underlying data. You should decide what the iterators should be.
Just the counts
auto cbegin() const
{
if constexpr (is_dense)
{
return std::cbegin(std::get<std::vector<CountT>>(histogram));
}
else
{
return std::cbegin(std::get<std::map<ElementT, CountT>>(histogram) | std::views::values);
}
}
The elements and the counts
auto cbegin() const
{
if constexpr (is_dense)
{
return std::cbegin(std::get<std::vector<CountT>>(histogram) | std::views::enumerate);
}
else
{
return std::cbegin(std::get<std::map<ElementT, CountT>>(histogram));
}
}
Exceptions in accessors
These should never be reached. The constructors all set size of the vector to allow all possible elements.
Subtraction
I don't think it is meaningful to try to subtract two histograms. Your subtraction isn't the inverse of your addition, because you disallow negative counts. Using + and - will confuse people. Either drop -, or use different names.
-
\$\begingroup\$ I would say both operators (or all four, I guess) are an abuse of operator overloading. The idea of using histograms in equations is nonsense, as is the concept of "adding" histograms. I suppose histograms’ data can in some cases be "combined" or "merged"... though not added. But histograms themselves? Certainly not. \$\endgroup\$indi– indi2025年08月15日 20:28:14 +00:00Commented Aug 15 at 20:28
-
\$\begingroup\$ @indi I can see an argument for +, in the same sense as string concatenation might use + \$\endgroup\$Caleth– Caleth2025年08月16日 14:47:38 +00:00Commented Aug 16 at 14:47
-
\$\begingroup\$ It wouldn’t be a good argument; "adding histograms" doesn’t really make sense. What would you expect to get? A histogram is a chart; is two histograms "added" a double chart of the two charts side-by-side? (📊+📊=📊📊?) No? You’d expect the two datasets to be merged (note: merged, not "added")? And that’s "addition" because the bin data values are added? Except that’s wrong; the bin data values are only added in one specific case: when the bins of the two "operand" histograms are identical. Otherwise, the merge is done differently... not by addition, but by some kind of split-then-add. \$\endgroup\$indi– indi2025年08月16日 21:28:19 +00:00Commented Aug 16 at 21:28
-
\$\begingroup\$ The "adding" case where you can just add the bin values is really just a special-case optimization of the general case... which is clearly not "addition". Nor is it concatenation (the other universally understood meaning of "+"). If any mathematical operator would apply, it would be "⋃". But even that would be sketchy. When it’s this much of a stretch to justify an operator (even if it weren’t wrong), that’s a bad case for operator overloading. \$\endgroup\$indi– indi2025年08月16日 21:28:37 +00:00Commented Aug 16 at 21:28
-
1\$\begingroup\$ @indi Adding histograms is a common concept in the field of image processing. If a histogram is a summary representation of a set, then the sum of two histograms is the histogram of the set addition of the two sets they represent. Histogram subtraction would correspond to the set subtraction. There is no confusion to be had by overloading plus and minus for these operations. \$\endgroup\$Cris Luengo– Cris Luengo2025年08月17日 12:39:01 +00:00Commented Aug 17 at 12:39