I am writing a generic C++14 multi dimensional array for computational science purpose.
The "features" so far for the class tries to:
- store multi-dimensional grid values into a flatten array
- provide grid values access and flat index access from multi dim coordinates with \$O(1)\$ average complexity (worst case is \$O(N)\$)
- provide grid values access and coordinates access from flat index with \$O(1)\$ complexity
- provide iterators so that grid behaves like a STL container
- statically-sized arrays, efficient storage, similar to C-style array thanks to metaprogramming
The key point in this implementation is to provide easy flat index Grid[idx]
conversion to/from multi-dim grid Grid[{x,y,z}]
. For instance in 2D, we get {x,y}
from {idx % N, idx / N}
for instance where N
is the total size of the grid. The aim is to make this generic.
#include <iostream>
#include <array>
#include <vector>
#include <unordered_map>
#include <type_traits>
#include <algorithm> // just for std::generate in main()
// std::unordered_map needs std::hash specialization for std::array
namespace std {
template<typename T, size_t N>
struct hash<array<T, N> > {
using argument_type = array<T, N> ;
using result_type = size_t;
result_type operator()(const argument_type& a) const {
hash<T> hasher;
result_type h = 0;
for (result_type i = 0; i < N; ++i) {
h = h * 31 + hasher(a[i]);
}
return h;
}
};
}
// pretty-print for std::array
template<class T, size_t N>
std::ostream& operator<<(std::ostream& os, const std::array<T, N>& arr) {
os << "{";
for (auto && el : arr) { os << el << ";"; }
return os << "\b}";
}
// meta functions
template<typename T>
constexpr T meta_prod(T x) { return x; }
template<typename T, typename... Ts>
constexpr T meta_prod(T x, Ts... xs) { return x * meta_prod(xs...); }
template<typename T, typename E>
constexpr T meta_pow(T base, E expo) { return (expo != 0 ) ? base * meta_pow(base, expo-1) : 1; }
// Compute the total number of elements 2x2x2 for two usage
// for Grid<3, float, 2, 2, 2> (specify all size dimensions)
template<size_t DIM, size_t... NDIM> constexpr
std::enable_if_t<sizeof...(NDIM) != 1, size_t>
num_vertices() { return meta_prod(NDIM...); }
// for Grid<3, float, 2> (specify one size dimension and consider the same size for other dimensions)
template<size_t DIM, size_t... NDIM> constexpr
std::enable_if_t<sizeof...(NDIM) == 1, size_t>
num_vertices() { return meta_pow(NDIM...,DIM); }
template<size_t DIM, typename T, size_t... NDIM>
class MultiGrid {
public:
static_assert(sizeof...(NDIM) == 1 or sizeof...(NDIM) == DIM,
"Variadic template arguments in Multigrid do not match dimension size !");
using ArrayValues = std::array<T,num_vertices<DIM,NDIM...>()>;
using ArrayCoord = std::array<size_t,DIM>;
using MapIndexToCoord = std::array<ArrayCoord,num_vertices<DIM,NDIM...>()>;
using MapCoordToIndex = std::unordered_map<ArrayCoord,size_t>;
using value_type = typename ArrayValues::value_type; // T
using reference = typename ArrayValues::reference; // T&
using const_reference = typename ArrayValues::const_reference; // const T&
using size_type = typename ArrayValues::size_type; // size_t
using iterator = typename ArrayValues::iterator; // random access iterator
using const_iterator = typename ArrayValues::const_iterator;
MultiGrid() : MultiGrid(ArrayValues{}) {} // default constructor use delegating constructor
MultiGrid(const ArrayValues& values)
: map_idx_to_coord_(fill_map_idx_to_coord())
, map_coord_to_idx_(fill_map_coord_to_idx())
, values_(values)
{}
iterator begin() { return values_.begin(); }
const_iterator begin() const { return values_.begin(); }
const_iterator cbegin() const { return values_.cbegin(); }
iterator end() { return values_.end(); }
const_iterator end() const { return values_.end(); }
const_iterator cend() const { return values_.cend(); }
reference operator[] (size_type idx) { return values_[idx]; };
const_reference operator[] (size_type idx) const { return values_[idx]; };
reference operator[] (const ArrayCoord& coord) {
return values_[map_coord_to_idx_.at(coord)];
};
const_reference operator[] (const ArrayCoord& coord) const {
return const_cast<reference>(static_cast<const MultiGrid&>(*this)[coord]);
};
auto get_coord_from_index(size_type idx) const {
return map_idx_to_coord_.at(idx);
}
auto get_index_from_coord(const ArrayCoord& coord) const {
return map_coord_to_idx_.at(coord);
}
private:
auto fill_map_idx_to_coord() const {
MapIndexToCoord coord;
std::array<size_t,DIM> size_per_dim{{NDIM...}};
if (sizeof...(NDIM) == 1) { size_per_dim.fill(size_per_dim[0]); }
for (size_t j = 0; j < num_vertices<DIM,NDIM...>(); j ++) {
size_t a = j, b = num_vertices<DIM,NDIM...>(), r = 0;
for(size_t i = 0; i <= DIM - 2; i ++) {
b /= size_per_dim[DIM - i - 1];
coord[j][DIM-i-1] = a / b;
r = a % b;
a = r;
}
coord[j][0] = r;
}
return coord;
}
auto fill_map_coord_to_idx() const {
MapCoordToIndex mapping(num_vertices<DIM,NDIM...>());
for(size_t i = 0; i < num_vertices<DIM,NDIM...>(); i ++) {
mapping.emplace(map_idx_to_coord_[i],i); // reuse the previous mapping
}
return mapping;
}
friend auto &operator<<(std::ostream &os, const MultiGrid& that) {
os << "Values : {";
for (auto&& v : that.values_) { os << v << ";"; }
os << "\b}\nMapping index to coord :\n";
static size_t count{0};
for (auto&& m : that.map_idx_to_coord_) { os << count ++ << ":" << m << "\t"; }
os << "\nMapping coord to index :\n";
for (auto && m : that.map_coord_to_idx_) { os << m.first << "->" << m.second << "\t"; }
return os << "\n";
}
private:
MapIndexToCoord map_idx_to_coord_; // O(1) access flat index -> dim coordinates
MapCoordToIndex map_coord_to_idx_; // O(1) average acess dim coordinates -> flat index (worst case : O(N))
ArrayValues values_; // same behaviour as declaring `float values_[meta_prod(NDIM)];`
};
int main() {
// Create a 4D grid with 3x2x3x5 vertices
MultiGrid<4,float,3,2,3,5> grid;
// grid behaves like a STL container and we can fill values with std::generate
std::generate(grid.begin(), grid.end(), []() {static float n{0.0f}; return n+=0.5f;} );
std::cout << grid << std::endl;
// get coordinates from index
std::cout << "get_coord_from_index(43) = " << grid.get_coord_from_index(43) << std::endl;
// and vice versa
std::cout << "get_index_from_coord({{2,0,2,3}}) = " << grid.get_index_from_coord({{2,0,2,3}}) << std::endl;
// print value at specific coordinates
std::cout << "Grid[{{2,0,2,3}}] = " << grid[{{2,0,2,3}}] << std::endl;
// print value at specific index
std::cout << "Grid[42] = " << grid[42] << "\n\n";
MultiGrid<2, float, 2> little_grid;
std::cout << little_grid << std::endl;
}
Live update with Barry insights
This is an ongoing work, I have an other (bad) implementation which considers a more "realistic" grid where we store the meshes lower/upper bounds, the strides, and the real (float) coordinates. Hence this implementation intends to start with something nicer.
Therefore, I am interested about your advice (especially for optimizing things). I guess there are some parts which can be coded simpler (e.g. I am not satisfied with the hideous if (sizeof...(NDIM) == 1) { size_per_dim.fill(size_per_dim[0]); }
).
1 Answer 1
I think the major problem with this solution is Unnecessary Redundancy. Let's start with the signature:
template<size_t DIM, typename T, size_t... NDIM>
class MultiGrid;
Why do you both need to specify the number of dimensions and the dimensions? You have to assert that the two line up, so why not just imply the #?
template <typename T, size_t... DIMS>
class MultiGrid {
static constexpr size_t NUM_DIMS = sizeof...(DIMS);
static_assert(NUM_DIMS > 0, "because what are you doing?");
};
The idea that MultiGrid<3, float, 2>
should be a 2x2x2 is a little too magical. If people want a 2x2x2 grid, they can write 2 three times. If they want a 2x2x2x....2 grid, they can write a metafunction themselves.
But that's just a minor quibble compared to... HOLY SIZE, BATMAN!
std::cout << sizeof(MultiGrid<5,int,2,2,2,2,2>) << std::endl;
// prints 1464
Uh, what now? I have a 5-dimension array that consists of 32 integers. That should be 128 bytes. You're using more than 10x the amount of memory necessary! And that underestimates the size since you have this huge unordered_map
which is all on the heap. Why? Because in addition to storing your array (values_
), you're storing completely unnecessary information in map_idx_to_coord_
and map_coord_to_idx_
.
Why do I say unnecessary? Let's consider your access by ArrayCoord
:
reference operator[] (const ArrayCoord& coord) {
return values_[map_coord_to_idx_.at(coord)];
}
Do we really need the map lookup there? Can we just mathematically determine the right idx
from coord
? Of course we can:
namespace detail {
template <typename Iter, size_t D0>
size_t flatten(Iter first, Iter, std::index_sequence<D0> ) {
return *first;
}
template <typename Iter, size_t D0, size_t... DIMS>
size_t flatten(Iter first, Iter last, std::index_sequence<D0, DIMS...> ) {
return *first * meta_prod(DIMS...) +
flatten(std::next(first), last, std::index_sequence<DIMS...>{} );
}
}
reference operator[] (const ArrayCoord& coord) {
return values_[details::flatten(
coord.begin(), coord.end(), std::index_sequence<DIMS...>{})];
}
That will compute your index from an array of coordinates on the fly. It's still constant time and is guaranteed faster than your unordered_map
solution anyway (which still has to walk the whole array at least twice - once to do the hash and again to do equality comparison). And will use zero extra space, so everything else will perform better too.
Drop the extra members. When you're writing containers, lean is king. This class should have exactly one member: values_
.
-
\$\begingroup\$ Wow thank you so much @Barry, this is a really nice review. New code is now available here, I have followed your points and it works like a charm ! May I ask you if it is possible to also get rid of the
map_idx_to_coord_
and get a compile time solution somehow (I am not that experienced in TMP... and sizeof is 1408 now) ? What are the extra members I should drop ? How can I propose helper metafunctions to make the "magical"MultiGrid<3, float, 2>
possible again ? Thank you again for your great enlightments ! \$\endgroup\$coincoin– coincoin2015年07月17日 21:56:22 +00:00Commented Jul 17, 2015 at 21:56
auto
(ii) also providecbegin
/cend
, (iii) no need to move here:std::move(fill_map_idx_to_coord())
, (iv) replace!std::is_same<std::integral_constant<size_t, sizeof...(NDIM)>,std::integral_constant<size_t, 1>>::value
simply bysizeof...(NDIM) != 1
, (v) I'd drop the uglyconst_cast
stuff and just writevalues_[idx]
again ... better code duplication than three times as much and hardly-understandeable code. \$\endgroup\$iterator begin()
you writeauto begin()
,auto fill_map_idx_to_coord() const
instead ofMapIndexToCoord fill_map_idx_to_coord() const
and so on ... you tagged C++14 so you have available this feature, it's called return type deduction. (ii) yes, leave them ... and edit whatever you want ;-) \$\endgroup\$