Generic multi dimension grid / array class in C++

Question 1

I am writing a generic C++14 multi dimensional array for computational science purpose.

The "features" so far for the class tries to:

store multi-dimensional grid values into a flatten array
provide grid values access and flat index access from multi dim coordinates with \$O(1)\$ average complexity (worst case is \$O(N)\$)
provide grid values access and coordinates access from flat index with \$O(1)\$ complexity
provide iterators so that grid behaves like a STL container
statically-sized arrays, efficient storage, similar to C-style array thanks to metaprogramming

The key point in this implementation is to provide easy flat index Grid[idx] conversion to/from multi-dim grid Grid[{x,y,z}]. For instance in 2D, we get {x,y} from {idx % N, idx / N} for instance where N is the total size of the grid. The aim is to make this generic.

#include <iostream>
#include <array>
#include <vector>
#include <unordered_map>
#include <type_traits>
#include <algorithm> // just for std::generate in main()
// std::unordered_map needs std::hash specialization for std::array 
namespace std {
 template<typename T, size_t N>
 struct hash<array<T, N> > {
 using argument_type = array<T, N> ;
 using result_type = size_t;
 result_type operator()(const argument_type& a) const {
 hash<T> hasher;
 result_type h = 0;
 for (result_type i = 0; i < N; ++i) {
 h = h * 31 + hasher(a[i]);
 }
 return h;
 }
 };
}
// pretty-print for std::array
template<class T, size_t N> 
std::ostream& operator<<(std::ostream& os, const std::array<T, N>& arr) {
 os << "{";
 for (auto && el : arr) { os << el << ";"; }
 return os << "\b}";
}
// meta functions
template<typename T>
constexpr T meta_prod(T x) { return x; }
template<typename T, typename... Ts>
constexpr T meta_prod(T x, Ts... xs) { return x * meta_prod(xs...); }
template<typename T, typename E>
constexpr T meta_pow(T base, E expo) { return (expo != 0 ) ? base * meta_pow(base, expo-1) : 1; }
// Compute the total number of elements 2x2x2 for two usage 
// for Grid<3, float, 2, 2, 2> (specify all size dimensions)
template<size_t DIM, size_t... NDIM> constexpr
std::enable_if_t<sizeof...(NDIM) != 1, size_t>
num_vertices() { return meta_prod(NDIM...); }
// for Grid<3, float, 2> (specify one size dimension and consider the same size for other dimensions) 
template<size_t DIM, size_t... NDIM> constexpr
std::enable_if_t<sizeof...(NDIM) == 1, size_t>
num_vertices() { return meta_pow(NDIM...,DIM); }
template<size_t DIM, typename T, size_t... NDIM>
class MultiGrid {
public:
 static_assert(sizeof...(NDIM) == 1 or sizeof...(NDIM) == DIM, 
 "Variadic template arguments in Multigrid do not match dimension size !");
 using ArrayValues = std::array<T,num_vertices<DIM,NDIM...>()>;
 using ArrayCoord = std::array<size_t,DIM>;
 using MapIndexToCoord = std::array<ArrayCoord,num_vertices<DIM,NDIM...>()>;
 using MapCoordToIndex = std::unordered_map<ArrayCoord,size_t>;
 using value_type = typename ArrayValues::value_type; // T
 using reference = typename ArrayValues::reference; // T&
 using const_reference = typename ArrayValues::const_reference; // const T&
 using size_type = typename ArrayValues::size_type; // size_t
 using iterator = typename ArrayValues::iterator; // random access iterator
 using const_iterator = typename ArrayValues::const_iterator;
 MultiGrid() : MultiGrid(ArrayValues{}) {} // default constructor use delegating constructor
 MultiGrid(const ArrayValues& values) 
 : map_idx_to_coord_(fill_map_idx_to_coord())
 , map_coord_to_idx_(fill_map_coord_to_idx())
 , values_(values)
 {}
 iterator begin() { return values_.begin(); }
 const_iterator begin() const { return values_.begin(); }
 const_iterator cbegin() const { return values_.cbegin(); }
 iterator end() { return values_.end(); }
 const_iterator end() const { return values_.end(); }
 const_iterator cend() const { return values_.cend(); }
 reference operator[] (size_type idx) { return values_[idx]; };
 const_reference operator[] (size_type idx) const { return values_[idx]; };
 reference operator[] (const ArrayCoord& coord) { 
 return values_[map_coord_to_idx_.at(coord)]; 
 };
 const_reference operator[] (const ArrayCoord& coord) const { 
 return const_cast<reference>(static_cast<const MultiGrid&>(*this)[coord]); 
 };
 auto get_coord_from_index(size_type idx) const {
 return map_idx_to_coord_.at(idx);
 }
 auto get_index_from_coord(const ArrayCoord& coord) const {
 return map_coord_to_idx_.at(coord);
 }
private:
 auto fill_map_idx_to_coord() const {
 MapIndexToCoord coord;
 std::array<size_t,DIM> size_per_dim{{NDIM...}};
 if (sizeof...(NDIM) == 1) { size_per_dim.fill(size_per_dim[0]); }
 for (size_t j = 0; j < num_vertices<DIM,NDIM...>(); j ++) {
 size_t a = j, b = num_vertices<DIM,NDIM...>(), r = 0;
 for(size_t i = 0; i <= DIM - 2; i ++) {
 b /= size_per_dim[DIM - i - 1];
 coord[j][DIM-i-1] = a / b;
 r = a % b;
 a = r;
 }
 coord[j][0] = r;
 }
 return coord;
 }
 auto fill_map_coord_to_idx() const {
 MapCoordToIndex mapping(num_vertices<DIM,NDIM...>());
 for(size_t i = 0; i < num_vertices<DIM,NDIM...>(); i ++) {
 mapping.emplace(map_idx_to_coord_[i],i); // reuse the previous mapping
 }
 return mapping;
 }
 friend auto &operator<<(std::ostream &os, const MultiGrid& that) {
 os << "Values : {";
 for (auto&& v : that.values_) { os << v << ";"; }
 os << "\b}\nMapping index to coord :\n";
 static size_t count{0};
 for (auto&& m : that.map_idx_to_coord_) { os << count ++ << ":" << m << "\t"; }
 os << "\nMapping coord to index :\n";
 for (auto && m : that.map_coord_to_idx_) { os << m.first << "->" << m.second << "\t"; }
 return os << "\n";
 }
private:
 MapIndexToCoord map_idx_to_coord_; // O(1) access flat index -> dim coordinates
 MapCoordToIndex map_coord_to_idx_; // O(1) average acess dim coordinates -> flat index (worst case : O(N))
 ArrayValues values_; // same behaviour as declaring `float values_[meta_prod(NDIM)];`
};
int main() {
 // Create a 4D grid with 3x2x3x5 vertices
 MultiGrid<4,float,3,2,3,5> grid;
 // grid behaves like a STL container and we can fill values with std::generate
 std::generate(grid.begin(), grid.end(), []() {static float n{0.0f}; return n+=0.5f;} );
 std::cout << grid << std::endl;
 // get coordinates from index
 std::cout << "get_coord_from_index(43) = " << grid.get_coord_from_index(43) << std::endl;
 // and vice versa
 std::cout << "get_index_from_coord({{2,0,2,3}}) = " << grid.get_index_from_coord({{2,0,2,3}}) << std::endl;
 // print value at specific coordinates
 std::cout << "Grid[{{2,0,2,3}}] = " << grid[{{2,0,2,3}}] << std::endl;
 // print value at specific index
 std::cout << "Grid[42] = " << grid[42] << "\n\n";
 MultiGrid<2, float, 2> little_grid;
 std::cout << little_grid << std::endl;
}

Live on Coliru

Live update with Barry insights

This is an ongoing work, I have an other (bad) implementation which considers a more "realistic" grid where we store the meshes lower/upper bounds, the strides, and the real (float) coordinates. Hence this implementation intends to start with something nicer.

Therefore, I am interested about your advice (especially for optimizing things). I guess there are some parts which can be coded simpler (e.g. I am not satisfied with the hideous if (sizeof...(NDIM) == 1) { size_per_dim.fill(size_per_dim[0]); }).

Question 2

I didn't get anything you do here, but a central question is, whether you really want the dimensions to be compile-time constants ... as particularly in a scientific context these could be varied in each run?

Question 3

Yes making dimensions compile time constants is revelant in my context since for many runs it is not the grid geometry that change but rather the parameters (such as velocity map, boundaries...). I would be glad to provide more detailed explanations since I am supposed to make that "understandable" for computational scientists. I will try to edit my post.

Question 4

other minor suggestions: (i) use auto (ii) also provide cbegin/cend, (iii) no need to move here: std::move(fill_map_idx_to_coord()), (iv) replace !std::is_same<std::integral_constant<size_t, sizeof...(NDIM)>,std::integral_constant<size_t, 1>>::value simply by sizeof...(NDIM) != 1, (v) I'd drop the ugly const_cast stuff and just write values_[idx] again ... better code duplication than three times as much and hardly-understandeable code.

Question 5

Thank you ! (i) where would you use it ? (ii) gotcha should I let the 2 begin and end version then ? (iii) ok wasn't sure about that (iv) ill try that (v) agree with you in this context :). Is it ok if I edit my posted code with your improvements ?

Question 6

You're welcome. (i) basically in front of every method: so instead of iterator begin() you write auto begin(), auto fill_map_idx_to_coord() const instead of MapIndexToCoord fill_map_idx_to_coord() const and so on ... you tagged C++14 so you have available this feature, it's called return type deduction. (ii) yes, leave them ... and edit whatever you want ;-)

Question 7

I think the major problem with this solution is Unnecessary Redundancy. Let's start with the signature:

template<size_t DIM, typename T, size_t... NDIM>
class MultiGrid;

Why do you both need to specify the number of dimensions and the dimensions? You have to assert that the two line up, so why not just imply the #?

template <typename T, size_t... DIMS>
class MultiGrid {
 static constexpr size_t NUM_DIMS = sizeof...(DIMS);
 static_assert(NUM_DIMS > 0, "because what are you doing?");
};

The idea that MultiGrid<3, float, 2> should be a 2x2x2 is a little too magical. If people want a 2x2x2 grid, they can write 2 three times. If they want a 2x2x2x....2 grid, they can write a metafunction themselves.

But that's just a minor quibble compared to... HOLY SIZE, BATMAN!

std::cout << sizeof(MultiGrid<5,int,2,2,2,2,2>) << std::endl;
// prints 1464

Uh, what now? I have a 5-dimension array that consists of 32 integers. That should be 128 bytes. You're using more than 10x the amount of memory necessary! And that underestimates the size since you have this huge unordered_map which is all on the heap. Why? Because in addition to storing your array (values_), you're storing completely unnecessary information in map_idx_to_coord_ and map_coord_to_idx_.

Why do I say unnecessary? Let's consider your access by ArrayCoord:

reference operator[] (const ArrayCoord& coord) { 
 return values_[map_coord_to_idx_.at(coord)]; 
}

Do we really need the map lookup there? Can we just mathematically determine the right idx from coord? Of course we can:

namespace detail {
 template <typename Iter, size_t D0>
 size_t flatten(Iter first, Iter, std::index_sequence<D0> ) {
 return *first;
 }
 template <typename Iter, size_t D0, size_t... DIMS>
 size_t flatten(Iter first, Iter last, std::index_sequence<D0, DIMS...> ) {
 return *first * meta_prod(DIMS...) + 
 flatten(std::next(first), last, std::index_sequence<DIMS...>{} );
 }
} 
reference operator[] (const ArrayCoord& coord) { 
 return values_[details::flatten(
 coord.begin(), coord.end(), std::index_sequence<DIMS...>{})];
}

That will compute your index from an array of coordinates on the fly. It's still constant time and is guaranteed faster than your unordered_map solution anyway (which still has to walk the whole array at least twice - once to do the hash and again to do equality comparison). And will use zero extra space, so everything else will perform better too.

Drop the extra members. When you're writing containers, lean is king. This class should have exactly one member: values_.

Question 8

Wow thank you so much @Barry, this is a really nice review. New code is now available here, I have followed your points and it works like a charm ! May I ask you if it is possible to also get rid of the map_idx_to_coord_ and get a compile time solution somehow (I am not that experienced in TMP... and sizeof is 1408 now) ? What are the extra members I should drop ? How can I propose helper metafunctions to make the "magical" MultiGrid<3, float, 2> possible again ? Thank you again for your great enlightments !

Barry BarryBarry 18.5k1 gold badge40 silver badges92 bronze badges · Accepted Answer · 2015-07-17 21:01:02Z

I think the major problem with this solution is Unnecessary Redundancy. Let's start with the signature:

template<size_t DIM, typename T, size_t... NDIM>
class MultiGrid;

Why do you both need to specify the number of dimensions and the dimensions? You have to assert that the two line up, so why not just imply the #?

template <typename T, size_t... DIMS>
class MultiGrid {
 static constexpr size_t NUM_DIMS = sizeof...(DIMS);
 static_assert(NUM_DIMS > 0, "because what are you doing?");
};

The idea that MultiGrid<3, float, 2> should be a 2x2x2 is a little too magical. If people want a 2x2x2 grid, they can write 2 three times. If they want a 2x2x2x....2 grid, they can write a metafunction themselves.

But that's just a minor quibble compared to... HOLY SIZE, BATMAN!

std::cout << sizeof(MultiGrid<5,int,2,2,2,2,2>) << std::endl;
// prints 1464

Uh, what now? I have a 5-dimension array that consists of 32 integers. That should be 128 bytes. You're using more than 10x the amount of memory necessary! And that underestimates the size since you have this huge unordered_map which is all on the heap. Why? Because in addition to storing your array (values_), you're storing completely unnecessary information in map_idx_to_coord_ and map_coord_to_idx_.

Why do I say unnecessary? Let's consider your access by ArrayCoord:

reference operator[] (const ArrayCoord& coord) { 
 return values_[map_coord_to_idx_.at(coord)]; 
}

Do we really need the map lookup there? Can we just mathematically determine the right idx from coord? Of course we can:

namespace detail {
 template <typename Iter, size_t D0>
 size_t flatten(Iter first, Iter, std::index_sequence<D0> ) {
 return *first;
 }
 template <typename Iter, size_t D0, size_t... DIMS>
 size_t flatten(Iter first, Iter last, std::index_sequence<D0, DIMS...> ) {
 return *first * meta_prod(DIMS...) + 
 flatten(std::next(first), last, std::index_sequence<DIMS...>{} );
 }
} 
reference operator[] (const ArrayCoord& coord) { 
 return values_[details::flatten(
 coord.begin(), coord.end(), std::index_sequence<DIMS...>{})];
}

That will compute your index from an array of coordinates on the fly. It's still constant time and is guaranteed faster than your unordered_map solution anyway (which still has to walk the whole array at least twice - once to do the hash and again to do equality comparison). And will use zero extra space, so everything else will perform better too.

Drop the extra members. When you're writing containers, lean is king. This class should have exactly one member: values_.

Wow thank you so much @Barry, this is a really nice review. New code is now available here, I have followed your points and it works like a charm ! May I ask you if it is possible to also get rid of the map_idx_to_coord_ and get a compile time solution somehow (I am not that experienced in TMP... and sizeof is 1408 now) ? What are the extra members I should drop ? How can I propose helper metafunctions to make the "magical" MultiGrid<3, float, 2> possible again ? Thank you again for your great enlightments !

Stack Exchange Network

Generic multi dimension grid / array class in C++

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Generic multi dimension grid / array class in C++

1 Answer 1

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions