About the Reasoning
I wrote a flat_multimap because it was slightly more efficient to read/write from disk (contiguous memory block) and because it would keep the logarithmic efficiency from a tree (using binary search). I tried to keep to the std::multimap
standard, but there were several operations from std::vector
which I couldn't help but implement (capacity
, reserve
, resize
). I figured these would make sense to implement because they made sense to have in a sized and continuous container. There were also some methods from std::multimap
that I didn't really think made sense to implement (emplace(Args&&...)
, insert(node_type&&)
) because for emplace you can't really construct the element in place, you're going to have to move it around to sort it, and for insert, I wasn't really sure how I would implement it in tandem with extract.
The Code
#include <functional>
#include <algorithm>
#include <iterator>
#include <utility>
#include <vector>
template <typename K, typename T, typename C = std::less<K>,
class A = std::allocator<std::pair<K, T>>>
requires std::predicate<C, K, K>
class flat_multimap {
public:
typedef std::pair<K, T> value_type;
typedef T mapped_type;
typedef K key_type;
typedef C key_compare;
typedef std::vector<value_type, A>::size_type size_type;
typedef std::vector<value_type, A>::difference_type difference_type;
typedef std::vector<value_type, A>::iterator iterator;
typedef std::vector<value_type, A>::const_iterator const_iterator;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef std::allocator_traits<A>::pointer pointer;
typedef std::allocator_traits<A>::const_pointer const_pointer;
typedef std::reverse_iterator<iterator> reverse_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::vector<value_type, A>::allocator_type allocator_type;
private:
std::vector<value_type, A> buffer;
C comparator;
public:
constexpr flat_multimap() : buffer(), comparator() {
};
explicit constexpr flat_multimap(const A& allocator)
: buffer(allocator), comparator() {
};
explicit constexpr flat_multimap(const C& comparator,
const A& allocator = A())
: buffer(allocator), comparator(comparator) {
};
constexpr flat_multimap(const flat_multimap& other) noexcept
: buffer(other.buffer), comparator(other.comparator) {
}
constexpr flat_multimap(flat_multimap&& other) noexcept
: buffer(std::move(other.buffer)),
comparator(std::move(other.comparator)) {
}
constexpr flat_multimap& operator=(const flat_multimap& other) noexcept {
return *this = flat_multimap(other);
}
constexpr flat_multimap& operator=(flat_multimap&& other) noexcept {
std::swap(buffer, other.buffer);
std::swap(comparator, other.comparator);
return *this;
}
constexpr ~flat_multimap() {
}
constexpr allocator_type get_allocator() const noexcept {
return buffer.get_allocator();
}
constexpr size_type size() const noexcept {
return buffer.size();
}
constexpr size_type max_size() const noexcept {
return buffer.max_size();
}
constexpr size_type capacity() const noexcept {
return buffer.capacity();
}
constexpr void resize(size_type count) {
buffer.resize(count);
}
constexpr void reserve(size_type new_cap) {
buffer.reserve(new_cap);
}
constexpr void shrink_to_fit() {
buffer.shrink_to_fit();
}
constexpr bool empty() const noexcept {
return buffer.empty();
}
constexpr void clear() noexcept {
buffer.clear();
}
constexpr pointer data() noexcept {
return buffer.data();
}
constexpr const_pointer data() const noexcept {
return buffer.data();
}
constexpr iterator insert(const value_type& value) {
return buffer.insert(
std::upper_bound(
cbegin(), cend(), value,
[](const value_type& left, const value_type& right) {
return C()(left.first, right.first);
}),
value);
}
constexpr iterator insert(value_type&& value) {
const_iterator destination = std::upper_bound(
cbegin(), cend(), value,
[](const value_type& left, const value_type& right) {
return C()(left.first, right.first);
});
return buffer.insert(destination, std::move(value));
}
constexpr size_type erase(const key_type& key) {
const auto [first, last] = equal_range(key);
size_type dist = std::distance(first, last);
buffer.erase(first, last);
return dist;
}
constexpr size_type count(const key_type& key) const {
const auto [first, last] = equal_range(key);
return std::distance(first, last);
}
constexpr bool contains(const key_type& key) const {
const_iterator first = lower_bound(key);
return (!(first == buffer.cend()) and !(comparator(key, first->first)));
}
constexpr std::pair<iterator, iterator> equal_range(const key_type& key) {
return std::make_pair(lower_bound(key), upper_bound(key));
}
constexpr std::pair<const_iterator, const_iterator> equal_range(
const key_type& key) const {
return std::make_pair(lower_bound(key), upper_bound(key));
}
constexpr iterator lower_bound(const key_type& key) {
return std::lower_bound(begin(), end(), key,
[this](const value_type& left, const key_type& right) {
return comparator(left.first, right);
});
}
constexpr const_iterator lower_bound(const key_type& key) const {
return std::lower_bound(cbegin(), cend(), key,
[this](const value_type& left, const key_type& right) {
return comparator(left.first, right);
});
}
constexpr iterator upper_bound(const key_type& key) {
return std::upper_bound(
begin(), end(), key,
[this](const key_type& left, const value_type& right) {
return comparator(left, right.first);
});
}
constexpr const_iterator upper_bound(const key_type& key) const {
return std::upper_bound(
cbegin(), cend(), key,
[this](const key_type& left, const value_type& right) {
return comparator(left, right.first);
});
}
constexpr void merge(flat_multimap<K, T, C, A>& source) {
std::vector<value_type, A> vec(size() + source.size(),
buffer.get_allocator());
std::swap(buffer, vec);
std::merge(vec.cbegin(), vec.cend(), source.buffer.cbegin(),
source.buffer.cend(), buffer.begin(),
[this](const value_type& left, const value_type& right) {
return comparator(left.first, right.first);
});
source.clear();
}
constexpr void merge(flat_multimap<K, T, C, A>&& source) {
std::vector<value_type, A> vec(size() + source.size(),
buffer.get_allocator());
std::swap(buffer, vec);
std::merge(vec.cbegin(), vec.cend(), source.buffer.cbegin(),
source.buffer.cend(), buffer.begin(),
[this](const value_type& left, const value_type& right) {
return comparator(left.first, right.first);
});
source.clear();
}
constexpr iterator begin() noexcept {
return buffer.begin();
}
constexpr const_iterator begin() const noexcept {
return buffer.begin();
}
constexpr const_iterator cbegin() const noexcept {
return buffer.cbegin();
}
constexpr iterator end() noexcept {
return buffer.end();
}
constexpr const_iterator end() const noexcept {
return buffer.end();
}
constexpr const_iterator cend() const noexcept {
return buffer.cend();
}
constexpr reverse_iterator rbegin() noexcept {
return buffer.rbegin();
}
constexpr const_reverse_iterator rbegin() const noexcept {
return buffer.rbegin();
}
constexpr const_reverse_iterator crbegin() const noexcept {
return buffer.crbegin();
}
constexpr reverse_iterator rend() noexcept {
return buffer.rend();
}
constexpr const_reverse_iterator rend() const noexcept {
return buffer.rend();
}
constexpr const_reverse_iterator crend() const noexcept {
return buffer.crend();
}
};
Concerns & Questions
- In a lot of places, I have to compare a key of an entry to an entry, in which case I call the comparator by accessing the key of the entry. Could there be a more efficient way to do this?
- An extension of the above point, could I implement
emplace(Args&&...)
to construct the key, and use something likestd::upper_bound
to call emplace on the vector buffer with the full set of arguments? - Do I need to implement the Rule of Five here, because I'm not actually managing any memory myself?
- Should I implement the remainder of the
std::multimap
constructors?
Curiosities
- I noticed while implementing merge that
std::inplace_merge
isn't constexpr, so I ended up usingstd::merge
instead. Does anyone know why it isn't constexpr?
2 Answers 2
Try to match std::flat_multimap
's interface
You already went quite far to make it look like a similar container from the standard library, but since we already know std::flat_multimap
is going to be in C++23, it would be good if you could match its interface:
Add emplace()
There were also some methods from
std::multimap
that I didn't really think made sense to implement (emplace(Args&&...)
,insert(node_type&&)
) because for emplace you can't really construct the element in place, you're going to have to move it around to sort it, and for insert, I wasn't really sure how I would implement it in tandem with extract.
I would implement this even if it doesn't give you the same performance benefit it has in std::multimap
; it still is useful to be able to forward arguments directly to the constructor instead of having the caller create a temporary object. Also, consider code that is currently using a regular std::multimap
, and you wnat to convert it to flat_multimap
for some reason, then it would be great if you didn't have to find and modify all existing calls to emplace()
.
Make the underlying container type a template parameter
It's great that you at least provided a way for the user to supply their own allocator, but even better would be to make it possible to change the underlying container type completely. For example, a std::deque
could be used, which is a little bit slower to access, but has stable pointers, can hold non-movable types, and has much less overhead when calling resize()
.
std::flat_multimap
also uses two containers: one for the keys and one for the values, which is more cache-friendly. Another benefit is that it's simpler to call the comparator.
Handle non-key_type
keys
Suppose key_type
is some large struct. Normally you would only compare one key_type
to another key_type
. However, it's not that rare to have a type A
that actually be compared to a type B
. Now suppose that I want to check if there is an entry in the map whose key matches some value that has a type different than key_type
. Because you only have one contains()
function that only accepts a const key_type&
parameter, the compiler now has to create a temporary key_type
from that value. This might be expensive.
The STL containers actually have overloads (starting at least for some member functions in C++14) that take arbitrary key types, so they can pass the provided values directly to the comparator without forcing any implicit conversions. So for example, in addition to your existing contains()
you should:
template <typename K>
constexpr bool contains(const K& key) const {
const_iterator first = lower_bound(key);
return !(first == buffer.cend()) and !(comparator(key, first->first));
}
The same goes for all functions that take a key_type
parameter.
Implement the rest of the constructors and member functions
Should I implement the remainder of the
std::multimap
constructors?
Yes, if you want this to be a drop-in replacement you should definitely do that, and also check for other missing member functions.
Remove or default
the special member functions where possible
Do I need to implement the Rule of Five here, because I'm not actually managing any memory myself?
You have implemented your own destructor and copy/move constructors/assignment operators, but they do exactly the same thing as what the compiler would generate itself. I would just remove all of them, this way you will also follow the rule of zero.
Be careful with noexcept
If you mark a member function noexcept
, but something it does could actually throw
, then instead the program will abort, with no possibility for the caller to catch
the error. So you should only do this if you are absolutely sure nothing can throw.
Consider your copy constructor for example: it makes a copy of the underlying std::vector
. But std::vector
's own copy constructor is not noexcept
, and it's easy to see why: it will allocate memory, and memory allocation can fail with std::bad_alloc
.
I would remove noexcept
from all the constructors and assignment operators. If you allow custom containers to be used, then you could consider using noexcept(...)
with a parameter to pass through the noexcept
ness of the underlying operations. Note that most containers have noexcept
move constructors and move assignment operators, but not all of them, so it would be safe for std::vector
, but not if you allow arbitrary underlying containers.
Use comparator
instead of C()
The comparator is an object that might have state. So you should always use the comparator
function instead of creating a default-constructed temporary C()
like you did in insert()
.
If you don't see why, consider:
template<typename K>
struct AscOrDescComparator {
bool descending{};
bool operator<(const K& lhs, const K& rhs) {
if (descending)
return lhs > rhs;
else
return lhs < rhs;
}
};
auto map = flat_multimap<int, int>(AscOrDescComparator<int>{true});
map.insert(...); // uses C(), which is ascending
map.count(...); // uses comparator, which is descending
About std::inplace_merge
I noticed while implementing merge that
std::inplace_merge
isn't constexpr, so I ended up using std::merge instead. Does anyone know why it isn't constexpr?
The problem is that std::inplace_merge
allocates a temporary buffer. Since C++20 you can use new
in limit ways in constexpr
contexts, and I would assume that the temporary buffer would be compatible with that. If there is no other reason why it couldn't be possible, then it might just be that the C++ standards committee hasn't come around to declare this constexpr
yet.
Prefer using
instead of typedef
This is just a minor issue, but I recommend you use using
decalarations instead of typedef
s. using
more clearly separates the type from the name, which is especially important if you want to create aliases for arrays and function pointers.
-
1\$\begingroup\$ I believe the move constructor and move assignment are correctly
noexcept
. They can be implemented withstd::swap
, which isconstexpr
andnoexcept
in C++20. The default constructor also only needs the default constructor ofstd::vector
, which isconstexpr
andnoexcept
. \$\endgroup\$Davislor– Davislor2023年06月20日 21:14:26 +00:00Commented Jun 20, 2023 at 21:14 -
\$\begingroup\$ Oops! I missed a few
C()
when I switched over tocomparator
. My bad! \$\endgroup\$FreezePhoenix– FreezePhoenix2023年06月20日 22:30:36 +00:00Commented Jun 20, 2023 at 22:30 -
\$\begingroup\$ There doesn't seem to be a STL way to implement the zipped iterator required for the iterators with 2 separate containers. \$\endgroup\$FreezePhoenix– FreezePhoenix2023年06月21日 00:33:31 +00:00Commented Jun 21, 2023 at 0:33
-
\$\begingroup\$ Typedef vs using is a minor issue, but it was the first thing that tripped me. Using is so much easier to read IMO. \$\endgroup\$Cris Luengo– Cris Luengo2023年06月21日 03:01:19 +00:00Commented Jun 21, 2023 at 3:01
-
1\$\begingroup\$ @CrisLuengo Wait until you
typedef
a function pointer. \$\endgroup\$Davislor– Davislor2023年06月21日 20:54:02 +00:00Commented Jun 21, 2023 at 20:54
You’ve gotten some good advice from G. Sliepen so far. I’d only add a few things:
Use default
where Appropriate
Your default constructor, copy constructor, move constructor, assignment and move assignment all just default-initialize, copy or move the fields of your class
. If you use the default
implementation, the compiler will do a better job of it. In particular, it can implicitly add constexpr
and noexcept
if and only if the arbitrary comparator object supports them, which would otherwise be a real hassle.
Implement swap
You should immediately use this everyplace that currently does multiple swaps, of the internal data and the comparator. You’ll get several advantages right away:
- One swap of a component cannot fail after the first succeeded, leaving the object in an inconsistent state
- You can add fields, such as a top-level
allocator
, and need to change the code in only one place - The compiler won’t ever forget about one of the fields
Use [[no_unique_address]]
on Your Comparator
In most use cases, the comparator will be a unit type, and this lets the compiler optimize it out.
Consider Restricting the Auxiliary Types
In particular, the clauses:
requires std::is_nothrow_default_constructible<C> &&
std::is_nothrow_swappable<C>
would make it safe to declare your default constructor, move constructor, move assignment, swap and anything else that you can implement from them as noexcept
.
It is extremely unlikely that you will ever actually need a comparator class whose default constructor and swap operation could throw
. (Maybe you’re dynamically allocating a random permutation as the ordering for each instance?) If you ever actually do, you probably just want to remove the affected noexcept
clauses. However, an alternative if you actually want to keep them where possible would be to declare a second template overload, identical but for removing noexcept
, and restricting it to
requires !std::is_nothrow_default_constructible<C> ||
!std::is_nothrow_swappable<C>
Consider Allocator and Comparator Concepts
If you define a concept
for an allocator of T and one for a comparator over T, the compiler can check the requirements immediately at compile time and give a useful error message, instead of crashing somewhere in the middle of an instantiation, in a way that will make no sense to someone who didn’t realize that the type parameter needed to be a comparator.
/* Matches the interface of an allocator, such as std::allocate<T>.
*/
template<class A, typename T>
concept allocator = requires(A a, std::size_t n) {
{a.allocate(n)} -> std::same_as<T*>;
{a.deallocate(a.allocate(n), n)} -> std::same_as<void>;
};
/* Matches the interface of a comparator for T, such as std::less<T>
*/
template<class C, typename T>
concept comparator = requires(T x, C c) {
{c(x,x)} -> std::convertible_to<bool>;
};
lets you write
requires allocator<A, K> &&
comparator<C, K>
boost
, so I ended up implementing this as an exercise for my coding skills. \$\endgroup\$