I'm trying to implement a Priority Search Tree(PST)
following this note: http://cs.brown.edu/courses/cs252/misc/resources/lectures/pdf/notes07.pdf. For convenience, I copied the related peuso-code here:
creating PST
and the note has also provided an example for creating a PST, I just copied the final tree here:
The following is my code.
#include <iostream>
#include <vector>
#include <algorithm>
#include <set>
#include <queue>
template <typename E>
struct Point2D {
static int used_count;
E _x;
E _y;
Point2D() : _x(0), _y(0) {
used_count++;
std::cout << "Point2D(), used_count = " << used_count << "\n";
}
Point2D(const E& x, const E& y) : _x(x), _y(y) {
used_count++;
std::cout << "Point2D(const E& x, const E& y), used_count = " << used_count << "\n";
}
Point2D(int arr[]) : _x(arr[0]), _y(arr[1]) {
used_count++;
std::cout << "Point2D(int arr[]), used_count = " << used_count << "\n";
}
Point2D(const Point2D& other) { // copy constructor
used_count++;
std::cout << "Point2D(const Point2D& other), used_count = " << used_count << "\n";
_x = other._x;
_y = other._y;
}
Point2D(Point2D& other) { // copy constructor
used_count++;
std::cout << "Point2D(Point2D& other), used_count = " << used_count << "\n";
_x = other._x;
_y = other._y;
}
Point2D& operator=(const Point2D& other) { // assignment operator
_x = other._x;
_y = other._y;
return *this;
}
Point2D& operator=(Point2D& other) { // assignment operator
_x = other._x;
_y = other._y;
return *this;
}
Point2D(Point2D&& other) { // move constructor
if (this != &other) {
used_count++;
std::cout << "Point2D(Point2D&& other), used_count = " << used_count << "\n";
_x = other._x;
_y = other._y;
}
}
virtual ~Point2D() {
used_count--;
std::cout << "~Point2D(), used_count = " << used_count << "\n";
}
Point2D operator-(const Point2D& other) {
return Point2D(_x - other._x, _y - other._y);
}
Point2D operator-(Point2D& other) {
return Point2D(_x - other._x, _y - other._y);
}
bool operator<(const Point2D& other) {
return (_y < other._y) || (_y == other._y && _x < other._x);
}
};
template <typename E>
class PSTMultiset {
public:
struct Element {
std::string _name;
Point2D<E> _point;
Element(const std::string& name, int arr[])
: _name(name), _point(arr) {}
};
struct ElementComparatorY {
bool operator()(const Element& l, const Element& r) {
return (l._point._y > r._point._y) ||
(l._point._y == r._point._y && l._point._x > r._point._x);
}
};
struct ElementComparatorX {
bool operator()(const Element& l, const Element& r) {
return (l._point._x < r._point._x) ||
(l._point._x == r._point._x && l._point._y < r._point._y);
}
};
std::multiset<Element, ElementComparatorY> _points;
typedef typename std::multiset<Element, ElementComparatorY>::iterator Iterator;
int _size;
PSTMultiset(int size) : _size(size) {
std::cout << "PSTMultiset(int size)\n";
}
PSTMultiset(std::string names[], E points[][2], int size)
: PSTMultiset(size) { // Nx2 points
for (int i = 0; i < _size; i++) {
_points.insert(Element(names[i], points[i]));
}
std::cout << "PSTMultiset(std::string names[], E points[][2])\n";
}
virtual ~PSTMultiset() {
Clear();
std::cout << "~PSTMultiset()\n";
}
void Print(const std::string& title) {
std::cout << title << "\n";
for (auto iter = _points.begin(); iter != _points.end(); iter++) {
const Element& p = *iter;
std::cout << p._name << ": (" << p._point._x << ", " << p._point._y << ")\n";
}
}
class Node {
public:
Element _element;
Node *_parent;
Node *_left;
Node *_right;
Node(const Element& element, Node* parent)
: _element(element),
_parent(parent),
_left(nullptr),
_right(nullptr) {
}
friend std::ostream& operator<<(std::ostream& out, Node* node) {
if (node == nullptr) {
return out;
}
if (node->_left != nullptr) {
out << "L(" << node->_left->_element._name << ")_";
}
out << node->_element._name
<< ": (" << node->_element._point._x << ", "
<< node->_element._point._y << ")";
if (node->_right != nullptr) {
out << "_R(" << node->_right->_element._name << ")";
}
return out;
}
};
Node *_root;
Node* Build(Node* parent, std::multiset<Element, ElementComparatorY>& points) {
if (points.size() == 0) return nullptr;
if (points.size() == 1) {
Node *node = new Node(*(points.begin()), parent);
return node;
}
auto first = points.begin();
std::multiset<Element, ElementComparatorX> remainPoints;
auto iter = points.begin(); iter++; // remove the first point (the point with greatest Y)
for (; iter != points.end(); iter++) {
remainPoints.insert(*iter);
}
auto mid_iter = remainPoints.begin();
std::advance(mid_iter, remainPoints.size() / 2);
std::multiset<Element, ElementComparatorY> leftPoints(remainPoints.begin(), mid_iter);
std::multiset<Element, ElementComparatorY> rightPoints(mid_iter, remainPoints.end());
Node *node = new Node(*first, parent);
node->_left = Build(node, leftPoints);
node->_right = Build(node, rightPoints);
return node;
}
void Build() {
_root = Build(nullptr, _points);
}
void Clear() { // clear the tree by level-order traverse
if (_root == nullptr) return;
std::queue<Node*> q;
q.push(_root);
int height = 1;
int levelSize = q.size();
while (!q.empty()) {
Node *node = q.front();
std::cout << node << " ";
if (node->_left != nullptr) {
q.push(node->_left);
}
if (node->_right != nullptr) {
q.push(node->_right);
}
delete node;
q.pop();
levelSize--;
if (levelSize == 0) {
height++;
levelSize = q.size();
std::cout << "\n";
}
}
std::cout << "Height: " << height << "\n";
}
};
template<> int Point2D<int>::used_count = 0;
int main() {
std::string names[] = {"A", "B", "C", "D", "E",
"F", "G", "H", "I", "J",
"K", "M", "N"};
int arr[][2] = {{15, 7},
{16, 2},
{12, 1},
{14, -1},
{10, -2},
{-1, 9},
{6, 4},
{7, 6},
{-2, 5},
{2, 3},
{4, 0},
{9, -3},
{1, 8}};
int size = sizeof(arr) / sizeof(arr[0]);
PSTMultiset<int> *t = new PSTMultiset<int>(names, arr, size);
// print the multiset
{
t->Print("before");
}
// build PST
{
t->Build();
}
delete t;
}
The output is as follows:
PSTMultiset(int size)
Point2D(int arr[]), used_count = 1
Point2D(Point2D&& other), used_count = 2
~Point2D(), used_count = 1
Point2D(int arr[]), used_count = 2
Point2D(Point2D&& other), used_count = 3
~Point2D(), used_count = 2
Point2D(int arr[]), used_count = 3
Point2D(Point2D&& other), used_count = 4
~Point2D(), used_count = 3
Point2D(int arr[]), used_count = 4
Point2D(Point2D&& other), used_count = 5
~Point2D(), used_count = 4
Point2D(int arr[]), used_count = 5
Point2D(Point2D&& other), used_count = 6
~Point2D(), used_count = 5
Point2D(int arr[]), used_count = 6
Point2D(Point2D&& other), used_count = 7
~Point2D(), used_count = 6
Point2D(int arr[]), used_count = 7
Point2D(Point2D&& other), used_count = 8
~Point2D(), used_count = 7
Point2D(int arr[]), used_count = 8
Point2D(Point2D&& other), used_count = 9
~Point2D(), used_count = 8
Point2D(int arr[]), used_count = 9
Point2D(Point2D&& other), used_count = 10
~Point2D(), used_count = 9
Point2D(int arr[]), used_count = 10
Point2D(Point2D&& other), used_count = 11
~Point2D(), used_count = 10
Point2D(int arr[]), used_count = 11
Point2D(Point2D&& other), used_count = 12
~Point2D(), used_count = 11
Point2D(int arr[]), used_count = 12
Point2D(Point2D&& other), used_count = 13
~Point2D(), used_count = 12
Point2D(int arr[]), used_count = 13
Point2D(Point2D&& other), used_count = 14
~Point2D(), used_count = 13
PSTMultiset(std::string names[], E points[][2])
before
F: (-1, 9)
N: (1, 8)
A: (15, 7)
H: (7, 6)
I: (-2, 5)
G: (6, 4)
J: (2, 3)
B: (16, 2)
C: (12, 1)
K: (4, 0)
D: (14, -1)
E: (10, -2)
M: (9, -3)
Point2D(const Point2D& other), used_count = 14
Point2D(const Point2D& other), used_count = 15
Point2D(const Point2D& other), used_count = 16
Point2D(const Point2D& other), used_count = 17
Point2D(const Point2D& other), used_count = 18
Point2D(const Point2D& other), used_count = 19
Point2D(const Point2D& other), used_count = 20
Point2D(const Point2D& other), used_count = 21
Point2D(const Point2D& other), used_count = 22
Point2D(const Point2D& other), used_count = 23
Point2D(const Point2D& other), used_count = 24
Point2D(const Point2D& other), used_count = 25
Point2D(const Point2D& other), used_count = 26
Point2D(const Point2D& other), used_count = 27
Point2D(const Point2D& other), used_count = 28
Point2D(const Point2D& other), used_count = 29
Point2D(const Point2D& other), used_count = 30
Point2D(const Point2D& other), used_count = 31
Point2D(const Point2D& other), used_count = 32
Point2D(const Point2D& other), used_count = 33
Point2D(const Point2D& other), used_count = 34
Point2D(const Point2D& other), used_count = 35
Point2D(const Point2D& other), used_count = 36
Point2D(const Point2D& other), used_count = 37
Point2D(const Point2D& other), used_count = 38
Point2D(const Point2D& other), used_count = 39
Point2D(const Point2D& other), used_count = 40
Point2D(const Point2D& other), used_count = 41
Point2D(const Point2D& other), used_count = 42
Point2D(const Point2D& other), used_count = 43
Point2D(const Point2D& other), used_count = 44
Point2D(const Point2D& other), used_count = 45
Point2D(const Point2D& other), used_count = 46
Point2D(const Point2D& other), used_count = 47
Point2D(const Point2D& other), used_count = 48
Point2D(const Point2D& other), used_count = 49
Point2D(const Point2D& other), used_count = 50
Point2D(const Point2D& other), used_count = 51
Point2D(const Point2D& other), used_count = 52
Point2D(const Point2D& other), used_count = 53
~Point2D(), used_count = 52
~Point2D(), used_count = 51
Point2D(const Point2D& other), used_count = 52
Point2D(const Point2D& other), used_count = 53
Point2D(const Point2D& other), used_count = 54
Point2D(const Point2D& other), used_count = 55
Point2D(const Point2D& other), used_count = 56
Point2D(const Point2D& other), used_count = 57
Point2D(const Point2D& other), used_count = 58
~Point2D(), used_count = 57
~Point2D(), used_count = 56
~Point2D(), used_count = 55
~Point2D(), used_count = 54
~Point2D(), used_count = 53
~Point2D(), used_count = 52
~Point2D(), used_count = 51
~Point2D(), used_count = 50
~Point2D(), used_count = 49
~Point2D(), used_count = 48
~Point2D(), used_count = 47
~Point2D(), used_count = 46
~Point2D(), used_count = 45
~Point2D(), used_count = 44
Point2D(const Point2D& other), used_count = 45
Point2D(const Point2D& other), used_count = 46
Point2D(const Point2D& other), used_count = 47
Point2D(const Point2D& other), used_count = 48
Point2D(const Point2D& other), used_count = 49
Point2D(const Point2D& other), used_count = 50
Point2D(const Point2D& other), used_count = 51
Point2D(const Point2D& other), used_count = 52
Point2D(const Point2D& other), used_count = 53
Point2D(const Point2D& other), used_count = 54
Point2D(const Point2D& other), used_count = 55
Point2D(const Point2D& other), used_count = 56
Point2D(const Point2D& other), used_count = 57
Point2D(const Point2D& other), used_count = 58
Point2D(const Point2D& other), used_count = 59
~Point2D(), used_count = 58
~Point2D(), used_count = 57
Point2D(const Point2D& other), used_count = 58
Point2D(const Point2D& other), used_count = 59
Point2D(const Point2D& other), used_count = 60
Point2D(const Point2D& other), used_count = 61
Point2D(const Point2D& other), used_count = 62
Point2D(const Point2D& other), used_count = 63
Point2D(const Point2D& other), used_count = 64
~Point2D(), used_count = 63
~Point2D(), used_count = 62
~Point2D(), used_count = 61
~Point2D(), used_count = 60
~Point2D(), used_count = 59
~Point2D(), used_count = 58
~Point2D(), used_count = 57
~Point2D(), used_count = 56
~Point2D(), used_count = 55
~Point2D(), used_count = 54
~Point2D(), used_count = 53
~Point2D(), used_count = 52
~Point2D(), used_count = 51
~Point2D(), used_count = 50
~Point2D(), used_count = 49
~Point2D(), used_count = 48
~Point2D(), used_count = 47
~Point2D(), used_count = 46
~Point2D(), used_count = 45
~Point2D(), used_count = 44
~Point2D(), used_count = 43
~Point2D(), used_count = 42
~Point2D(), used_count = 41
~Point2D(), used_count = 40
~Point2D(), used_count = 39
~Point2D(), used_count = 38
~Point2D(), used_count = 37
~Point2D(), used_count = 36
~Point2D(), used_count = 35
~Point2D(), used_count = 34
~Point2D(), used_count = 33
~Point2D(), used_count = 32
~Point2D(), used_count = 31
~Point2D(), used_count = 30
~Point2D(), used_count = 29
~Point2D(), used_count = 28
~Point2D(), used_count = 27
~Point2D(), used_count = 26
L(N)_F: (-1, 9)_R(A) ~Point2D(), used_count = 25
L(I)_N: (1, 8)_R(H) ~Point2D(), used_count = 24
L(E)_A: (15, 7)_R(B) ~Point2D(), used_count = 23
I: (-2, 5)_R(J) ~Point2D(), used_count = 22
L(K)_H: (7, 6)_R(G) ~Point2D(), used_count = 21
E: (10, -2)_R(M) ~Point2D(), used_count = 20
L(C)_B: (16, 2)_R(D) ~Point2D(), used_count = 19
J: (2, 3) ~Point2D(), used_count = 18
K: (4, 0) ~Point2D(), used_count = 17
G: (6, 4) ~Point2D(), used_count = 16
M: (9, -3) ~Point2D(), used_count = 15
C: (12, 1) ~Point2D(), used_count = 14
D: (14, -1) ~Point2D(), used_count = 13
Height: 5
~PSTMultiset()
~Point2D(), used_count = 12
~Point2D(), used_count = 11
~Point2D(), used_count = 10
~Point2D(), used_count = 9
~Point2D(), used_count = 8
~Point2D(), used_count = 7
~Point2D(), used_count = 6
~Point2D(), used_count = 5
~Point2D(), used_count = 4
~Point2D(), used_count = 3
~Point2D(), used_count = 2
~Point2D(), used_count = 1
~Point2D(), used_count = 0
From this result, It seems the PST tree has correctly built with the code. But I'm not sure if the code is efficiency. If you had time and convenient, could you please check the code? I greatly appreciate for any suggestions and comments. Thanks in advance.
-
3\$\begingroup\$ @pacmaninbw, thanks for your kind comment! This is not a homework. These days I'm learning the basic data structures and algorithms, so I'm trying to implement them. I eager to learn more about C++ and write elegant code and beautiful design and solutions. \$\endgroup\$mining– mining2020年08月30日 01:05:53 +00:00Commented Aug 30, 2020 at 1:05
1 Answer 1
About using underscores
Avoid starting identifiers with an underscore, or using double underscores. Some uses of underscores are reserved, see this question.
If you really want to use some way to distinguish private and public member variables, consider prefixing using m_
or adding a single underscore at the end.
Useless overloads
I see you often have two overloads for member functions, such as:
Point2D& operator=(const Point2D& other) {...}
Point2D& operator=(Point2D& other) {...}
Unless you plan to modify other
, there is no need for the second version.
Make member functions const
if they do not modify the object
You should mark functions themselves as const
if they do not make any modifications. For example:
bool operator<(const Point2D& other) const {
return (m_y < other.m_y) || (m_y == other.m_y && m_x < other.m_x);
}
Your set is actually a map
Your class name is PSTMultiset
, but instead of a set you actually implement a map from a Point2D<E>
to a std::string
. And your internal use of std::multiset
can thus be replaced with std::multimap
, and you then also no longer need to declare a struct Element
:
template <typename E>
class PSTMultimap {
std::multimap<Point2D<E>, std::string> m_points;
class Node {
...
Point2D<E> m_key;
std::string m_value;
...
};
...
};
Inside Build()
you want to create a std::multimap
with a custom comparison function to sort on the x
coordinate first:
struct Point2DComparatorX {
bool operator()(const Point2D<E>& l, const Point2D<E>& r) {
return (l.m_x < r.m_x) || (l.m_x == r.m_x && l.m_y < r.m_y);
}
};
Node* Build(Node* parent, std::multimap<Point2D<E>, std::string>& points) {
...
std::multimap<Point2D<E>, std::string, Point2DComparatorX> remainPoints;
...
};
Consider not hardcoding the use of Point2D
inside your PST
The only thing PSTMultimap
needs to know about the type of keys is how to order them in two ways. It doesn't need to know you are sorting two-dimensional points. So it might be nicer if you could do something like:
template<typename Key, typename Value, typename Compare1 = std::less<>, typename Compare2 = std::less<>>
class PSTMultimap {
std::multimap<Key, Value, Compare1> m_points;
public:
PSTMultimap(Key keys[], Value values[], int size) ...
class Node {
...
Key m_key;
Value m_value;
...
};
Node* Build(Node* parent, std::multimap<Key, Value>& points) {
...
std::multimap<Key, Value, Compare2> remainPoints;
...
}
...
};
And then let the caller worry about providing the right comparison functions:
std::string names[] = {"A", "B", ...};
Point2D<int> points[] = {{15, 7}, {16, 2}, ...};
int size = ...;
template<typename E>
struct Point2DComparatorX {
bool operator()(const Point2D<E>& l, const Point2D<E>& r) {
return (l.m_x < r.m_x) || (l.m_x == r.m_x && l.m_y < r.m_y);
}
};
PSTMultimap<Point2D<int>, std::string, std::less<Point2D>, Point2DComparatorX<int>> t(names, points, size);
Alternative constructors
Your constructor requires you to store the keys and values in two arrays. But what if you have the information in a different container? It might make sense to provide a constructor that takes two iterators to key/value std::pair
s, like so:
template<typename Iterator>
PSTMultimap(const Iterator &begin, const Iterator &end) {
for (Iterator it = begin; it != end; ++it) {
m_points[it->first] = it->second;
}
}
And then use it, for example, like so:
std::vector<std::pair<Point2D<int>, std::string>> points = {
{{15, 7}, "A"},
{{16, 2}, "B"},
...
};
PSTMultimap<...> t(points.begin(), points.end());
Consider allowing points to be added and removed from an existing PST
Just like regular STL contains, it would be nice to be able to insert()
and erase()
elements. Even better would be to make it look like a regular STL container.
Make Build()
automatic
I understand that you want to be able to print the situation before and after ordering the elements, but in production use, you don't need that, and it is much nicer if your class builds the PST automatically without having to manually call Build()
.
Also, this will remove the need to temporarily store all the elements in a std::multimap<>
member variable until Build()
is called.
Use std::unique_ptr
to manage memory
Instead of having raw pointer variables and calling new
and delete
manually, consider using std::unique_ptr
. Use this for PSTMultimap
's root
and Node
's left
and right
member variables.
Improving efficiency
A major issue with your code is that you build a lot of temporary maps, and even do some unnecessary sorting. When building the PST, what you want to do at each recursive step is:
- Get the max element using
Compare1
, this becomes the root. - Sort the remaining elements using
Compare2
. - Split the remaining elements in two, recurse each half, and add those as children.
- Return the root.
Consider storing all the elements in a std::vector
instead, and for each call to Build()
, give it the begin
and end
position in this vector to use. Then the above steps become more concretely:
- Use
std::max_element()
usingCompare1
on the given range, thenstd::swap()
it to the start of the range. Alternatively, you could usestd::partial_sort(begin, begin + 1, end)
. - Use
std::sort()
frombegin + 1
toend
usingCompare2
. - Find the midpoint, and recurse each half (
being + 1
tomid_iter
andmid_iter
toend
)
So there's just one vector throughout the whole building process, of which smaller and smaller parts get sorted, without needing to allocate more and more maps. It also avoids full sorts just to get the maximum element.
-
4\$\begingroup\$
h3
(###
) weight is fairly enough emphasis for section titles (and saves some vertical space). Good c++ reviews from your side otherwise \$\endgroup\$πάντα ῥεῖ– πάντα ῥεῖ2020年08月29日 19:01:58 +00:00Commented Aug 29, 2020 at 19:01 -
\$\begingroup\$ I greatly appreciate for your so detail answer! So many valuable suggestions and improvements! The most important I learnt from your answer is the valuable design experience using
C++
! I think I'm still in the kindergarden ofC++
. \$\endgroup\$mining– mining2020年08月30日 00:27:05 +00:00Commented Aug 30, 2020 at 0:27 -
\$\begingroup\$ 1. For the naming of private variables within the class, I'll follow your kind suggestion by using
m_
. \$\endgroup\$mining– mining2020年08月30日 00:28:09 +00:00Commented Aug 30, 2020 at 0:28 -
1\$\begingroup\$ @mining Good luck with improving your class and your C++ skills, I'm glad you find the review helpful! About
set
andmap
: there really is no difference exceptmap
stores keys and values, whereasset
only stores keys. As forunique_ptr
: it is not garbage collected, it uses RAII. Butshared_ptr
can be considered garbage collecting, although a simple form using reference counting. \$\endgroup\$G. Sliepen– G. Sliepen2020年08月30日 08:24:42 +00:00Commented Aug 30, 2020 at 8:24 -
1\$\begingroup\$ @G.Sliepen, very grateful for your kind suggestions and detail explainations, which is very helpful and valuable! Thank you! \$\endgroup\$mining– mining2020年08月30日 08:49:11 +00:00Commented Aug 30, 2020 at 8:49