This is my attempt to solve the percolation problem for the Princeton Algorithm-I course, the problem is well-defined here. My implementation is in C++ not Java and the following procedure is followed:
Create an NxN grid using
QuickUnion
class and keep track of each grid site by labeling it (except the first row that is all labeled with zero, to make thepercolates()
andisFull()
functions easier to implement).Each site is a
struct
type that contains itsid
and the booleanopen
state.The solving methodology is as follows: open a random site, check if system percolates and if not repeat.
My problem is with the
open()
function, specifically the part of the code where I try to get the neighbor sites in order to connect the current site with the open neighbor site, where I make multiple checks for the location of the current site and what to do in each case.
QuickUnion.h (the implementation is not attached because it's not the issue here)
#ifndef QUICKUNION_H
#define QUICKUNION_H
#include <vector>
struct Site {
bool open = false;
std::size_t id;
};
typedef std::vector<std::vector<Site>> Matrix2D;
typedef std::pair<int ,int> Location;
class QuickUnion {
public:
QuickUnion(const int N);
bool isConnected(int p, int q);
void Union(int p, int q);
int Root(int p);
Location getLocation(int p);
virtual ~QuickUnion(){ }
Matrix2D grid;
std::size_t count = 0;
};
#endif /* QUICKUNION_H */
Percolation.h
#ifndef PERCOLATION_H
#define PERCOLATION_H
#include "QuickUnion.h"
inline std::size_t getRandom(std::size_t N);
class Percolation {
public:
Percolation(int N); // create n-by-n grid, with all sites blocked
void open(int row, int col); // open site (row, col) if it is not open already
bool isOpen(int row, int col); // is site (row, col) open?
bool isFull(int row, int col); // is site (row, col) full?
bool percolates(); // does the system percolate?
~Percolation();
QuickUnion *qu;
};
#endif /* PERCOLATION_H */
Percolation.cpp
#include <cstdlib>
#include <random>
#include <iostream>
#include <vector>
#include "QuickUnion.h"
#include "Percolation.h"
inline std::size_t getRandom(std::size_t N) {
return ( std::rand() % ( N ) );
}
Percolation::Percolation(int N) {
qu = new QuickUnion(N);
}
Percolation::~Percolation() {
delete qu;
}
void Percolation::open(int row, int col) {
// row-> j & col->i
if (isOpen(row, col))
return;
qu->grid[row][col].open = true;
auto current_site_id = qu->grid[row][col].id;
// connect the just-opened site to the neighbor open sites
// upper boundary. Don't connect to any site, all the opened sites here shall be roots.
if (row == 0) {
if (col == 0) {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row + 1, col));
neighbors.push_back(std::make_pair(row + 1, col + 1));
neighbors.push_back(std::make_pair(row, col + 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(id, current_site_id);
}
}
return;
}
if (col == qu->count - 1) {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row + 1, col));
neighbors.push_back(std::make_pair(row + 1, col - 1));
neighbors.push_back(std::make_pair(row, col - 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(id, current_site_id);
}
}
return;
} else {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row + 1, col));
neighbors.push_back(std::make_pair(row + 1, col - 1));
neighbors.push_back(std::make_pair(row + 1, col + 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(id, current_site_id);
}
}
return;
}
}
// lower boundary
if (row == qu->count - 1) {
if (col == 0) {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row - 1, col));
neighbors.push_back(std::make_pair(row - 1, col + 1));
neighbors.push_back(std::make_pair(row, col + 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(current_site_id, id);
}
}
return;
}
if (col == qu->count - 1) {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row - 1, col));
neighbors.push_back(std::make_pair(row - 1, col - 1));
neighbors.push_back(std::make_pair(row, col - 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(current_site_id, id);
}
}
return;
} else {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row - 1, col));
neighbors.push_back(std::make_pair(row - 1, col - 1));
neighbors.push_back(std::make_pair(row - 1, col + 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(current_site_id, id);
}
}
return;
}
}
// left boundary
if (col == 0) {
std::vector<std::pair<int, int>> neighbors;
neighbors.push_back(std::make_pair(row + 1, col));
neighbors.push_back(std::make_pair(row - 1, col));
neighbors.push_back(std::make_pair(row, col + 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(current_site_id, id);
}
}
return;
}
// right boundary
if (col == qu->count - 1) {
std::vector<std::pair<int, int>> neighbors;
neighbors.push_back(std::make_pair(row + 1, col));
neighbors.push_back(std::make_pair(row - 1, col));
neighbors.push_back(std::make_pair(row, col - 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(current_site_id, id);
}
}
return;
}
else {
std::vector<std::pair<int ,int>> neighbors;
neighbors.push_back(std::make_pair(row + 1, col));
neighbors.push_back(std::make_pair(row + 1, col + 1));
neighbors.push_back(std::make_pair(row + 1, col - 1));
neighbors.push_back(std::make_pair(row - 1, col));
neighbors.push_back(std::make_pair(row - 1, col + 1));
neighbors.push_back(std::make_pair(row - 1, col - 1));
neighbors.push_back(std::make_pair(row, col + 1));
neighbors.push_back(std::make_pair(row, col - 1));
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(current_site_id, id);
}
}
return;
}
return;
}
bool Percolation::isOpen(int row, int col) {
return qu->grid[row][col].open;
}
bool Percolation::isFull(int row, int col) {
return qu->isConnected(qu->grid[row][col].id, 0);
}
bool Percolation::percolates() {
for (int i = 0; i < qu->count; i++) {
if (qu->isConnected(qu->grid[qu->count - 1][i].id, 0))
return true;
}
return false;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
std::cout << "Too few arguments.\n";
return -1;
}
auto size = std::atoi(argv[1]);
Percolation p(size);
int threshold = 0;
int i, j;
while (!p.percolates()) {
i = getRandom(size);
j = getRandom(size);
p.open(j, i);
threshold++;
}
std::cout << "System percolates at " << float(threshold)/float(size*size) << " open sites propability.\n";
}
In terms of performance, how can I speed up the code (it's an algorithm course!)?
Is their an easier way to avoid the ugly code block in
open()
to solve the connectivity versus site location problem?
2 Answers 2
Because no details were provided regarding the implementation of the QuickUnion
class, it's not possible to provide any meaningful comment on performance. However, here are a few things that many help you improve your code.
Consider using a better random number generator
You are currently using
inline std::size_t getRandom(std::size_t N) {
return ( std::rand() % ( N ) );
}
The major problems with this approach is that the low order bits of the random number generator are not particularly random. On my machine, there's a slight but measurable bias toward 0 with that. A better solution, if your compiler and library supports it, would be to use the C++11 std::uniform_int_distribution
. In this case, I'd instead add a std::uniform_int_distribution
to the class as private data members like this:
std::uniform_int_distribution<std::size_t> randomSquare;
static std::mt19937 gen;
Then in the implementation file, define gen
:
std::mt19937 Percolation::gen([]()->std::random_device::result_type{std::random_device rd; return rd();}());
Now whenever you want a random square, use the member function randomSquare()
like this:
randomSquare(0,size*size);
Or if you feel you must keep the x,y
style the code is currently using, use this:
i = randomSquare(0, size);
j = randomSquare(0, size);
Save the constructor argument
The size N
is the only constructor argument but it doesn't appear to be explicitly saved which would be convenient in a number of places. One could refer perhaps to qu->grid.size()
each time, but that seems a little awkward. I'd suggest a private member variable that stores the size for convenience.
Fix the bug
Right now, the open
function begins with this:
if (isOpen(row, col))
return;
However, the threshold
is incremented even if no new square is opened. That's a bug and will skew your calculation.
Make the object do the work
If I were writing this program, I'd prefer to write a main
like this:
int main(int argc, char *argv[]) {
if (argc < 2) {
std::cout << "Too few arguments.\n";
return -1;
}
auto size = std::atoi(argv[1]);
Percolation p(size);
std::cout << "System percolates at " << p.run() << " open sites propability.\n";
}
I'm sure you can figure out what would need to go into a run
function.
Simplify your code
As you have already realized, the open
function is waaaay too long and complex. Consider instead what's required. When a site is opened, you simply need to call qu->Union()
with each of it neighbors. If the site is on the edge of the grid, the neighbor that would be outside the grid simply needs to be skipped. Also, it appears that id
is simply an alias for row + col * size
-- I'd recommend choosing the single id
rather than the row,col
approach or a messy mix of both. Anyway, a much simpler open
might be this:
void Percolation::open(int row, int col) {
if (isOpen(row, col))
return;
qu->grid[row][col].open = true;
auto current_site_id = qu->grid[row][col].id;
// process neighbor above
if (row != 0 && qu->grid[row-1][col].open) {
qu->Union(qu->grid[row-1][col].id, current_site_id);
}
// process neighbor below
if (row != size-1 && qu->grid[row+1][col].open) {
qu->Union(qu->grid[row+1][col].id, current_site_id);
}
// process neighbor left
if (col != 0 && qu->grid[row][col-1].open) {
qu->Union(qu->grid[row][col-1].id, current_site_id);
}
// process neighbor right
if (col != size-1 && qu->grid[row][col+1].open) {
qu->Union(qu->grid[row][col+1].id, current_site_id);
}
}
-
\$\begingroup\$ Great answer Edward. I don't know why I was connecting the just-opened site to all the neighbor sites (total of 8 sites not just 4 as you did in your code), it seems that I understood the problem in the wrong way. Thanks again. \$\endgroup\$Algo– Algo2016年12月12日 15:24:24 +00:00Commented Dec 12, 2016 at 15:24
Your open()
function does seem needlessly complicated. It looks like you are separately connecting every existing cell on the map.
Because the problem has a direction, and the bottom and top rows are special, you are only interested in paths originating from either of those edges.
Also, if it helps, you can think of finding a path through the map as solving a maze. One way to solve a maze is to hug the right-hand wall until you arrive at the exit.
I've jotted these ideas below in pseudo-code. The function takes a list of all open cell addresses. The first for
loop takes O(n) where n is the length of the list. The second for
loop tests paths from the top row down, and terminates either when the counter returns to the starting position or reaches the bottom row. This takes time proportional to the width of the map multiplied by the average length of false paths.
(Edit: Repeatedly exploring the same false path will slow this down in highly interconnected maps, and can be prevented by keeping a list of cells already checked, then breaking out of the do until
loop upon finding a repeat.)
function percolates(all_cells):
// make list of open neighbours for each open cell
for i in all_open_cells:
for side in range(1 to 6): // sides listed clockwise from left
if has_open_neighbour(i, side):
i.neighbours.add(get_neighbour_address(i, side))
current_address = None
// search edge of open neighbours by "hugging" leftmost edge
for i in top_row:
if not i.neighbours: next i
current_address == i.neighbours.first_entry // move from start cell
do until current_address == i:
current_address == current_address.neighbours.first_entry
// move to left-side neighbour or nearest clockwise
if current_address.row == bottom_row:
return True
Hope that maybe gives you some ideas.
Edit:
Reading the link you provided, it seems the approach you've taken is the one they have in their Java implementation of QuickUnion. In terms of run-time, I'd predict it to scale with the size of the map (rows x columns) multiplied by the average length of false paths. This is about O(n)x, where n is the width of the map.
If that's true, and your current approach is the "official" one, then there aren't a lot of options to speed it up.
The multiple if
blocks in your C++ implementation have very similar content, and can definitely be simplified, perhaps with a switch
case
statement. It won't make it any faster, but it will look better.
In fact, if I'm not mistaken, the only thing that is different between the different cases is the sign on your row / columns operations. Why not pass in a multiplier depending on the result of your row / col / qu if
statements? As in:
// make array rows
int a[6] = {1,0,1,-1,0,-1}
int b[6] = {-1,0,-1,1,0,1}
// etc...
int arr[8][6] // or as long as you need
arr = make_array(a,b,c,d,e,f...) // a loop to populate the array
// equivalent of your if statements
switch ( col ) {
case 0: i = 0
case qu: i = 1
// etc... (you might have to nest these)
// your variable part
neighbors.push_back(std::make_pair(row + arr[i][0], col + arr[i][1]));
neighbors.push_back(std::make_pair(row + arr[i][2], col + arr[i][3]));
neighbors.push_back(std::make_pair(row + arr[i][4], col + arr[i][5]));
// The rest of your code is the same in all cases.
for (auto &neighbor: neighbors) {
if (isOpen(neighbor.first, neighbor.second)) {
auto id = qu->grid[neighbor.first][neighbor.second].id;
qu->Union(id, current_site_id);
}
}
return;
This would replace most of your Percolation.cpp
-
\$\begingroup\$ Thanks Ryan for your answer. The
open
function in my code connects the current site with all the neighbor open sites , otherwise thepercolate
state won't be realized. The problem is the connection algorithm in the function where I manually list all the 8 neighbors for arbitrary non-boundary sites and the 3 neighbors for the boundary sites using multiple uglyif
s, and that't mainly my question: How can I avoid this? \$\endgroup\$Algo– Algo2016年12月12日 06:10:16 +00:00Commented Dec 12, 2016 at 6:10
Explore related questions
See similar questions with these tags.
QuickUnion
is likely to have a big impact on overall performance, so if you'd like reviewers to comment on performance, you might want to supply that file. \$\endgroup\$