Sudoku solver program written in modern c++

Question 1

This is a simple program that solves a given Sudoku puzzle recursively. The input is provided as a file that contains the cells, 0 if empty, delimited by a , character.

#include <iostream>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include <utility>
#include <exception>
#include <fstream>
#include <cctype>
#include <algorithm>
using std::cout;
using std::cin;
using std::endl;
using std::vector;
using std::string;
using std::string_view;
using std::pair;
using std::optional;
using puzzle_t = vector<vector<int>>;
using coord = pair<size_t, size_t>;
std::ostream& operator<<(std::ostream& other, const puzzle_t& puzzle) {
 for (size_t i = 0; i < 9; i++) {
 char comma[3] = {'0円', ' ', '0円'};
 for (size_t j = 0; j < 9; j++) {
 other << comma << puzzle[i][j];
 comma[0] = ',';
 }
 other << endl;
 }
 return other;
}
vector<string> split(const string& str, const string& delim) {
 vector<string> tokens;
 string_view str_view = str;
 size_t pos = 0;
 do {
 pos = str_view.find(delim);
 tokens.emplace_back(str_view.substr(0, pos));
 str_view = str_view.substr(pos + 1, str_view.size());
 } while (pos != string_view::npos);
 return tokens;
}
bool safe_isspace(char c) {
 return std::isspace(static_cast<unsigned char>(c));
}
string strip(const string& str) {
 size_t i;
 string result = str;
 for (i = 0; i < result.size() && safe_isspace(result[i]); i++);
 result = result.substr(i, result.length());
 for (i = result.length() - 1; i >= 0 && safe_isspace(result[i]); i--);
 result = result.substr(0, i + 1);
 return result;
}
template <class T>
vector<T> filter_empty(vector<T>&& vec) {
 vector<T> result;
 for (auto& t : vec) {
 if (!t.empty()) {
 result.push_back(t);
 }
 }
 return result;
}
optional<puzzle_t> read_puzzle(std::istream& in) {
 try {
 puzzle_t puzzle;
 while (puzzle.size() < 9) {
 string line;
 std::getline(in, line);
 if (line.empty()) {
 continue;
 }
 vector<string> all_cells = split(line, ",");
 vector<int> row;
 for (auto& str : all_cells) {
 str = strip(str);
 }
 all_cells = filter_empty(std::move(all_cells));
 if (all_cells.size() != 9) {
 return { };
 }
 for (auto& str : all_cells) {
 int value = std::stoi(str);
 if (value < 0 || value > 9) {
 return { };
 }
 row.push_back(value);
 }
 puzzle.push_back(row);
 }
 return { puzzle };
 } catch (const std::exception& exn) {
 return { };
 }
}
bool validate_puzzle(const puzzle_t& puzzle ) {
 auto validate_group = [](vector<int> group) -> bool {
 for (int i = 1; i <= 9; i++) {
 if (std::count(group.begin(), group.end(), i) > 1) {
 return false;
 }
 }
 return true;
 };
 auto get_col = [&puzzle](size_t col_n) -> vector<int> {
 vector<int> col(9);
 for (const auto& row : puzzle) {
 col.push_back(row[col_n]);
 }
 
 return col;
 };
 auto get_group = [&puzzle](size_t i, size_t j) -> vector<int> {
 vector<int> group;
 for (int row = i * 3; row < i * 3 + 3; row++) {
 for (int col = j * 3; col < j * 3 + 3; col++) {
 group.push_back(puzzle[row][col]);
 }
 }
 return group;
 };
 
 return validate_group(puzzle[0]) &&
 validate_group(puzzle[1]) &&
 validate_group(puzzle[2]) &&
 validate_group(puzzle[3]) &&
 validate_group(puzzle[4]) &&
 validate_group(puzzle[5]) &&
 validate_group(puzzle[6]) &&
 validate_group(puzzle[7]) &&
 validate_group(puzzle[8]) &&
 validate_group(get_col(0)) &&
 validate_group(get_col(1)) &&
 validate_group(get_col(2)) &&
 validate_group(get_col(3)) &&
 validate_group(get_col(4)) &&
 validate_group(get_col(5)) &&
 validate_group(get_col(6)) &&
 validate_group(get_col(7)) &&
 validate_group(get_col(8)) &&
 validate_group(get_group(0, 0)) &&
 validate_group(get_group(0, 1)) &&
 validate_group(get_group(0, 2)) &&
 validate_group(get_group(1, 0)) &&
 validate_group(get_group(1, 1)) &&
 validate_group(get_group(1, 2)) &&
 validate_group(get_group(2, 0)) &&
 validate_group(get_group(2, 1)) &&
 validate_group(get_group(2, 2));
}
optional<coord> get_next_empty_coord(const puzzle_t& puzzle) {
 for (size_t i = 0; i < 9; i++) {
 for (size_t j = 0; j < 9; j++) {
 if (puzzle[i][j] == 0) {
 return { std::make_pair(i, j) };
 }
 }
 }
 return { };
}
void solve_recursive(puzzle_t& puzzle, vector<puzzle_t>& results) {
 auto next_coord_opt = get_next_empty_coord(puzzle);
 if (!next_coord_opt) {
 results.push_back(puzzle);
 return;
 }
 auto next_coord = next_coord_opt.value();
 for (int i = 1; i <= 9; i++) {
 puzzle[next_coord.first][next_coord.second] = i;
 if (validate_puzzle(puzzle)) {
 solve_recursive(puzzle, results);
 }
 puzzle[next_coord.first][next_coord.second] = 0;
 }
}
vector<puzzle_t> solve(puzzle_t&& puzzle) {
 vector<puzzle_t> result;
 solve_recursive(puzzle, result);
 return result;
}
bool solver(const string& in_path, const string& out_path) {
 std::ifstream fin(in_path);
 std::ofstream fout(out_path);
 if (!fin || !fout) {
 cout << "IO failure" << endl;
 return false;
 }
 optional<puzzle_t> puzzle = read_puzzle(fin);
 if (!puzzle || puzzle.value().size() != 9 || puzzle.value()[0].size() != 9) {
 cout << "Invalid puzzle" << endl;
 return false;
 }
 auto solutions = solve(std::move(puzzle.value()));
 fout << "Number of solutions found: " << solutions.size() << endl;
 fout << endl;
 
 for (const auto& solution : solutions) {
 fout << solution << endl;
 fout << endl;
 }
 fin.close();
 fout.close();
 return true;
}
int main(int argc, char* argv[]) {
 std::ios::sync_with_stdio(false);
 if (argc == 3) {
 string in_path = argv[1];
 string out_path = argv[2];
 if (solver(in_path, out_path)) {
 return EXIT_SUCCESS;
 } else {
 return EXIT_FAILURE;
 }
 } else {
 cout << "Invalid number of arguments to program." << endl;
 return EXIT_FAILURE;
 }
}

The program is compiled with clang using the following options.

$ clang++ -std=c++17 -Wall -Werror -O3 -o sudoku_solver sudoku_solver.cpp # Normal
$ clang++ -std=c++17 -Wall -Werror -O0 -g -fsanitize=address -fsanitize=undefined -fno-optimize-sibling-calls -o sudoku_solver sudoku_solver.cpp # for debug with ASAN

The program can be run as follows.

$ sudoku_solver <input_file> <output_file>

Here is an example of the program running. Here is an example input. Note, that whitespace is added for human readability, but the program is expected to ignore empty lines.

0, 0, 5, 0, 8, 0, 3, 0, 2
4, 2, 0, 0, 0, 0, 5, 0, 0
6, 0, 0, 0, 0, 4, 0, 0, 0
0, 0, 0, 3, 0, 0, 9, 0, 0
3, 0, 0, 0, 2, 6, 0, 0, 0
0, 0, 0, 0, 0, 0, 0, 7, 0
0, 0, 0, 0, 7, 0, 6, 8, 0
0, 9, 8, 0, 0, 0, 0, 0, 4
0, 0, 0, 5, 0, 0, 0, 0, 0

Here is the output generated by the program.

Number of solutions found: 1
9, 1, 5, 6, 8, 7, 3, 4, 2
4, 2, 7, 9, 1, 3, 5, 6, 8
6, 8, 3, 2, 5, 4, 1, 9, 7
8, 7, 1, 3, 4, 5, 9, 2, 6
3, 4, 9, 7, 2, 6, 8, 5, 1
2, 5, 6, 8, 9, 1, 4, 7, 3
1, 3, 2, 4, 7, 9, 6, 8, 5
5, 9, 8, 1, 6, 2, 7, 3, 4
7, 6, 4, 5, 3, 8, 2, 1, 9

Question 2

Micro-review: std:: size_t. ;)

Question 3

Is there a particular aspect that you want the review to focus on?

Question 4

@harold Primarily style and use of modern c++

Question 5

@adityagnet Maybe use a named constant instead of 9 in a lot of places?

Question 6

So proud of you. That's a lot of work. I did some stuff related years and years ago, feel free to steal any ideas you want: github.com/phorgan1/sudoku-system.git

Question 7

#include <iostream>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include <utility>
#include <exception>
#include <fstream>
#include <cctype>
#include <algorithm>

I find it helps if I keep my includes in alphabetical order - that makes it easy to quickly check whether I have already included one I'm about to add.

using std::cout;
using std::cin;
using std::endl;
using std::vector;
using std::string;
using std::string_view;
using std::pair;
using std::optional;

I don't like these at file scope (but infinitely better than using the whole of namespace std). But that's perhaps just a personal preference.

Despite not using std::size_t, it's written unqualified in many places - a likely portability issue.

//Flat vectors/arrays are faster, but for this small a data set it doesn't really matter.

It's not really the size of data that matters, but more the number of accesses. An array of arrays would probably be a better choice, given that the size is fixed:

using puzzle_t = std::array<9, std::array<9, int>>;

Be careful using identifiers ending in _t - in the global namespace, POSIX reserves those names. Not a problem here, but something to be aware of.

 char comma[3] = {'0円', ' ', '0円'};
 for(size_t j = 0; j < 9; j++) {
 other << comma << puzzle[i][j];
 comma[0] = ',';
 }

It took me a while to work out what's happening here. I think it's clearer and more idiomatic to just switch a pointer between two constant string literals:

 char const *sep = "";
 for (auto cell: puzzle[i]) {
 other << sep << cell;
 sep = ", ";
 }

I think that's easier to follow than manipulating individual characters in an array. You might want to consider using a std::ostream_iterator() to do the delimiter handling for you (though that doesn't do exactly what we want here).

 other << endl;

We don't need to flush output for every line. Just write \n there instead.

bool safe_isspace(char c) {
 return std::isspace(static_cast<unsigned char>(c));
}

Yes, that's a good way to reduce clutter, and to reduce the likelihood of forgetting the cast somewhere.

for (i = 0; i < result.size() && safe_isspace(result[i]); i++);
result = result.substr(i, result.length());
for (i = result.length() - 1; i >= 0 && safe_isspace(result[i]); i--);
result = result.substr(0, i + 1);

Instead of assigning twice to result, it's better to find the start and end positions and use substr() just once. If we use iterators rather than indexes, standard algorithms come to our aid:

using RevIt = std::reverse_iterator<std::string::iterator>;
auto a = std::find_if_not(str.begin(), str.end(), safe_isspace);
auto z = std::find_if_not(str.rbegin(), RevIt(a), safe_isspace).base();
return std::string{a,z};

Look, no arithmetic!

Another loop that can be replaced here:

for (auto& t : vec) {
 if (!t.empty()) {
 result.push_back(t);
 }
}

That looks like a candidate for std::copy_if().

 for (int i = 1; i <= 9; i++) {
 if (std::count(group.begin(), group.end(), i) > 1) {
 return false;
 }
 }

Looks inefficient. Instead of searching for each possible value, why not gather what's there into a std::multiset (or even count into a simple array) and examine the counts in that? That has the advantage (if you put it into the Puzzle class itself) of being reusable for checking whether a solution is complete.

return validate_group(puzzle[0]) &&
 validate_group(puzzle[1]) &&
 validate_group(puzzle[2]) &&
 validate_group(puzzle[3]) &&
 validate_group(puzzle[4]) &&
 validate_group(puzzle[5]) &&
 validate_group(puzzle[6]) &&
 validate_group(puzzle[7]) &&
 validate_group(puzzle[8]) &&
 validate_group(get_col(0)) &&
 validate_group(get_col(1)) &&
 validate_group(get_col(2)) &&
 validate_group(get_col(3)) &&
 validate_group(get_col(4)) &&
 validate_group(get_col(5)) &&
 validate_group(get_col(6)) &&
 validate_group(get_col(7)) &&
 validate_group(get_col(8)) &&
 validate_group(get_group(0, 0)) &&
 validate_group(get_group(0, 1)) &&
 validate_group(get_group(0, 2)) &&
 validate_group(get_group(1, 0)) &&
 validate_group(get_group(1, 1)) &&
 validate_group(get_group(1, 2)) &&
 validate_group(get_group(2, 0)) &&
 validate_group(get_group(2, 1)) &&
 validate_group(get_group(2, 2));

Although quite clear, it might be better transformed into a few loops (possible internal to calls to std::all_of()). That trades one clarity for another - we would then not need to check whether we'd missed one out.

 cout << "Invalid number of arguments to program." << endl;
 return EXIT_FAILURE;

Error messages should go to the standard error stream std::cerr, not std::cout.

Question 8

Hmm, alphabetical includes... never thought about that! I always do it by increasing order of dependencies/complexity.

Question 9

+1 for alphy sorted includes/imports. It makes it really easy to spot a missing import. This is especially helpful in code reviews.

Question 10

also, +1 for calling out endl as meaning "newline AND flush" and not simply a constant for '\n'. I have solved many a performance issue by replacing endl with '\n'.

Question 11

Note that identifiers ending in _t are reserved for use by the Standard Library, so I recommend changing that name. Say what? Any identifier that's not actually reserved for the implementation (_Ugly, two__scores) won't be a macro or "magic" and can be defined in your own scope that's not going to conflict with what's in namespace std. You might be thinking of Posix, which would only apply to the global namespace in C++.

Question 12

@TobySpeight that's a good reason to put a namespace around the whole thing, even for simple one-file programs like this.

Question 13

Everything that Toby said, plus:

In operator<<, the output stream should not be called other. The variable name other should only be used in comparison functions between this and other. It should rather be called out or os.

When I run the program, I get an error message that doesn't help me:

$ ./cmake-build-debug/sudoku.exe sudoku.txt
Invalid number of arguments to program.

The common pattern is to print this instead:

usage: sudoku.exe <input file> <output file>

"Don't tell me what I did wrong, tell me how to do it correctly."

What is the purpose of the commas in the input and output? I don't see any. In my own sudoku solver, I rather used this format:

---3-8---
--2---31-
35-2-1-68
6---1-2-7
--3-9-4--
1-4-8---9
84-6-5-23
-31---5--
---1-9---

Of course, you can choose any other character for unknown cells. Or allow spaces. But spaces + commas is a little too much to me.

Just like Toby, I had a hard time reading the code around char comma[3]. Seeing that a variable called comma didn't actually contain a comma could only mean one thing: That you are a liar, or at least your code is. You should rather write trustworthy code instead, where the variable names match their content and purpose. This one should have been called separator or sep instead, just like Toby said. I usually use sep since that name is long enough in a function with less than 10 lines of code.

In the function get_group, you compare signed with unsigned numbers. You should use consistent data types, which in this case means std::size_t instead of int. In the compiler warnings, you forgot -Wextra for GCC and -Weverything for Clang.

In validate_group you construct a vector with 9 elements each time. This looks like unnecessary memory allocation to me. I'd rather use std::array<int, 9> instead, or maybe even write it without any memory allocation at all. The way you did it in validate_puzzle looks easy to understand though, therefore chances are high that after trying to do it in another way, I'd just throw that attempt away and return to your code instead.

I'm a bit confused when I see a loop like for (int i = 1; i <= 9; i++) since I expect loops with the variable i to start at 0 and use < instead of <=. If you had renamed the variable to candidate or cand for short, I wouldn't be as surprised.

I'd rename validate_puzzle to is_solvable. On second thought, the correct name would rather be is_not_obviously_unsolvable, but that quickly gets ugly. So the appropriate name might be may_be_solvable.

 cout << "Invalid puzzle" << endl;

This is really unfriendly to the user. Again, don't tell me that I did something wrong (without even telling me what this something is), tell me how and where to fix it. Line number and column number are the least that your program must provide.

In main, the argc != 3 is an error condition. Therefore it should be written with a simple if, not an if-else, like this:

int main(int argc, char* argv[]) {
 std::ios::sync_with_stdio(false);
 if (argc != 3) {
 cout << "Invalid number of arguments to program." << endl;
 return EXIT_FAILURE;
 }
 string in_path = argv[1];
 string out_path = argv[2];
 if (!solver(in_path, out_path))
 return EXIT_FAILURE;
 return EXIT_SUCCESS;
}

This way, the main control flow all happens at the same indentation level of the code.

Question 14

"The variable name other should only be used in comparison functions between this and other" this is a specific programming style rather than a general rule.

Question 15

It's a general rule. The word other only makes sense if there is also a this. It's not a programming rule but a language rule. Language rules apply to code reviews as well.

Question 16

"Other" makes sense in any context in which there is a clear comparator, it does not need to be this as you stated. Of course, I agree like any variable name it should be chosen to make sense in context but your specification is overly narrow.

Question 17

@JackAidley Could you drop me a note when you come across a piece of code that uses the variable name other in a meaningful way, without also having a variable called self or this or it at the same time? I'd expect it to be difficult to find such code, that's why I'd like to know.

Question 18

You should separate loading, solving, and saving the solution into different usable functions. Consider that you could have unit tests that have a board embedded directly into the source code, and you would want to call a different function to ingest that (rather than giving it a file name). Your tests might perturb an existing board to generate different cases, and just call solve without re-loading the whole thing from scratch. You can compare the output against the known correct value, and not save it to a file at all.

It's not just to consider unit testing, but such separation makes it easy to maintain. What if you add a GUI or other file formats? That should be possible and easy without having to touch the solve function.

Consider using string_view. In particular, this makes it easy to trim spaces without recopying the input to an output string. If you do all the processing of the individual values on a line while the whole line is still in memory, you can use string_view end to end from split through to the final value.

BTW, I use a split that returns an array<string_view,N> so it does not have to allocate memory as with a vector (or any of the string elements) at all! Since you know how many items are on one line (it's an error if that's not right) you could use that method as well.

Donald Knuth wrote a Sudoku solver called Dancing Links that is very efficient. If I wanted to do this, I would implement his algorithm using modern C++, along with the desired input and output routines.

Toby Speight Toby Speight 87.4k14 gold badges104 silver badges322 bronze badges · Accepted Answer · 2020-11-18 17:35:36Z

#include <iostream>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include <utility>
#include <exception>
#include <fstream>
#include <cctype>
#include <algorithm>

I find it helps if I keep my includes in alphabetical order - that makes it easy to quickly check whether I have already included one I'm about to add.

using std::cout;
using std::cin;
using std::endl;
using std::vector;
using std::string;
using std::string_view;
using std::pair;
using std::optional;

I don't like these at file scope (but infinitely better than using the whole of namespace std). But that's perhaps just a personal preference.

Despite not using std::size_t, it's written unqualified in many places - a likely portability issue.

//Flat vectors/arrays are faster, but for this small a data set it doesn't really matter.

It's not really the size of data that matters, but more the number of accesses. An array of arrays would probably be a better choice, given that the size is fixed:

using puzzle_t = std::array<9, std::array<9, int>>;

Be careful using identifiers ending in _t - in the global namespace, POSIX reserves those names. Not a problem here, but something to be aware of.

 char comma[3] = {'0円', ' ', '0円'};
 for(size_t j = 0; j < 9; j++) {
 other << comma << puzzle[i][j];
 comma[0] = ',';
 }

It took me a while to work out what's happening here. I think it's clearer and more idiomatic to just switch a pointer between two constant string literals:

 char const *sep = "";
 for (auto cell: puzzle[i]) {
 other << sep << cell;
 sep = ", ";
 }

I think that's easier to follow than manipulating individual characters in an array. You might want to consider using a std::ostream_iterator() to do the delimiter handling for you (though that doesn't do exactly what we want here).

 other << endl;

We don't need to flush output for every line. Just write \n there instead.

bool safe_isspace(char c) {
 return std::isspace(static_cast<unsigned char>(c));
}

Yes, that's a good way to reduce clutter, and to reduce the likelihood of forgetting the cast somewhere.

for (i = 0; i < result.size() && safe_isspace(result[i]); i++);
result = result.substr(i, result.length());
for (i = result.length() - 1; i >= 0 && safe_isspace(result[i]); i--);
result = result.substr(0, i + 1);

Instead of assigning twice to result, it's better to find the start and end positions and use substr() just once. If we use iterators rather than indexes, standard algorithms come to our aid:

using RevIt = std::reverse_iterator<std::string::iterator>;
auto a = std::find_if_not(str.begin(), str.end(), safe_isspace);
auto z = std::find_if_not(str.rbegin(), RevIt(a), safe_isspace).base();
return std::string{a,z};

Look, no arithmetic!

Another loop that can be replaced here:

for (auto& t : vec) {
 if (!t.empty()) {
 result.push_back(t);
 }
}

That looks like a candidate for std::copy_if().

 for (int i = 1; i <= 9; i++) {
 if (std::count(group.begin(), group.end(), i) > 1) {
 return false;
 }
 }

Looks inefficient. Instead of searching for each possible value, why not gather what's there into a std::multiset (or even count into a simple array) and examine the counts in that? That has the advantage (if you put it into the Puzzle class itself) of being reusable for checking whether a solution is complete.

return validate_group(puzzle[0]) &&
 validate_group(puzzle[1]) &&
 validate_group(puzzle[2]) &&
 validate_group(puzzle[3]) &&
 validate_group(puzzle[4]) &&
 validate_group(puzzle[5]) &&
 validate_group(puzzle[6]) &&
 validate_group(puzzle[7]) &&
 validate_group(puzzle[8]) &&
 validate_group(get_col(0)) &&
 validate_group(get_col(1)) &&
 validate_group(get_col(2)) &&
 validate_group(get_col(3)) &&
 validate_group(get_col(4)) &&
 validate_group(get_col(5)) &&
 validate_group(get_col(6)) &&
 validate_group(get_col(7)) &&
 validate_group(get_col(8)) &&
 validate_group(get_group(0, 0)) &&
 validate_group(get_group(0, 1)) &&
 validate_group(get_group(0, 2)) &&
 validate_group(get_group(1, 0)) &&
 validate_group(get_group(1, 1)) &&
 validate_group(get_group(1, 2)) &&
 validate_group(get_group(2, 0)) &&
 validate_group(get_group(2, 1)) &&
 validate_group(get_group(2, 2));

Although quite clear, it might be better transformed into a few loops (possible internal to calls to std::all_of()). That trades one clarity for another - we would then not need to check whether we'd missed one out.

 cout << "Invalid number of arguments to program." << endl;
 return EXIT_FAILURE;

Error messages should go to the standard error stream std::cerr, not std::cout.

Hmm, alphabetical includes... never thought about that! I always do it by increasing order of dependencies/complexity.
+1 for alphy sorted includes/imports. It makes it really easy to spot a missing import. This is especially helpful in code reviews.
also, +1 for calling out endl as meaning "newline AND flush" and not simply a constant for '\n'. I have solved many a performance issue by replacing endl with '\n'.
Note that identifiers ending in _t are reserved for use by the Standard Library, so I recommend changing that name. Say what? Any identifier that's not actually reserved for the implementation (_Ugly, two__scores) won't be a macro or "magic" and can be defined in your own scope that's not going to conflict with what's in namespace std. You might be thinking of Posix, which would only apply to the global namespace in C++.
@TobySpeight that's a good reason to put a namespace around the whole thing, even for simple one-file programs like this.

Stack Exchange Network

Sudoku solver program written in modern c++

3 Answers 3

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Sudoku solver program written in modern c++

3 Answers 3

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions