This is a simple program that solves a given Sudoku puzzle recursively. The input is provided as a file that contains the cells, 0 if empty, delimited by a ,
character.
#include <iostream>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include <utility>
#include <exception>
#include <fstream>
#include <cctype>
#include <algorithm>
using std::cout;
using std::cin;
using std::endl;
using std::vector;
using std::string;
using std::string_view;
using std::pair;
using std::optional;
using puzzle_t = vector<vector<int>>;
using coord = pair<size_t, size_t>;
std::ostream& operator<<(std::ostream& other, const puzzle_t& puzzle) {
for (size_t i = 0; i < 9; i++) {
char comma[3] = {'0円', ' ', '0円'};
for (size_t j = 0; j < 9; j++) {
other << comma << puzzle[i][j];
comma[0] = ',';
}
other << endl;
}
return other;
}
vector<string> split(const string& str, const string& delim) {
vector<string> tokens;
string_view str_view = str;
size_t pos = 0;
do {
pos = str_view.find(delim);
tokens.emplace_back(str_view.substr(0, pos));
str_view = str_view.substr(pos + 1, str_view.size());
} while (pos != string_view::npos);
return tokens;
}
bool safe_isspace(char c) {
return std::isspace(static_cast<unsigned char>(c));
}
string strip(const string& str) {
size_t i;
string result = str;
for (i = 0; i < result.size() && safe_isspace(result[i]); i++);
result = result.substr(i, result.length());
for (i = result.length() - 1; i >= 0 && safe_isspace(result[i]); i--);
result = result.substr(0, i + 1);
return result;
}
template <class T>
vector<T> filter_empty(vector<T>&& vec) {
vector<T> result;
for (auto& t : vec) {
if (!t.empty()) {
result.push_back(t);
}
}
return result;
}
optional<puzzle_t> read_puzzle(std::istream& in) {
try {
puzzle_t puzzle;
while (puzzle.size() < 9) {
string line;
std::getline(in, line);
if (line.empty()) {
continue;
}
vector<string> all_cells = split(line, ",");
vector<int> row;
for (auto& str : all_cells) {
str = strip(str);
}
all_cells = filter_empty(std::move(all_cells));
if (all_cells.size() != 9) {
return { };
}
for (auto& str : all_cells) {
int value = std::stoi(str);
if (value < 0 || value > 9) {
return { };
}
row.push_back(value);
}
puzzle.push_back(row);
}
return { puzzle };
} catch (const std::exception& exn) {
return { };
}
}
bool validate_puzzle(const puzzle_t& puzzle ) {
auto validate_group = [](vector<int> group) -> bool {
for (int i = 1; i <= 9; i++) {
if (std::count(group.begin(), group.end(), i) > 1) {
return false;
}
}
return true;
};
auto get_col = [&puzzle](size_t col_n) -> vector<int> {
vector<int> col(9);
for (const auto& row : puzzle) {
col.push_back(row[col_n]);
}
return col;
};
auto get_group = [&puzzle](size_t i, size_t j) -> vector<int> {
vector<int> group;
for (int row = i * 3; row < i * 3 + 3; row++) {
for (int col = j * 3; col < j * 3 + 3; col++) {
group.push_back(puzzle[row][col]);
}
}
return group;
};
return validate_group(puzzle[0]) &&
validate_group(puzzle[1]) &&
validate_group(puzzle[2]) &&
validate_group(puzzle[3]) &&
validate_group(puzzle[4]) &&
validate_group(puzzle[5]) &&
validate_group(puzzle[6]) &&
validate_group(puzzle[7]) &&
validate_group(puzzle[8]) &&
validate_group(get_col(0)) &&
validate_group(get_col(1)) &&
validate_group(get_col(2)) &&
validate_group(get_col(3)) &&
validate_group(get_col(4)) &&
validate_group(get_col(5)) &&
validate_group(get_col(6)) &&
validate_group(get_col(7)) &&
validate_group(get_col(8)) &&
validate_group(get_group(0, 0)) &&
validate_group(get_group(0, 1)) &&
validate_group(get_group(0, 2)) &&
validate_group(get_group(1, 0)) &&
validate_group(get_group(1, 1)) &&
validate_group(get_group(1, 2)) &&
validate_group(get_group(2, 0)) &&
validate_group(get_group(2, 1)) &&
validate_group(get_group(2, 2));
}
optional<coord> get_next_empty_coord(const puzzle_t& puzzle) {
for (size_t i = 0; i < 9; i++) {
for (size_t j = 0; j < 9; j++) {
if (puzzle[i][j] == 0) {
return { std::make_pair(i, j) };
}
}
}
return { };
}
void solve_recursive(puzzle_t& puzzle, vector<puzzle_t>& results) {
auto next_coord_opt = get_next_empty_coord(puzzle);
if (!next_coord_opt) {
results.push_back(puzzle);
return;
}
auto next_coord = next_coord_opt.value();
for (int i = 1; i <= 9; i++) {
puzzle[next_coord.first][next_coord.second] = i;
if (validate_puzzle(puzzle)) {
solve_recursive(puzzle, results);
}
puzzle[next_coord.first][next_coord.second] = 0;
}
}
vector<puzzle_t> solve(puzzle_t&& puzzle) {
vector<puzzle_t> result;
solve_recursive(puzzle, result);
return result;
}
bool solver(const string& in_path, const string& out_path) {
std::ifstream fin(in_path);
std::ofstream fout(out_path);
if (!fin || !fout) {
cout << "IO failure" << endl;
return false;
}
optional<puzzle_t> puzzle = read_puzzle(fin);
if (!puzzle || puzzle.value().size() != 9 || puzzle.value()[0].size() != 9) {
cout << "Invalid puzzle" << endl;
return false;
}
auto solutions = solve(std::move(puzzle.value()));
fout << "Number of solutions found: " << solutions.size() << endl;
fout << endl;
for (const auto& solution : solutions) {
fout << solution << endl;
fout << endl;
}
fin.close();
fout.close();
return true;
}
int main(int argc, char* argv[]) {
std::ios::sync_with_stdio(false);
if (argc == 3) {
string in_path = argv[1];
string out_path = argv[2];
if (solver(in_path, out_path)) {
return EXIT_SUCCESS;
} else {
return EXIT_FAILURE;
}
} else {
cout << "Invalid number of arguments to program." << endl;
return EXIT_FAILURE;
}
}
The program is compiled with clang using the following options.
$ clang++ -std=c++17 -Wall -Werror -O3 -o sudoku_solver sudoku_solver.cpp # Normal
$ clang++ -std=c++17 -Wall -Werror -O0 -g -fsanitize=address -fsanitize=undefined -fno-optimize-sibling-calls -o sudoku_solver sudoku_solver.cpp # for debug with ASAN
The program can be run as follows.
$ sudoku_solver <input_file> <output_file>
Here is an example of the program running. Here is an example input. Note, that whitespace is added for human readability, but the program is expected to ignore empty lines.
0, 0, 5, 0, 8, 0, 3, 0, 2
4, 2, 0, 0, 0, 0, 5, 0, 0
6, 0, 0, 0, 0, 4, 0, 0, 0
0, 0, 0, 3, 0, 0, 9, 0, 0
3, 0, 0, 0, 2, 6, 0, 0, 0
0, 0, 0, 0, 0, 0, 0, 7, 0
0, 0, 0, 0, 7, 0, 6, 8, 0
0, 9, 8, 0, 0, 0, 0, 0, 4
0, 0, 0, 5, 0, 0, 0, 0, 0
Here is the output generated by the program.
Number of solutions found: 1
9, 1, 5, 6, 8, 7, 3, 4, 2
4, 2, 7, 9, 1, 3, 5, 6, 8
6, 8, 3, 2, 5, 4, 1, 9, 7
8, 7, 1, 3, 4, 5, 9, 2, 6
3, 4, 9, 7, 2, 6, 8, 5, 1
2, 5, 6, 8, 9, 1, 4, 7, 3
1, 3, 2, 4, 7, 9, 6, 8, 5
5, 9, 8, 1, 6, 2, 7, 3, 4
7, 6, 4, 5, 3, 8, 2, 1, 9
3 Answers 3
#include <iostream> #include <optional> #include <string> #include <string_view> #include <vector> #include <utility> #include <exception> #include <fstream> #include <cctype> #include <algorithm>
I find it helps if I keep my includes in alphabetical order - that makes it easy to quickly check whether I have already included one I'm about to add.
using std::cout; using std::cin; using std::endl; using std::vector; using std::string; using std::string_view; using std::pair; using std::optional;
I don't like these at file scope (but infinitely better than using
the whole of namespace std
). But that's perhaps just a personal preference.
Despite not using
std::size_t
, it's written unqualified in many places - a likely portability issue.
//Flat vectors/arrays are faster, but for this small a data set it doesn't really matter.
It's not really the size of data that matters, but more the number of accesses. An array of arrays would probably be a better choice, given that the size is fixed:
using puzzle_t = std::array<9, std::array<9, int>>;
Be careful using identifiers ending in _t
- in the global namespace, POSIX reserves those names. Not a problem here, but something to be aware of.
char comma[3] = {'0円', ' ', '0円'}; for(size_t j = 0; j < 9; j++) { other << comma << puzzle[i][j]; comma[0] = ','; }
It took me a while to work out what's happening here. I think it's clearer and more idiomatic to just switch a pointer between two constant string literals:
char const *sep = "";
for (auto cell: puzzle[i]) {
other << sep << cell;
sep = ", ";
}
I think that's easier to follow than manipulating individual characters in an array. You might want to consider using a std::ostream_iterator()
to do the delimiter handling for you (though that doesn't do exactly what we want here).
other << endl;
We don't need to flush output for every line. Just write \n
there instead.
bool safe_isspace(char c) { return std::isspace(static_cast<unsigned char>(c)); }
Yes, that's a good way to reduce clutter, and to reduce the likelihood of forgetting the cast somewhere.
for (i = 0; i < result.size() && safe_isspace(result[i]); i++); result = result.substr(i, result.length()); for (i = result.length() - 1; i >= 0 && safe_isspace(result[i]); i--); result = result.substr(0, i + 1);
Instead of assigning twice to result
, it's better to find the start and end positions and use substr()
just once. If we use iterators rather than indexes, standard algorithms come to our aid:
using RevIt = std::reverse_iterator<std::string::iterator>;
auto a = std::find_if_not(str.begin(), str.end(), safe_isspace);
auto z = std::find_if_not(str.rbegin(), RevIt(a), safe_isspace).base();
return std::string{a,z};
Look, no arithmetic!
Another loop that can be replaced here:
for (auto& t : vec) {
if (!t.empty()) {
result.push_back(t);
}
}
That looks like a candidate for std::copy_if()
.
for (int i = 1; i <= 9; i++) {
if (std::count(group.begin(), group.end(), i) > 1) {
return false;
}
}
Looks inefficient. Instead of searching for each possible value, why not gather what's there into a std::multiset
(or even count into a simple array) and examine the counts in that? That has the advantage (if you put it into the Puzzle class itself) of being reusable for checking whether a solution is complete.
return validate_group(puzzle[0]) && validate_group(puzzle[1]) && validate_group(puzzle[2]) && validate_group(puzzle[3]) && validate_group(puzzle[4]) && validate_group(puzzle[5]) && validate_group(puzzle[6]) && validate_group(puzzle[7]) && validate_group(puzzle[8]) && validate_group(get_col(0)) && validate_group(get_col(1)) && validate_group(get_col(2)) && validate_group(get_col(3)) && validate_group(get_col(4)) && validate_group(get_col(5)) && validate_group(get_col(6)) && validate_group(get_col(7)) && validate_group(get_col(8)) && validate_group(get_group(0, 0)) && validate_group(get_group(0, 1)) && validate_group(get_group(0, 2)) && validate_group(get_group(1, 0)) && validate_group(get_group(1, 1)) && validate_group(get_group(1, 2)) && validate_group(get_group(2, 0)) && validate_group(get_group(2, 1)) && validate_group(get_group(2, 2));
Although quite clear, it might be better transformed into a few loops (possible internal to calls to std::all_of()
). That trades one clarity for another - we would then not need to check whether we'd missed one out.
cout << "Invalid number of arguments to program." << endl;
return EXIT_FAILURE;
Error messages should go to the standard error stream std::cerr
, not std::cout
.
-
\$\begingroup\$ Hmm, alphabetical includes... never thought about that! I always do it by increasing order of dependencies/complexity. \$\endgroup\$Cody Gray– Cody Gray2020年11月18日 21:31:36 +00:00Commented Nov 18, 2020 at 21:31
-
2\$\begingroup\$ +1 for alphy sorted includes/imports. It makes it really easy to spot a missing import. This is especially helpful in code reviews. \$\endgroup\$Sean Perry– Sean Perry2020年11月19日 18:48:45 +00:00Commented Nov 19, 2020 at 18:48
-
2\$\begingroup\$ also, +1 for calling out
endl
as meaning "newline AND flush" and not simply a constant for '\n'. I have solved many a performance issue by replacing endl with '\n'. \$\endgroup\$Sean Perry– Sean Perry2020年11月19日 18:52:49 +00:00Commented Nov 19, 2020 at 18:52 -
\$\begingroup\$ Note that identifiers ending in _t are reserved for use by the Standard Library, so I recommend changing that name. Say what? Any identifier that's not actually reserved for the implementation (
_Ugly
,two__scores
) won't be a macro or "magic" and can be defined in your own scope that's not going to conflict with what's innamespace std
. You might be thinking of Posix, which would only apply to the global namespace in C++. \$\endgroup\$JDługosz– JDługosz2021年11月15日 15:52:56 +00:00Commented Nov 15, 2021 at 15:52 -
1\$\begingroup\$ @TobySpeight that's a good reason to put a
namespace
around the whole thing, even for simple one-file programs like this. \$\endgroup\$JDługosz– JDługosz2021年11月15日 16:10:42 +00:00Commented Nov 15, 2021 at 16:10
Everything that Toby said, plus:
In operator<<
, the output stream should not be called other
. The variable name other
should only be used in comparison functions between this
and other
. It should rather be called out
or os
.
When I run the program, I get an error message that doesn't help me:
$ ./cmake-build-debug/sudoku.exe sudoku.txt
Invalid number of arguments to program.
The common pattern is to print this instead:
usage: sudoku.exe <input file> <output file>
"Don't tell me what I did wrong, tell me how to do it correctly."
What is the purpose of the commas in the input and output? I don't see any. In my own sudoku solver, I rather used this format:
---3-8---
--2---31-
35-2-1-68
6---1-2-7
--3-9-4--
1-4-8---9
84-6-5-23
-31---5--
---1-9---
Of course, you can choose any other character for unknown cells. Or allow spaces. But spaces + commas is a little too much to me.
Just like Toby, I had a hard time reading the code around char comma[3]
. Seeing that a variable called comma didn't actually contain a comma could only mean one thing: That you are a liar, or at least your code is. You should rather write trustworthy code instead, where the variable names match their content and purpose. This one should have been called separator
or sep
instead, just like Toby said. I usually use sep
since that name is long enough in a function with less than 10 lines of code.
In the function get_group
, you compare signed with unsigned numbers. You should use consistent data types, which in this case means std::size_t
instead of int
. In the compiler warnings, you forgot -Wextra
for GCC and -Weverything
for Clang.
In validate_group
you construct a vector with 9 elements each time. This looks like unnecessary memory allocation to me. I'd rather use std::array<int, 9>
instead, or maybe even write it without any memory allocation at all. The way you did it in validate_puzzle
looks easy to understand though, therefore chances are high that after trying to do it in another way, I'd just throw that attempt away and return to your code instead.
I'm a bit confused when I see a loop like for (int i = 1; i <= 9; i++)
since I expect loops with the variable i
to start at 0 and use <
instead of <=
. If you had renamed the variable to candidate
or cand
for short, I wouldn't be as surprised.
I'd rename validate_puzzle
to is_solvable
. On second thought, the correct name would rather be is_not_obviously_unsolvable
, but that quickly gets ugly. So the appropriate name might be may_be_solvable
.
cout << "Invalid puzzle" << endl;
This is really unfriendly to the user. Again, don't tell me that I did something wrong (without even telling me what this something is), tell me how and where to fix it. Line number and column number are the least that your program must provide.
In main
, the argc != 3
is an error condition. Therefore it should be written with a simple if
, not an if-else
, like this:
int main(int argc, char* argv[]) {
std::ios::sync_with_stdio(false);
if (argc != 3) {
cout << "Invalid number of arguments to program." << endl;
return EXIT_FAILURE;
}
string in_path = argv[1];
string out_path = argv[2];
if (!solver(in_path, out_path))
return EXIT_FAILURE;
return EXIT_SUCCESS;
}
This way, the main control flow all happens at the same indentation level of the code.
-
1\$\begingroup\$ "The variable name other should only be used in comparison functions between this and other" this is a specific programming style rather than a general rule. \$\endgroup\$anon– anon2020年11月20日 11:05:58 +00:00Commented Nov 20, 2020 at 11:05
-
\$\begingroup\$ It's a general rule. The word other only makes sense if there is also a this. It's not a programming rule but a language rule. Language rules apply to code reviews as well. \$\endgroup\$Roland Illig– Roland Illig2020年11月20日 11:18:19 +00:00Commented Nov 20, 2020 at 11:18
-
1\$\begingroup\$ "Other" makes sense in any context in which there is a clear comparator, it does not need to be
this
as you stated. Of course, I agree like any variable name it should be chosen to make sense in context but your specification is overly narrow. \$\endgroup\$anon– anon2020年11月20日 12:26:46 +00:00Commented Nov 20, 2020 at 12:26 -
\$\begingroup\$ @JackAidley Could you drop me a note when you come across a piece of code that uses the variable name
other
in a meaningful way, without also having a variable calledself
orthis
orit
at the same time? I'd expect it to be difficult to find such code, that's why I'd like to know. \$\endgroup\$Roland Illig– Roland Illig2020年11月20日 22:19:20 +00:00Commented Nov 20, 2020 at 22:19
You should separate loading, solving, and saving the solution into different usable functions. Consider that you could have unit tests that have a board embedded directly into the source code, and you would want to call a different function to ingest that (rather than giving it a file name). Your tests might perturb an existing board to generate different cases, and just call solve
without re-loading the whole thing from scratch. You can compare the output against the known correct value, and not save it to a file at all.
It's not just to consider unit testing, but such separation makes it easy to maintain. What if you add a GUI or other file formats? That should be possible and easy without having to touch the solve
function.
Consider using string_view
. In particular, this makes it easy to trim spaces without recopying the input to an output string. If you do all the processing of the individual values on a line while the whole line is still in memory, you can use string_view
end to end from split
through to the final value.
BTW, I use a split
that returns an array<string_view,N>
so it does not have to allocate memory as with a vector
(or any of the string
elements) at all! Since you know how many items are on one line (it's an error if that's not right) you could use that method as well.
Donald Knuth wrote a Sudoku solver called Dancing Links that is very efficient. If I wanted to do this, I would implement his algorithm using modern C++, along with the desired input and output routines.
std::
size_t
. ;) \$\endgroup\$9
in a lot of places? \$\endgroup\$