I am solving another problem in leetcode.
Given a set, generate all the subsets of the set.
Input:
[1,2,3]
Output:[ [3],[1],[2],[1,2,3],[1,3],[2,3],[1,2],[]]
The solution is accepted but I would like to improve on my C coding style. I have created a node_t
type to encapsulate the data related each subset. There could be less verbose ways to do this. Two popular methods of solving this problem seem to be:
- backtracking
- a subset equals to binary number ≤ n where n is element count.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node_s {
int *array;
int size;
struct node_s *next;
} node_t;
node_t* alloc_node()
{
return (node_t*)malloc(sizeof(node_t));
}
void free_node(node_t *node)
{
free(node);
}
#define SIZE 3
void print_one(int *one)
{
printf("%d ", *one);
}
void print_1d_array(int *array, int size)
{
for(int i = 0; i < size; i++) {
print_one(&array[i]);
}
printf("\n");
}
void print_2d_array(int **array, int *col_sizes, int size)
{
for (int i = 0; i < size; i++) {
print_1d_array(array[i], col_sizes[i]);
}
}
int** create_solution_array(node_t *head, int ans_size, int **column_sizes)
{
int **ans = (int**)malloc(sizeof(int*)*ans_size);
node_t *iter;
iter = head->next;
int idx = 0;
while(iter) {
ans[idx] = iter->array;
(*column_sizes)[idx] = iter->size;
idx++;
iter = iter->next;
}
return ans;
}
void gen_powerset(int *num, int num_size, int idx, int cur_size, int *cur, node_t **iter_node, int *ans_size) {
if (idx == num_size) {
node_t *new_node = alloc_node();
if (cur_size) {
new_node->array = (int *) malloc(sizeof(int) * cur_size);
new_node->size = cur_size ;
memcpy(new_node->array, cur, cur_size*sizeof(int));
}
(*iter_node)->next = new_node;
*iter_node = new_node;
(*ans_size)++;
return;
}
gen_powerset(num, num_size, idx + 1, cur_size, cur, iter_node, ans_size);
*(cur + cur_size) = num[idx];
gen_powerset(num, num_size, idx + 1, cur_size + 1, cur, iter_node, ans_size);
}
int** subsets(int* nums, int numsSize, int** columnSizes, int* returnSize) {
node_t *head = alloc_node();
node_t *iter = head;
int *cur = (int*)malloc(sizeof(int)*numsSize);
gen_powerset(nums, numsSize, 0, 0, cur, &iter, returnSize);
return create_solution_array(head, *returnSize, columnSizes);
}
int main ()
{
int *nums = (int*)malloc(sizeof(int)*SIZE);
for (int i = 0; i < SIZE; i++) {
nums[i] = i+1;
}
int *col_sizes = (int*)malloc(sizeof(int)*SIZE);
int ans_size;
int ** ans = subsets(nums, SIZE, &col_sizes, &ans_size);
print_2d_array(ans, col_sizes, ans_size);
return 0;
}
-
\$\begingroup\$ @Toby Speight Added problem description. \$\endgroup\$wispymisty– wispymisty2018年09月07日 13:25:17 +00:00Commented Sep 7, 2018 at 13:25
2 Answers 2
I'll just work through this from the top:
#include <stdio.h> #include <stdlib.h> #include <string.h>
Looks good; we need these.
typedef struct node_s { int *array; int size; struct node_s *next; } node_t;
It's usual to use the same structure tag as the typename we're going to use: struct node_t
.
If size
is the number of elements in the array, a more natural type might be size_t
.
node_t* alloc_node() { return (node_t*)malloc(sizeof(node_t)); }
All this does is to allocate the memory. Consider initialising the members to sane values, too:
node_t* alloc_node()
{
node_t *n = malloc(sizeof *n);
if (n) {
n->array = NULL;
n->size = 0;
n->next = NULL;
}
return n;
}
We're writing C, so it's not advised to cast the result of malloc()
. But we can't assume it succeeds!
It's a good habit to use the assigned-to variable in the argument of sizeof
, rather than its type; that saves us the mental work of having to check that the type actually matches. That's why I've used sizeof *n
in place of sizeof (node_t)
here; you'll see just following an example where we're further from the declaration and this idiom has more benefit.
It's possibly a good idea to create the array at the same time:
node_t* alloc_node(size_t array_count)
{
node_t *n = malloc(sizeof *n);
if (n) {
n->array = malloc(sizeof *n->array * array_count);
n->size = n->array ? array_count : 0;
n->next = NULL;
}
return n;
}
void free_node(node_t *node) { free(node); }
Assuming that the node owns its array, we need to free that, too:
void free_node(node_t *node)
{
if (node) {
free(node->array);
}
free(node);
}
Does the node own its next
as well? If so, we need to free that, or perhaps return it.
#define SIZE 3
Why is this in the middle here? It should be right at the beginning (or perhaps just before main()
).
void print_one(int *one) { printf("%d ", *one); }
I'm not convinced there's much benefit to encapsulating this as a function.
void print_1d_array(int *array, int size) { for(int i = 0; i < size; i++) { print_one(&array[i]); } printf("\n"); }
We should use size_t
for the count here:
void print_1d_array(int *array, size_t size)
{
for (size_t i = 0; i < size; ++i) {
printf("%d ", array[i]);
}
printf("\n");
}
void print_2d_array(int **array, int *col_sizes, int size) { for (int i = 0; i < size; i++) { print_1d_array(array[i], col_sizes[i]); } }
Another size_t
(okay, I'll stop mentioning these now).
int** create_solution_array(node_t *head, int ans_size, int **column_sizes) { int **ans = (int**)malloc(sizeof(int*)*ans_size); node_t *iter; iter = head->next; int idx = 0; while(iter) { ans[idx] = iter->array; (*column_sizes)[idx] = iter->size; idx++; iter = iter->next; } return ans; }
Aside from points from earlier, that while
loop has an initialisation and an advancement, so it's really a for
loop:
int** create_solution_array(node_t *head, size_t ans_size, size_t **column_sizes)
{
int **ans = malloc(sizeof *ans * ans_size);
if (ans) {
int idx = 0;
for (node_t *iter = head->next; iter; iter = iter->next) {
ans[idx] = iter->array;
(*column_sizes)[idx] = iter->size;
++idx;
}
}
return ans;
}
void gen_powerset(int *num, int num_size, int idx, int cur_size, int *cur, node_t **iter_node, int *ans_size) { if (idx == num_size) { node_t *new_node = alloc_node(); if (cur_size) { new_node->array = (int *) malloc(sizeof(int) * cur_size); new_node->size = cur_size ; memcpy(new_node->array, cur, cur_size*sizeof(int)); } (*iter_node)->next = new_node; *iter_node = new_node; (*ans_size)++; return; } gen_powerset(num, num_size, idx + 1, cur_size, cur, iter_node, ans_size); *(cur + cur_size) = num[idx]; gen_powerset(num, num_size, idx + 1, cur_size + 1, cur, iter_node, ans_size); }
*(cur + cur_size)
is normally written cur[cur_size]
.
We need to check whether our allocations succeeded:
/* true if successful, false on any error (e.g. out of memory) */
bool gen_powerset(int *num, size_t num_size, size_t idx,
size_t cur_size, int *cur, node_t **iter_node,
size_t *ans_size)
{
if (idx == num_size) {
node_t *new_node = alloc_node(cur_size);
if (!new_node || new_node->size != cur_size) {
return false;
}
if (cur_size) {
memcpy(new_node->array, cur, sizeof *cur * cur_size);
}
(*iter_node)->next = new_node;
*iter_node = new_node;
++*ans_size;
return true;
}
cur[cur_size] = num[idx];
return gen_powerset(num, num_size, idx + 1, cur_size, cur, iter_node, ans_size)
&& gen_powerset(num, num_size, idx + 1, cur_size + 1, cur, iter_node, ans_size);
}
int** subsets(int* nums, int numsSize, int** columnSizes, int* returnSize) { node_t *head = alloc_node(); node_t *iter = head; int *cur = (int*)malloc(sizeof(int)*numsSize); gen_powerset(nums, numsSize, 0, 0, cur, &iter, returnSize); return create_solution_array(head, *returnSize, columnSizes); }
Again, check that malloc()
succeeded, and check that gen_powerset()
succeeded.
int main () { int *nums = (int*)malloc(sizeof(int)*SIZE); for (int i = 0; i < SIZE; i++) { nums[i] = i+1; } int *col_sizes = (int*)malloc(sizeof(int)*SIZE); int ans_size; int ** ans = subsets(nums, SIZE, &col_sizes, &ans_size); print_2d_array(ans, col_sizes, ans_size); return 0; }
Having carefully created free_node()
, we completely forgot to use it. Valgrind reveals a substantial leak due to this. We need to free the other allocated memory, too.
-
\$\begingroup\$ Thanks @Toby Speight, I am rewriting the solutions with your pointers in mind and comparing how close. I have come across most of the points that you have mentioned but, in practice, they didn't come out in force. I think it will be help to slow down a bit and savor every line I write. \$\endgroup\$wispymisty– wispymisty2018年09月07日 18:58:52 +00:00Commented Sep 7, 2018 at 18:58
I subscribe to everything @TobySpeight said.
I also believe that you could make your code a lot clearer (and your coding style better) by obeying a few important rules:
When in doubt, choose the simplest algorithm
Binary representation of numbers between 0 and the size of the powerset are the simplest tool to compute the subsets of the powerset; 0 means the absence, 1 the presence of an element of the initial set:
// {1, 2, 3}
// 000 -> {}
// 001 -> {3}
// 010 -> {2}
// ...
// 111 -> {1,2,3}
Since the size of the powerset is 2^N
, where N
is the cardinality of the set, and given a function taking the original set and a number to return the subset characterized by this number, a simple array and a simple for
loop are enough to achieve our objective:
for (int i = 0; i < pow(2, set.size); ++i)
powerset[i] = subset(set, i);
That is quite simpler than linked nodes etc.
When in doubt, bundle together what goes together well
Outside of special cases, like the C-string, a pointer without a size won't be of much use to represent a range. So keep the pointer and the size together, rather than having arrays of pointers and arrays of sizes side by side, as in your subsets
function:
typedef struct {
int* items;
size_t size;
} Set;
typedef struct {
Set* subsets;
size_t size;
} Powerset;
Whether you want to fill them, print them, free them, you'll need to have both informations, because you can't deduce one from the other.
A working example:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct {
int* items;
size_t size;
} Set;
typedef struct {
Set* subsets;
size_t size;
} Powerset;
int is_bit_on(int n, int pos) {
return (n >> pos) & 1;
}
int set_bits(int n) {
// any set bits counting function will do fine
// this one is gcc only
return __builtin_popcount(n);
// but you could have to write your own;
// see https://stackoverflow.com/questions/109023/how-to-count-the-number-of-set-bits-in-a-32-bit-integer
}
Set subset(Set set, int step) {
int* subset = malloc(sizeof(int) * set_bits(step));
if (subset == NULL) {
Set failure = { .items = NULL, .size = 0 };
return failure;
}
int elem_n = 0;
for (; set.size > 0; --set.size) {
if (is_bit_on(step, set.size - 1))
subset[elem_n++] = set.items[set.size-1];
}
Set ret = { .items = subset, .size = elem_n };
return ret;
}
Powerset powerset(Set set) {
size_t powerset_size = pow(2, set.size);
Powerset powerset = {
.subsets = malloc(sizeof(Set) * powerset_size),
.size = powerset_size
};
Powerset failure = { .subsets = NULL, .size = 0 };
if (powerset.subsets == NULL) return failure;
for (size_t i = 0; i < powerset_size; ++i) {
powerset.subsets[i] = subset(set, i);
if (powerset.subsets[i].items == NULL) {
for (size_t j = 0; j < i; ++j) {
free(powerset.subsets[j].items);
}
return failure;
}
}
return powerset;
}
void free_powerset(Powerset powerset) {
for (size_t i = 0; i < powerset.size; ++i) {
free(powerset.subsets[i].items);
}
free(powerset.subsets);
}
void print_array(Set array) {
for (size_t i = 0; i < array.size; ++i)
printf("%d, ", array.items[i]);
printf("\n");
}
int main(void) {
int items[] = {1, 2, 3};
Set set = { .items = items, .size = 3};
Powerset test = powerset(set);
if (test.subsets == NULL) {
printf("Bad allocation");
return 1;
}
for (size_t i = 0; i < test.size; ++i)
print_array(test.subsets[i]);
free_powerset(test);
return 0;
}
-
\$\begingroup\$ Good solution, but it’s worth noting that it’s limited to sets no larger than the size of an int. \$\endgroup\$Edward– Edward2018年09月07日 16:05:44 +00:00Commented Sep 7, 2018 at 16:05
-
\$\begingroup\$ @Edward Indeed, but this is an acceptable limitation since a set of 32 elements already has a power set of 2^32 elements, i.e more than 4 billions elements, which would take a good time to print... \$\endgroup\$papagaga– papagaga2018年09月07日 16:09:55 +00:00Commented Sep 7, 2018 at 16:09
-
\$\begingroup\$ Thanks papagaga. I have less experience using builtin functions. I think it is better to use those as they are fully blessed by relevant compilers ? I also like the fact that you are using inline struct initialization. That is really convenient and descriptive. Need to have an arsenal of "when-in-doubt" techniques ingrained in me. Yesterday, I was thinking of how to do inline initialization of function pointer or action based on a type field in type hierarchy. Just want to add some form of reflections with minimal effort. Thanks again for your answer. I will also subscribe to @Toby Speight. \$\endgroup\$wispymisty– wispymisty2018年09月07日 19:07:47 +00:00Commented Sep 7, 2018 at 19:07
-
\$\begingroup\$ If you wanted to deal with sets containing more than 15 elements (where did you get 32 from?), then you could use one of the fixed-size unsigned types from
<stdint.h>
, of course. \$\endgroup\$Toby Speight– Toby Speight2018年09月10日 07:46:28 +00:00Commented Sep 10, 2018 at 7:46
Explore related questions
See similar questions with these tags.