You are given a binary tree in which each node contains an integer value (which might be positive or negative). Design an algorithm to count the number of paths that sum to a given value. The path does not need to start or end at the root or a leaf, but it must go downwards (traveling only from parent nodes to child nodes).
The question asks for count only but I went one step ahead and found the paths too.
The following is my attempt at this code. Please let me know how I can improve it. Also I think the time complexity of the code is \$O(n^2)\$ because of the following reasoning:
findPathsWithSumHelper
is called n times for each node in the tree.- This function goes through all the nodes below the
root
to find paths. - The total work done is \$n + 2 * \frac{n}{2} + 4 * \frac{n}{4} + \dots n * \frac{n}{n}\$ which equals \$O(n^2)\$.
- The work done in copying the arrays using
emplace_back
is \$O(n)\$ at each node in the tree and adds a constant to \$O(n^2)\$.
#include <iostream>
#include <vector>
using namespace std;
struct Tree{
struct Tree * left, *right;
int data;
Tree(int val): data(val), left(NULL), right(NULL){};
};
//insert nodes randomly in a BT
void insertNodeBT(Tree * &root, int val){
Tree * newNode = new Tree(val);
if(root==NULL){
root = newNode;
return;
}
int r = rand()%2;
if(r)
insertNodeBT(root->left, val);
else
insertNodeBT(root->right, val);
}
vector<vector<int>> findPathsWithSumHelper(Tree * root, int target, int prev, vector<int> path){
vector<vector<int>> res;
if(root == NULL){
return res;
}
path.push_back(root->data);
if(prev + root->data == target){
res.push_back(path);
}
vector<vector<int>> temp;
temp = findPathsWithSumHelper(root->left, target, prev+root->data, path);
for(auto i : temp){
res.emplace_back(move(i));
}
temp = findPathsWithSumHelper(root->right, target, prev+root->data, path);
for(auto i : temp){
res.emplace_back(move(i));
}
return res;
}
vector<vector<int>> findPathsWithSum(Tree * root, int target){
vector<vector<int>> res;
if(root == NULL){
return res;
}
vector<int> path;
res = findPathsWithSumHelper(root, target, 0, path);
vector<vector<int>> temp;
temp = findPathsWithSum(root->left, target);
for(auto i : temp){
res.emplace_back(move(i));
}
temp = findPathsWithSum(root->right, target);
for(auto i : temp){
res.emplace_back(move(i));
}
return res;
}
int main() {
Tree *root = NULL;
insertNodeBT(root, 4);
insertNodeBT(root, 6);
insertNodeBT(root, 2);
insertNodeBT(root, 7);
insertNodeBT(root, 5);
insertNodeBT(root, 1);
insertNodeBT(root, 3);
insertNodeBT(root, -1);
vector<vector<int>> res = findPathsWithSum(root, 6);
for (auto i : res){
for (auto j : i) {
cout << j << " ";
}
cout<<endl;
}
return 0;
}
Following is a better solution but it only returns the count and not the actual paths. I feel it is \$O(nlogn)\$ but not sure.
int findPathsWithSum(Tree * root, int target, unordered_map<int, int> m){
if(root==NULL)
return 0;
int res=0;
for(auto &it : m){
if(it.first+root->data == target)
res++;
m[it.first+root->data]++;
m.erase(it.first);
}
if(root->data == target)
res++;
m[root->data]++;
res += findPathsWithSum(root->left, target, m);
res += findPathsWithSum(root->right, target, m);
return res;
}
1 Answer 1
Simplification: remove temp vectors
Currently, your code spends a lot of effort merging partial return vectors with the final return vector. You can eliminate the use of these temp vectors if you just pass in the final return vector as an argument to each recursive call. Here is what your functions would look like with this change, which not only looks simpler but also speeds things up quite a bit:
void findPathsWithSumHelper(Tree * root, int target, int prev,
vector<int> path, vector<vector<int>> &res)
{
if (root == NULL)
return;
path.push_back(root->data);
prev += root->data;
if (prev == target)
res.push_back(path);
findPathsWithSumHelper(root->left, target, prev, path, res);
findPathsWithSumHelper(root->right, target, prev, path, res);
}
void findPathsWithSum1(Tree * root, int target, vector<vector<int>> &res)
{
if(root == NULL)
return;
vector<int> path;
findPathsWithSumHelper(root, target, 0, path, res);
findPathsWithSum1(root->left, target, res);
findPathsWithSum1(root->right, target, res);
}
vector<vector<int>> findPathsWithSum(Tree * root, int target)
{
vector<vector<int>> res;
findPathsWithSum1(root, target, res);
return res;
}
I tested with a complete binary tree of depth 16 (64K nodes) of all value 0, searching for value 0. This returns 983041 paths (every path in the entire tree). With the original code, this took 4 seconds. With the simplified code, it took 0.78 seconds.
Simplification: Reduce recursions
Right now, your program recurses in two ways. The first way recurses to the child, adding the child to an existing path. The second way starts a new path at the current node and then recurses to the children.
You can reduce the amount of recursion by always adding to the existing path, but at each level, you must try each point in the path as a possible head of a path. I rewrote your functions to do that and found that this change speeds up the program even further:
static void findPathsHelper(const Tree *node, int target, int depth,
vector<int> &path, int pathSum, vector<vector<int>> &ret)
{
if (node == NULL)
return;
path.push_back(node->data);
pathSum += node->data;
depth++;
int sum = pathSum;
// Try each point in the path as a new head of a path.
for (int i=0; i<depth; i++) {
if (sum == target) {
vector<int> subPath(depth - i);
for (int j=i; j < depth; j++) {
subPath[j-i] = path[j];
}
ret.push_back(move(subPath));
}
sum -= path[i];
}
findPathsHelper(node->left, target, depth, path, pathSum, ret);
findPathsHelper(node->right, target, depth, path, pathSum, ret);
path.pop_back();
}
vector<vector<int>> findPathsWithSum(const Tree *root, int target)
{
vector<int> path;
vector<vector<int>> ret;
findPathsHelper(root, target, 0, path, 0, ret);
return ret;
}
Test program and timings
Here is the code I used to test the various implementations. I commented out the part that printed the results in order to reduce time spent on IO. I mostly used complete binary trees with all nodes having value 0 and searching for value 0 in order to generate the worst case scenario, although I did test other cases including using randomized values (code not shown).
Tree *createCompleteBT(int depth, int value)
{
if (depth == 0)
return NULL;
Tree *newNode = new Tree(value);
newNode->left = createCompleteBT(depth - 1, value);
newNode->right = createCompleteBT(depth - 1, value);
return newNode;
}
// Run with: depth value find
int main(int argc, char *argv[])
{
int depth = 16;
int value = 0;
int find = 0;
if (argc > 1)
depth = atoi(argv[1]);
if (argc > 2)
value = atoi(argv[2]);
if (argc > 3)
find = atoi(argv[3]);
Tree *root = createCompleteBT(depth, value);
vector<vector<int>> res = findPathsWithSum(root, find);
cout << res.size() << endl;
#if 0
for (auto i : res){
for (auto j : i) {
cout << j << " ";
}
cout<<endl;
}
#endif
return 0;
}
Here are the timing results for depth 16 trees:
Original program : 4.06 seconds
Removed temp vectors: 0.78 seconds
Reduced recursion : 0.22 seconds
By the way, for a balanced tree, I believe the complexity is \$O(n \log^2 n)\$. You need to visit each node once (\$n\$), and then for each node search a path from the root (\$\log n\$), and if the path matches the target, you need to add that path (\$\log n\$ again). For a tree that is completely linear, the time would be \$O(n^3)\$. For an arbitrary tree, the complexity should be \$O(n*d^2)\$ where \$n\$ is the number of nodes and \$d\$ is the depth of the tree.
insertNodeBST()
function works, it always sorts the values. But for the question, will that always be the case? Is it possible to end up with a binary tree that has, say 5 with 278 to the left or -43 to right? \$\endgroup\$insertNodeBST()
twice. Is that intentional? I don't think that will compile. \$\endgroup\$