Let's suppose you have a complete binary tree (i.e. each internal node has exactly two non empty descendants). Each node contains a nonzero integer. You are given the task of encoding and decoding the tree into/from a list of integers.
The tree is stored internally something like:
struct node {
int data;
struct node *left, *right;
};
And you have to implement two functions:
int *encode(struct node *root);
struct node *decode(int *array);
It is up to you how you encode and decode.
Points for:
- minimum encoding length
- complexity (ideally linear in number of nodes)
- originality
No points for source code length and you are not restricted to C.
Tree example:
5
/ \
3 2
/ \
2 1
/ \
9 9
8 Answers 8
This Haskell program encodes a tree of n nodes in n Integers. The trick is that it encodes the node's data doubled, and then uses the lower-order bit to indicate if this is a leaf node, or an interior node.
Technically, the Parser
monad here is over-kill, since there is only one parser created, decoder
and I could have put the parser chaining logic directly there. But this way the decoder is very clear, and the Parser
despite it's small size, is a reasonable simple parsing framework.
import Control.Monad (ap)
data Tree = Leaf Integer | Node Integer Tree Tree
deriving (Eq, Show)
encode :: Tree -> [Integer]
encode (Leaf n) = [n*2]
encode (Node n t u) = (n*2+1) : encode t ++ encode u
decode :: [Integer] -> Maybe Tree
decode = fullyParse decoder
where
decoder :: Parser Integer Tree
decoder = do
i <- next
let n = i `div` 2
if even i
then return (Leaf n)
else return (Node n) `ap` decoder `ap` decoder
-- A simple Parsing Monad
data Parser a b = P { runParser :: [a] -> Maybe (b, [a]) }
instance Monad (Parser a) where
return a = P ( \ts -> Just (a, ts) )
p >>= q = P ( \ts -> runParser p ts >>= (\(v,ts') -> runParser (q v) ts') )
fail _ = P ( const Nothing )
next :: Parser a a
next = P n
where n (t:ts) = Just (t,ts)
n _ = Nothing
fullyParse :: Parser a b -> [a] -> Maybe b
fullyParse p ts = runParser p ts >>= consumedResult
where
consumedResult (v,[]) = Just v
consumedResult _ = Nothing
-- Example
main :: IO ()
main = do
putStrLn $ "example: " ++ show ex
putStrLn $ "encoding: " ++ show encEx
putStrLn $ "decoding: " ++ show decEx
putStrLn $ "worked? " ++ show worked
where
ex = Node 5
(Leaf 3)
(Node 2
(Node 2
(Leaf 9)
(Leaf 9)
)
(Leaf 1)
)
encEx = encode ex
decEx = decode encEx
worked = maybe False (ex ==) decEx
Running this gets you:
> runhaskell TreeEncoding.hs
example: Node 5 (Leaf 3) (Node 2 (Node 2 (Leaf 9) (Leaf 9)) (Leaf 1))
encoding: [11,6,5,5,18,18,2]
decoding: Just (Node 5 (Leaf 3) (Node 2 (Node 2 (Leaf 9) (Leaf 9)) (Leaf 1)))
worked? True
In C
#include <stdlib.h>
#include <stdio.h>
struct Node;
typedef struct Node Node;
struct Node
{
int data;
Node* left;
Node* right;
};
/* Private Functions */
static int* encodeNode(Node* tree, int* store);
static Node* decodeNode(int** store);
/* Public Functions */
Node* newNode(int data,Node* left,Node* right);
void deleteTree(Node* tree);
int countNodesTree(Node* tree);
int* encode(Node *tree);
Node* decode(int* store);
void printTree(Node* tree);
Node* newNode(int data,Node* left,Node* right)
{
Node* result = (Node*)malloc(sizeof(Node));
result->data = data;
result->left = left;
result->right = right;
return result;
}
void deleteTree(Node* tree)
{
if (tree == NULL)
{ return;
}
deleteTree(tree->left);
deleteTree(tree->right);
free(tree);
}
int countNodesTree(Node* tree)
{
if (tree == NULL)
{ return 0;
}
return countNodesTree(tree->left)
+ countNodesTree(tree->right)
+ 1;
}
void printTree(Node* tree)
{
if (tree == NULL)
{
fprintf(stdout, "- ");
}
else
{
fprintf(stdout, "%d ", tree->data);
printTree(tree->left);
printTree(tree->right);
}
};
The encode:
int* encode(Node *tree)
{
int nodeCount = countNodesTree(tree);
int* result = (int*)malloc(sizeof(int) * (nodeCount * 2 + 1));
// Put the node count in the first element.
// This makes it easy to also serialize this object for transport
// i.e. you can put it in a file or a stream (socket) and easily recover it.
result[0] = nodeCount;
encodeNode(tree, result + 1);
return result;
}
int* encodeNode(Node* tree, int* store)
{
if (tree != NULL)
{
store[0] = tree->data;
/*
* Slight overkill. for this question.
* But works and makes future enhancement easy
*/
store[1] = (tree->left == NULL ? 0 : 1)
+ (tree->right == NULL ? 0 : 2);
store += 2;
store = encodeNode(tree->left, store);
store = encodeNode(tree->right, store);
}
return store;
}
The decode:
Node* decode(int* store)
{
if (store == NULL)
{ fprintf(stderr, "Bad Input terminating: encode() always return non NULL\n");
exit(1);
}
if (store[0] == 0)
{
return NULL;
}
store++;
return decodeNode(&store);
}
Node* decodeNode(int** store)
{
int value = (*store)[0];
int flag = (*store)[1];
(*store) += 2;
Node* left = flag & 1 ? decodeNode(store) : NULL;
Node* right = flag & 2 ? decodeNode(store) : NULL;
return newNode(value, left, right);
}
Main:
int main()
{
Node* t = newNode(5,
newNode(3, NULL, NULL),
newNode(2,
newNode(2,
newNode(9, NULL, NULL),
newNode(9, NULL, NULL)
),
newNode(1, NULL, NULL)
)
);
printTree(t);
fprintf(stdout,"\n");
int* e = encode(t);
Node* d = decode(e);
printTree(d);
fprintf(stdout,"\n");
free(e);
deleteTree(d);
deleteTree(t);
}
Note. Each node is encoded as two integers (plus one int for the count of nodes).
So the supplied tree encodes like this:
7, 5, 3, 3, 0, 2, 3, 2, 3, 9, 0, 9, 0 1, 0
^ ^
^ ^ Node 1
^
Count
In Ruby, with the same encoding than @MtnViewMark :
class Node
def initialize(data, left = nil, right = nil)
@data, @left, @right = data, left, right
end
def encode
"%d %s %s" % [@data<<1|1, @left.encode, @right.encode]
end
class << self
def decode(str)
_decode(str.split.map &:to_i)
end
private
def _decode(a)
n = a.shift
if n & 1 == 1
Node.new(n>>1, _decode(a), _decode(a))
else
Leaf.new(n>>1)
end
end
end
end
class Leaf < Node
def encode
(@data<<1).to_s
end
end
tree=Node.decode("11 6 5 5 18 18 2")
print tree.encode
The cost is one integer per node (data << 1 | has_childs
) :
11 6 5 5 18 18 2
-
\$\begingroup\$ Wow - that looks lean and elegant. However, it doesn't take an Array of int, does it? \$\endgroup\$user unknown– user unknown2012年03月17日 08:11:07 +00:00Commented Mar 17, 2012 at 8:11
~ 1.03 N
It seems all the answers so far take least 2*N * 32-bits to store. (Except the solutions in languages which allows integer values longer than 32 bits, like the Haskell and Ruby solutions - but those are still going to take extra bytes to encode whenever the data is greater than 16K.)
Here is a solution which only takes N+ceiling(N/32)+1 ints of storage. This approaches 1.03125 N for large N, and is under 1.1 N for all N greater than 20.
The idea is to store an extra bit for each node where 1 is "hasChildren". These bits are packed into N/32 words up front.
int* encodeHelper(Node* n, int* code, int* pos, int* flag)
{
int hasKids = (n->left!=0);
code[*flag/32]|=hasKids<<(*flag&31);
*flag+=1;
if (hasKids) bencodeHelper(n->left, code, pos, flag);
code[*pos]=n->data;
*pos+=1;
if (hasKids) bencodeHelper(n->right, code, pos, flag);
return code;
}
int* encode(Node* h, int* sizeOut)
{
int nnodes=countNodes(h);
int nflags = (int)ceil(nnodes/32.0);
int pos=nflags+1;
int flag=32;
int* out;
*sizeOut = 1+nnodes+nflags;
out = calloc(*sizeOut, sizeof(int));
if (!h) return out;
out[0]=nflags+1; //store start of data
return encodeHelper(h,out,&pos,&flag);
}
Node* decodeHelper(int* code, int* pos, int* flag)
{
Node*n = calloc(1, sizeof(Node));
int hasKids = code[*flag/32]>>(*flag&31)&1;
*flag+=1;
if (hasKids) n->left = bdecodeHelper(code, pos, flag);
n->data = code[*pos];
*pos+=1;
if (hasKids) n->right = bdecodeHelper(code, pos, flag);
return n;
}
Node* decode(int* code)
{
int flag=32;
int pos=code[0];
if (!pos) return NULL;
return decodeHelper(code, &pos, &flag);
}
Given a binary tree with n
nodes, this encodes it in a list of 2n + 1
integers. Both the encoding and decoding algorithms have O(n)
complexity.
I use the 0 integer as a sentinel marker when encoding, indicating when I unfold the recursion. Then when I'm decoding, I put the tree nodes I'm creating on a stack (of sorts) and use the 0s in the list to keep track of where to add the next node. I haven't tried, but I'm pretty sure the decoding would break if the tree was not complete.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
// Prototypes
struct BTnode;
struct BTnode * bt_add_left(struct BTnode * node, int data);
struct BTnode * bt_add_right(struct BTnode * node, int data);
int bt_depth(struct BTnode * tree);
int bt_encode_preorder(int * list, struct BTnode * tree, int index);
struct BTnode * bt_node_create(int data);
int bt_node_delete(struct BTnode * node);
void bt_print_preorder(struct BTnode * tree);
int * encode(struct BTnode * tree);
struct BTnode * decode(int * list);
// Binary tree node
struct BTnode
{
int data;
struct BTnode *left, *right;
};
// Add node to this node's left
struct BTnode * bt_add_left(struct BTnode * node, int data)
{
struct BTnode * newnode = bt_node_create(data);
node->left = newnode;
return newnode;
}
// Add node to this node's right
struct BTnode * bt_add_right(struct BTnode * node, int data)
{
struct BTnode * newnode = bt_node_create(data);
node->right = newnode;
return newnode;
}
// Determine depth of the tree
int bt_depth(struct BTnode * tree)
{
int depth;
int leftdepth = 0;
int rightdepth = 0;
if( tree == NULL ) return 0;
if( tree->left != NULL )
leftdepth = bt_depth(tree->left);
if( tree->right != NULL )
rightdepth = bt_depth(tree->right);
depth = leftdepth;
if(rightdepth > leftdepth)
depth = rightdepth;
return depth + 1;
}
// Recursively add node values to integer list, using 0 as an unfolding sentinel
int bt_encode_preorder(int * list, struct BTnode * tree, int index)
{
list[ index++ ] = tree->data;
// This assumes the tree is complete (i.e., if the current node does not have
// a left child, then it does not have a right child either)
if( tree->left != NULL )
{
index = bt_encode_preorder(list, tree->left, index);
index = bt_encode_preorder(list, tree->right, index);
}
// Add sentinel
list[ index++ ] = 0;
return index;
}
// Allocate memory for a node
struct BTnode * bt_node_create(int data)
{
struct BTnode * newnode = (struct BTnode *) malloc(sizeof(struct BTnode));
newnode->left = NULL;
newnode->right = NULL;
newnode->data = data;
return newnode;
}
// Free node memory
int bt_node_delete(struct BTnode * node)
{
int data;
if(node == NULL)
return 0;
data = node->data;
if(node->left != NULL)
bt_node_delete(node->left);
if(node->right != NULL)
bt_node_delete(node->right);
free(node);
return data;
}
// Print all values from the tree in pre-order
void bt_print_preorder(struct BTnode * tree)
{
printf("%d ", tree->data);
if(tree->left != NULL)
bt_print_preorder(tree->left);
if(tree->right != NULL)
bt_print_preorder(tree->right);
}
// Decode binary tree structure from a list of integers
struct BTnode * decode(int * list)
{
struct BTnode * tree;
struct BTnode * nodestack[ list[0] ];
int i,j;
// Handle trivial case
if( list == NULL ) return NULL;
tree = bt_node_create( list[1] );
nodestack[ 1 ] = tree;
j = 1;
for(i = 2; i < list[0]; i++)
{
if( list[i] == 0 )
{
//printf("popping\n");
j--;
}
else
{
if( nodestack[j]->left == NULL )
{
//printf("Adding %d to left of %d\n", list[i], nodestack[j]->data);
nodestack[ j+1 ] = bt_add_left(nodestack[j], list[i]);
j++;
}
else
{
//printf("Adding %d to right of %d\n", list[i], nodestack[j]->data);
nodestack[ j+1 ] = bt_add_right(nodestack[j], list[i]);
j++;
}
}
}
return tree;
}
// Encode binary tree structure as a list of integers
int * encode(struct BTnode * tree)
{
int maxnodes, depth, length;
int * list;
int j;
// Handle trivial case
if(tree == NULL) return NULL;
// Calculate maximum number of nodes in the tree from the tree depth
maxnodes = 1;
depth = bt_depth(tree);
for(j = 0; j < depth; j++)
{
maxnodes += pow(2, j);
}
// Allocate memory for the list; we need two ints for each value plus the
// first value in the list to indicate length
list = (int *) malloc( ((maxnodes * 2)+1) * sizeof(int));
length = bt_encode_preorder(list, tree, 1);
list[ 0 ] = length;
return list;
}
int main()
{
struct BTnode * tree;
struct BTnode * newtree;
int * list;
int i;
/* Provided example
5
/ \
3 2
/ \
2 1
/ \
9 9
*/
tree = bt_node_create(5);
bt_add_left(tree, 3);
struct BTnode * temp = bt_add_right(tree, 2);
bt_add_right(temp, 1);
temp = bt_add_left(temp, 2);
bt_add_left(temp, 9);
bt_add_right(temp, 9);
printf("T (traversed in pre-order): ");
bt_print_preorder(tree);
printf("\n");
list = encode(tree);
printf("T (encoded as integer list): ");
for(i = 1; i < list[0]; i++)
printf("%d ", list[i]);
printf("\n");
newtree = decode(list);
printf("T' (decoded from int list): ");
bt_print_preorder(newtree);
printf("\n\n");
// Free memory
bt_node_delete(tree);
bt_node_delete(newtree);
free(list);
return 0;
}
Saved this as encode.c
then compiled and executed. This program uses the example tree you provided, and I've tested it on a few others successfully.
$ gcc -Wall -lm -o encode encode.c
$ ./encode
T (traversed in pre-order): 5 3 2 2 9 9 1
T (encoded as integer list): 5 3 0 2 2 9 0 9 0 0 1 0 0 0
T' (decoded from int list): 5 3 2 2 9 9 1
-
\$\begingroup\$ It is pretty much what I had in mind :). \$\endgroup\$Alexandru– Alexandru2011年02月02日 21:10:12 +00:00Commented Feb 2, 2011 at 21:10
-
\$\begingroup\$ won't this fail decoding if the data contains a 0? \$\endgroup\$AShelly– AShelly2011年02月04日 03:03:17 +00:00Commented Feb 4, 2011 at 3:03
-
\$\begingroup\$ @AShelly He explicitly said that 0 would not be included in the tree. If it were, then yes this would fail. \$\endgroup\$Daniel Standage– Daniel Standage2011年02月04日 03:26:41 +00:00Commented Feb 4, 2011 at 3:26
My code encodes the tree in a preorder traversal, each leaf in two ints (its data followed by 0) and each internal node in one int (its data followed by its left child, then its right). For a complete binary tree (as you define it) with n nodes, n must be odd, and there are (n+1)/2 leaves and (n-1)/2 internal nodes, so that's 3n/2+1/2 integers for the encoding.
warning: untested, just typed it in.
struct node {
int data;
struct node *left, *right;
};
void encodeInternal(struct node *root, vector<int> *buf) {
buf->push_back(root->data);
if (root->left) {
encodeInternal(root->left, buf);
encodeInternal(root->right, buf);
} else {
buf->push_back(0);
}
}
int *encode(struct node *root) {
vector<int> buf;
encodeInternal(root, &buf);
return &buf[0];
}
struct decodeResult {
int encoded_size;
struct node *n;
}
struct decodeResult decodeInternal(int *array) {
struct node *n = (struct node*)malloc(sizeof(struct node));
n->data = array[0];
if (array[1] == 0) {
n->left = n->right = NULL;
return (decodeResult){2, n};
} else {
decodeResult L = decodeInternal(array + 1);
decodeResult R = decodeInternal(array + 1 + L.encoded_size);
n->left = L.n;
n->right = R.n;
return (decodeResult){1 + L.encoded_size + R.encoded_size, n};
}
}
struct node *decode(int *array) {
return decodeInternal(array).n;
}
Scala:
trait Node {
def encode (): Array[Int]
}
case object Node {
def decode (a: Array[Int]): InnerNode = {
if (a.length == 1) InnerNode (a(0)) else {
val r = InnerNode (a(1))
val l = decode (a.tail.tail)
InnerNode (a(0), l, r)
}
}
}
case object Leaf extends Node {
def encode (): Array[Int] = Array.empty
}
case class InnerNode (val data: Int, var l: Node=Leaf, var r: Node=Leaf) extends Node {
def encode (): Array[Int] = Array (data) ++ r.encode () ++ l.encode ()
}
object BinTreeTest extends App {
println (Node.decode (Array (1, 2, 3, 4, 5)).encode.mkString (", "))
}
(削除)
This is an approach which uses deprecated syntax, but compiles without error in Scala 2.9.1 . It generates a Tree and decodes every encoded Tree to the same Array as used for encoding. Maybe I get somehow rid of the deprecated warnings today.
(削除ここまで)
Wow - that was a simple one. First idea worked immediately.
Here's my try. It stores the tree in an array of size 2**depth+1. It
uses a[0]
to hold the size, and a[size]
to hold the index of the
first "empty node" it encounters in a depth-first traversal. (An
empty node is the place where a child would be stored if the parent
had one). Each empty node holds the index of the next empty node that
will be encountered.
This scheme avoids reserving bits to mark the presense children, so each node can use the full integer range. It also allows unbalanced trees - a parent can have only one child.
output:
empty tree: [0]
head node only: [2,5,0]
example tree: [16,5,3,2,5,14,2,1,0,0, 0,0,9,9,15,0,4];
The Encoder:
//utility
int findDepth(Node* n) {
int l = 0 ,r = 0;
if (n) {
l = 1 + findDepth(n->left);
r = 1 + findDepth(n->right);
}
return ( l > r ) ? l : r;
}
//Encode Function
int* encodeTree(Node* head) {
int* out;
int depth = findDepth(head);
int size = depth>0;
while (depth--) size*=2;
out = calloc(size+1,sizeof(int));
out[0]=size;
encodeNode(head, out,1, out+size);
return out;
}
void encodeNode(Node* n, int* a, int idx, int* pEmpty) {
if (n) {
a[idx]=n->data;
encodeNode(n->left,a,idx*2,pEmpty);
encodeNode(n->right,a,idx*2+1,pEmpty);
}
else if (idx<a[0]) {
*pEmpty = idx;
pEmpty = a+idx;
}
}
The Decoder:
//Decode Function
Node* decodeArray(int* a) {
return (a[0]) ? decodeNode(a,1,a+a[0]) : NULL;
}
Node* decodeNode(int* a, int idx, int* pEmpty) {
Node* n = NULL;
if (idx== *pEmpty)
*pEmpty=a[idx];
else {
n = calloc(1,sizeof(Node));
n->data = a[idx];
if (idx*2<a[0]) {
n->left = decodeNode(a, idx*2, pEmpty);
n->right = decodeNode(a, idx*2+1, pEmpty);
}
}
return n;
}
(thanks @daniel sobral for fixing the formatting)
int *
is a black box for user. \$\endgroup\$