7
\$\begingroup\$

For class I have to write a program that reads (plaintext) numbers from a file, and inserts them into an AVL tree stored in another (binary) file. I cannot have more than about 10 nodes in memory at one time. I've written the code and it works fine, but I feel it's running slower than it could. I can currently process a file with 100,000 numbers in 34 seconds. How can I make my code faster?

Here's a link to the full assignment details.

#include <fstream>
#include <iostream>
#include <ctime>
const int C_NULL = -1;
struct node
{
 node ( ) : value ( C_NULL ), left ( C_NULL ), right ( C_NULL ),
 height ( C_NULL ), parent ( C_NULL ), location ( C_NULL ) { };
 node ( int v, int l, int r, int h, int p, int loc )
 : value ( v ), left ( l ), right ( r ),
 height ( h ), parent ( p ), location ( loc ) { };
 node ( std::fstream & file, const int loc ) { read ( file, loc ); };
 int value, left, right, height, parent, location;
 void read ( std::fstream & file, const int loc );
 void write ( std::fstream & file );
 void print ( )
 {
 std::cout.width ( 10 );
 std::cout << value;
 std::cout.width ( 10 );
 std::cout << left;
 std::cout.width ( 10 );
 std::cout << right;
 std::cout.width ( 10 );
 std::cout << parent;
 std::cout.width ( 10 );
 std::cout << height;
 std::cout.width ( 10 );
 std::cout << location << '\n';
 }
};
const int C_NODE_SIZE = sizeof ( node );
void node::read ( std::fstream & file, const int loc )
{
 if ( loc != C_NULL )
 {
 file.seekg ( loc * C_NODE_SIZE );
 file.read ( (char*) this, C_NODE_SIZE );
 }
 else * this = node ( );
}
void node::write ( std::fstream & file )
{
 if ( location != C_NULL )
 {
 file.seekp ( location * C_NODE_SIZE );
 file.write ( (char*) this, C_NODE_SIZE );
 }
}
node seedTree ( std::ifstream & input, std::fstream & output );
bool insertValue ( std::fstream & file, const int value, const int numNodes, node & root, node & newNode );
void UpdateHeightAndCheckBalance ( std::fstream & file, node & parent, node & child, const int height );
void rebalanceLeft ( std::fstream & file, node & left, node & parent, const node & right );
void rebalanceRight ( std::fstream & file, const node & left, node & parent, node & right );
void fixRootLocation ( std::fstream & file, node & root );
void getSelectValues ( std::fstream & file, int & smallest, int & biggest );
void printNodes ( std::fstream & file );
int main ( int argc, char * argv[] )
{
 if ( argc < 3 ) return 0;
 std::ifstream input ( argv[1] );
 std::fstream output ( argv[2], std::fstream::in | std::fstream::out |
 std::fstream::trunc | std::fstream::binary );
 if ( !input || !output ) return 0; // file open fail
 // insert first two numbers to get things started
 node root = seedTree ( input, output ), child;
 if ( root.height > 0 )
 {
 int value, nodes = root.height + 1;
 while ( input >> value )
 {
 if ( insertValue ( output, value, nodes++, root, child ) )
 UpdateHeightAndCheckBalance ( output, root, child, 2 );
 fixRootLocation ( output, root );
 }
 }
 int smallest, biggest;
 getSelectValues ( output, smallest, biggest );
 std::cout << "Height: " << root.height
 << "\nRoot Value: " << root.value
 << "\nSmallest Leaf Value: " << smallest
 << "\nBiggest Leaf Value: " << biggest
 << "\nTime: " << clock ( ) / CLOCKS_PER_SEC;
 printNodes ( output );
 std::cin.ignore ( );
}
node seedTree ( std::ifstream & input, std::fstream & output )
{
 node root ( C_NULL, C_NULL, C_NULL, 0, C_NULL, 0 );
 input >> root.value;
 if ( input ) // if there is at least one number in the file
 {
 node child ( C_NULL, C_NULL, C_NULL, 0, 0, 1 );
 input >> child.value;
 if ( input ) // if there is a second number
 {
 root.height = 1;
 if ( root.value < child.value ) // second = right child of first
 root.right = 1;
 else // second = left child of first
 root.left = 1;
 child.write ( output );
 }
 root.write ( output );
 }
 return root;
}
bool insertValue ( std::fstream & file, const int value, const int numNodes, node & root, node & newNode )
{
 newNode = node ( value, C_NULL, C_NULL, 0, C_NULL, numNodes );
 bool balanceIssue = true;
 while ( true )
 {
 if ( root.value < newNode.value ) // move right
 {
 if ( root.right == C_NULL ) // write new node
 {
 if ( root.height == 0 ) root.height = 1;
 else balanceIssue = false; // if the parent height isn't updated, the tree will not become unbalanced
 root.right = numNodes;
 root.write ( file );
 newNode.parent = root.location;
 newNode.write ( file );
 break;
 }
 else root.read ( file, root.right ); // move down tree
 }
 else // move left
 {
 if ( root.left == C_NULL ) // write new node
 {
 if ( root.height == 0 ) root.height = 1;
 else balanceIssue = false; // if the parent height isn't updated, the tree will not become unbalanced
 root.left = numNodes;
 root.write ( file );
 newNode.parent = root.location;
 newNode.write ( file );
 break;
 }
 else root.read ( file, root.left ); // move down tree
 }
 }
 if ( balanceIssue )
 {
 newNode = root;
 root.read ( file, root.parent );
 }
 return balanceIssue;
}
void UpdateHeightAndCheckBalance ( std::fstream & file, node & parent, node & child, const int height )
{
 if ( parent.height < height )
 {
 parent.height = height; // update parent height
 parent.write ( file ); // write parent to file
 }
 else return;
 { // check balance
 node left, right;
 if ( parent.left == child.location )
 {
 left = child;
 right.read ( file, parent.right );
 }
 else
 {
 left.read ( file, parent.left );
 right = child;
 }
 const int balance = left.height - right.height;
 if ( balance > 1 ) // left heavy
 return rebalanceLeft ( file, left, parent, right );
 else if ( balance < -1 ) // right heavy
 return rebalanceRight ( file, left, parent, right );
 }
 if ( parent.parent == C_NULL ) return;
 child = parent;
 parent.read ( file, parent.parent );
 UpdateHeightAndCheckBalance ( file, parent, child, height + 1 );
}
void rebalanceLeft ( std::fstream & file, node & left, node & parent, const node & right )
{
 node grandparent ( file, parent.parent );
 const node leftleft ( file, left.left );
 node leftright ( file, left.right );
 if ( node ( file, left.left ).height < leftright.height ) // left right case
 {
 if ( grandparent.left == parent.location )
 grandparent.left = leftright.location;
 else grandparent.right = leftright.location;
 parent.parent = leftright.location;
 parent.left = leftright.right;
 left.parent = leftright.location;
 left.right = leftright.left;
 leftright.parent = grandparent.location;
 leftright.left = left.location;
 leftright.right = parent.location;
 { // set 'left' height and left.right.parent
 node newleftright ( file, left.right );
 newleftright.parent = left.location;
 newleftright.write ( file );
 if ( leftleft.height > newleftright.height ) left.height = leftleft.height + 1;
 else left.height = newleftright.height + 1;
 }
 { // set 'parent' height and parent.left.parent
 node newrightleft ( file, parent.left );
 newrightleft.parent = parent.location;
 newrightleft.write ( file );
 if ( newrightleft.height > right.height ) parent.height = newrightleft.height + 1;
 else parent.height = right.height + 1;
 }
 if ( left.height < parent.height ) leftright.height = parent.height + 1;
 else leftright.height = left.height + 1;
 }
 else // left left case
 {
 if ( grandparent.left == parent.location )
 grandparent.left = left.location;
 else grandparent.right = left.location;
 parent.parent = left.location;
 parent.left = left.right;
 left.parent = grandparent.location;
 left.right = parent.left;
 leftright.parent = parent.location;
 // set 'parent' height
 if ( leftright.height > right.height ) parent.height = leftright.height + 1;
 else parent.height = right.height + 1;
 // set 'left' height
 if ( leftleft.height > parent.height ) left.height = leftleft.height + 1;
 else left.height = parent.height + 1;
 }
 grandparent.write ( file );
 parent.write ( file );
 left.write ( file );
 leftright.write ( file );
}
void rebalanceRight ( std::fstream & file, const node & left, node & parent, node & right )
{
 node grandparent ( file, parent.parent );
 node rightleft ( file, right.left );
 const node rightright ( file, right.right );
 if ( rightleft.height > node ( file, right.right ).height ) // right left case
 {
 if ( grandparent.left == parent.location )
 grandparent.left = rightleft.location;
 else grandparent.right = rightleft.location;
 parent.parent = rightleft.location;
 parent.right = rightleft.left;
 right.parent = rightleft.location;
 right.left = rightleft.right;
 rightleft.parent = grandparent.location;
 rightleft.left = parent.location;
 rightleft.right = right.location;
 { // set 'parent' height and parent.right.parent
 node newleftright ( file, parent.right );
 newleftright.parent = parent.location;
 newleftright.write ( file );
 if ( left.height > newleftright.height ) parent.height = left.height + 1;
 else parent.height = newleftright.height + 1;
 }
 { // set 'right' height and right.left.parent
 node newrightleft ( file, right.left );
 newrightleft.parent = right.location;
 newrightleft.write ( file );
 if ( newrightleft.height > rightright.height ) right.height = newrightleft.height + 1;
 else right.height = rightright.height + 1;
 }
 if ( parent.height < right.height ) rightleft.height = right.height + 1;
 else rightleft.height = parent.height + 1;
 }
 else // right right case
 {
 if ( grandparent.left == parent.location )
 grandparent.left = right.location;
 else grandparent.right = right.location;
 parent.parent = right.location;
 parent.right = right.left;
 right.parent = grandparent.location;
 right.left = parent.location;
 rightleft.parent = parent.location;
 // set 'parent' height
 if ( left.height > rightleft.height ) parent.height = left.height + 1;
 else parent.height = rightleft.height + 1;
 // set 'right' height
 if ( parent.height > rightright.height ) right.height = parent.height + 1;
 else right.height = rightright.height + 1;
 }
 grandparent.write ( file );
 parent.write ( file );
 right.write ( file );
 rightleft.write ( file );
}
void fixRootLocation ( std::fstream & file, node & root )
{
 root.read ( file, 0 );
 if ( root.parent != C_NULL )
 {
 node newRoot ( file, root.parent );
 root.location = newRoot.location;
 newRoot.location = 0;
 root.parent = 0;
 if ( newRoot.left == 0 )
 {
 newRoot.left = root.location;
 node temp ( file, newRoot.right );
 temp.parent = 0;
 temp.write ( file );
 }
 else
 {
 newRoot.right = root.location;
 node temp ( file, newRoot.left );
 temp.parent = 0;
 temp.write ( file );
 }
 root.write ( file );
 newRoot.write ( file );
 if ( root.left != C_NULL )
 {
 node temp ( file, root.left );
 temp.parent = root.location;
 temp.write ( file );
 }
 if ( root.right != C_NULL )
 {
 node temp ( file, root.right );
 temp.parent = root.location;
 temp.write ( file );
 }
 root = newRoot;
 }
}
void getSelectValues ( std::fstream & file, int & smallest, int & biggest )
{
 // set starting values
 smallest = INT_MAX;
 biggest = INT_MIN;
 file.seekg ( 0, std::fstream::end ); // seek to end of file
 int fSize = int ( file.tellg ( ) ); // get file size
 if ( fSize > C_NODE_SIZE ) // check that file has more than one node
 {
 node temp ( file, 1 ); // start at first node after root
 while ( file.tellg ( ) < fSize )
 {
 temp.read ( file, int ( file.tellg ( ) ) / C_NODE_SIZE );
 if ( temp.height == 0 )
 {
 if ( temp.value < smallest ) smallest = temp.value;
 if ( temp.value > biggest ) biggest = temp.value;
 }
 }
 }
 else
 {
 node root ( file, 0 );
 smallest = root.value;
 biggest = root.value;
 }
}
void printNodes ( std::fstream & file )
{
 // print header
 std::cout << '\n';
 std::cout.width ( 10 );
 std::cout << "Value";
 std::cout.width ( 10 );
 std::cout << "Left";
 std::cout.width ( 10 );
 std::cout << "Right";
 std::cout.width ( 10 );
 std::cout << "Parent";
 std::cout.width ( 10 );
 std::cout << "Height";
 std::cout.width ( 10 );
 std::cout << "Location" << '\n';
 file.seekg ( 0, std::ios_base::end ); // seek to end of file
 int fSize = int ( file.tellg ( ) ); // get file size
 file.seekg ( 0 ); // seek to start of file
 // print nodes
 while ( file.tellg ( ) < fSize ) node ( file, int ( file.tellg ( ) ) / C_NODE_SIZE ).print ( );
}
greybeard
7,4013 gold badges21 silver badges55 bronze badges
asked Mar 1, 2015 at 6:48
\$\endgroup\$

2 Answers 2

8
\$\begingroup\$

This code appears to have a lot of excessive whitespace. I do understand that putting a space between parentheses is a type of style, but you also add blank lines in many unnecessary places.

The biggest example are your function prototypes:

node seedTree ( std::ifstream & input, std::fstream & output );
bool insertValue ( std::fstream & file, const int value, const int numNodes, node & root, node & newNode );
void UpdateHeightAndCheckBalance ( std::fstream & file, node & parent, node & child, const int height );
void rebalanceLeft ( std::fstream & file, node & left, node & parent, const node & right );
void rebalanceRight ( std::fstream & file, const node & left, node & parent, node & right );
void fixRootLocation ( std::fstream & file, node & root );
void getSelectValues ( std::fstream & file, int & smallest, int & biggest );
void printNodes ( std::fstream & file );

You really don't need a blank line after each one. It just adds to the length of the code, but not in a good way. If some pieces of code are similar in functionality, leave them together (such as here). Otherwise, add a blank line between them. Adding whitespace to "isolate" code doesn't always improve readability and can even make the code more frustrating to read.

I'm also not sure why you're doing this in a single line:

while ( file.tellg ( ) < fSize ) node ( file, int ( file.tellg ( ) ) / C_NODE_SIZE ).print ( );

You've added many blank lines everywhere else, but here, you've done just the opposite. It would take more unnecessary time to understand this line.

Here's how it should look:

while (file.tellg() < fSize)
{
 node(file, int(file.tellg()) / C_NODE_SIZE).print();
}

Note that I've added curly braces here, which is good practice.

Speaking of curly braces, there are areas where you don't need them, such as here:

{ // set 'left' height and left.right.parent
 node newleftright ( file, left.right );
 newleftright.parent = left.location;
 newleftright.write ( file );
 if ( leftleft.height > newleftright.height ) left.height = leftleft.height + 1;
 else left.height = newleftright.height + 1;
}

I assume you're using this as a way of grouping or isolating a block of code. This may not even help you at all and could be an indication that some kind of refactoring is needed. Otherwise, it would be okay to just remove these curly braces altogether. Try not to clutter you code with such things that may only hurt readability and won't make it any functionally better.

answered May 27, 2015 at 1:37
\$\endgroup\$
0
3
\$\begingroup\$

main:

  • If the program fails to run correctly, we should exit with an error code, instead of returning 0. It might be nice to print a message saying what went wrong (to std::cerr). We can also avoid magic numbers and use EXIT_FAILURE and EXIT_SUCCESS from the <cstdlib> header.

  • node root = seedTree(input, output), child; Please don't declare variables like this. It's much cleaner to put one variable declaration per line.

  • We should try to minimize variable scope. It looks like the child variable can be declared inside the inner while loop.

  • int value, nodes = root.height + 1; Again, one variable declaration per line please.

seedTree:

  • Is there a reason we need a separate function for the first two nodes? I feel like insertValue should probably be able to handle these as well.

  • We can avoid one level of extra indent and reduce the complexity by returning early if we fail to read a root value: if (!input) return root;

insertValue:

  • The if branches inside the loop are almost identical. We should be able to factor them into a function.

  • It might be simpler to should call the rebalancing function from inside insertValue.

rebalancing:

  • You should be able to abstract out two functions: leftRotate and rightRotate that can be used to handle all the cases.

performance:

  • Since you're constructing the tree on disk (disk I/O is slow), and rebalancing with every inserted node (because it's an AVL) it's going to be slow. I wouldn't worry about it.
answered Feb 5, 2020 at 11:18
\$\endgroup\$

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.