Skip to main content
Code Review

Return to Question

replaced http://codereview.stackexchange.com/ with https://codereview.stackexchange.com/
Source Link

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree Unbalanced binary search tree.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

added 1 character in body; edited tags; edited title
Source Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238

Is this an efficient way of removing Removing duplicate words from an unsorted list?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree .

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

Is this an efficient way of removing duplicate words from an unsorted list?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

Removing duplicate words from an unsorted list

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree .

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

Source Link
2013Asker
  • 2k
  • 3
  • 18
  • 23

Is this an efficient way of removing duplicate words from an unsorted list?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

lang-c

AltStyle によって変換されたページ (->オリジナル) /