Return to Question

replaced http://codereview.stackexchange.com/ with https://codereview.stackexchange.com/

edited Apr 13, 2017 at 12:40

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

This function creates a binary tree and inserts all words appearing on the list, then collects the unique words without sorting. Duplicate words are handled during insertion. For the tree, I'm using about the same code from Unbalanced binary search tree Unbalanced binary search tree.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

added 1 character in body; edited tags; edited title

Source Link

edited Sep 14, 2015 at 17:48

Jamal

edited Sep 14, 2015 at 17:48

Jamal

35.2k
13
134
238

Is this an efficient way of removing Removing duplicate words from an unsorted list?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

Is this an efficient way of removing duplicate words from an unsorted list?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

Removing duplicate words from an unsorted list

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

Source Link

asked Feb 13, 2014 at 19:34

2013Asker

asked Feb 13, 2014 at 19:34

2013Asker

Is this an efficient way of removing duplicate words from an unsorted list?

I was using brute force to remove duplicate words since the lists were really small. But I want a solution that won't become too slow if the input grows.

#include "bst.h"
#include <strings.h>
#include <stdlib.h>
#define LIST_TERMINATOR 1
static size_t i = 0;
static char **final_list;
static void insert(void *word)
{
 final_list[i++] = word;
}
char **unique_words(const char **words)
{
 //Binary tree containing the words
 BST unique;
 bst_init(&unique, (int(*)(const void *, const void *))strcasecmp);
 //Every word will be inserted at most 1 time
 while(*words != NULL){
 if(bst_insert(&unique, (void *)*words) == BST_NO_MEMORY){
 bst_free(&unique);
 return NULL;
 }
 
 ++words;
 }
 //Array to return
 final_list = malloc(sizeof(char *) * (unique.node_count + LIST_TERMINATOR));
 if(final_list == NULL){
 bst_free(&unique);
 return NULL;
 }
 //Collect words without sorting, so if the list is merged with another 
 //and passed again, the tree won't become a linked list
 if(bst_iterate_top_down(&unique, insert) == BST_NO_MEMORY){
 free(final_list);
 bst_free(&unique);
 return NULL;
 }
 final_list[i] = NULL;
 bst_free(&unique);
 //Clear state
 i = 0;
 return final_list;
}

Would sorting the input then removing duplicates be faster?

optimization c algorithm

lang-c