Code directory structure:
./Computing$ ls -LR
.:
fileIO list record file.txt mergeSort.c mergeSort.exe type.h
./record:
record.c record.h
./fileIO:
file.h fileReading.c
./list:
arrayImpl.c linkedListImpl.c list.h
Compilation procedure:
./Computing$
./Computing$ gcc -Wall -Werror -I. -DARRAY ./list/*.c ./record/*.c ./fileIO/*.c mergeSort.c
-o mergeSort.exe
file.txt
Age,LastName,FirstName
50,B,A
30,A,B
20,X,D
10,F,A
10,A,B
90,V,E
60,N,M
type.h
/*********** type.h **************/
#include<stdbool.h>
#include<stddef.h>
#include<stdlib.h>
#include<stdio.h>
record.h
#include"list/list.h"
/*************Usage- start ****************/
typedef struct Record Record;
void copyData(List *, char []);
const int getAge(const Record *);
const char* getName(const Record *);
void printRecord(List *, int);
/***************Usage - end ******************/
record.c
#include"record/record.h"
#define DELIMITER "," //Bad style
/************Repre-start **************/
typedef struct Record{
int age;
char *firstName;
char *lastName;
}Record;
/************Repre-start **************/
/******************Usage - start ***********/
void copyData(List *records, char buf[]){
Record *record = malloc(sizeof(Record));
char *data = strtok(buf, DELIMITER);
record->age = atoi(data);
data = strtok(NULL, DELIMITER);
record->firstName = strdup(data);
data = strtok(NULL, DELIMITER);
record->lastName = strdup(data);
listInsertItem(records, record);
}
void printRecord(List *records, int index){
const Record *record = listGetItem(records, index);
fprintf(stdout, "%d,%s,%s\n", record->age,
record->firstName,
record->lastName);
}
const int getAge(const Record *record){
return record->age;
}
const char* getName(const Record *record){
return record->firstName;
}
/******************Usage - end ***********/
file.h
#include"list/list.h"
#ifndef FILE_H
#define FILE_H
#include"record/record.h"
/***************Usage -start **********/
typedef struct DBCache DBCache;
void readData(FILE *, DBCache *, char[]);
void readHeader(FILE *, DBCache *, char[]);
DBCache *initCache(FILE *);
List * getRecords(DBCache*);
void printRecords(DBCache*);
/***************Usage - end ***********/
#endif
fileReading.c
#include"fileIO/file.h"
#define MALLOC_FAILURE "malloc() failed"
#define FILE_HANDLE_FAILURE "Unable to open file"
#define DELIMITER ","
#define MAX_RECORD_SIZE 256
/***************Repr-start ***************/
typedef struct DBCache{
List *records;
char *header;
size_t fileSize;
char *fileDataStream;
}DBCache;
/*************** Repr-end ******************/
static void checkHandleFailure(void *, char *);
static void noNewLine(char[]);
/**************Usage-start **************/
List * getRecords(DBCache *cache){
return cache->records;
}
void printRecords(DBCache *cache){
fprintf(stdout, "%s\n", cache->header);
fprintf(stdout, "------------------------------\n");
for(int index = 0; index < listGetSize(getRecords(cache)); index++){
print(getRecords(cache), index);
}
fprintf(stdout, "Records#: %d\n", listGetSize(getRecords(cache)));
}
void readData(FILE *pFile, DBCache *cache, char buf[]){
if(pFile == NULL){
checkHandleFailure(pFile, FILE_HANDLE_FAILURE);
}
for(int index =0;fgets(buf, MAX_RECORD_SIZE, pFile) != NULL;index++){
copyData(getRecords(cache), buf);
}
}
void readHeader(FILE *pFile, DBCache* cache, char buf[]){
if(pFile == NULL){
checkHandleFailure(pFile, FILE_HANDLE_FAILURE);
}
fseek(pFile, 0, SEEK_SET);
if(fgets(buf, MAX_RECORD_SIZE, pFile) == NULL){
fprintf(stderr, "EOF encountered\n");
exit(EXIT_FAILURE);
}
noNewLine(buf);
cache->header = malloc(strlen(buf) + 1);
checkHandleFailure(cache->header, MALLOC_FAILURE);
strcpy(cache->header, buf);
memset(buf, 0, MAX_RECORD_SIZE);
}
DBCache *initCache(FILE *pFile){
DBCache *cache = malloc(1*sizeof(DBCache));
checkHandleFailure(cache, MALLOC_FAILURE);
cache->records = createList();
cache->header = NULL;
fseek(pFile, 0, SEEK_END);
cache->fileSize = ftell(pFile) + 1;
cache->fileDataStream = malloc(cache->fileSize * sizeof(char));
fseek(pFile, 0, SEEK_SET);
fgets(cache->fileDataStream, cache->fileSize, pFile);
return cache;
}
/***************Usage-end ***************/
/******************Helper -start ***********/
static void noNewLine(char buf[]){
int len = strlen(buf);
if(len > 0){
if(buf[len-1] == '\n'){
buf[len-1] = '0円';
}else{
fprintf(stderr, "Corrupted buffer\n");
exit(EXIT_FAILURE);
}
}
}
static void checkHandleFailure(void *ptr, char *msg){
if(ptr == NULL){
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}
}
/******************Helper - end ***********/
list.h
/************ list.h ************/
/*
List is an ordered collection of homogenuous type elements(unique or duplicate).
List is not designed to have collection of heterogenuous type elements
All elements in a List are related.
List is mutable
Each element has a position.
If an element is deleted, then still the remaining elements sit in new order.
Array implementation of List
Linked implementation of List
*/
#ifndef LIST_H /* Header guard */
#define LIST_H
#include"type.h"
/***************** Usage-start ************/
#if defined(ARRAY) || (LINKED_LIST)
/* To ensure Encapsulation(i.e., maintain invariants of array & linked list)
So, Just provide the `List` declartion, to avoid mis-use of `List`
*/
typedef struct List List;
#else
#error "Wrong list implementation macro name !!!"
#endif
void listInsertItem(List *, void *newItem);
void *listDeleteItem(List *, int listIndex);
void *listDeleteLastItem(List *);
void *listDeleteFirstItem(List *);
const void *listGetItem(List *, const int index); /* 'index' is array index */
int listGetSize(List *);
List* createList(void);
void freeList(List *);
/*********** Searching & Sorting algorithm - start*************/
typedef int (*compareTo)(const void *key, const void *item);
typedef bool (*isLess)(const void *key, const void *item);
typedef bool (*isEqual)(const void *key, const void *item);
void *linearSearch(const void *key, List *, size_t listSize, isEqual);
/*
bsearch() function returns a pointer to a matching member
of the array, or NULL if no match is found
*/
void *binarySearch(const void *key, List *, size_t listSize, compareTo);
/*'listSize' elements. First argument points to list.*/
void insertionSort(List *, size_t, isLess);
void selectionSort(List *, size_t listSize, compareTo);
void shellSort(List *, size_t listSize, compareTo);
void mergeSort(List *, size_t, isLess);
void quickSort(List *, size_t listSize, compareTo);
void quickSelct(List *, size_t listSize, compareTo);
/**************** Sorting algorithm - end*************/
#endif /* LIST_H */
/***************** Usage-end ***************/
arrayImpl.c
/***************** arrayImpl.c **************/
#include"list/list.h"
#if defined(ARRAY)
typedef enum {DOUBLE_THE_LIST, HALF_THE_LIST}ListResizeOperation;
static List *resizeList(List *, ListResizeOperation);
static void *bSearchRecur(const void *, void**, int, int, compareTo);
static void *bSearchIter(const void *, void **, int, int, compareTo);
static void swap(void **, int i, int j);
static void insSort(void **, size_t, isLess);
static void merge(void **, void **, int, int, int, isLess);
static void mSort(void **, void **, int, int, isLess);
/************ Representation - start ************/
typedef struct List{
void **array;
/* Housekeeping - Array enhancement/shrink */
int lastItemPosition;
int size;
}List;
#define INITIAL_LIST_SIZE 50
#define FIRST_ITEM_INDEX 0
/********************* Representation - end ************/
/************* Usage - start ***************/
List *createList(){
List *list = malloc(sizeof(List));
if(list != NULL){
list->array = malloc(INITIAL_LIST_SIZE*sizeof(void*));
if(list->array != NULL){
/* Is it safe to initialise zero to array of pointers? */
list->array = memset(list->array, 0, INITIAL_LIST_SIZE*sizeof(void *));
list->lastItemPosition = -1;
list->size = INITIAL_LIST_SIZE;
}else{
free(list);
list = NULL;
}
}
return list;
}
void freeList(List *list){
if(list != NULL){
if(list->array != NULL){
int index = 0;
while( index < list->size){
free(list->array[index++]);
}
free(list->array);
}else{
fprintf(stderr, "Invalid list sent to freeList()\n");
}
free(list);
}
}
int listGetSize(List *list){
if(list != NULL){
return list->lastItemPosition + 1;
}else{
fprintf(stderr, "List is NULL\n ");
return -1;
}
}
const void *listGetItem(List *list, const int index){
if((index >=0) && (index < list->size)){
return (const void *)list->array[index];
}else{
return NULL;
}
}
void listInsertItem(List *arrayList, void *newItem){
if(arrayList == NULL){
fprintf(stderr, "listInsertItem() -Invalid list \n");
return;
}
/* House keeping - Enhance the array */
if(arrayList->lastItemPosition + 1 == arrayList->size){
arrayList = resizeList(arrayList, DOUBLE_THE_LIST);
if(arrayList == NULL){
fprintf(stderr, "insertItem() - Unable to allocate memory \n");
exit(1);
}
}
/* Insert new element - O(1) operation */
arrayList->array[++(arrayList->lastItemPosition)] = newItem;
}
void *listDeleteItem(List *arrayList, int listIndex){
if(arrayList == NULL){
fprintf(stderr, "Invalid list \n");
return NULL;
}
void *returnElement = arrayList->array[listIndex];
/* Delete operation - O(n) operation */
for(int accumulator = listIndex; accumulator <= arrayList->lastItemPosition; accumulator++){
arrayList->array[accumulator] = arrayList->array[accumulator + 1];
}
arrayList->lastItemPosition--;
/* House keeping - Half the list */
if(arrayList->size > INITIAL_LIST_SIZE){ /* Minimum size maintained */
if((arrayList->lastItemPosition + 1) == ((arrayList->size)/2)){
arrayList = resizeList(arrayList, HALF_THE_LIST);
if(arrayList == NULL){
fprintf(stderr, "deleteItem() - Unable to allocate memory \n");
exit(1);
}
}
}
return returnElement; /* User must free this element*/
}
void * listDeleteLastItem(List *arrayList){
return listDeleteItem(arrayList, arrayList->lastItemPosition);
}
void *listDeleteFirstItem(List *arrayList){
return listDeleteItem(arrayList, FIRST_ITEM_INDEX);
}
/**************Searching & Sorting -start **************/
void *linearSearch(const void *key, List *list, size_t listSize, isEqual equal){
if(list != NULL && (listSize > 0)){
void ** array = list->array;
for(int index =0; index < listSize; index++){
if(equal(key, (array[index])) ){
return array[index];
}
}
}
return NULL;
}
void *binarySearch(const void *key, List *list, size_t listSize, compareTo compare){
if(list != NULL && (listSize > 0)){
return bSearchIter(key, list->array, 0, listSize-1, compare);
return bSearchRecur(key, list->array, 0, listSize-1, compare);
}
return NULL;
}
void insertionSort(List *list, size_t listSize, isLess less){
if(list!=NULL && (list->size > 0)){
insSort(list->array, listSize, less);
}
}
void mergeSort(List *list, size_t listSize, isLess less){
if(list != NULL){
void **aux = malloc(list->size * sizeof(void*)); //Auxillary shadow copy
if(aux != NULL){
printf("Size of list: %d\n", listSize);
mSort(list->array, aux, 0, listSize-1, less);
}else{
fprintf(stderr, "mergeSort() - Malloc failure");
exit(EXIT_FAILURE);
}
}else{
fprintf(stderr, "mergeSort() - List is NULL");
}
}
/**************Searching & Sorting -end **************/
/******************** Usage - end *******************/
/************* helper function - start ********/
static void mSort(void **array, void **aux, int low, int high, isLess less){
if(high <= low) return;
int mid = (low + high)/2;
mSort(array, aux, low, mid, less);
mSort(array, aux, mid+1, high, less);
merge(array, aux, low, mid, high, less);
}
static void merge(void **array, void **aux, int low, int mid, int high, isLess less){
for(int index = low; index <= high; index++){
aux[index] = array[index]; //Shallow copy
}
printf("Low-%d, Mid-%d, High-%d\n", low, mid, high);
int leftIndex = low; int rightIndex = mid+1;
printf("leftIndex-%d, rightIndex-%d\n", leftIndex, rightIndex);
for(int index = low; index <= high; index++){
if(leftIndex > mid) /* right array exhausted */ array[index] = aux[rightIndex++];
else if(rightIndex > high) /*left array exhausted*/ array[index] = aux[leftIndex++];
else if( less(aux[rightIndex], aux[leftIndex]) ) array[index] = aux[rightIndex++];
else array[index] = aux[leftIndex++];
}
}
static void swap(void **array, int i, int j){
void *tempPointer = array[i];
array[i] = array[j];
array[j] = tempPointer;
}
static void insSort(void **array, size_t listSize, isLess less){
for(int sortedBoundaryIndex = -1; sortedBoundaryIndex < (long long)listSize - 1; sortedBoundaryIndex++){
/*
-1 mean sorted pool is yet to form.
0 mean first element is in sorted pool
*/
for(int unSortedElementIndex = sortedBoundaryIndex + 1; unSortedElementIndex > 0; unSortedElementIndex--){
/* Within this loop, sorted pool does not exist, as new element is being compared*/
if(less(array[unSortedElementIndex], array[unSortedElementIndex-1])){
swap(array, unSortedElementIndex, unSortedElementIndex-1);
}else{
break; //If the unsorted element is > or ==, no swap, Stable sort
}
}
}
}
static void *bSearchIter(const void *key, void **array, int lowerBound, int upperBound, compareTo compare){
int mid =0;
while(lowerBound <= upperBound){
mid = (lowerBound + upperBound)/2;
if(compare(key, array[mid]) == 0){
return array[mid];
}else if(compare(key, array[mid]) < 0){
upperBound = mid-1;
}else if(compare(key, array[mid]) > 0){
lowerBound = mid + 1;
}
}/* end while */
return NULL;
}
static void *bSearchRecur(const void *key, void **array, int lowerBound, int upperBound, compareTo compare){
if(lowerBound > upperBound) return NULL;
int mid = (lowerBound + upperBound)/2;
if(compare(key, array[mid]) == 0){
return array[mid];
}else if(compare(key, array[mid]) < 0){
return bSearchRecur(key, array, lowerBound, mid-1, compare);
}else { // compare() > 0
return bSearchRecur(key, array, mid+1, upperBound, compare);
}
}
/* resizeList() is not visible to Linker (ld) */
static List *resizeList(List *list, ListResizeOperation opType){
if(opType == DOUBLE_THE_LIST){
list->array = realloc(list->array, 2*(list->size)*sizeof(void *));
if(list->array == NULL){ return NULL; }
list->lastItemPosition = list->lastItemPosition;;
list->size = 2*(list->size);
}else if(opType == HALF_THE_LIST){
list->array = realloc(list->array, ((list->size)/2)*sizeof(void *));
if(list->array == NULL){ return NULL; }
list->lastItemPosition = list->lastItemPosition;
list->size = (list->size)/2;
}
return list;
}
/************* helper function - end *************/
#endif
linkedListImpl.c
/**********linkedListImpl.c ***********/
#include"type.h"
#if defined(LINKED_LIST)
/***************** Representation - start ******************/
/* struct members are not visible to other .c files */
struct DListNode{
void *item;
struct DListNode *next;
struct DListNode *prev;
};
/* Should be used in this .c file, only, so static */
typedef struct DListNode DListNode;
static DListNode* createNode(void *);
/*
Reason to introduce 'List' type:
Problem 1:
Say, user code has 'x' and 'y' pointing to the same shopping list that is built using 'Node' type.
Some part of user code update list with new item using 'x'
'y' is not in sync with this updation
Node *x = someCreatedList;
Node *y = x;
Node *z = malloc(sizeof(Node));
z->next = x;
x = z; //y misses that reference.
Solution:
Maintain a List type, whose job is to point to head(first node) of the list.
User code will go via reference of List type
Problem 2:
It make sense to have references of 'Node' type pointing to NULL
Applying operation[insertItem()] on NULL pointer will cause runtime errors
Solution:
Run operations over List type because it does not make sense to have reference of SList type pointing to NULL.
To solve problem1 & problem2, here is 'List' type
*/
/* struct members are not visible to other .c files */
typedef struct List{
DListNode *head;
int size; /*size attribute is not part of list definition, but quick way to help user code */
}List;
#define SENTINEL_NODE_DATA_ITEM (void *)0
/************ Representation - end *************/
/******** Usage - start **********/
List *createList(){
/*
Amidst performing insert/delete operations on 'List',
To reduce the number of special checks, we designate one node as 'SENTINEL'
After using sentinel, there will be no NULL assignments/check in code.
*/
List *list = (List *)malloc(sizeof(List));
if(list != NULL){
DListNode *sentinel = createNode(SENTINEL_NODE_DATA_ITEM);
list->head = sentinel;
list->head->next = list->head;
list->head->prev = list->head;
list->size = 0;
return list;
}else{
return NULL;
}
}
bool freeList(List *list){
if(list != NULL){
if(list->size > 0){
int index = 0;
DListNode *currentNode, *nextNode;
currentNode = list->head->next;
do{
nextNode = currentNode->next;
free(currentNode->item);
free(currentNode);
currentNode = nextNode;
}while(++index < list->size);
return true;
}else{
return true;
}
}else{
return false;
}
}
int listGetSize(List *list){
if(list != NULL){
return list->size;
}else{
fprintf(stderr, "List is NULL\n ");
return -1;
}
}
const void *listGetItem(List *list, int index){
if((index >=0) && (index < list->size)){
DListNode *node = list->head->next;
while(index-- > 0){
node = node->next;
}
return (const void *)node->item;
}else{
fprintf(stderr, "Invalid index \n");
return NULL;
}
}
/* O(1) operation - insert() operation */
void listInsertItem(List *linkedList, void *newItem){
DListNode *newNode = createNode(newItem);
if(linkedList->size == 0){
linkedList->head->next = linkedList->head->prev = newNode;
}else{
/* Link with current last node in the linked list*/
newNode->prev = linkedList->head->prev;
linkedList->head->prev->next = newNode;
/* Link with Sentinel node */
newNode->next = linkedList->head;
linkedList->head->prev = newNode;
}
}
/* O(n) - delete() operation*/
void *listDeleteItem(List *linkedList, int listIndex){
int accumulator = 0;
DListNode *nodeToDelete = linkedList->head->next;
if(listIndex < linkedList->size){
while(accumulator++ < listIndex){
nodeToDelete = nodeToDelete->next;
}
nodeToDelete->prev->next = nodeToDelete->next;
nodeToDelete->next->prev = nodeToDelete->prev;
linkedList->size++;
void *item = nodeToDelete->item;
free(nodeToDelete);
return item; /* User must delete by casting to free(item); */
}else{
fprintf(stderr, "deleteItem() - Invalid Index");
return NULL;
}
}
/* O(1) - deleteLastItem() operation */
void *listDeleteLastItem(List *linkedList){
if(linkedList->size){
DListNode *nodeToDelete = linkedList->head->prev;
void *item = nodeToDelete->item;
nodeToDelete->prev->next = nodeToDelete->next;
nodeToDelete->next->prev = nodeToDelete->prev;
free(nodeToDelete);
return item; /* User must free this item,by casting, free(item) */
}else{
return NULL;
}
}
/* O(1) - deleteFirstItem() operation */
void *listDeleteFirstItem(List *linkedList){
if(linkedList->size){
DListNode *nodeToDelete = linkedList->head->next;
void *item = nodeToDelete->item;
nodeToDelete->next->prev = nodeToDelete->prev;
nodeToDelete->prev->next = nodeToDelete->next;
free(nodeToDelete);
return item; /* User must free this item,by casting, free(item) */
}else{
return NULL;
}
}
/********** Usage - end *************/
/********** Helper function - start ****************/
/* createNode() is not visible to the linker(ld) */
static DListNode *createNode(void * value){
DListNode *newNode= malloc(sizeof(DListNode));
newNode->next = newNode;
newNode->prev = newNode;
newNode->item = value;
return newNode;
}
/******** Helper function - end ********/
#endif
User code:
mergeSort.c
#include"fileIO/file.h"
#define MAX_RECORD_SIZE 256 // Bad style
bool less(const void *key, const void *item){
if( getAge(key) < getAge(item) ){
printf("Return true\n");
return true;
}else{
printf("Return false\n");
return false;
}
}
int main(){
FILE *pFile = fopen("file.txt", "r");
char buf[MAX_RECORD_SIZE];
DBCache *cache = initCache(pFile);
readHeader(pFile, cache, buf);
readData(pFile, cache, buf);
printRecords(cache);
mergeSort(getRecords(cache), listGetSize(getRecords(cache)), less);
fprintf(stdout, "Sorted data \n\n");
printRecords(cache);
}
Points of concern:
- Not sure about the approach,
fgets()
reading 256 bytes. Need improvement. - Not sure about the approach,
DELIMITER
macro is being used. - Not sure, if the code in
record
folder should be written by user or author offileIO/
code. getName()
is providing first name, bit confusing.
Question:
1) From an abstraction and encapsulation perspective, can I still improve this code?
2)
Is it possible to write List records
instead of List *records
in the user code, by providing an opaque pointer? Can I further optimize the code?
3)
Does merge-sort performance improve using a doubly linked list implementation provided by linkedListImpl.c
? I've yet to implement merge-sort using a doubly linked list.
Note: Working in Cygwin environment
1 Answer 1
Although this review describes multiple phases as if they were performed strictly in sequence, there was actually some weaving back and forth. Even so, I believe the presentation is reasonably coherent as shown.
Phase 1 — Compiling the code
Taking the code from the question and compiling it with the script given — using GCC 6.3.0 on macOS Sierra 10.12.2 — yields the following:
$ gcc -Wall -Werror -I. -DARRAY ./list/arrayImpl.c ./list/linkedListImpl.c ./record/record.c ./fileIO/fileReading.c mergeSort.c -o mergeSort.exe
./list/arrayImpl.c: In function ‘createList’:
./list/arrayImpl.c:45:23: error: implicit declaration of function ‘memset’ [-Werror=implicit-function-declaration]
list->array = memset(list->array, 0, INITIAL_LIST_SIZE*sizeof(void *));
^~~~~~
./list/arrayImpl.c:45:23: error: incompatible implicit declaration of built-in function ‘memset’ [-Werror]
./list/arrayImpl.c:45:23: note: include ‘<string.h>’ or provide a declaration of ‘memset’
./list/arrayImpl.c: In function ‘mergeSort’:
./list/arrayImpl.c:186:30: error: format ‘%d’ expects argument of type ‘int’, but argument 2 has type ‘size_t {aka long unsigned int}’ [-Werror=format=]
printf("Size of list: %d\n", listSize);
^
cc1: all warnings being treated as errors
./record/record.c: In function ‘copyData’:
./record/record.c:19:19: error: implicit declaration of function ‘strtok’ [-Werror=implicit-function-declaration]
char *data = strtok(buf, DELIMITER);
^~~~~~
./record/record.c:19:19: error: initialization makes pointer from integer without a cast [-Werror=int-conversion]
./record/record.c:22:10: error: assignment makes pointer from integer without a cast [-Werror=int-conversio]
data = strtok(NULL, DELIMITER);
^
./record/record.c:23:25: error: implicit declaration of function ‘strdup’ [-Werror=implicit-function-declaration]
record->firstName = strdup(data);
^~~~~~
./record/record.c:23:25: error: incompatible implicit declaration of built-in function ‘strdup’ [-Werror]
./record/record.c:25:10: error: assignment makes pointer from integer without a cast [-Werror=int-conversio]
data = strtok(NULL, DELIMITER);
^
cc1: all warnings being treated as errors
./fileIO/fileReading.c: In function ‘printRecords’:
./fileIO/fileReading.c:34:5: error: implicit declaration of function ‘print’ [-Werror=implicit-function-declaration]
print(getRecords(cache), index);
^~~~~
./fileIO/fileReading.c: In function ‘readHeader’:
./fileIO/fileReading.c:68:26: error: implicit declaration of function ‘strlen’ [-Werror=implicit-function-declaration]
cache->header = malloc(strlen(buf) + 1);
^~~~~~
./fileIO/fileReading.c:68:26: error: incompatible implicit declaration of built-in function ‘strlen’ [-Werror]
./fileIO/fileReading.c:68:26: note: include ‘<string.h>’ or provide a declaration of ‘strlen’
./fileIO/fileReading.c:70:3: error: implicit declaration of function ‘strcpy’ [-Werror=implicit-function-declaration]
strcpy(cache->header, buf);
^~~~~~
./fileIO/fileReading.c:70:3: error: incompatible implicit declaration of built-in function ‘strcpy’ [-Werro]
./fileIO/fileReading.c:70:3: note: include ‘<string.h>’ or provide a declaration of ‘strcpy’
./fileIO/fileReading.c:71:3: error: implicit declaration of function ‘memset’ [-Werror=implicit-function-declaration]
memset(buf, 0, MAX_RECORD_SIZE);
^~~~~~
./fileIO/fileReading.c:71:3: error: incompatible implicit declaration of built-in function ‘memset’ [-Werro]
./fileIO/fileReading.c:71:3: note: include ‘<string.h>’ or provide a declaration of ‘memset’
./fileIO/fileReading.c: In function ‘noNewLine’:
./fileIO/fileReading.c:98:13: error: incompatible implicit declaration of built-in function ‘strlen’ [-Werror]
int len = strlen(buf);
^~~~~~
./fileIO/fileReading.c:98:13: note: include ‘<string.h>’ or provide a declaration of ‘strlen’
cc1: all warnings being treated as errors
$
At first, I suspected that you're using GCC 4.x which defaults to compiling in C90 mode, whereas GCC 5.x and 6.x default to C11 mode. However, even with GCC 4.8.1 (the oldest version I have on my machine), I get a lot of errors.
Fixing that lot requires lots of fiddly stuff — mainly adding #include <string.h>
to files, but also things such as changing print(...)
to printRecord(...)
, and changing a %d
to %zu
to print a size_t
value correctly, dropping the const
from the return type of getAge()
, and not comparing int
with size_t
. Since I compile with more stringent options, I also had to add void
to int main(void)
and List *createList(void)
, and I made less()
into a static function since it isn't called by name from outside the file where it is defined.
$ gcc -Wall -Werror -Wextra -Wmissing-prototypes -Wstrict-prototypes \
> -Wold-style-definition -Wold-style-declaration -I. -DARRAY \
> ./list/arrayImpl.c ./list/linkedListImpl.c ./record/record.c \
> ./fileIO/fileReading.c mergeSort.c -o mergeSort.exe
$
This gets the code to compile cleanly under fussy options — it was more work than I expected.
I observe that list/list.h
suggests that you should be able to use -DLINKED_LIST
instead of -DARRAY
. With the stringent options used above (especially -Wold-style-definition
and -Wmissing-prototypes
), there are quite a lot of warnings. That turns out to be because list/linkedListImpl.c
does not include list/list.h
. When it does include that, you get a type conflict between the definition bool freeList(List *list)
and the declaration void freeList(List *)
. Since the linked list variant of the function returns true
or false
, the declaration should be fixed. When it is fixed, the code compiles but doesn't link because there is no mergeSort()
function implemented.
Then switching back to -DARRAY
, the freeList()
in list/arrayImpl.c
is now the wrong type. There's no insight found from looking at where it is called — it isn't called. It is relatively simple to modify the code to return a bool
— true
if there was anything to delete and false
if not, which is the protocol in the linked list version. I'm not sure whether that's actually the best choice; it isn't clear what anyone would do with that information.
This leaves a reviewer wondering how much of the code presented for review is really relevant. Indeed, when list/linkedListImpl.c
is dropped from the compilation, the program links and runs.
Phase 2 — Running the code
Running the code produces the output:
Age,LastName,FirstName
------------------------------
50,B,A
30,A,B
20,X,D
10,F,A
10,A,B
90,V,E
60,N,M
Records#: 7
Size of list: 7
Low-0, Mid-0, High-1
leftIndex-0, rightIndex-1
Return true
Low-2, Mid-2, High-3
leftIndex-2, rightIndex-3
Return true
Low-0, Mid-1, High-3
leftIndex-0, rightIndex-2
Return true
Return true
Low-4, Mid-4, High-5
leftIndex-4, rightIndex-5
Return false
Low-4, Mid-5, High-6
leftIndex-4, rightIndex-6
Return false
Return true
Low-0, Mid-3, High-6
leftIndex-0, rightIndex-4
Return false
Return true
Return false
Return false
Return false
Sorted data
Age,LastName,FirstName
------------------------------
10,F,A
10,A,B
20,X,D
30,A,B
50,B,A
60,N,M
90,V,E
Records#: 7
This much seems to be more or less correct, though when there are two entries with the same age, the entries are not sorted in a determinate order (there is no secondary match based on name, for example).
I also tested with a 200-entry file of numbers 1..99, with random names of all upper-case letters, 4-15 long for 'first name' and 6-15 long for 'last name'. That seemed to work apart from the indeterminate sorting of entries with equal age (of which there were a lot, of course).
Phase 3 — Studying the code style (layout)
There are multiple issues that I don't like in the coding style — just in the layout. This stuff is inherently subjective; you can decide for yourself whether to pay attention to my diatribes. Consistency is, IMO, crucial. (I have seen much worse code for consistency, but I've seen better too.)
For example:
#include"record/record.h"
#include<stdbool.h>
There should be a space between #include
and the header name.
I don't like the layout that treats keywords and function calls the same — there should be space between keywords such as if
, for
, while
and the following condition.
for(int index = low; index <= high; index++){
if(leftIndex > mid) /* right array exhausted */ array[index] = aux[rightIndex++];
else if(rightIndex > high) /*left array exhausted*/ array[index] = aux[leftIndex++];
else if( less(aux[rightIndex], aux[leftIndex]) ) array[index] = aux[rightIndex++];
else array[index] = aux[leftIndex++];
}
I particularly don't like the occasional space after the open parenthesis. I'm also unclear on why there's a blank line at the start of this loop, but not the previous loop in the same function. Again, consistency.
if(list->array == NULL){ return NULL; }
if((arrayList->lastItemPosition + 1) == ((arrayList->size)/2)){
And I find the braces around return NULL;
on the same line as the conditional extremely unpleasant to read. The number of parentheses in some of the code seems excessive to me, too. You do seem to be consistent in the use of one variant of 1TBS (One True Brace Style). That's good. It's not the style I prefer, but consistency is good and more important than the style chosen.
Much of the indentation is at 2 spaces; sometimes it is at 4 spaces. If you like 2-space indentation, you'd be happy at Google; I prefer 4-space indentation. If the code is consistent, either is acceptable.
There are semi-random numbers of blank lines scattered around the source, sometimes 1 or 2 or 3 or 4, with no particular rhyme or reason for the spacing on display. Again, consistency is most important.
There's the redundant code in list/arrayImpl.c
:
void *binarySearch(const void *key, List *list, size_t listSize, compareTo compare){
if(list != NULL && (listSize > 0)){
return bSearchIter(key, list->array, 0, listSize-1, compare);
return bSearchRecur(key, list->array, 0, listSize-1, compare);
}
return NULL;
}
The second return is never reached — it should be removed. I'm mildly puzzled that even with -O3
in the compilation options, GCC 6.3.0 didn't complain about the dead code. It's safe to assume it removed it, anyway, but it shouldn't be there. When the call is removed, the bSearchRecur()
function is unused; it needs to be removed too. Or maybe there's some preprocessor conditional compilation that has been lost.
The header list/list.h
has multiple-inclusion protection; the header record/record.h
does not. With C11 you can get away with redeclaring a typedef
as long as the two declarations are the same, so the header guard isn't crucial under C11. I still prefer it to be there — old school anal retentiveness no doubt.
In list/list.h
, you have:
void *linearSearch(const void *key, List *, size_t listSize, isEqual);
/*
bsearch() function returns a pointer to a matching member
of the array, or NULL if no match is found
*/
void *binarySearch(const void *key, List *, size_t listSize, compareTo);
Presumably the bsearch()
referred to is actually binarySearch()
rather than bsearch()
from <stdlib.h>
, but then why the misspelling. If it is the standard bsearch()
, the comment is accurate but its relevance is not immediately clear. If there was a comment about "like the standard bsearch()
, these two search functions return a pointer to the matching element of the array, or null if no match is found", then it would be more coherent.
Phase 4 — Studying the code style (functionality)
In mergeSort.c
, we find:
#include"fileIO/file.h"
#define MAX_RECORD_SIZE 256 // Bad style
...implementation of `less()`...
int main(void){
FILE *pFile = fopen("file.txt", "r");
char buf[MAX_RECORD_SIZE];
DBCache *cache = initCache(pFile);
readHeader(pFile, cache, buf);
readData(pFile, cache, buf);
printRecords(cache);
mergeSort(getRecords(cache), listGetSize(getRecords(cache)), less);
fprintf(stdout, "Sorted data \n\n");
printRecords(cache);
}
It isn't clear why MAX_RECORD_SIZE
is inherently bad. What is bad is that the code opens "file.txt"
and doesn't verify that it was successful. It is always crucial to check file open operations. That can be fixed and an appropriate error reported on standard error.
We then have a variable buf
that is passed to readHeader()
and readData()
, but there is no indication to those functions of how big a buffer was passed — and the size is fixed in mergeSort.c
and not specified in a header. That's bad. There's also no way for the two input functions to indicate whether they were successful or not. And there doesn't seem to be any use for buf
in main()
other than to be passed to those functions.
When the code in fileIO/fileReading.c
is examined, we find that MAX_RECORD_SIZE
is defined in that file too — that means the value is defined in (at least) two places, which is at least one too many. The readHeader()
function does actually do a check that it is passed a non-null file stream, but so does readData()
. That check is in the wrong place; the function that opens the file should check that it was successful — not least because it can report the file name whereas the file I/O functions can't because they're not given the file name.
The variable buf
should not exist in main()
. The two input functions can perfectly well define their own local variables for the job. That reduces the connectedness of the two files, and avoids the double maintenance problem from having MAX_RECORD_SIZE
defined twice.
When we look at the top of readHeader()
, we find:
void readHeader(FILE *pFile, DBCache* cache, char buf[]){
if(pFile == NULL){
checkHandleFailure(pFile, FILE_HANDLE_FAILURE);
}
...
When we look at checkHandleFailure()
, we find:
static void checkHandleFailure(void *ptr, char *msg){
if(ptr == NULL){
fprintf(stderr, "%s\n", msg);
exit(EXIT_FAILURE);
}
}
So, the pointer is checked twice, as if it might change between the two checks. That seems a bit pointless.
Going back to main()
, it initializes a DBCache
, using the file pointer before the calls to readHeader()
and readData()
. Although it contains a call to checkHandleFailure()
, that's for a malloc()
failure. It uses the file stream without checking that it is valid — which means undefined behaviour and most probably a crash if the file couldn't be opened. This is another reason for checking the file stream once, immediately after the call to fopen()
!
The DBCache
contains some information about the file size, and a buffer called fileDataStream
that is big enough for the whole file. It reads the header line into the buffer, too. Then readHeader()
re-reads the header line into a different buffer. In fact, after DBCache
has finished, the fileDataStream
buffer is never touched again. It isn't freed, either, but that may be because there isn't a function to release the DBCache
(a freeCache()
perhaps). The fileSize
element of the cache isn't used again, either.
Phase 5 — Bailing out early
That's as much review as I'm prepared to do on this. I hope it is some help, but there is a lot of work required to lick this code into shape.
-
\$\begingroup\$ Why
memset
call lacks declaration error? I havestring.h
intype.h
which is inlist.h
. Am usinggcc version 5.4.0 (GCC)
\$\endgroup\$overexchange– overexchange2017年01月05日 09:39:16 +00:00Commented Jan 5, 2017 at 9:39 -
\$\begingroup\$ Because when I copied the code from the question onto my machine in a simulacrum of your directory structure and ran the command line you gave to compile the code, these were the errors I got. The file
type.h
was not being included. \$\endgroup\$Jonathan Leffler– Jonathan Leffler2017年01月05日 14:52:17 +00:00Commented Jan 5, 2017 at 14:52
Explore related questions
See similar questions with these tags.