I'm writing a C program called "mylogger". It is a simple program that merges several log files in order of timestamp.
For example, we have server.log:
#SERVER
DEBUG,2011年10月21日 14:32:00,server starting
DEBUG,2011年10月21日 14:32:01,server started
WARN,2011年10月21日 14:32:10,client connected
DEBUG,2011年10月21日 14:32:15,received string
INFO,2011年10月21日 14:32:18,sent result
ERROR,2011年10月21日 14:32:19,error sending
DEBUG,2011年10月21日 14:32:22,client disconnected
and client.log:
#CLIENT
WARN,2011年10月21日 14:32:09,session started
INFO,2011年10月21日 14:32:14,calling server
ERROR,2011年10月21日 14:32:20,error while calling
DEBUG,2011年10月21日 14:32:21,disconnecting
Then when you type ./mylogger
in terminal, the program will read these two files and output a combinedlogs.log file:
DEBUG,2011年10月21日 14:32:00,server starting
DEBUG,2011年10月21日 14:32:01,server started
WARN,2011年10月21日 14:32:09,session started
WARN,2011年10月21日 14:32:10,client connected
INFO,2011年10月21日 14:32:14,calling server
DEBUG,2011年10月21日 14:32:15,received string
INFO,2011年10月21日 14:32:18,sent result
ERROR,2011年10月21日 14:32:19,error sending
ERROR,2011年10月21日 14:32:20,error while calling
DEBUG,2011年10月21日 14:32:21,disconnecting
DEBUG,2011年10月21日 14:32:22,client disconnected
And here is all my code.
main.c
reads the working directory (either specified or default current working directory), and there is a filter to make sure only read .log files in that directory. Then it opens files and passes the string from files to the function parseLine
line by line, which will convert the string to objects. Then it inserts an object into a linked list and calls the function sortList
to sort the linked list based on timestamp. Then it finally writes the resulting list into a file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <stdbool.h>
#include "structDef.h"
/* to remove warning comparison between pointer and iteger */
#include <unistd.h>
/*Return true when file name end with suffix .log*/
bool has_log_extension(char const *name)
{
size_t len = strlen(name);
return len > 4 && strcmp(name + len - 4, ".log") == 0;
}
/* Main driver */
int main(int argc, char **argv) {
char cwd[1024];
char *pathName;
loglist *head, *tail;
head = tail = NULL;
logline *i;
logline j;
if((i = (logline *)malloc(sizeof(logline))) == NULL) {
printf("Allocation falild!\n");
exit(1);
}
/* Get current working directory */
if (argc == 1) {
if (getcwd(cwd, sizeof(cwd)) != NULL) {
pathName = cwd;
} else {
perror("getcwd() error");
}
}
/* Get directory from user input */
else if (argc == 2) {
pathName = argv[1];
fprintf(stdout, "input working dir: %s\n", pathName);
}
/* Invalid user input from command line */
else {
fprintf(stderr, "Error: invalid No. of command line arguments.\n"); exit(1);
}
/* End of user input */
/* Open .log files in specified directory (default is cwd). */
struct dirent *entry;
DIR *dp;
dp = opendir(pathName);
if (dp == NULL)
{
perror("opendir");
return -1;
}
/* Save filenames into a string array */
int maxLogFiles = 16;
char **logfiles = malloc(maxLogFiles * sizeof(char *));
char filetoOpen[maxLogFiles][20];
int fileCount = 0;
while((entry = readdir(dp)))
if (has_log_extension(entry->d_name)) {
strcpy(filetoOpen[fileCount], entry->d_name);
if(fileCount == maxLogFiles -1) {
maxLogFiles *= 2;
}
fileCount++;
}
closedir(dp);
/*Now its time to open the files*/
int k;
char * buffer = 0;
long length;
FILE *f;
for(k = 0; k < fileCount; k++) {
f = fopen (filetoOpen[k], "r");
if(f == NULL) {
printf("\n Unable to open %s \n", filetoOpen[k]);
}
if (f) {
fseek (f, 0, SEEK_END);
length = ftell (f);
fseek (f, 0, SEEK_SET);
buffer = malloc (length);
if (buffer) {
fread (buffer, 1, length, f);
}
fclose (f);
}
if (buffer) {
/* If the log file is not start with #, skip this log file */
if(buffer[0] != '#') {
printf("\nlogfile %s is not start with #. Skipped...\n", logfiles[0]);
break;
}
char * curLine = buffer;
while(curLine)
{
char * nextLine = strchr(curLine, '\n');
if (nextLine) *nextLine = '0円'; /* Temporarily terminate the current line */
i = parseLine(curLine);
if(i != 0) {
strcpy(j.level, i->level);
strcpy(j.timestamp, i->timestamp);
strcpy(j.message, i->message);
insert_node(&head, &tail, j);
}
if (nextLine) *nextLine = '\n'; /* Then restore newline-char, just to be tidy */
curLine = nextLine ? (nextLine+1) : NULL;
}
}
}
sortList(head);
printf("\n --Sorted list: \n");
printLines(head);
writeToFile(head);
return 0;
}
The error handling in main.c is not strong enough I think.
functionA.c
/*
* This class implemented 2 methods
* 1. parseLine
* 2. print out the list to the console
*/
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "prototypeA.h"
void insert_node(loglist **h, loglist **t, logline v) {
loglist *temp;
if((temp = (loglist *)malloc(sizeof(loglist))) == NULL) {
printf("Allocation falild!\n");
exit(1);
}
temp->line = v;
temp->next = NULL;
if(*h == NULL) {
*h = *t = temp;
}
else {
(*t)->next = temp;
*t = (*t)->next;
}
} /* End of insert */
/*
* To construct a logline with 3 fields (Level, Timestamp, Message)
* based on the input string.
* This function allocates memory.
*/
logline* parseLine(char *line) {
const char takeMyComma[] = ",";
char *token;
char subLine[3][100];
int subLineIndex = 0;
if (line[0] == '#') {
// printf("Header skiped.\n");
return 0;
}
/* Get the first comma */
token = strtok(line, takeMyComma);
/* Walk through other commas */
while(token != NULL) {
strcpy(subLine[subLineIndex++], token);
token = strtok(NULL, takeMyComma);
}
logline *i;
if((i = (logline *)malloc(sizeof(logline))) == NULL) {
printf("Allocation falild!\n");
exit(1);
}
strcpy(i->level, subLine[0]);
strcpy(i->timestamp, subLine[1]);
strcpy(i->message, subLine[2]);
return i;
}
/* Print all the lines contained in the list. */
void printLines(loglist* l) {
if (l == NULL) {
printf("The list is empty.\n");
}
else {
// printf("Logs list : \n");
while (l != NULL) {
printf("%s %s %s\n", l->line.level, l->line.timestamp, l->line.message);
l = l->next;
}
}
}/* End of print */
/* Print all the lines contained in the list. */
void writeToFile(loglist* l) {
FILE *sortedLogs;
if (l == NULL) {
printf("The list is empty.\n");
}
else {
printf("\n Writing...\n");
/* Create the file to write */
if((sortedLogs = fopen("combinedlogs.log", "w")) == NULL) {
fprintf(stderr, "Could not open file: combinedlogs.log \n");
exit(1);
}
/* Write data */
while(l != NULL) {
fprintf(sortedLogs, "%s %s %s\n", l->line.level, l->line.timestamp, l->line.message);
l = l->next;
}
/* Close file */
if(fclose(sortedLogs) == EOF) {
fprintf(stderr, "Could not close file: combinedlogs.log \n");
}
printf("\n Done! \n");
}
}/* End of print */
functionB.c
/*
* This class implemented sort the lists.
*/
#include <string.h>
#include <stdlib.h>
#include "prototypeB.h"
void swap(loglist *p1, loglist *p2)
{
logline temp = p1->line;
p1->line = p2->line;
p2->line = temp;
}
loglist* sortList(loglist* inlist) {
loglist *head = inlist;
loglist *traverse;
loglist *min;
while(head->next) {
min = head;
traverse = head->next;
while(traverse) {
if (strcmp(min->line.timestamp, traverse->line.timestamp) > 0) {
min = traverse;
}
traverse = traverse->next;
}
swap(head, min);
head = head->next;
}
}
prototypeA.h
/*
* The header file that contains the prototypes using
* in functionA.c
*/
#include "structDef.h"
void insert_node(loglist **h, loglist **t, logline v);
void print_list(loglist *h);
logline* parseLine(char *line);
void deleteList(loglist* l);
void printLines(loglist* l);
prototypeB.h
/*
* The header file that contains the prototypes using
* in functionB.c
*/
#include "structDef.h"
loglist* mergeLists(loglist* resultlist, loglist* inlist);
loglist* sortList(loglist* inlist);
structDef.h
/*
* The linked list struct which will be used in functions
*/
typedef struct logline {
char level[20];
char timestamp[20];
char message[100];
} logline;
typedef struct loglist {
logline line;
struct loglist *next;
} loglist;
makefile
CC = gcc
CFLAGS = -g
OBJECTS = main.o functionA.o functionB.o
mylogger: $(OBJECTS)
$(CC) $(CFLAGS) $(OBJECTS) -o mylogger
%.o: %.c
$(CC) $(CFLAGS) -c $<
1 Answer 1
Is there any guarantee that lines in the source log files are in sorted order for e.g if line A comes before B then timeStamp of line A is less than line B? If yes then you can also merge log files using the idea of merge sort (open all the log files together and increase the line pointer one by one as per their order among all the line pointers of all the log files). Along with merge sort you can also leverage the benefit of dumping the already sorted lines time to time to final log file. This will also decrease the memory resource consumption of your program. Even if the lines are not in not sorted order in source log files, you can insert lines in sorted order by traversing the list and putting the line at right place (instead of always inserting them at end and later on sorting them).
sort -u -t , -k 2 -k 1 *.log
instead (additionally with-m
if the source files are themselves already sorted). \$\endgroup\$