I am writing possibly my first real C project, and it is almost finished. But as I was coming to a close I noticed something.
This is a simple operation to split a cookie string into a ~2D array. I don't expect you to read all of the code if you don't want to. It works, but that is not my current worry.
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <openssl/rand.h>
#include <zlib.h>
using namespace std;
typedef struct _http_cookie_t
{
const char * key;
const char * val;
unsigned int key_len;
unsigned int val_len;
} http_cookie_t;
int cookies_get_length(char * s);
int cookies_contains(http_cookie_t ** cookies, int length, const char * string);
int cookies_create_map(http_cookie_t ** cookies, int length, char * string);
int cookies_separate_string(char ** cookies, int length, char * string);
int cookies_create_single(http_cookie_t * cookie, char * src);
int main() {
const char * src = "UREF1=F2E2D08C;UREF2=F2E2D08C";
const int length = cookies_get_length((char *)src);
http_cookie_t ** cookies = (http_cookie_t **)malloc(length * sizeof(struct http_cookie_t));
cookies_create_map(cookies, length, (char *)src);
if (cookies_contains(cookies, length, "UREF")) {
printf("WAS FOUND");
} else {
printf("NOT FOUND");
}
return 0;
}
int cookies_contains(http_cookie_t ** cookies, int length, const char * string)
{
int located = 0;
int i;
for (i = 0; i < length; i++) {
http_cookie_t * c = cookies[i];
if (strcmp(string, c->key)) {
located = 1;
break;
}
}
return located;
}
int cookies_create_map(http_cookie_t ** cookies, int length, char * string)
{
char * strings[length];
cookies_separate_string(strings, length, string);
int i;
for (i = 0; i<length; i++) {
cookies[i] = (http_cookie_t *)malloc(sizeof(struct http_cookie_t));
cookies_create_single(cookies[i], strings[i]);
}
return 1;
}
int cookies_separate_string(char ** strings, int length, char * src)
{
const char semicolon[2] = ";";
char *d = (char *)malloc(strlen(src) + 1);
if (d == NULL) {
return 0;
}
strcpy(d, src);
char * v = strtok(d, semicolon);
int i = 0;
while (v != NULL) {
strings[i] = (char *)malloc(strlen(v));
strings[i] = v;
i++;
v = strtok(NULL, semicolon);
}
return 1;
}
int cookies_create_single(http_cookie_t * cookie, char * src)
{
const char sign[2] = "=";
char *d = (char *)malloc(strlen(src) + 1);
if (d == NULL) {
return 0;
}
strcpy(d, src);
char * v = strtok(d, sign);
char * strings[2];
int i = 0;
while (v != NULL) {
if (i < 2) {
strings[i] = (char *)malloc(strlen(v));
strings[i] = v;
}
v = strtok(NULL, sign);
i++;
}
if (i < 2) {
return 0;
}
cookie->key = strings[0];
cookie->val = strings[1];
cookie->key_len = (int)strlen(cookie->key);
cookie->val_len = (int)strlen(cookie->val);
return 1;
}
int cookies_get_length(char * s)
{
int i;
for (i=0; s[i]; s[i]==';' ? i++ : *s++);
return i + 1;
}
My concern is that I was able to create the exact same procedure in PHP which looks like this:
<?php
$src = 'UREF1=F2E2D08C;UREF2=F2E2D08C';
$cookies = array();
foreach (explode(';', $src) as $pair) {
$pair = explode('=', $pair);
$cookies[$pair[0]] = $pair[1];
}
echo '<pre>';
print_r($cookies);
Now I need some advice, taking into account this being my first C program and having likely made a few mistakes in optimization.
But please either explain why my C solution is massively larger, or explain how I can simplify it. You may ignore the PHP comparison context of this question.
=====================================================================
I modified the script since this question was originally posted. I took a large amount of advise from other questions I asked and updated the program accordingly.
The updated version is:
- C only
- Frees memory
- Includes better use of typedef
It is essentially a refactor of the previous version and is built as the actual apache module.
#include "httpd.h"
#include "http_config.h"
#include "http_protocol.h"
#include "ap_config.h"
#include <ctype.h>
#include "openssl/rand.h"
#include "zlib.h"
const char * COOKIE_KEY = "FGID";
typedef struct
{
const char * key;
const char * val;
unsigned int key_len;
unsigned int val_len;
} http_cookie_t;
char * cookie_create(request_rec *r);
typedef struct {
int total;
int max;
} explode_count_t;
typedef struct {
char ** elements;
int len;
int max;
} explode_t;
explode_count_t * explode_count(char * haystack, char * needle);
void remove_explode_count_t(explode_count_t * count);
explode_t * explode(char * delimiter, char * string);
void remove_explode_t(explode_t * explode);
char * str_dup(char * source);
char * trim (char *string, char junk);
char * ltrim(char *string, char junk);
char * rtrim(char* string, char junk);
char * create_unique_ref();
/* The sample content handler */
static int cookie_uref_handler(request_rec *r)
{
const char * src = apr_table_get(r->headers_in, "Cookie");
char * hash;
if (src == NULL) {
hash = cookie_create(r);
} else {
char * header = str_dup((char *)src);
explode_t * pairs = explode(header, ";");
int has_cookie = 0;
int i = 0;
for (i; i < pairs->len; i++) {
char * element = trim(pairs->elements[i], ' ');
explode_t * single_pair = explode(element, "=");
if (single_pair->len == 2 && strcmp(single_pair->elements[0], COOKIE_KEY) == 0) {
has_cookie = 1;
hash = malloc(strlen(single_pair->elements[1]));
strcpy(hash, single_pair->elements[1]);
}
remove_explode_t(single_pair);
}
remove_explode_t(pairs);
if (! has_cookie) {
hash = cookie_create(r);
}
}
free(hash);
return DECLINED;
}
static void cookie_uref_register_hooks(apr_pool_t *p)
{
ap_hook_handler(cookie_uref_handler, NULL, NULL, APR_HOOK_MIDDLE);
}
/* Dispatch list for API hooks */
module AP_MODULE_DECLARE_DATA cookie_uref_module = {
STANDARD20_MODULE_STUFF,
NULL, /* create per-dir config structures */
NULL, /* merge per-dir config structures */
NULL, /* create per-server config structures */
NULL, /* merge per-server config structures */
NULL, /* table of config file commands */
cookie_uref_register_hooks /* register hooks */
};
char * cookie_create(request_rec *r)
{
char cookie_header[100];
char * hash = create_unique_ref();
sprintf(cookie_header, "%s=%s; Path=/; Max-Age=3600; Version=1", COOKIE_KEY, hash);
apr_table_set(r->headers_out, "Set-Cookie", cookie_header);
return hash;
}
char * str_dup(char * source)
{
char * copy = malloc(strlen(source));
strcpy(copy, source);
return copy;
}
explode_count_t * explode_count(char * haystack, char * needle)
{
explode_count_t * count = malloc(sizeof(explode_count_t));
count->total = 1;
count->max = 0;
int hay_length = (int)strlen(haystack);
int ndl_length = (int)strlen(needle);
int i = 0;
int j = 0;
int m = 0;
for (i; i < hay_length; i++) {
if (haystack[i] == needle[j]) {
if (j == ndl_length - 1) {
count->total++;
j = 0;
if (m > count->max) {
count->max = m;
}
m = 0;
} else {
j++;
}
} else if(haystack[i] == needle[0]) {
j = 1;
} else {
j = 0;
m++;
if (m > count->max) {
count->max = m;
}
}
}
return count;
}
void remove_explode_count_t(explode_count_t * count)
{
free(count);
}
explode_t * explode(char * string, char * delimiter)
{
explode_t * explode = malloc(sizeof(explode_t));
explode_count_t * count = explode_count(string, delimiter);
explode->elements = malloc(sizeof(char *) * count->total);
explode->len = count->total;
explode->max = count->max;
if (explode->max > 0) {
char * copy = malloc(strlen(string));
strcpy(copy, string);
int i = 0;
char * v = strtok(copy, delimiter);
while (v != NULL) {
explode->elements[i] = malloc(strlen(v) + 1);
strcpy(explode->elements[i], v);
v = strtok(NULL, delimiter);
i++;
}
free(copy);
} else {
explode->elements[0] = "";
}
remove_explode_count_t(count);
return explode;
}
void remove_explode_t(explode_t * explode)
{
int i = 0;
for (i; i < explode->len; i++) {
free(explode->elements[i]);
}
free(explode->elements);
free(explode);
}
char* trim (char *string, char junk)
{
return ltrim(rtrim(string, junk), junk);
}
char* ltrim(char *string, char junk)
{
char* original = string;
char *p = original;
int trimmed = 0;
do
{
if (*original != junk || trimmed)
{
trimmed = 1;
*p++ = *original;
}
}
while (*original++ != '0円');
return string;
}
char* rtrim(char* string, char junk)
{
char* original = string + strlen(string);
while(*--original == junk);
*(original + 1) = '0円';
return string;
}
char * create_unique_ref()
{
char * hash = malloc(sizeof(char) * 80);
unsigned long crc = crc32(0L, Z_NULL, 0);
unsigned int length = 30;
unsigned char buf[30] = "";
RAND_bytes(buf, length);
unsigned char *c = NULL;
for (c = buf; *c != '0円'; c++) {
crc = crc32(crc, buf, length);
}
sprintf(hash, "%x", (int)crc);
char *p = NULL;
for (p = hash; *p; *p = (char)toupper(*p), p++);
return hash;
}
A side note is that this does crash on phpmyadmin, I have yet to discover why.
-
1\$\begingroup\$ This is actually a C++ program, but it has some C-isms in it. Regardless, this is not a code review site. But I will throw you a bone: check out Boost and C++ 11 for newer libraries and features that might make your code much simpler. \$\endgroup\$user31517– user315172014年11月07日 17:40:44 +00:00Commented Nov 7, 2014 at 17:40
-
\$\begingroup\$ @snowman this is a testing ground I'm using it happens to be a C++ project. This code is being moved to a C Apache module. \$\endgroup\$Flosculus– Flosculus2014年11月07日 18:44:23 +00:00Commented Nov 7, 2014 at 18:44
-
\$\begingroup\$ You're wondering why an interpreted language which aims is to make life easier is... easier to write than a language which aims is to make everything fast and efficient? Your answer in right in your question. \$\endgroup\$Olivier Pons– Olivier Pons2014年12月16日 11:43:42 +00:00Commented Dec 16, 2014 at 11:43
-
\$\begingroup\$ @OlivierPons I added the note at the bottom of the question to clarify that I know that now. \$\endgroup\$Flosculus– Flosculus2014年12月16日 11:55:17 +00:00Commented Dec 16, 2014 at 11:55
-
\$\begingroup\$ @OlivierPons While what you say is true, please note that the OP isn't experienced in C. There might be other - less obvious - reasons for the size difference in the two snippets. \$\endgroup\$yannis– yannis2014年12月16日 14:09:55 +00:00Commented Dec 16, 2014 at 14:09
1 Answer 1
Here are some things that may help your improve your code.
Decide which language you're using
The code looks mostly like C, but won't compile as C because it has a few things that are C++. I'm going to assume you're really intending to write C for this review. If it were C++, I'd suggest a completely different approach.
Remove things that aren't C
Remove #include <iostream>
and using namespace std
because they are not C.
Use your typedef
s
The code correctly and reasonably defines a typedef
for http_cookie_t
but then uses lines like this:
cookies[i] = (http_cookie_t *)malloc(sizeof(struct http_cookie_t));
First, that won't (or shouldn't) compile because the struct actually has a leading underscore (_http_cookie_t
), and second, you can use the typedef
to simplify the code:
cookies[i] = (http_cookie_t *)malloc(sizeof(http_cookie_t));
Use pointers rather than indexing
In C, it's usually more efficient to use pointers rather than indexing. Even if you don't care too much about the performance of the code, this also makes the code more idomatic C. You almost certainly don't want to use both except in very special circumstances. So in cookies_get_length
, for instance, the code is currently this:
int cookies_get_length(char * s)
{
int i;
for (i=0; s[i]; s[i]==';' ? i++ : *s++);
return i + 1;
}
This is broken code because of the last part of the for
clause. The expression s[i]==';' ? i++ : *s++
says that if s[i]
is a semicolon, increment i
, otherwise increment s*
. If you feed this code this string ";"
it will never terminate. If you feed it a NULL
pointer, it will crash. Do you see why? A better way to write the loop would be this:
int cookies_get_count(const char * s)
{
int cookie_count = 0;
if (s == NULL)
return cookie_count;
for (++cookie_count; *s; ++s)
if (*s == ';')
++cookie_count;
return cookie_count;
}
Note that the code bails out early if it gets a NULL
pointer, that the loop is much simpler and that the name is more descriptive -- it's really a count of cookies, rather than the length. Also, the parameter is declared as const char *s
.
Don't cast result of malloc
The malloc
call returns a void *
and one of the special aspects of C is that such a type does not need to be cast to be converted into another pointer type. So for example, this line:
http_cookie_t ** cookies = (http_cookie_t **)malloc(length * sizeof(http_cookie_t));
could be shortened to this:
http_cookie_t ** cookies = malloc(length * sizeof(http_cookie_t));
However, you should check the return value to assure it isn't NULL
because that is an indication that the program has run out of memory and is a serious error.
Eliminate unused variables
In the cookies_separate_string
routine, the parameter length
is never used and can be eliminated.
Use const
where practical
In all of your functions, the last parameter is a pointer to a string. In all cases, that string is unmodified by the function and so can (and should) be declared as const char *
.
Consider a more efficient algorithm
Right now, the code makes many passes through the same string, but it's quite possible to do everything in a single pass. Such an algorithm would likely shorten the code considerably.
Don't leak memory
This code calls malloc
several places but never free
. This means that the routines are leaking memory. It would be much better to get into the habit of using free
for each call to malloc
and then assuring that you don't leak memory.
-
\$\begingroup\$ Thanks for your review. I have updated the question with the code after I refactored it a few weeks ago. It is the actual code intended to be used so an updated review of this would be appreciated. \$\endgroup\$Flosculus– Flosculus2014年12月16日 16:31:50 +00:00Commented Dec 16, 2014 at 16:31