I am trying to write an HTTP/1.0 (RFC 1945) parser in C, so I started with the Request-Line. Ideally, the parser should return one of the three:
- The request is valid.
- The request is malformed, but appended data could make it valid.
- The request is malformed and no appended data can make it valid.
The idea is to know whether there is still a point receiving data of a request from a connection. But internal errors can happen, and I have chosen some limitations, so error are also returned.
I present 3 files: the header file of the Request-Line parser (httpParse.h
), the implementation (httpParse.c
) and the file containing the error definitions (error.h
). The library should not compile, because the functions is_http_method()
and is_http_uri()
are not here defined, since they are more about complex BNF. They'd be fined in httpChar.h
.
httpParse.h
/* This file is of the project MarkServer, which is under the MIT licence. For
* more information, see github.com/Schilive/MarkServer.
*
* This file was originally created in 2023年10月05日
*/
/* This file declares functions to parse HTTP requests, but not responses.
*/
#ifndef MARKSERVER_HTTPPARSE_H
#define MARKSERVER_HTTPPARSE_H
#include <stdio.h>
#include "error.h"
#define HTTP_METHOD_MAX_LENGTH 255
#define HTTP_URI_MAX_LENGTH 1023
struct http_version {
unsigned int major;
unsigned int minor;
};
/* The parsed Request-Line. */
struct http_request_line {
char method[HTTP_METHOD_MAX_LENGTH + 1];
char uri[HTTP_URI_MAX_LENGTH];
struct http_version http_version;
};
/* Parses the Request-Line in a string, checking the logic.
*
* @param [in] fName The filename of the file starting with the
* Request-Line.
* @param [out] pRes The parsed Request-Line. Only changed if no
* error is returned.
* @return The error value.
* @except ERROR_INVALID_PARAMETER.
* @except ERROR_REQUEST_INCOMPLETE
* @except ERROR_INTERNAL
* @except ERROR_REQUEST_TOO_LONG
* @except ERROR_BAD_REQUEST
*/
enum error parse_http_request_line( const char *restrict fName,
struct http_request_line *restrict pRes);
#endif /* MARKSERVER_HTTPPARSE_H */
httpParse.c
/* This file is of the project MarkServer, which is under the MIT licence. For
* more information, see github.com/Schilive/MarkServer.
*
* This file was originally created in 2023年10月05日
*/
/* This file implements the declarations on "httpParse.h".
*/
#include "httpParse.h"
#include <string.h>
#include <stdbool.h>
#include "httpChar.h"
#include <stdlib.h>
#include <limits.h>
#define HTTP_VERSION_TOKEN_MAX_LENGTH 63
#define HTTP_REQUEST_LINE_MAX_LENGTH (HTTP_METHOD_MAX_LENGTH + 1 + \
HTTP_URI_MAX_LENGTH + 1 + HTTP_VERSION_TOKEN_MAX_LENGTH)
struct request_line_tokens {
char method[HTTP_METHOD_MAX_LENGTH + 1];
char uri[HTTP_URI_MAX_LENGTH + 1];
char version[HTTP_VERSION_TOKEN_MAX_LENGTH + 1];
};
/* Returns the next character to be read, without changing the file pointer. */
static int seek_file(FILE *f)
{
int ch = fgetc(f);
if (ch != EOF)
ungetc(ch, f);
return ch;
}
/* Buffers the Request-Line of the start of the file, as a string, whether
* the content of the fields are correct or not. That is, until CRLF.
*
* @param f The file to be read.
* @param reqLine The buffer.
* @param maxLen The maximum length of the buffer. If the buffer is too
* small, then ERROR_REQUEST_TOO_LONG is returned.
* @except ERROR_REQUEST_INCOMPLETE
* @except ERROR_INTERNAL
* @except ERROR_REQUEST_TOO_LONG
*/
static enum error read_request_line(FILE *restrict f, char *restrict reqLine)
{
char reqLineTxt[HTTP_REQUEST_LINE_MAX_LENGTH];
size_t reqLineLen = 0;
while (1) {
int ch = fgetc(f);
if (ch == EOF)
return ferror(f)
? ERROR_REQUEST_INCOMPLETE
: ERROR_INTERNAL;
if (reqLineLen == HTTP_REQUEST_LINE_MAX_LENGTH)
return ERROR_REQUEST_TOO_LONG;
if (ch == '\r' && seek_file(f) == '\n')
break;
reqLineTxt[reqLineLen] = (char)ch;
reqLineLen++;
}
memcpy(reqLine, reqLineTxt, reqLineLen);
reqLine[reqLineLen] = 0;
return ERROR_SUCCESS;
}
/* Tokenizes the Request-Line string.
*
* @except ERROR_BAD_REQUEST
*/
static enum error tokenize_request_line_str(const char *restrict reqLine,
struct request_line_tokens *restrict pRes)
{
char *sp1 = strchr(reqLine, ' ');
if (!sp1)
return ERROR_BAD_REQUEST;
char *sp2 = strchr(sp1 + 1, ' ');
if (!sp2)
return ERROR_BAD_REQUEST;
size_t methodLen = sp1 - reqLine;
size_t uriLen = sp2 - (sp1 + 1);
size_t versLen = strlen(sp2 + 1);
memcpy(pRes->method, reqLine, methodLen);
memcpy(pRes->uri, sp1 + 1, uriLen);
memcpy(pRes->version, sp2 + 1, versLen);
pRes->method[methodLen] = 0;
pRes->uri[uriLen] = 0;
pRes->version[versLen] = 0;
return ERROR_SUCCESS;
}
/* Returns whether the method format is correct. */
static bool is_valid_method(const char *str)
{
return is_http_method(str, strlen(str));
}
/* Returns whether the URI format is correct. */
static bool is_valid_uri(const char *str)
{
return is_http_requestURI(str, strlen(str));
}
/* Parses the HTTP version string, verifying that its format is correct.
* Returns the major and minor versions.
*
* @except ERROR_BAD_REQUEST
*/
static enum error parse_http_version(const char *restrict vers,
unsigned long *restrict pMajor,
unsigned long *restrict pMinor)
{
char prefix[] = "HTTP/";
size_t prefixLen = sizeof(prefix) - 1;
if (strncmp(vers, prefix, 5) != 0)
return ERROR_BAD_REQUEST;
char *pPeriod = strchr(vers, '.');
if (!pPeriod)
return ERROR_BAD_REQUEST;
char *next;
errno = 0;
unsigned long ulMajor = strtoul(vers + prefixLen, &next, 10);
if (errno)
return ERROR_BAD_REQUEST;
if (next != pPeriod || next == vers)
return ERROR_BAD_REQUEST;
unsigned long ulMinor = strtoul(pPeriod + 1, &next, 10);
if (errno)
return ERROR_BAD_REQUEST;
if (*next != 0 || next == pPeriod + 1)
return ERROR_BAD_REQUEST;
if (ulMajor >= ULONG_MAX || ulMinor >= ULONG_MAX)
return ERROR_BAD_REQUEST;
*pMajor = (unsigned int)ulMajor;
*pMinor = (unsigned int)ulMinor;
return ERROR_SUCCESS;
}
/* Parse tokens into the struct 'request_line'
*
* @except ERROR_BAD_REQUEST
*/
static enum error parse_request_line_tokens(
const struct request_line_tokens *restrict pReqLineToks,
struct http_request_line *restrict pRes)
{
if (!is_valid_method(pReqLineToks->method))
return ERROR_BAD_REQUEST;
if (!is_valid_uri(pReqLineToks->uri))
return ERROR_BAD_REQUEST;
unsigned long major, minor;
enum error err =
parse_http_version(pReqLineToks->version, &major, &minor);
if (err)
return err;
strcpy(pRes->method, pReqLineToks->method);
strcpy(pRes->uri, pReqLineToks->uri);
pRes->http_version.major = major;
pRes->http_version.minor = minor;
return ERROR_SUCCESS;
}
enum error parse_http_request_line(const char *restrict fName,
struct http_request_line *restrict pRes)
{
if (!fName || !pRes)
return ERROR_INVALID_PARAMETER;
FILE *f = fopen(fName, "rb");
if (!f)
return ERROR_INVALID_PARAMETER;
char reqLineStr[HTTP_REQUEST_LINE_MAX_LENGTH + 1];
enum error err = read_request_line(f, reqLineStr);
if (err)
return err;
struct request_line_tokens reqLineToks;
err = tokenize_request_line_str(reqLineStr, &reqLineToks);
if (err)
return err;
err = parse_request_line_tokens(&reqLineToks, pRes);
if (err)
return err;
return ERROR_SUCCESS;
}
error.h
/* This file is of the project MarkServer, which is under the MIT licence. For
* more information, see github.com/Schilive/MarkServer.
*
* This file was originally created in 2023年10月05日
*/
#ifndef MARKSERVER_ERROR_H
#define MARKSERVER_ERROR_H
enum error {
ERROR_SUCCESS,
ERROR_REQUEST_INCOMPLETE,
ERROR_INVALID_PARAMETER,
ERROR_REQUEST_TOO_LONG, /* Something of the HTTP request was considered
* too long. */
ERROR_INTERNAL,
ERROR_BAD_REQUEST /* Request is malformed by the standard. */
};
#endif /* MARKSERVER_ERROR_H */
-
\$\begingroup\$ This is a great question, but can you please add error.h to the code to be reviewed? \$\endgroup\$pacmaninbw– pacmaninbw ♦2023年10月19日 12:54:51 +00:00Commented Oct 19, 2023 at 12:54
-
\$\begingroup\$ @pacmaninbw, I will, And thank you. I think I was going to add it and forgot. \$\endgroup\$Schilive– Schilive2023年10月19日 17:00:49 +00:00Commented Oct 19, 2023 at 17:00
1 Answer 1
read_request_line
is too complicated. There is no need to read it character by character. You are reading fromFILE *
, so a simplefgets
will do the job. Just test that the line ends with\r\n
, and you are done.Too many copies made.
I don't see a need for
recLineTxt
. Reading directly intoreqLine
is perfectly OK.I don't see a need for
struct http_request_line
to contain actual strings. Pointers intoreqLine
will work (mutatingreqLine
is perfectly OK too), e.g.pRes->method = reqLine; sp = strchr(reqLine, ' '); if (!sp) { return ERROR_BAD_REQUEST; } *sp++ = 0; // etc
Ditto for
reqLineToks
. You may parse the line directly intopRes
, soparse_request_line_tokens
wouldn't need allstrcpy
s and assignments.parse_http_version
can be streamlined. There is no need to computepPeriod
separately. Considerstrtoul(vers + prefixLen, &pPeriod, 10)
test for*pPeriod == '.'
, and haveulMinor = strtoul(pPeriod + 1, &next, 10)
Also, the cast of
unsigned long
tounsigned int
, followed by an assignmebt tounsigned long
is weird.is_http_requestURI
, which in my opinion is the most challenging and interesting part, is missing.
Edit: a concrete example. This is how I would approach it (forgive the broad strokes, error checking is omitted for clarity):
struct http_request {
char * method;
char * uri;
unsigned long major;
unsigned long minor;
};
static enum error parse_http_request_line(char * line, struct http_request * req)
{
req->method = line;
char * sp = strchr(line, ' ');
*sp++ = 0;
req->uri = sp;
sp = strchr(line, ' ');
*sp++ = 0
parse_http_version(sp, req);
return ERROR_SUCCESS;
}
enum error handle_request_line(FILE * src, struct http_request_line * req)
{
char request[HTTP_REQUEST_LINE_MAX_LENGTH + 1];
fgets(request, HTTP_REQUEST_LINE_MAX_LENGTH + 1, src);
struct http_request req;
parse_http_request_line(line, & req);
// Actual busyness logic here. The caller only sets up a
// file pointer, and does not care about the request.
}
-
\$\begingroup\$ Thank you for the review! If you'd like to see
is_http_requestURI()
, you can checkhttpChar.c/h
on here. It is not pretty though. About the copiesin functions, it is true: they are static functions, not interface, so there is no need. Aboutfgets()
, I did not remember it is -- very useful. \$\endgroup\$Schilive– Schilive2023年10月19日 18:11:55 +00:00Commented Oct 19, 2023 at 18:11 -
\$\begingroup\$ I just did not understand what you said about pointers about
http_request_line
. Do you mean having points to the start of the field of the Request-Line? \$\endgroup\$Schilive– Schilive2023年10月19日 18:16:31 +00:00Commented Oct 19, 2023 at 18:16 -
\$\begingroup\$ @Schilive See edit. \$\endgroup\$vnp– vnp2023年10月19日 18:37:58 +00:00Commented Oct 19, 2023 at 18:37
-
\$\begingroup\$ vnp, thank you for the example. But wouldn't I have to
malloc()
the string to do it? \$\endgroup\$Schilive– Schilive2023年10月19日 19:02:15 +00:00Commented Oct 19, 2023 at 19:02 -
\$\begingroup\$ No, you don't have to. Essentially, split
reqLine
into 3 fragments in-place, by placing nul bytes where the spaces were, and keep the pointers in thestruct http_request_line
. If I am still unclear, let me know, and I will give a more complete example. \$\endgroup\$vnp– vnp2023年10月19日 19:40:15 +00:00Commented Oct 19, 2023 at 19:40