4
\$\begingroup\$

Title should be self-explenatory. This is a simple URL parser I wrote in C. The function takes a URL from the user and produces a struct that contains the information that can be used to request the resource at the URL over http(s).

enum url_protocol {
 PROTOCOL_HTTP,
 PROTOCOL_HTTPS
};
struct url {
 /* Protocol to use */
 enum url_protocol protocol;
 /* Credentials */
 char *username;
 char *password;
 /* Host and service */
 char *host;
 char *service;
 /* Path to request */
 char *path;
};
void
free_url(struct url *url);
int
parse_url(char *start, struct url *url)
{
 char *end, *delim;
 memset(url, 0, sizeof(*url));
 /* Find the end of the scheme */
 end = strchr(start, ':');
 if (!end)
 goto err;
 /* Find protocol from scheme */
 if (!strncmp(start, "http", end - start))
 url->protocol = PROTOCOL_HTTP;
 else if (!strncmp(start, "https", end - start))
 url->protocol = PROTOCOL_HTTPS;
 else
 goto err;
 /* URLs must begin with // */
 if (*++end != '/' || *++end != '/')
 goto err;
 /* Parse credentials */
 start = ++end;
 end = strchr(start, '@');
 if (end) {
 delim = strchr(start, ':');
 if (delim && delim < end) { /* Username and password */
 if (!(delim - start))
 goto err;
 url->username = strndup(start, delim - start);
 ++delim;
 if (!(end - delim))
 goto err;
 url->password = strndup(delim, end - delim);
 } else { /* Only username */
 if (!(end - start))
 goto err;
 url->username = strndup(start, end - start);
 }
 start = ++end;
 }
 /* Host till / or end of string */
 end = strchrnul(start, '/');
 /* Skip IPv6 literals before : search */
 if (*start == '[' && (delim = strchr(start, ']')))
 delim = strchr(delim, ':'); /* Found IPv6 literal */
 else
 delim = strchr(start, ':'); /* Normal search */
 if (delim && delim < end) { /* Host and service */
 if (!(delim - start))
 goto err;
 url->host = strndup(start, delim - start);
 ++delim;
 if (!(end - delim))
 goto err;
 url->service = strndup(delim, end - delim);
 } else { /* Only host */
 if (!(end - start))
 goto err;
 url->host = strndup(start, end - start);;
 }
 /* Default path is a single / */
 url->path = *end ? strdup(end) : strdup("/");
 return 0;
err:
 free_url(url);
 return -1;
}
void
free_url(struct url *url)
{
 /* Assume free ignores NULL */
 free(url->username);
 free(url->password);
 free(url->host);
 free(url->service);
 free(url->path);
}
```
asked Oct 11, 2020 at 22:57
\$\endgroup\$
1
  • \$\begingroup\$ does it meant to be rfc 1738 compatible? \$\endgroup\$ Commented Oct 12, 2020 at 10:15

1 Answer 1

3
\$\begingroup\$

Just some minor things in your code. Is a bit strange that you have a function call free_url and you dont have one called init_url where you alloc and do the memset. My suggestion is that you have another function for that and you move your memset of the function

int
parse_url(char *start, struct url *url)
{
 char *end, *delim;
 memset(url, 0, sizeof(*url)); <- Move this to init_url

And your function

 struct url *init_url()
 {
 struct url *u = (struct url*)malloc(sizeof(struct url));
 if (u != NULL)
 memset(u, 0, sizeof(struct url));
 return u
 } 

My recommendation is that you should have a good suite of unit tests for parsing things for the user, so you will make your code more robust and with less errors.

Hope it helps

answered Oct 12, 2020 at 9:51
\$\endgroup\$
1
  • \$\begingroup\$ There is no init_url on purpose, the struct how its used now is allocated on the callers stack instead of on the heap. The only job of the free_url is to free the strings. \$\endgroup\$ Commented Oct 12, 2020 at 18:43

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.