This is a follow up to: struct sockaddr_storage initialization by network format string
First of all thanks to @Haris, @Toby Speight, @G. Sliepen and @chux - Reinstate for their help. I learned a lot and try to use it for future projects.
I removed the Addr
type for this code to keep it clean and based on the basic structures used in network programming.
I've split the code into miscellaneous code in misc.c
and each function returning a socket in this case has its own file. There is an internal header file netstring_internal.h
containing definitions for misc.c
and the user header file netstring.h
.
Hope I did realize your suggestions as good as possible. If you think I missed something you can let me know.
netstring_internal.h:
- I put the macro for getting the size of an array in it. Very useful and simple solution
- defining a constant for the host buffer size
#ifndef _INTERNAL_NETSTRING_H /* BEGIN INCLUDE GUARD */
#define _INTERNAL_NETSTRING_H
#if(_POSIX_C_SOURCE != 200112L)
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200112L
#endif
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#define DOMAIN_BUFSIZ 256 /* used vor netstring parser */
#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
/* make protocl_table defined in misc.c visible to other files */
extern const char *protocol_table[];
/* numerical identifiert for the protocol table */
enum netstr_protocols {tcp, tcp4, tcp6, udp, udp4, udp6};
/* error number returned by all functions beginning with -2...-n because functions will return and socket and socket return -1 on error otherwise will be > 0 */
enum netstr_error {
NETSTR_PROTO = -2,
NETSTR_HOST = -3,
NETSTR_PORT = -4,
NETSTR_INET_PTON = -5,
};
/* netstring internal */
/*! @function
@param str1 null terminated first string to compare
@param str2 null terminated first string to compare
@param delim charcter until compare happens
*/
bool netstr_compare_until_delim(const char *str1, const char *str2, char delim);
/*! @function
@abstract internal function to compare beginning of string to a lookup table containing predefined protocol formats
@param str const char pointer to string
@return int: index of to be used with protocol_table
*/
int netstr_check_protocol(const char **str);
/*! @function
@abstract parsing string for host as (ipv6 format, ipv4 dotted notation or domain name), writing everything valid to buffer
@param host_start: double char pointer with starting location for the host part
@param dest: pointer to host buffer
@return index of to be used with protocol_table
*/
int netstring_write_domain(const char **host_start, char *dest);
/*! @function
@abstract interal function that takes the assumed string location of port and translating it to int if valid
@param str double char pointer to char array (used to update it to the address behind end of string number)
@return port if port > 0 and port < 65536 and -1 on error
*/
int netstr_check_port(const char **str);
/*! @function
@abstract internal function used to check for domain lookup and overwriting if any
@param host pointer to the buffer containing the host
@param sz size of host buffer
@return 0 on succes or NETSTR_HOST error value
*/
int netstr_resolve_host(char *host, int sz);
/*! @function
@abstract internal function used to get the appropriate socket type for the socket
@param proto: internal defined protocol value
@return socket type defined by <sys/socket.h>
*/
int netstr_get_proto(int proto);
/*! @function
@abstract internal function to setup struct sockaddr_storage by ipv6 flag, host and port
@param *sockaddr_storage pointer to struct sockaddr_storage
@param ipv6 bool to check if use ipv6 or ipv4 structure
@param host null terminated host buffer
@param port port number
@return 0 on success, -1 if error
*/
int netstr_setup_sockaddr(void *sockaddr_storage, bool ipv6, const char*host, int port);
#endif /* END INCLUDE GUARD */
misc.c:
- using simplified
compare_until_delim
by Toby Speight - split down into some functions S can use for this format
- I use braces around every
if
/else if
/else
- I use
strtol
with checking forerrno
for error - I removed the need of
stncpy_s
becausenetstring_write_domain
will write host/domain into buffer while parsing
#include "netstring_internal.h"
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <netdb.h>
#include <arpa/inet.h>
/* protocol table containing supported protocol for socket configuration */
const char *protocol_table[] =
{
"tcp:",
"tcp4:",
"tcp6:",
"udp:",
"udp4:",
"upd6:",
};
/*! @function
@param str1 null terminated first string to compare
@param str2 null terminated first string to compare
@param delim charcter until compare happens
*/
bool netstr_compare_until_delim(const char *str1, const char *str2, char delim)
{
const char end[] = "0円0円";
while (*str1 || *str2) {
if (*str1 == delim) str1 = end;
if (*str2 == delim) str2 = end;
if (*str1++ != *str2++) return false;
}
return true;
}
/*! @function
@abstract internal function to compare beginning of string to a lookup table containing predefined protocol formats
@param str const char pointer to string
@return int: index of to be used with protocol_table
*/
int netstr_check_protocol(const char **str)
{
int i;
int table_size = sizeof(protocol_table) / sizeof(*protocol_table);
for(i = 0; i < table_size ; i++) {
if(netstr_compare_until_delim(protocol_table[i], *str, ':')) {
*str = *str + strlen(protocol_table[i]);
return i;
}
}
return -1; /* not found */
}
/*! @function
@abstract parsing string for host as (ipv6 format, ipv4 dotted notation or domain name), writing everything valid to buffer
@param host_start: double char pointer with starting location for the host part
@param dest: pointer to host buffer
@return index of to be used with protocol_table
*/
int netstring_write_domain(const char **host_start, char *dest)
{
bool ipv6 = false;
const char *start = *host_start;
*dest = '0円';
/* if first char after protocol is [ check for ] to say it is an ipv6 address */
if(*start == '[') {
/* go latest until null byte */
start++; /* go behind '[' */
while(*start != '0円') {
/* if enclosure bracket ] was found set ipv6 true and increment service_start */
if(*start == ']') {
*dest = '0円';
ipv6 = true;
start++;
break;
}
*dest++ = *start++;
}
/* service_start should point to ':' in format string right before service/port */
}
/* ip not ipv6 assume ipv4 or domain name */
if(ipv6 == false) {
/* go latest until null byte */
while(*start != '0円') {
/* service_start points to seperator ':', increment domain_p first and set to null byte */
if(*start == ':') {
*dest = '0円';
break;
}
/* write a copy to domain_buffer */
*dest++ = *start++;
}
/* service_start should point to ':' in format string right before service/port too */
}
if(*start != ':') {
return NETSTR_HOST; /* return position in host string */
}
*host_start = ++start; /* overwrite external string with ne location */
return ipv6;
}
/*! @function
@abstract interal function that takes the assumed string location of port and translating it to int if valid
@param str double char pointer to char array (used to update it to the address behind end of string number)
@return port if port > 0 and port < 65536 and -1 on error
*/
int netstr_check_port(const char **str)
{
long port;
port = strtol(*str, (char **) str, 10);
if (errno == EINVAL || errno == ERANGE)
return -1;
if (port < 1 && port > 65535)
return -1;
return (int) port;
}
/*! @function
@abstract internal function used to check for domain lookup and overwriting if any
@param host pointer to the buffer containing the host
@param sz size of host buffer
@return 0 on succes or NETSTR_HOST error value
*/
int netstr_resolve_host(char *host, int sz)
{
struct hostent *entry = gethostbyname(host);
if (entry == NULL)
return NETSTR_HOST;
strncpy(host, inet_ntoa(*(struct in_addr*)entry->h_addr_list[0]), sz);
return 0;
}
/*! @function
@abstract internal function used to get the appropriate socket type for the socket
@param proto: internal defined protocol value
@return socket type defined by <sys/socket.h>
*/
int netstr_get_proto(int proto)
{
switch(proto) {
case tcp:
case tcp4:
case tcp6:
proto = SOCK_STREAM;
break;
case udp:
case udp4:
case udp6:
proto = SOCK_DGRAM;
break;
default: /* this should never happen */
return proto = -1;
}
return proto;
}
/*! @function
@abstract internal function to setup struct sockaddr_storage by ipv6 flag, host and port
@param *sockaddr_storage pointer to struct sockaddr_storage
@param ipv6 bool to check if use ipv6 or ipv4 structure
@param host null terminated host buffer
@param port port number
@return 0 on success, -1 if error
*/
int netstr_setup_sockaddr(void *sockaddr_storage, bool ipv6, const char *host, int port)
{
void *in_addr = NULL;
if (ipv6 == true) {
((struct sockaddr_in6 *) sockaddr_storage)->sin6_family = AF_INET6;
((struct sockaddr_in6 *) sockaddr_storage)->sin6_port = htons(port);
in_addr = &((struct sockaddr_in6 *) sockaddr_storage)->sin6_addr;
}
else { /* ipv4 */
((struct sockaddr_in *) sockaddr_storage)->sin_family = AF_INET;
((struct sockaddr_in *) sockaddr_storage)->sin_port = htons(port);
in_addr = &((struct sockaddr_in *) sockaddr_storage)->sin_addr;
}
if (inet_pton(((struct sockaddr*)sockaddr_storage)->sa_family, host, in_addr) != 1) {
return -1;
}
return 0;
}
netstring_to_addrinfo.c
- I implemented one function that uses
struct addrinfo
//#include <stdnet.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <unistd.h>
#include <time.h>
#include "netstring_internal.h"
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
/*! @function
@abstract returns a configured socket for given network format string and setup of struct addrinfo (freeing struct addrinfo is for user)
@param netstr network format string like 'proto:host:port' e.g. tcp:www.google.de:443 or udp:8.8.8.8:dns
@param storage: pointer to stuct sockaddr_storage
@return configured socket
*/
int netstring_to_addrinfo(const char *netstr, struct addrinfo **info)
{
int proto = -1, ipv6 = 0, err = 0;
char host[DOMAIN_BUFSIZ];
struct addrinfo hint;
memset(&hint, 0, sizeof hint);
/* get protocol if any otherwise return with error */
proto = netstr_check_protocol(&netstr);
if(proto < 0) {
err = NETSTR_PROTO;
goto CLEANUP_ERROR;
}
/* write host (domain, ip or ipv6) to host buffer and return on error */
ipv6 = netstring_write_domain(&netstr, host);
if (ipv6 == NETSTR_HOST) {
err = ipv6;
goto CLEANUP_ERROR;
}
/* set hint based on ipv6 or ipv4 */
if (ipv6 == true) {
hint.ai_family = AF_INET6;
} else {
hint.ai_family = AF_INET;
}
/* get the correct socket type from protocl found in netstr */
proto = netstr_get_socktype(proto);
if(proto == -1) {
err = NETSTR_PROTO;
goto CLEANUP_ERROR;
}
hint.ai_socktype = proto;
/* lookup domain if any and return on error */
err = getaddrinfo(host, netstr, &hint, info);
if (err) {
err = errno;
goto CLEANUP_ERROR;
}
/* setup socket with infos from getaddrinfo */
int sock;
sock = socket((*info)->ai_family, (*info)->ai_socktype, (*info)->ai_protocol);
if (sock == -1) {
err = errno;
freeaddrinfo((*info));
goto CLEANUP_ERROR;
}
return sock;
CLEANUP_ERROR:
return err;
}
netstring_to_socket.c:
- I kept the version with
struct sockaddr_storage
anyway
#include "netstring_internal.h"
/*! @function
@abstract returns a configured socket for given network format string and setup of struct sockaddr_storage
@param netstr network format string like 'proto:host:port'
@param storage: pointer to stuct sockaddr_storage
@return configured socket
*/
int netstring_to_socket(const char *ipstr, struct sockaddr_storage *storage)
{
int proto = -1, port = -1, err = 0;
bool ipv6 = 0;
char host[DOMAIN_BUFSIZ];
/* get protocol if any otherwise return with error */
proto = netstr_check_protocol(&ipstr);
if (proto == -1) {
err = NETSTR_PROTO;
goto CLEANUP_ERROR;
}
/* write host (domain, ip or ipv6) to host buffer and return on error */
ipv6 = netstring_write_domain(&ipstr, host);
if (ipv6 == NETSTR_HOST) {
err = NETSTR_HOST;
goto CLEANUP_ERROR;
}
/* check if port is a valid port */
if ((port = netstr_check_port(&ipstr)) < 0) {
err = NETSTR_PORT;
goto CLEANUP_ERROR;
}
/* if not ipv6 expecting hostname or ip address so resolve hostname and overwrite current host */
if(ipv6 == false) {
if(netstr_resolve_host(host, sizeof host)) {
err = NETSTR_HOST;
goto CLEANUP_ERROR;
}
}
/* setting up sockaddr_storage and return on error if any */
err = netstr_setup_sockaddr(storage, ipv6, host, port);
if (err) {
err = NETSTR_INET_PTON;
goto CLEANUP_ERROR;
}
/* get the correct socket type from protocol found in netstr */
proto = netstr_get_socktype(proto);
if (proto == -1) {
err = NETSTR_PROTO;
goto CLEANUP_ERROR;
}
/* setup socket and return it */
return socket(((struct sockaddr_in *) storage)->sin_family, proto, 0);
CLEANUP_ERROR:
return err;
}
-
2\$\begingroup\$ I have rolled back Rev 3 → 2. Please see What to do when someone answers. \$\endgroup\$Sᴀᴍ Onᴇᴌᴀ– Sᴀᴍ Onᴇᴌᴀ ♦2023年03月02日 19:36:23 +00:00Commented Mar 2, 2023 at 19:36
5 Answers 5
Why not always use getaddrinfo()
?
I kept the version with
struct sockaddr_storage
anyway
Why? It is inferior in every possible way, except that it might work on some very old/rare systems that do have struct sockaddr_in6
and inet_pton()
but not getaddrinfo()
. Unless you really need to support those, I would not bother with it.
Make sure names are precise
Your netstring_to_addrinfo()
does more than convert a string to "an" addrinfo
, it also creates a socket. Either I would put that in the name as well (netstring_to_socket_and_addrinfo()
), or I would have it not create a socket to begin with. An addrinfo
has all the information needed to create a proper socket for the address it holds. But more importantly, getaddrinfo()
returns a pointer to a list of addrinfo
s, since more than one address can be associated with a hostname. You cannot just create a socket for the first addrinfo
in the list, as the next one might be for a different address family.
-
\$\begingroup\$ But wouldn't I try to connect to one until it works? I now wrote a functions that goes through the linked list until
connect
returns 0 and then return a socket for this connection. Would that be better then? \$\endgroup\$mortytheshorty– mortytheshorty2023年03月02日 15:44:15 +00:00Commented Mar 2, 2023 at 15:44 -
\$\begingroup\$ I renamed it to
netstring_to_addrinfo
\$\endgroup\$mortytheshorty– mortytheshorty2023年03月02日 16:06:35 +00:00Commented Mar 2, 2023 at 16:06 -
3\$\begingroup\$ A
connect_to_netstring()
function, that takes a string, callsgetaddrinfo()
, then tries toconnect()
to each of the addresses until one works, and returns the filedescriptor of that connection, would indeed be a nice function to have. Note however that aconnect()
that succeeds might not mean you connected to the right thing; if you just return thestruct addrinfo*
, the calling code can decide what to do if communication with the peer doesn't go as intended, where one possibility is to try to connect to the next address in the list. \$\endgroup\$G. Sliepen– G. Sliepen2023年03月02日 17:08:16 +00:00Commented Mar 2, 2023 at 17:08
There's a subtle mistake in this macro definition:
#define ARRAYSIZE(x) (sizeof(x) / sizeof(x[0]))
When we put parens around the expansions of macro arguments, we need to put them immediately around the arguments. (x[0])
fails here, as []
has very high precedence. Example: ARRAYSIZE(using_buf ? buf : default_storage)
.
A corrected version is
#define ARRAYSIZE(x) (sizeof (x) / sizeof (x)[0])
-
\$\begingroup\$ Okay nice. I did not think about such a scenario. I changed it it. \$\endgroup\$mortytheshorty– mortytheshorty2023年03月02日 15:38:57 +00:00Commented Mar 2, 2023 at 15:38
-
2\$\begingroup\$ @mortytheshorty In addition, if
x
is an expression with side-effects, this macro would evaluate it twice. However, here, that is not the case. \$\endgroup\$Davislor– Davislor2023年03月02日 22:55:48 +00:00Commented Mar 2, 2023 at 22:55 -
\$\begingroup\$ @Davislor do you have an example what you mean with side effects? \$\endgroup\$mortytheshorty– mortytheshorty2023年03月03日 07:50:01 +00:00Commented Mar 3, 2023 at 7:50
-
1\$\begingroup\$ Not quite @Davislor; this macro doesn't evaluate
x
even once (sincex
is expanded only in arguments tosizeof
, which is an unevaluated context). Of course, that's also different to calling a function, where the side-effects occur exactly once. \$\endgroup\$Toby Speight– Toby Speight2023年03月03日 08:03:57 +00:00Commented Mar 3, 2023 at 8:03 -
2\$\begingroup\$ @mortytheshorty, Davislor is referring to invocations such as
ARRAYSIZE(++a)
- because a macro is a text substitution, we get two instances of++a
in the expansion. That can be surprising and misleading in macros that are used like functions. \$\endgroup\$Toby Speight– Toby Speight2023年03月03日 08:06:19 +00:00Commented Mar 3, 2023 at 8:06
Declare only where needed:
//long port;
//port = strtol(*str, (char **) str, ...);
long port = strtol(*str, (char **) str, ...);
Simplify initialization:
//struct addrinfo hint;
//memset(&hint, 0, sizeof hint);
struct addrinfo hint = { 0 };
Limit the scope of variables whenever possible:
//int i;
//int table_size = sizeof(protocol_table) / sizeof(*protocol_table);
// for(i = 0; i < table_size ; i++)
for (size_t i = 0; ARRAYSIZE (table_size); i++) {
// ...
}
- A loop variable should be defined in the initial part of the
for
loop. - Use
size_t
for sizes, cardinalities, and ordinal numbers. - As
table_size()
is not used anywhere else in the function, it can be eliminated.
Do not compare against true
/ false
:
See : Why Use !boolean_variable
over boolean_variable == false
.
//if (ipv6 == true) {
// hint.ai_family = AF_INET6;
// } else {
// hint.ai_family = AF_INET;
// }
And the above code snippet can be simplified to just:
hints.ai_family = ipv6 ? AF_INET6 : AF_INET;
-
\$\begingroup\$ I don't know why I did not add
ARRAYSIZE
to replace this. Must have been dreaming. C89 does not allow it and I don't know... some say, write for C89 so it can run everywhere some say don't write for C89 and I sit inbetween them don't know what to do. When I learned C from a book I always put declarations into the initial part of thefor
loop but then all code did not compile with C89 so I stopped doing that. But I changed those tosize_t
\$\endgroup\$mortytheshorty– mortytheshorty2023年03月02日 18:31:45 +00:00Commented Mar 2, 2023 at 18:31 -
\$\begingroup\$ Instead of learning an ancient C standard, consider C17. \$\endgroup\$Madagascar– Madagascar2023年03月03日 05:54:57 +00:00Commented Mar 3, 2023 at 5:54
Two minor points about this bit:
#if(_POSIX_C_SOURCE != 200112L)
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200112L
#endif
First, if you define _POSIX_C_SOURCE
, it’s good defensive coding to also define _XOPEN_SOURCE
. (Unless you genuinely care about the minor distinctions.) Otherwise, it’s possible that the build system could set -D_XOPEN_SOURCE
to an incompatible value, and override your attempt to set the version.
Second, it’s a bad idea to silently redefine the macro if it was already set to a different version. If you need that one specific version of the POSIX API, but some other module you’re linking requires a different version, you want the compiler to halt and tell you about the logic error in your program.
Otherwise, some things that could go wrong are:
- This file might have been included after other headers, meaning those were loaded with inconsistent versions.
- The program requires a later version of the API, but you disabled it.
- The program requires an earlier version of the API, in which some function wasn’t deprecated.
- Another module defines a different value, and now the object files will not link.
So, I would change this to:
#define _POSIX_C_SOURCE 200112L
#define _XOPEN_SOURCE 600
If you want to be robust to the project bumping the version, you might try:
#if (!defined(_POSIX_C_SOURCE) && !defined(_XOPEN_SOURCE))
# define _POSIX_C_SOURCE 200112L
# define _XOPEN_SOURCE 600
#elif (_POSIX_C_SOURCE < 200112L || _XOPEN_SOURCE < 600)
# error "Inconsistent versions of POSIX specified."
#endif
-
\$\begingroup\$ I used this because I had problems with VS Code. I got
incomplete type errors
forstruct addrinfo
but I fixed it by adding_GNU_SOURCE
to thec_cpp_properties.json
. But nice example how to handle such problems! \$\endgroup\$mortytheshorty– mortytheshorty2023年03月02日 21:37:20 +00:00Commented Mar 2, 2023 at 21:37
Improve naming uniformity
Instead of ....
netstring_internal.h
_INTERNAL_NETSTRING_H
netstr_error
NETSTR_PROTO
protocol_table
netstring_to_addrinfo.c
DOMAIN_BUFSIZ
Consider
netstr_internal.h
_NETSTR_INTERNAL_H
netstr_error
NETSTR_PROTO
netstr_protocol_table
netstr_to_addrinfo.c
NETSTR_DOMAIN_BUFSIZ
... the user header file netstring.h
Where is netstring.h
??
Conversion error checking
Conversion fails to certainly detect no conversion and can trigger an incorrect error if errno
was non-zero prior to the call.
// long port;
//port = strtol(*str, (char **) str, 10);
//if (errno == EINVAL || errno == ERANGE)
// return -1;
errno = 0; // Add
char *startptr = *str;
long port = strtol(startptr, (char **) str, 10);
if (startptr == *str || errno) { // Stronger test
return -1;
}
For the sake of consistency, I'd consider setting the range error too in
if (port < 1 && port > 65535) {
errno = ERANGE: // Maybe add
return -1;
}
-
\$\begingroup\$ I renamed the identifiers to keep a good structure but I don't think I should set
ERANGE
if port is lower or bigger because error handling should be done by the functions that use them and theERANGE
error is used with overflow I think. And since I was not advised to usestruct sockaddrXXX
the portchecking function is currently not in use. BUT what I think I should/could do and miss is returning the error code from my internal header. This is whatnetstr_write_domain
does. It returnsNETSTR_HOST
error code. So thanks for highlighting this. \$\endgroup\$mortytheshorty– mortytheshorty2023年03月05日 22:04:20 +00:00Commented Mar 5, 2023 at 22:04