Dynamic library to intercept oddsock hostname lookups

Question 1

Just for the fun of it, I decided I would try to create sort of an intermediate DNS system for the oddsock SOCKS proxy. With this, the domain name extension .unet is statically resolved when requested. All of this is hooked into the oddsocks proxy system via inserting into the DYLD before libevent does. Tell me what you think.

#include <netdb.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <event2/event.h>
#include <event2/buffer.h>
#include <event2/bufferevent.h>
#include <event2/dns.h>
#include <time.h>
#include <curl/curl.h>
#include <curl/easy.h>
struct unetent { char name[64], ip[13]; };
struct lnode
{
 struct unetent data;
 struct lnode *next;
};
struct lnode * list_insert(struct lnode *list, char *name, char *ip);
int list_size(struct lnode *list);
struct unetent * list_get(struct lnode *list, int p);
struct lnode * list_getp(struct lnode *list, int p);
struct lnode * insert_from_file(struct lnode *list, FILE *f, int *code);
int (*original_bufferevent_socket_connect_hostname)(void *bev, void *evdns_base, int family, const char *hostname, int port) = NULL;
struct lnode *node_list = NULL;
time_t refresh_list_after = 0;
struct lnode * list_insert(struct lnode *list, char *name, char *ip)
{
 struct lnode *ptr = malloc(sizeof(struct lnode)), *tmp = NULL;
 strcpy(ptr->data.name, name);
 strcpy(ptr->data.ip, ip);
 ptr->next = NULL;
 if (list == NULL) return ptr;
 for (tmp = list; tmp->next != NULL; tmp = tmp->next);
 tmp->next = ptr;
 return list;
}
int list_size(struct lnode *list)
{
 int i;
 struct lnode *tmp = list;
 for (i = 0; tmp != NULL; i++, tmp = tmp->next);
 return i;
}
struct unetent * list_get(struct lnode *list, int p)
{
 int i;
 struct lnode *tmp = list;
 for (i = 0; i < p; i++)
 tmp = tmp->next;
 return &tmp->data;
}
struct lnode * list_getp(struct lnode *list, int p)
{
 int i;
 struct lnode *tmp = list;
 if (p < 0) return NULL;
 for (i = 0; i < p; i++)
 tmp = tmp->next;
 return tmp;
}
struct lnode * insert_from_file(struct lnode *list, FILE *f, int *code)
{
 struct unetent tmp;
 if (code == NULL)
 code = malloc(sizeof(int));
 *code = fread(&tmp, sizeof(struct unetent), 1, f);
 if (*code > 0)
 return list_insert(list, tmp.name, tmp.ip);
 return list;
}
struct lnode * list_move_front(struct lnode *list, int nn)
{
 struct lnode * p = list_getp(list, nn - 1);
 struct lnode * c = list_getp(list, nn + 1);
 struct lnode * n = list_getp(list, nn + 0);
 if (nn == 0) return list;
 p->next = c;
 n->next = list;
 list = n;
 return list;
}
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
{
 size_t written = fwrite(ptr, size, nmemb, stream);
 return written;
}
void update_list()
{
 time_t curr = time(NULL);
 CURL *curl;
 FILE *fp;
 CURLcode res;
 char *url = "http://97.85.72.250/out.dat";
 char *outfilename = "/tmp/undernet.dat";
 int readcode = 1;
 if (curr > refresh_list_after || node_list == NULL)
 {
 printf("\t[unet] Refreshing node cache...\n");
 curl = curl_easy_init();
 if (curl)
 {
 fp = fopen(outfilename,"wb");
 curl_easy_setopt(curl, CURLOPT_URL, url);
 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
 curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
 res = curl_easy_perform(curl);
 curl_easy_cleanup(curl);
 fclose(fp);
 }
 fp = fopen(outfilename, "rb");
 while (readcode > 0)
 node_list = insert_from_file(node_list, fp, &readcode);
 refresh_list_after = time(NULL) + (3600); //refresh every minute
 unlink(outfilename);
 }
}
const char *lookup_ip_from_nodelist(const char *host)
{
 int i;
 const int end = list_size(node_list);
 struct unetent *tmp = NULL;
 for (i = 0; i < end; i++)
 {
 tmp = list_get(node_list, i);
 if (strcmp(tmp->name, host) == 0)
 {
 node_list = list_move_front(node_list, i);
 return (const char *)tmp->ip;
 }
 }
 return (const char *)host;
}
char isunet(const char *name)
{
 const char *endurit = strstr(name, "/");
 const char *enduri = endurit ? endurit : name + strlen(name);
 return (strstr(name, ".unet") == enduri - strlen(".unet"));
}
const char *undernet_translate_URI(const char *name)
{
 if (isunet(name))
 {
 update_list();
 return lookup_ip_from_nodelist(name);
 }
 return name;
}
int bufferevent_socket_connect_hostname(struct bufferevent *bev, struct evdns_base *evdns_base, int family, const char *hostname, int port)
{
 if (original_bufferevent_socket_connect_hostname == NULL)
 original_bufferevent_socket_connect_hostname = dlsym(RTLD_NEXT, "bufferevent_socket_connect_hostname");
 return original_bufferevent_socket_connect_hostname(bev, evdns_base, family, undernet_translate_URI(hostname), port);
}

Question 2

Concept

There are architecturally cleaner ways to achieve a similar result.

Configure the machine on which oddsock is running to point to your own proxying nameserver, such as dnsmasq. That nameserver resolves hostnames against a local database, and falls back to an upstream DNS server if not found.
If you don't want to affect the name resolution configuration for any other processes running on the same machine as oddsock, then monkey-patching oddsock is a reasonable hack. However, instead of overriding bufferevent_socket_connect_hostname(), you should probably create a hook for evdns_base_new() instead. With that hook, you could...
- Insert a call to evdns_base_nameserver_ip_add() to tell oddsock to use the aforementioned proxying nameserver, or
- Fetch the mappings from http://97.85.72.250/out.dat into a temporary file, then insert a call to evdns_base_load_hosts() to load them all. It might be a good idea to add an alarm(2) and SIGALRM handler that reloads the host definitions.

These solutions don't require any meddling after the initial setup hook. Almost all of your code disappears.

Data structure

Linked lists generally suck. There are few situations where they shine, and this is certainly not one of them.

The number of entries is known in advance: it's the number of bytes in out.dat divided by sizeof struct unetent. You could just malloc() an array.

The only reason you have code to manipulate the list at all is to put the most recently used entries in front for performance. If you had used a hashtable instead, then everything would have been O(1), and you wouldn't be rearranging entries needlessly.

Performance

Since update_list() is called from within undernet_translate_URI(), the refreshing of the host definitions happens synchronously. As a result, once a minute, a request would take longer than normal.

list_move_front() calls list_getp(list, nn + ...) three times. Each call takes O(nn) time. But c is just n->next. Usually, n is p->next (as long as n is not the head node).

Bugs

Writing to a hard-coded temporary file (/tmp/undernet.dat) is a security risk. Use mkstemp() or tmpfile() or open(..., O_TMPFILE) in Linux for Workgroups.

insert_from_file() would leak memory if code is NULL. I don't recommend malloc() and free() at all in that situation, though. It would be more idiomatic C to return the status code, and manipulate list by reference. Better yet, incorporate the while loop so that one function loads the entire file. (By the way, while (readcode > 0) would be better as a do-while loop.)

Summary

Interesting idea. However, it could be done with a cleaner architecture, which would eliminate most of the code. Furthermore, linked list is a poor choice of data structure, and it's not worth optimizing that.

Question 3

I actually considered scrapping the remote dl and going for straight sql entries. I dont know if it was a bad choice to decide to keep local (thank you for the hash table idea, completely forgot that one) or if i should go full remote.

Question 4

SQL? No. Be practical: think like a system administrator, not a programmer. Strive for a solution that involves configuration only, no code, and standard protocols. That means setting up a DNS server — dnsmasq is your best bet, I think.

Question 5

And your suggestion is that I make the update_list work async? Or that I should time it to run out of cycle, when it isnt being depended on? Also: my only concern with not using something like SQL is that the list could potentially be very large. I would like to steer away from third party servers as I do this as a learning experience, and just some fun.

Question 6

No, my suggestion is to use a nameserver for name resolution, because that is precisely what DNS is for. The UDP-based protocol is efficient, the DNS server will have code that is fast, results will be authoritative and management will be centralized. Configure oddsock to use a nameserver that resolves entries in the .unet zone and don't write any code. If you'd like, follow-up on chat or ask Super User about how to accomplish this (possibly Server Fault, but this doesn't seem like a kind of question that a professional sysadmin would ask).

Question 7

I'd recommend staying consistent with curly brace usage for conditionals. It can help with readability and also ease maintainability if you end up needing to add additional lines.
```
if (someCondition)
{
 // do something...
 // can still do something else...
}
```
However, it's okay to leave them out if you have a single-line conditional that you know will never require more than one line:
```
if (someConditional) return something;
```
Your loop counters should be declared right before the loop statement as opposed to the start of the function (before the other variables). Doing so will keep a minimum scope on the counter, and everything will be in one place should the loop ever need to be removed.

write_data() doesn't need two lines:

size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
{
 size_t written = fwrite(ptr, size, nmemb, stream);
 return written;
}

It can just have one line

size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
{
 return fwrite(ptr, size, nmemb, stream);
}

If the return value of fwrite() will just be returned right away, then another variable isn't needed. The name also doesn't add much to what is already known about the function.

Question 8

sorry. My program for formatting to stackexchange sites looks for braces to do tabbing when it reformats. Since it was missing the brace, it got untabbed. I will correct those.

Question 9

should be good now

Question 10

@phyrrus9: I've edited my answer to address something similar instead. Since this wasn't intentional, I'll let it slide. For future reference, please don't make corrections on the original code from answers, even if it's a simple fix.

Question 11

I agree with that, and most of my code does follow that syntax. If I don't have a reason to put it all on one line, then it has curly braces. For this, however, this is the debugging code, and therefore does not include final formatting and is intended to be as less line-wastey as possible

Question 12

I was looking for everything. I appreciate that comment, in future posts I will ensure to include them if stated that it makes it easier to read. Thanks.

Question 13

This review addresses only list management.

naming

I'd prefer list_get_data and list_get_node over list_get and list_getp. list_insert is in fact list_append, and shall return ptr (the caller already knows what list is, while ptr carries valuable information, as we shall see).

overall code organization

I don't like node_list being global. Globals are bad.

list_move_front, list_get, list_getp need null pointer checking.

A need to access a list by index raise suspicions (see below).

efficiency

For lists your program manages it is probably not relevant. Nevertheless, insert_from_file exhibits a quadratic complexity. Should list_insert return a pointer to a newly created node, the complexity would become linear.

suggested improvements

list_move_front doesn't need to traverse the list 3 times, as it does. It is enough to do it once.

Notice that lists with a simple payload (such as in your case - a single pointer) are subject to a very simple node extraction: you don't need to know the previous one (see below).

The way list_move_front is called adds two more traversals (one explicit, and one due to list_size()).

I'd suggest having

struct lnode * list_find(struct lnode * head, const char * name)
 while (head != NULL)
 if (strcmp(head->name, name)) break;
 else head = head->next;
 return head;
}

Now lookup_ip_from_nodelist can be rewritten as

struct lnode * node = list_find(node_list, hostname);
if (node == NULL) return hostname;
node_list = detach_node(node);
return node->ip;

detach_node implements the trick mentioned above (a seemingly special case of node->next being NULL is addressed by having a sentinel node):

struct lnode * detach_node(struct lnode * node)
{
 struct lnode * next = node->next;
 node_next = next->next;
 char * ip = node->ip;
 node->ip = next->ip;
 next->ip = tmp;
 return next;
}

Notice that all index-based helpers (list_size, list_get, list_getp) are eliminated.

200_success 200_success 146k22 gold badges190 silver badges479 bronze badges · Accepted Answer · 2014-05-21 23:21:34Z

Concept

There are architecturally cleaner ways to achieve a similar result.

Configure the machine on which oddsock is running to point to your own proxying nameserver, such as dnsmasq. That nameserver resolves hostnames against a local database, and falls back to an upstream DNS server if not found.
If you don't want to affect the name resolution configuration for any other processes running on the same machine as oddsock, then monkey-patching oddsock is a reasonable hack. However, instead of overriding bufferevent_socket_connect_hostname(), you should probably create a hook for evdns_base_new() instead. With that hook, you could...
- Insert a call to evdns_base_nameserver_ip_add() to tell oddsock to use the aforementioned proxying nameserver, or
- Fetch the mappings from http://97.85.72.250/out.dat into a temporary file, then insert a call to evdns_base_load_hosts() to load them all. It might be a good idea to add an alarm(2) and SIGALRM handler that reloads the host definitions.

These solutions don't require any meddling after the initial setup hook. Almost all of your code disappears.

Data structure

Linked lists generally suck. There are few situations where they shine, and this is certainly not one of them.

The number of entries is known in advance: it's the number of bytes in out.dat divided by sizeof struct unetent. You could just malloc() an array.

The only reason you have code to manipulate the list at all is to put the most recently used entries in front for performance. If you had used a hashtable instead, then everything would have been O(1), and you wouldn't be rearranging entries needlessly.

Performance

Since update_list() is called from within undernet_translate_URI(), the refreshing of the host definitions happens synchronously. As a result, once a minute, a request would take longer than normal.

list_move_front() calls list_getp(list, nn + ...) three times. Each call takes O(nn) time. But c is just n->next. Usually, n is p->next (as long as n is not the head node).

Bugs

Writing to a hard-coded temporary file (/tmp/undernet.dat) is a security risk. Use mkstemp() or tmpfile() or open(..., O_TMPFILE) in Linux for Workgroups.

insert_from_file() would leak memory if code is NULL. I don't recommend malloc() and free() at all in that situation, though. It would be more idiomatic C to return the status code, and manipulate list by reference. Better yet, incorporate the while loop so that one function loads the entire file. (By the way, while (readcode > 0) would be better as a do-while loop.)

Summary

Interesting idea. However, it could be done with a cleaner architecture, which would eliminate most of the code. Furthermore, linked list is a poor choice of data structure, and it's not worth optimizing that.

I actually considered scrapping the remote dl and going for straight sql entries. I dont know if it was a bad choice to decide to keep local (thank you for the hash table idea, completely forgot that one) or if i should go full remote.
SQL? No. Be practical: think like a system administrator, not a programmer. Strive for a solution that involves configuration only, no code, and standard protocols. That means setting up a DNS server — dnsmasq is your best bet, I think.
And your suggestion is that I make the update_list work async? Or that I should time it to run out of cycle, when it isnt being depended on? Also: my only concern with not using something like SQL is that the list could potentially be very large. I would like to steer away from third party servers as I do this as a learning experience, and just some fun.
No, my suggestion is to use a nameserver for name resolution, because that is precisely what DNS is for. The UDP-based protocol is efficient, the DNS server will have code that is fast, results will be authoritative and management will be centralized. Configure oddsock to use a nameserver that resolves entries in the .unet zone and don't write any code. If you'd like, follow-up on chat or ask Super User about how to accomplish this (possibly Server Fault, but this doesn't seem like a kind of question that a professional sysadmin would ask).

Stack Exchange Network

Dynamic library to intercept oddsock hostname lookups

3 Answers 3

Concept

Data structure

Performance

Bugs

Summary

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Dynamic library to intercept oddsock hostname lookups

3 Answers 3

Concept

Data structure

Performance

Bugs

Summary

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions