Just for the fun of it, I decided I would try to create sort of an intermediate DNS system for the oddsock SOCKS proxy. With this, the domain name extension .unet is statically resolved when requested. All of this is hooked into the oddsocks proxy system via inserting into the DYLD before libevent does. Tell me what you think.
#include <netdb.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <event2/event.h>
#include <event2/buffer.h>
#include <event2/bufferevent.h>
#include <event2/dns.h>
#include <time.h>
#include <curl/curl.h>
#include <curl/easy.h>
struct unetent { char name[64], ip[13]; };
struct lnode
{
struct unetent data;
struct lnode *next;
};
struct lnode * list_insert(struct lnode *list, char *name, char *ip);
int list_size(struct lnode *list);
struct unetent * list_get(struct lnode *list, int p);
struct lnode * list_getp(struct lnode *list, int p);
struct lnode * insert_from_file(struct lnode *list, FILE *f, int *code);
int (*original_bufferevent_socket_connect_hostname)(void *bev, void *evdns_base, int family, const char *hostname, int port) = NULL;
struct lnode *node_list = NULL;
time_t refresh_list_after = 0;
struct lnode * list_insert(struct lnode *list, char *name, char *ip)
{
struct lnode *ptr = malloc(sizeof(struct lnode)), *tmp = NULL;
strcpy(ptr->data.name, name);
strcpy(ptr->data.ip, ip);
ptr->next = NULL;
if (list == NULL) return ptr;
for (tmp = list; tmp->next != NULL; tmp = tmp->next);
tmp->next = ptr;
return list;
}
int list_size(struct lnode *list)
{
int i;
struct lnode *tmp = list;
for (i = 0; tmp != NULL; i++, tmp = tmp->next);
return i;
}
struct unetent * list_get(struct lnode *list, int p)
{
int i;
struct lnode *tmp = list;
for (i = 0; i < p; i++)
tmp = tmp->next;
return &tmp->data;
}
struct lnode * list_getp(struct lnode *list, int p)
{
int i;
struct lnode *tmp = list;
if (p < 0) return NULL;
for (i = 0; i < p; i++)
tmp = tmp->next;
return tmp;
}
struct lnode * insert_from_file(struct lnode *list, FILE *f, int *code)
{
struct unetent tmp;
if (code == NULL)
code = malloc(sizeof(int));
*code = fread(&tmp, sizeof(struct unetent), 1, f);
if (*code > 0)
return list_insert(list, tmp.name, tmp.ip);
return list;
}
struct lnode * list_move_front(struct lnode *list, int nn)
{
struct lnode * p = list_getp(list, nn - 1);
struct lnode * c = list_getp(list, nn + 1);
struct lnode * n = list_getp(list, nn + 0);
if (nn == 0) return list;
p->next = c;
n->next = list;
list = n;
return list;
}
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream)
{
size_t written = fwrite(ptr, size, nmemb, stream);
return written;
}
void update_list()
{
time_t curr = time(NULL);
CURL *curl;
FILE *fp;
CURLcode res;
char *url = "http://97.85.72.250/out.dat";
char *outfilename = "/tmp/undernet.dat";
int readcode = 1;
if (curr > refresh_list_after || node_list == NULL)
{
printf("\t[unet] Refreshing node cache...\n");
curl = curl_easy_init();
if (curl)
{
fp = fopen(outfilename,"wb");
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
fclose(fp);
}
fp = fopen(outfilename, "rb");
while (readcode > 0)
node_list = insert_from_file(node_list, fp, &readcode);
refresh_list_after = time(NULL) + (3600); //refresh every minute
unlink(outfilename);
}
}
const char *lookup_ip_from_nodelist(const char *host)
{
int i;
const int end = list_size(node_list);
struct unetent *tmp = NULL;
for (i = 0; i < end; i++)
{
tmp = list_get(node_list, i);
if (strcmp(tmp->name, host) == 0)
{
node_list = list_move_front(node_list, i);
return (const char *)tmp->ip;
}
}
return (const char *)host;
}
char isunet(const char *name)
{
const char *endurit = strstr(name, "/");
const char *enduri = endurit ? endurit : name + strlen(name);
return (strstr(name, ".unet") == enduri - strlen(".unet"));
}
const char *undernet_translate_URI(const char *name)
{
if (isunet(name))
{
update_list();
return lookup_ip_from_nodelist(name);
}
return name;
}
int bufferevent_socket_connect_hostname(struct bufferevent *bev, struct evdns_base *evdns_base, int family, const char *hostname, int port)
{
if (original_bufferevent_socket_connect_hostname == NULL)
original_bufferevent_socket_connect_hostname = dlsym(RTLD_NEXT, "bufferevent_socket_connect_hostname");
return original_bufferevent_socket_connect_hostname(bev, evdns_base, family, undernet_translate_URI(hostname), port);
}
3 Answers 3
Concept
There are architecturally cleaner ways to achieve a similar result.
- Configure the machine on which
oddsock
is running to point to your own proxying nameserver, such asdnsmasq
. That nameserver resolves hostnames against a local database, and falls back to an upstream DNS server if not found. - If you don't want to affect the name resolution configuration for any other processes running on the same machine as
oddsock
, then monkey-patchingoddsock
is a reasonable hack. However, instead of overridingbufferevent_socket_connect_hostname()
, you should probably create a hook forevdns_base_new()
instead. With that hook, you could...- Insert a call to
evdns_base_nameserver_ip_add()
to telloddsock
to use the aforementioned proxying nameserver, or - Fetch the mappings from
http://97.85.72.250/out.dat
into a temporary file, then insert a call toevdns_base_load_hosts()
to load them all. It might be a good idea to add analarm(2)
andSIGALRM
handler that reloads the host definitions.
- Insert a call to
These solutions don't require any meddling after the initial setup hook. Almost all of your code disappears.
Data structure
Linked lists generally suck. There are few situations where they shine, and this is certainly not one of them.
The number of entries is known in advance: it's the number of bytes in out.dat
divided by sizeof struct unetent
. You could just malloc()
an array.
The only reason you have code to manipulate the list at all is to put the most recently used entries in front for performance. If you had used a hashtable instead, then everything would have been O(1), and you wouldn't be rearranging entries needlessly.
Performance
Since update_list()
is called from within undernet_translate_URI()
, the refreshing of the host definitions happens synchronously. As a result, once a minute, a request would take longer than normal.
list_move_front()
calls list_getp(list, nn + ...)
three times. Each call takes O(nn) time. But c
is just n->next
. Usually, n
is p->next
(as long as n
is not the head node).
Bugs
Writing to a hard-coded temporary file (/tmp/undernet.dat
) is a security risk. Use mkstemp()
or tmpfile()
or open(..., O_TMPFILE)
in Linux for Workgroups.
insert_from_file()
would leak memory if code
is NULL
. I don't recommend malloc()
and free()
at all in that situation, though. It would be more idiomatic C to return the status code, and manipulate list
by reference. Better yet, incorporate the while
loop so that one function loads the entire file. (By the way, while (readcode > 0)
would be better as a do-while loop.)
Summary
Interesting idea. However, it could be done with a cleaner architecture, which would eliminate most of the code. Furthermore, linked list is a poor choice of data structure, and it's not worth optimizing that.
-
\$\begingroup\$ I actually considered scrapping the remote dl and going for straight sql entries. I dont know if it was a bad choice to decide to keep local (thank you for the hash table idea, completely forgot that one) or if i should go full remote. \$\endgroup\$phyrrus9– phyrrus92014年05月22日 00:51:18 +00:00Commented May 22, 2014 at 0:51
-
\$\begingroup\$ SQL? No. Be practical: think like a system administrator, not a programmer. Strive for a solution that involves configuration only, no code, and standard protocols. That means setting up a DNS server —
dnsmasq
is your best bet, I think. \$\endgroup\$200_success– 200_success2014年05月22日 07:44:35 +00:00Commented May 22, 2014 at 7:44 -
\$\begingroup\$ And your suggestion is that I make the update_list work async? Or that I should time it to run out of cycle, when it isnt being depended on? Also: my only concern with not using something like SQL is that the list could potentially be very large. I would like to steer away from third party servers as I do this as a learning experience, and just some fun. \$\endgroup\$phyrrus9– phyrrus92014年05月22日 17:51:51 +00:00Commented May 22, 2014 at 17:51
-
\$\begingroup\$ No, my suggestion is to use a nameserver for name resolution, because that is precisely what DNS is for. The UDP-based protocol is efficient, the DNS server will have code that is fast, results will be authoritative and management will be centralized. Configure
oddsock
to use a nameserver that resolves entries in the .unet zone and don't write any code. If you'd like, follow-up on chat or ask Super User about how to accomplish this (possibly Server Fault, but this doesn't seem like a kind of question that a professional sysadmin would ask). \$\endgroup\$200_success– 200_success2014年05月22日 19:31:42 +00:00Commented May 22, 2014 at 19:31
I'd recommend staying consistent with curly brace usage for conditionals. It can help with readability and also ease maintainability if you end up needing to add additional lines.
if (someCondition) { // do something... // can still do something else... }
However, it's okay to leave them out if you have a single-line conditional that you know will never require more than one line:
if (someConditional) return something;
Your loop counters should be declared right before the loop statement as opposed to the start of the function (before the other variables). Doing so will keep a minimum scope on the counter, and everything will be in one place should the loop ever need to be removed.
write_data()
doesn't need two lines:size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t written = fwrite(ptr, size, nmemb, stream); return written; }
It can just have one line
size_t write_data(void *ptr, size_t size, size_t nmemb, FILE *stream) { return fwrite(ptr, size, nmemb, stream); }
If the return value of
fwrite()
will just be returned right away, then another variable isn't needed. The name also doesn't add much to what is already known about the function.
-
\$\begingroup\$ sorry. My program for formatting to stackexchange sites looks for braces to do tabbing when it reformats. Since it was missing the brace, it got untabbed. I will correct those. \$\endgroup\$phyrrus9– phyrrus92014年05月21日 16:39:25 +00:00Commented May 21, 2014 at 16:39
-
\$\begingroup\$ should be good now \$\endgroup\$phyrrus9– phyrrus92014年05月21日 16:40:28 +00:00Commented May 21, 2014 at 16:40
-
3\$\begingroup\$ @phyrrus9: I've edited my answer to address something similar instead. Since this wasn't intentional, I'll let it slide. For future reference, please don't make corrections on the original code from answers, even if it's a simple fix. \$\endgroup\$Jamal– Jamal2014年05月21日 16:55:09 +00:00Commented May 21, 2014 at 16:55
-
\$\begingroup\$ I agree with that, and most of my code does follow that syntax. If I don't have a reason to put it all on one line, then it has curly braces. For this, however, this is the debugging code, and therefore does not include final formatting and is intended to be as less line-wastey as possible \$\endgroup\$phyrrus9– phyrrus92014年05月21日 16:58:39 +00:00Commented May 21, 2014 at 16:58
-
2\$\begingroup\$ I was looking for everything. I appreciate that comment, in future posts I will ensure to include them if stated that it makes it easier to read. Thanks. \$\endgroup\$phyrrus9– phyrrus92014年05月21日 17:03:57 +00:00Commented May 21, 2014 at 17:03
This review addresses only list management.
naming
I'd prefer list_get_data
and list_get_node
over list_get
and list_getp
.
list_insert
is in fact list_append
, and shall return ptr
(the caller already knows what list
is, while ptr
carries valuable information, as we shall see).
overall code organization
I don't like node_list
being global. Globals are bad.
list_move_front
, list_get
, list_getp
need null pointer checking.
A need to access a list by index raise suspicions (see below).
efficiency
For lists your program manages it is probably not relevant. Nevertheless, insert_from_file
exhibits a quadratic complexity. Should list_insert
return a pointer to a newly created node, the complexity would become linear.
suggested improvements
list_move_front
doesn't need to traverse the list 3 times, as it does. It is enough to do it once.
Notice that lists with a simple payload (such as in your case - a single pointer) are subject to a very simple node extraction: you don't need to know the previous one (see below).
The way list_move_front
is called adds two more traversals (one explicit, and one due to list_size()
).
I'd suggest having
struct lnode * list_find(struct lnode * head, const char * name)
while (head != NULL)
if (strcmp(head->name, name)) break;
else head = head->next;
return head;
}
Now lookup_ip_from_nodelist
can be rewritten as
struct lnode * node = list_find(node_list, hostname);
if (node == NULL) return hostname;
node_list = detach_node(node);
return node->ip;
detach_node
implements the trick mentioned above (a seemingly special case of node->next
being NULL is addressed by having a sentinel node):
struct lnode * detach_node(struct lnode * node)
{
struct lnode * next = node->next;
node_next = next->next;
char * ip = node->ip;
node->ip = next->ip;
next->ip = tmp;
return next;
}
Notice that all index-based helpers (list_size
, list_get
, list_getp
) are eliminated.
Explore related questions
See similar questions with these tags.