Annotation of libwww/Library/src/HTTP.c, revision 1.49
1.44 frystyk 1: /* HyperText73 Tranfer Protocol - Client implementation HTTP.c
1.1 timbl 2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
24: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
25: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
26: #define VERSION_LENGTH 20 /* for returned protocol version */
27:
28: /* Uses:
29: */
1.1 timbl 30: #include "HTParse.h"
31: #include "HTUtils.h"
32: #include "tcp.h"
33: #include "HTTCP.h"
34: #include "HTFormat.h"
1.2 timbl 35: #include <ctype.h>
36: #include "HTAlert.h"
37: #include "HTMIME.h"
1.5 timbl 38: #include "HTML.h" /* SCW */
39: #include "HTInit.h" /* SCW */
1.21 luotonen 40: #include "HTAccess.h" /* HTRequest */
1.14 luotonen 41: #include "HTAABrow.h" /* Access Authorization */
1.20 timbl 42: #include "HTTee.h" /* Tee off a cache stream */
43: #include "HTFWriter.h" /* Write to cache file */
1.1 timbl 44:
1.2 timbl 45: struct _HTStream {
46: HTStreamClass * isa; /* all we need to know */
47: };
48:
1.6 timbl 49: extern char * HTAppName; /* Application name: please supply */
50: extern char * HTAppVersion; /* Application version: please supply */
51:
1.37 luotonen 52: PUBLIC long HTProxyBytes = 0; /* Number of bytes transferred thru proxy */
53: extern BOOL using_proxy; /* are we using a proxy gateway? */
54: PUBLIC char * HTProxyHeaders = NULL; /* Headers to pass as-is */
1.23 luotonen 55:
1.21 luotonen 56: PRIVATE void parse_401_headers ARGS2(HTRequest *, req,
57: HTInputSocket *, isoc)
58: {
59: HTAAScheme scheme;
60: char *line;
61: int num_schemes = 0;
62: HTList *valid_schemes = HTList_new();
63: HTAssocList **scheme_specifics = NULL;
64: char *template = NULL;
65:
66: /* Read server reply header lines */
67:
68: if (TRACE)
69: fprintf(stderr, "Server 401 reply header lines:\n");
70:
71: while (NULL != (line = HTInputSocket_getUnfoldedLine(isoc)) &&
72: *line != 0) {
73:
74: if (TRACE) fprintf(stderr, "%s\n", line);
75:
76: if (strchr(line, ':')) { /* Valid header line */
77:
78: char *p = line;
79: char *fieldname = HTNextField(&p);
80: char *arg1 = HTNextField(&p);
81: char *args = p;
82:
83: if (0==strcasecomp(fieldname, "WWW-Authenticate:")) {
84: if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) {
85: HTList_addObject(valid_schemes, (void*)scheme);
86: if (!scheme_specifics) {
87: int i;
88: scheme_specifics = (HTAssocList**)
89: malloc(HTAA_MAX_SCHEMES * sizeof(HTAssocList*));
90: if (!scheme_specifics)
91: outofmem(__FILE__, "parse_401_headers");
92: for (i=0; i < HTAA_MAX_SCHEMES; i++)
93: scheme_specifics[i] = NULL;
94: }
95: scheme_specifics[scheme] = HTAA_parseArgList(args);
96: num_schemes++;
97: }
98: else if (TRACE) {
99: fprintf(stderr, "Unknown scheme `%s' %s\n",
100: (arg1 ? arg1 : "(null)"),
101: "in WWW-Authenticate: field");
102: }
103: }
104:
105: else if (0==strcasecomp(fieldname, "WWW-Protection-Template:")) {
106: if (TRACE)
107: fprintf(stderr, "Protection template set to `%s'\n", arg1);
108: StrAllocCopy(template, arg1);
109: }
110:
111: } /* if a valid header line */
112: else if (TRACE) {
113: fprintf(stderr, "Invalid header line `%s' ignored\n", line);
114: } /* else invalid header line */
1.44 frystyk 115: free(line);
1.21 luotonen 116: } /* while header lines remain */
1.44 frystyk 117: FREE(line);
1.21 luotonen 118: req->valid_schemes = valid_schemes;
119: req->scheme_specifics = scheme_specifics;
120: req->prot_template = template;
121: }
122:
123:
124:
1.1 timbl 125: /* Load Document from HTTP Server HTLoadHTTP()
126: ** ==============================
127: **
128: ** Given a hypertext address, this routine loads a document.
129: **
130: **
131: ** On entry,
132: ** arg is the hypertext reference of the article to be loaded.
133: **
134: ** On exit,
135: ** returns >=0 If no error, a good socket number
136: ** <0 Error.
137: **
138: ** The socket must be closed by the caller after the document has been
139: ** read.
140: **
141: */
1.19 timbl 142: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 143: {
1.22 luotonen 144: CONST char * arg = NULL;
1.1 timbl 145: int s; /* Socket number for returned data */
146: int status; /* tcp return */
1.10 timbl 147: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 148: HTStream * target = NULL; /* Unconverted data */
1.46 frystyk 149: BOOL cache_http = YES; /* Enable caching of HTTP-retrieved files */
150:
1.2 timbl 151: CONST char* gate = 0; /* disable this feature */
1.1 timbl 152: SockA soc_address; /* Binary network address */
153: SockA * sin = &soc_address;
1.40 frystyk 154: BOOL extensions = YES; /* Assume good HTTP server */
1.36 frystyk 155:
1.37 luotonen 156: if (HTImProxy) HTProxyBytes = 0;
1.23 luotonen 157:
1.37 luotonen 158: arg = HTAnchor_physical(request->anchor);
1.22 luotonen 159:
1.1 timbl 160: if (!arg) return -3; /* Bad if no name sepcified */
161: if (!*arg) return -2; /* Bad if name had zero length */
162:
163: /* Set up defaults:
164: */
165: #ifdef DECNET
1.2 timbl 166: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
167: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 168: #else /* Internet */
1.2 timbl 169: sin->sin_family = AF_INET; /* Family = internet, host order */
170: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 171: #endif
172:
1.10 timbl 173: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
174:
1.1 timbl 175: if (TRACE) {
176: if (gate) fprintf(stderr,
177: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
178: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
179: }
180:
181: /* Get node name and optional port number:
182: */
183: {
184: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
185: int status = HTParseInet(sin, p1); /* TBL 920622 */
1.49 ! luotonen 186: if (status) {
! 187: HTAddError2(request,"No such host:",p1);
! 188: free(p1);
! 189: return status; /* No such host for example */
! 190: }
1.1 timbl 191: free(p1);
192: }
193:
1.15 luotonen 194: /*
195: ** Compose authorization information (this was moved here
196: ** from after the making of the connection so that the connection
197: ** wouldn't have to wait while prompting username and password
198: ** from the user). -- AL 13.10.93
199: */
200: #ifdef ACCESS_AUTH
1.21 luotonen 201: HTAA_composeAuth(request);
202: if (TRACE) {
203: if (request->authorization)
204: fprintf(stderr, "HTTP: Sending Authorization: %s\n",
205: request->authorization);
206: else
207: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
1.15 luotonen 208: }
209: #endif /* ACCESS_AUTH */
1.1 timbl 210:
1.10 timbl 211: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 212: */
213: #ifdef DECNET
214: s = socket(AF_DECnet, SOCK_STREAM, 0);
215: #else
216: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
217: #endif
218: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
219: if (status < 0) {
220: if (TRACE) fprintf(stderr,
1.30 frystyk 221: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n",
222: arg, errno);
1.49 ! luotonen 223: HTAddErrorN(request,"Unable to connect to remote host, errno:",
! 224: errno);
1.1 timbl 225: return HTInetStatus("connect");
226: }
227:
228: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
229:
1.17 timbl 230:
231: /* Compose and send command
232: ** ------------------------
233: */
234: {
235: char *command; /* The whole command */
236:
1.1 timbl 237: /* Ask that node for the document,
238: ** omitting the host name & anchor if not gatewayed.
239: */
1.37 luotonen 240: if (gate) { /* This is no longer used, and could be thrown away */
1.17 timbl 241: command = malloc(4 + strlen(arg)+ 2 + 31);
242: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
243: strcpy(command, "GET ");
244: strcat(command, arg);
245: } else { /* not gatewayed */
246: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
247: command = malloc(4 + strlen(p1)+ 2 + 31);
248: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
1.23 luotonen 249: if (request->method != METHOD_INVALID) {
250: strcpy(command, HTMethod_name(request->method));
1.22 luotonen 251: strcat(command, " ");
252: }
253: else {
254: strcpy(command, "GET ");
255: }
1.37 luotonen 256: /* if we are using a proxy gateway don't copy in the first slash
257: ** of say: /gopher://a;lkdjfl;ajdf;lkj/;aldk/adflj
258: ** so that just gohper://.... is sent.
259: */
260: if (using_proxy)
261: strcat(command, p1+1);
262: else
263: strcat(command, p1);
1.17 timbl 264: free(p1);
265: }
1.2 timbl 266: #ifdef HTTP2
1.17 timbl 267: if (extensions) {
268: strcat(command, " ");
269: strcat(command, HTTP_VERSION);
270: }
1.2 timbl 271: #endif
1.17 timbl 272:
273: strcat(command, crlf); /* CR LF, as in rfc 977 */
274:
1.37 luotonen 275: if (extensions && HTImProxy && HTProxyHeaders) {
276: StrAllocCat(command, HTProxyHeaders);
277: }
278: else if (extensions) {
1.21 luotonen 279:
1.17 timbl 280: int i;
281: HTAtom * present = WWW_PRESENT;
282: char line[256]; /*@@@@ */
1.21 luotonen 283: HTList *conversions[2];
284:
1.22 luotonen 285: if (!HTConversions) {
286: HTConversions = HTList_new();
1.34 frystyk 287: /* HTFormatInit(HTConversions); App may do this not us tbl940210 */
1.22 luotonen 288: }
1.28 timbl 289:
1.21 luotonen 290: conversions[0] = HTConversions;
291: conversions[1] = request->conversions;
292:
1.34 frystyk 293:
1.21 luotonen 294: for (i=0; i<2; i++) {
295: HTList *cur = conversions[i];
296: HTPresentation *pres;
297:
298: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
299: if (pres->rep_out == present) {
300: if (pres->quality != 1.0) {
1.35 frystyk 301: sprintf(line, "Accept: %s; q=%.3f%c%c",
1.21 luotonen 302: HTAtom_name(pres->rep),
303: pres->quality, CR, LF);
304: } else {
305: sprintf(line, "Accept: %s%c%c",
306: HTAtom_name(pres->rep), CR, LF);
307: }
308: StrAllocCat(command, line);
1.17 timbl 309: }
310: }
1.2 timbl 311: }
1.22 luotonen 312:
1.37 luotonen 313: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
1.17 timbl 314: HTAppName ? HTAppName : "unknown",
315: HTAppVersion ? HTAppVersion : "0.0",
316: HTLibraryVersion, CR, LF);
1.37 luotonen 317: StrAllocCat(command, line);
1.45 luotonen 318:
319: #ifdef ACCESS_AUTH
320: if (request->authorization != NULL) {
321: sprintf(line, "Authorization: %s%c%c",
322: request->authorization, CR, LF);
323: StrAllocCat(command, line);
324: }
325: #endif /* ACCESS_AUTH */
326:
1.37 luotonen 327: }
1.22 luotonen 328:
1.17 timbl 329: StrAllocCat(command, crlf); /* Blank line means "end" */
330:
331: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
332:
333: /* Translate into ASCII if necessary
334: */
1.4 timbl 335: #ifdef NOT_ASCII
1.17 timbl 336: {
337: char * p;
338: for(p = command; *p; p++) {
339: *p = TOASCII(*p);
340: }
1.1 timbl 341: }
1.3 timbl 342: #endif
1.17 timbl 343:
344: status = NETWRITE(s, command, (int)strlen(command));
345: free(command);
346: if (status<0) {
347: if (TRACE) fprintf(stderr,
348: "HTTPAccess: Unable to send command.\n");
1.49 ! luotonen 349: HTAddErrorN(request,"Couldn't send request, errno:",errno);
1.1 timbl 350: return HTInetStatus("send");
1.17 timbl 351: }
352: } /* compose and send command */
353:
1.2 timbl 354:
1.17 timbl 355: /* Read the response
356: ** -----------------
1.11 timbl 357: **
358: ** HTTP0 servers must return ASCII style text, though it can in
359: ** principle be just text without any markup at all.
360: ** Full HTTP servers must return a response
361: ** line and RFC822 style header. The response must therefore in
362: ** either case have a CRLF somewhere soon.
363: **
364: ** This is the theory. In practice, there are (1993) unfortunately
365: ** many binary documents just served up with HTTP0.9. This
366: ** means we have to preserve the binary buffer (on the assumption that
367: ** conversion from ASCII may lose information) in case it turns
368: ** out that we want the binary original.
1.2 timbl 369: */
1.37 luotonen 370: if (HTImProxy) {
1.24 luotonen 371:
1.22 luotonen 372: /*
373: ** Server as a gateway -- send body of the message
374: ** received from client (if any).
375: */
376: if (request->isoc && request->content_length > 0) {
377: int remain = request->content_length;
378: int i = remain;
379: char * buf;
380:
381: while (remain > 0 &&
382: (buf = HTInputSocket_getBlock(request->isoc, &i))) {
383: int status = NETWRITE(s, buf, i);
384: if (status < 0) {
1.27 luotonen 385: CTRACE(stderr, "HTTPAccess.. Unable to forward body\n");
1.49 ! luotonen 386: HTAddErrorN(request,
! 387: "Couldn't forward message body, errno:",errno);
1.22 luotonen 388: return HTInetStatus("send");
389: }
390: remain -= i;
391: i = remain;
392: }
393: }
1.23 luotonen 394:
395: /*
1.22 luotonen 396: ** Load results directly to client
397: */
1.37 luotonen 398: HTProxyBytes = HTCopy(s, request->output_stream);
1.25 luotonen 399: (*request->output_stream->isa->free)(request->output_stream);
1.37 luotonen 400:
1.22 luotonen 401: return HT_LOADED;
402: }
403: else { /* read response */
1.21 luotonen 404:
1.17 timbl 405: HTFormat format_in; /* Format arriving in the message */
1.21 luotonen 406: HTInputSocket *isoc = HTInputSocket_new(s);
407: char * status_line = HTInputSocket_getStatusLine(isoc);
1.2 timbl 408:
1.11 timbl 409: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
410: ** First time we have enough, look at the stub in ASCII
411: ** and get out of here if it doesn't look right.
412: **
413: ** We also check for characters above 128 in the first few bytes, and
414: ** if we find them we forget the html default.
415: **
416: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
417: ** will be taken as a HTTP 1.0 server. Failure.
418: ** An HTTP 0.9 server returning a binary document with
419: ** characters < 128 will be read as ASCII.
420: */
1.36 frystyk 421:
422: /* If HTTP 0 response, then DO NOT CACHE (Henrik 14/02-94) */
423: if (!status_line) {
1.21 luotonen 424: if (HTInputSocket_seemsBinary(isoc)) {
425: format_in = HTAtom_for("www/unknown");
426: }
427: else {
428: format_in = WWW_HTML;
429: }
1.46 frystyk 430: cache_http = NO; /* Do not cache */
1.21 luotonen 431: goto copy;
432: } /* end kludge */
433:
434: if (status_line) { /* Decode full HTTP response */
435: /*
436: ** We now have a terminated server status line, and we have
437: ** checked that it is most probably a legal one. Parse it.
438: */
439: char server_version[VERSION_LENGTH+1];
440: int server_status;
441:
442: if (TRACE)
443: fprintf(stderr, "HTTP Status Line: Rx: %.70s\n", status_line);
1.17 timbl 444:
1.21 luotonen 445: sscanf(status_line, "%20s%d", server_version, &server_status);
1.2 timbl 446:
1.21 luotonen 447: format_in = HTAtom_for("www/mime");
1.7 timbl 448:
1.21 luotonen 449: switch (server_status / 100) {
1.2 timbl 450:
1.21 luotonen 451: default: /* bad number */
452: HTAlert("Unknown status reply from server!");
453: break;
1.17 timbl 454:
1.21 luotonen 455: case 3: /* Various forms of redirection */
456: HTAlert(
1.17 timbl 457: "Redirection response from server is not handled by this client");
1.21 luotonen 458: break;
1.17 timbl 459:
1.21 luotonen 460: case 4: /* Access Authorization problem */
1.14 luotonen 461: #ifdef ACCESS_AUTH
1.21 luotonen 462: switch (server_status) {
463: case 401:
464: parse_401_headers(request, isoc);
465:
466: if (TRACE) fprintf(stderr, "%s %d %s\n",
467: "HTTP: close socket", s,
468: "to retry with Access Authorization");
1.24 luotonen 469: if (HTAA_retryWithAuth(request, HTLoadHTTP)) {
1.21 luotonen 470: status = HT_LOADED;/* @@ THIS ONLY WORKS ON LINEMODE */
471: goto clean_up;
472: }
473: /* else falltrough */
474: default:
1.14 luotonen 475: {
1.21 luotonen 476: char *p1 = HTParse(gate ? gate : arg, "",
477: PARSE_HOST);
478: char * message;
479:
480: if (!(message = (char*)malloc(strlen(status_line) +
481: strlen(p1) + 100)))
482: outofmem(__FILE__, "HTTP 4xx status");
1.14 luotonen 483: sprintf(message,
1.21 luotonen 484: "HTTP server at %s replies:\n%s\n\n%s\n",
485: p1, status_line,
486: ((server_status == 401)
487: ? "Access Authorization package giving up.\n"
488: : ""));
1.22 luotonen 489: status = HTLoadError(request, server_status, message);
1.14 luotonen 490: free(message);
491: free(p1);
492: goto clean_up;
493: }
1.21 luotonen 494: } /* switch */
495: goto clean_up;
496: break;
497: #else
498: /* case 4 without Access Authorization falls through */
499: /* to case 5 (previously "I think I goofed"). -- AL */
500: #endif /* ACCESS_AUTH */
501:
502: case 5: /* I think you goofed */
503: {
504: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
505: char * message = (char*)malloc(strlen(status_line) +
506: strlen(p1) + 100);
507: if (!message) outofmem(__FILE__, "HTTP 5xx status");
508: sprintf(message,
509: "HTTP server at %s replies:\n%s", p1, status_line);
1.22 luotonen 510: status = HTLoadError(request, server_status, message);
1.21 luotonen 511: free(message);
512: free(p1);
513: goto clean_up;
514: }
515: break;
1.17 timbl 516:
1.21 luotonen 517: case 2: /* Good: Got MIME object */
518: break;
1.17 timbl 519:
1.21 luotonen 520: } /* switch on response code */
1.17 timbl 521:
1.21 luotonen 522: } /* Full HTTP reply */
1.17 timbl 523:
524:
1.3 timbl 525: /* Set up the stream stack to handle the body of the message
1.33 timbl 526: **
527: ** In the special case of user asking for source and the message
528: ** being in MIME, we force the MIME decoding to occur, as it is really
529: ** HTTP decoding. If the user really wants the HTTP headers, he
530: ** can ask for them as www/mime.
1.3 timbl 531: */
1.21 luotonen 532:
1.13 duns 533: copy:
1.21 luotonen 534:
1.43 frystyk 535: if ((format_in == WWW_MIME)
536: && (request->output_format == WWW_SOURCE)) {
1.33 timbl 537: target = HTMIMEConvert(request, NULL, format_in,
538: request->output_format, request->output_stream);
539: } else {
1.47 luotonen 540: target = HTStreamStack(format_in, request, NO);
1.33 timbl 541: }
542:
1.17 timbl 543: if (!target) {
544: char buffer[1024]; /* @@@@@@@@ */
545: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
546: HTAtom_name(format_in), HTAtom_name(request->output_format));
547: fprintf(stderr, "HTTP: %s", buffer);
1.22 luotonen 548: status = HTLoadError(request, 501, buffer);
1.17 timbl 549: goto clean_up;
550: }
551:
1.19 timbl 552: /* @@ Bug: The decision of whether or not to cache should also be
1.21 luotonen 553: ** made contingent on a IP address match or non match.
554: */
1.48 timbl 555:
556: if (HTCacheDir && cache_http) {
1.19 timbl 557: target = HTTee(target, HTCacheWriter(request, NULL, format_in,
1.21 luotonen 558: request->output_format,
559: request->output_stream));
1.19 timbl 560: }
561:
1.11 timbl 562: /* Push the data down the stream
1.3 timbl 563: ** We have to remember the end of the first buffer we just read
1.2 timbl 564: */
1.30 frystyk 565: if (format_in == WWW_HTML) {
1.17 timbl 566: target = HTNetToText(target); /* Pipe through CR stripper */
567: }
1.21 luotonen 568:
1.17 timbl 569: (*target->isa->put_block)(target,
1.21 luotonen 570: isoc->input_pointer,
571: isoc->input_limit - isoc->input_pointer);
1.41 frystyk 572:
1.17 timbl 573: HTCopy(s, target);
574:
575: (*target->isa->free)(target);
576: status = HT_LOADED;
1.11 timbl 577:
1.2 timbl 578: /* Clean up
1.1 timbl 579: */
1.17 timbl 580:
581: clean_up:
582: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
583: (void) NETCLOSE(s);
1.41 frystyk 584: if (isoc)
585: HTInputSocket_free(isoc);
586: if (status_line)
1.38 frystyk 587: free(status_line); /* Leak fix Henrik 18/02-94 */
588: return status; /* Good return */
1.3 timbl 589:
1.17 timbl 590: } /* read response */
591: } /* load HTTP */
1.1 timbl 592:
593: /* Protocol descriptor
594: */
595:
1.17 timbl 596: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
1.21 luotonen 597:
Webmaster