Annotation of libwww/Library/src/HTTP.c, revision 1.20
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.14 luotonen 44: #include "HTAABrow.h" /* Access Authorization */
1.20 ! timbl 45: #include "HTTee.h" /* Tee off a cache stream */
! 46: #include "HTFWriter.h" /* Write to cache file */
1.1 timbl 47:
1.2 timbl 48: struct _HTStream {
49: HTStreamClass * isa; /* all we need to know */
50: };
51:
52:
1.6 timbl 53: extern char * HTAppName; /* Application name: please supply */
54: extern char * HTAppVersion; /* Application version: please supply */
55:
1.19 timbl 56: PUBLIC BOOL HTCacheHTTP = YES; /* Enable caching of HTTP-retrieved files */
57:
1.1 timbl 58: /* Load Document from HTTP Server HTLoadHTTP()
59: ** ==============================
60: **
61: ** Given a hypertext address, this routine loads a document.
62: **
63: **
64: ** On entry,
65: ** arg is the hypertext reference of the article to be loaded.
66: **
67: ** On exit,
68: ** returns >=0 If no error, a good socket number
69: ** <0 Error.
70: **
71: ** The socket must be closed by the caller after the document has been
72: ** read.
73: **
74: */
1.19 timbl 75: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 76: {
1.19 timbl 77: CONST char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 78: int s; /* Socket number for returned data */
79: int status; /* tcp return */
1.10 timbl 80: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 81: HTStream * target = NULL; /* Unconverted data */
1.15 luotonen 82: char *auth = NULL; /* Authorization information */
1.3 timbl 83:
1.2 timbl 84: CONST char* gate = 0; /* disable this feature */
1.1 timbl 85: SockA soc_address; /* Binary network address */
86: SockA * sin = &soc_address;
1.2 timbl 87: BOOL extensions = YES; /* Assume good HTTP server */
1.17 timbl 88:
1.1 timbl 89: if (!arg) return -3; /* Bad if no name sepcified */
90: if (!*arg) return -2; /* Bad if name had zero length */
91:
92: /* Set up defaults:
93: */
94: #ifdef DECNET
1.2 timbl 95: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
96: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 97: #else /* Internet */
1.2 timbl 98: sin->sin_family = AF_INET; /* Family = internet, host order */
99: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 100: #endif
101:
1.10 timbl 102: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
103:
1.1 timbl 104: if (TRACE) {
105: if (gate) fprintf(stderr,
106: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
107: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
108: }
109:
110: /* Get node name and optional port number:
111: */
112: {
113: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
114: int status = HTParseInet(sin, p1); /* TBL 920622 */
115: free(p1);
116: if (status) return status; /* No such host for example */
117: }
118:
1.2 timbl 119: retry:
1.15 luotonen 120:
121: /*
122: ** Compose authorization information (this was moved here
123: ** from after the making of the connection so that the connection
124: ** wouldn't have to wait while prompting username and password
125: ** from the user). -- AL 13.10.93
126: */
127: #ifdef ACCESS_AUTH
128: #define FREE(x) if (x) {free(x); x=NULL;}
129: {
130: char *docname;
131: char *hostname;
132: char *colon;
133: int portnumber;
134:
135: docname = HTParse(arg, "", PARSE_PATH);
136: hostname = HTParse((gate ? gate : arg), "", PARSE_HOST);
137: if (hostname &&
138: NULL != (colon = strchr(hostname, ':'))) {
1.16 duns 139: *(colon++) = '0円'; /* Chop off port number */
1.15 luotonen 140: portnumber = atoi(colon);
141: }
142: else portnumber = 80;
143:
144: auth = HTAA_composeAuth(hostname, portnumber, docname);
145:
146: if (TRACE) {
147: if (auth)
148: fprintf(stderr, "HTTP: Sending authorization: %s\n", auth);
149: else
150: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
151: }
152: FREE(hostname);
153: FREE(docname);
154: }
155: #endif /* ACCESS_AUTH */
1.1 timbl 156:
1.10 timbl 157: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 158: */
159: #ifdef DECNET
160: s = socket(AF_DECnet, SOCK_STREAM, 0);
161: #else
162: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
163: #endif
164: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
165: if (status < 0) {
166: if (TRACE) fprintf(stderr,
167: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 timbl 168:
1.1 timbl 169: return HTInetStatus("connect");
170: }
171:
172: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
173:
1.17 timbl 174:
175: /* Compose and send command
176: ** ------------------------
177: */
178: {
179: char *command; /* The whole command */
180:
1.1 timbl 181: /* Ask that node for the document,
182: ** omitting the host name & anchor if not gatewayed.
183: */
1.17 timbl 184: if (gate) {
185: command = malloc(4 + strlen(arg)+ 2 + 31);
186: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
187: strcpy(command, "GET ");
188: strcat(command, arg);
189: } else { /* not gatewayed */
190: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
191: command = malloc(4 + strlen(p1)+ 2 + 31);
192: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
193: strcpy(command, "GET ");
194: strcat(command, p1);
195: free(p1);
196: }
1.2 timbl 197: #ifdef HTTP2
1.17 timbl 198: if (extensions) {
199: strcat(command, " ");
200: strcat(command, HTTP_VERSION);
201: }
1.2 timbl 202: #endif
1.17 timbl 203:
204: strcat(command, crlf); /* CR LF, as in rfc 977 */
205:
206: if (extensions) {
207:
208: int n;
209: int i;
210: HTAtom * present = WWW_PRESENT;
211: char line[256]; /*@@@@ */
212:
213: /* if (!request->conversions) HTFormatInit(request->conversions); */
214: n = HTList_count(request->conversions);
215:
216: for(i=0; i<n; i++) {
217: HTPresentation * pres =
218: HTList_objectAt(request->conversions, i);
219: if (pres->rep_out == present) {
220: if (pres->quality != 1.0) {
221: sprintf(line, "Accept: %s q=%.3f%c%c",
222: HTAtom_name(pres->rep), pres->quality, CR, LF);
223: } else {
224: sprintf(line, "Accept: %s%c%c",
225: HTAtom_name(pres->rep), CR, LF);
226: }
227: StrAllocCat(command, line);
228:
229: }
1.2 timbl 230: }
1.17 timbl 231:
232: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
233: HTAppName ? HTAppName : "unknown",
234: HTAppVersion ? HTAppVersion : "0.0",
235: HTLibraryVersion, CR, LF);
236: StrAllocCat(command, line);
237:
1.14 luotonen 238: #ifdef ACCESS_AUTH
1.17 timbl 239: if (auth != NULL) {
240: sprintf(line, "%s%c%c", auth, CR, LF);
241: StrAllocCat(command, line);
242: }
243: #endif /* ACCESS_AUTH */
1.14 luotonen 244: }
1.17 timbl 245:
246: StrAllocCat(command, crlf); /* Blank line means "end" */
247:
248: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
249:
250: /* Translate into ASCII if necessary
251: */
1.4 timbl 252: #ifdef NOT_ASCII
1.17 timbl 253: {
254: char * p;
255: for(p = command; *p; p++) {
256: *p = TOASCII(*p);
257: }
1.1 timbl 258: }
1.3 timbl 259: #endif
1.17 timbl 260:
261: status = NETWRITE(s, command, (int)strlen(command));
262: free(command);
263: if (status<0) {
264: if (TRACE) fprintf(stderr,
265: "HTTPAccess: Unable to send command.\n");
1.1 timbl 266: return HTInetStatus("send");
1.17 timbl 267: }
268: } /* compose and send command */
269:
1.2 timbl 270:
1.17 timbl 271: /* Read the response
272: ** -----------------
1.11 timbl 273: **
274: ** HTTP0 servers must return ASCII style text, though it can in
275: ** principle be just text without any markup at all.
276: ** Full HTTP servers must return a response
277: ** line and RFC822 style header. The response must therefore in
278: ** either case have a CRLF somewhere soon.
279: **
280: ** This is the theory. In practice, there are (1993) unfortunately
281: ** many binary documents just served up with HTTP0.9. This
282: ** means we have to preserve the binary buffer (on the assumption that
283: ** conversion from ASCII may lose information) in case it turns
284: ** out that we want the binary original.
1.2 timbl 285: */
1.3 timbl 286:
1.17 timbl 287: { /* read response */
288:
289: char * eol = 0; /* End of line if found */
290: char * start_of_data; /* Start of body of reply */
291: int length; /* Number of valid bytes in buffer */
292: char * text_buffer = NULL;
293: char * binary_buffer = NULL;
294: HTFormat format_in; /* Format arriving in the message */
295:
296: { /* local variablees for loop*/
1.2 timbl 297:
298: /* Get numeric status etc */
299:
1.17 timbl 300: BOOL end_of_file = NO;
301: HTAtom * encoding = HTAtom_for("7bit");
302: int buffer_length = INIT_LINE_SIZE; /* Why not? */
303:
304: binary_buffer = (char *) malloc(buffer_length * sizeof(char));
305: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
306: text_buffer = (char *) malloc(buffer_length * sizeof(char));
307: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
308: length = 0;
1.2 timbl 309:
1.17 timbl 310: do { /* Loop to read in the first line */
311:
312: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
313:
314: if (buffer_length - length < LINE_EXTEND_THRESH) {
315: buffer_length = buffer_length + buffer_length;
316: binary_buffer = (char *) realloc(
317: binary_buffer, buffer_length * sizeof(char));
318: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
319: text_buffer = (char *) realloc(
320: text_buffer, buffer_length * sizeof(char));
321: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
322: }
323: status = NETREAD(s, binary_buffer + length,
324: buffer_length - length -1);
325: if (status < 0) {
326: HTAlert("Unexpected network read error on response");
327: NETCLOSE(s);
328: return status;
329: }
330:
331: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
332: status);
333:
334: if (status == 0) {
335: end_of_file = YES;
336: break;
337: }
338: binary_buffer[length+status] = 0;
339:
340:
1.11 timbl 341: /* Make an ASCII *copy* of the buffer
342: */
1.2 timbl 343: #ifdef NOT_ASCII
1.17 timbl 344: if (TRACE) fprintf(stderr,
345: "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 timbl 346: #endif
1.17 timbl 347: {
348: char * p;
349: char * q;
350: for(p = binary_buffer+length, q=text_buffer+length;
351: *p; p++, q++) {
352: *q = FROMASCII(*p);
353: }
354:
355: *q++ = 0;
1.11 timbl 356: }
1.17 timbl 357:
1.11 timbl 358: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
359: ** First time we have enough, look at the stub in ASCII
360: ** and get out of here if it doesn't look right.
361: **
362: ** We also check for characters above 128 in the first few bytes, and
363: ** if we find them we forget the html default.
364: **
365: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
366: ** will be taken as a HTTP 1.0 server. Failure.
367: ** An HTTP 0.9 server returning a binary document with
368: ** characters < 128 will be read as ASCII.
369: */
370: #define STUB_LENGTH 20
1.17 timbl 371: if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
372: if(strncmp("HTTP/", text_buffer, 5)!=0) {
373: char *p;
374: start_of_data = text_buffer; /* reparse whole reply */
375: for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
376: if (((int)*p)&128) {
377: format_in = HTAtom_for("www/unknown");
378: length = length + status;
379: goto copy; /* out of while loop */
380: }
1.11 timbl 381: }
382: }
1.2 timbl 383: }
1.17 timbl 384: /* end kludge */
385:
386:
387: eol = strchr(text_buffer + length, 10);
388: if (eol) {
389: *eol = 0; /* Terminate the line */
390: if (eol[-1] == CR) eol[-1] = 0; /* Chop trailing CR */
391: /* = corrected to == -- AL */
392: }
393:
394: length = length + status;
395:
396: } while (!eol && !end_of_file); /* No LF */
397:
398: } /* Scope of loop variables */
1.2 timbl 399:
1.7 timbl 400:
401: /* We now have a terminated unfolded line. Parse it.
402: ** -------------------------------------------------
1.2 timbl 403: */
404:
1.17 timbl 405: if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
406:
407: {
408: int fields;
409: char server_version [VERSION_LENGTH+1];
410: int server_status;
411:
412:
413: #ifdef OLD_CODE /* old buggy servers should not exist now tbl9311 */
1.2 timbl 414: /* Kludge to work with old buggy servers. They can't handle the third word
415: ** so we try again without it.
416: */
1.17 timbl 417: if (extensions &&
418: 0==strcmp(text_buffer, /* Old buggy server? */
419: "Document address invalid or access not authorised")) {
420: extensions = NO;
421: if (binary_buffer) free(binary_buffer);
422: if (text_buffer) free(text_buffer);
423: if (TRACE) fprintf(stderr,
424: "HTTP: close socket %d to retry with HTTP0\n", s);
425: NETCLOSE(s);
426: goto retry; /* @@@@@@@@@@ */
427: }
428: #endif
429:
430: fields = sscanf(text_buffer, "%20s%d",
431: server_version,
432: &server_status);
433:
434: if (fields < 2 ||
435: strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
436: format_in = WWW_HTML;
437: start_of_data = text_buffer; /* reread whole reply */
438: if (eol) *eol = '\n'; /* Reconstitute buffer */
439:
440: } else { /* Full HTTP reply */
1.2 timbl 441:
1.17 timbl 442: /* Decode full HTTP response */
1.2 timbl 443:
1.17 timbl 444: format_in = HTAtom_for("www/mime");
445: start_of_data = eol ? eol + 1 : text_buffer + length;
446:
447: switch (server_status / 100) {
1.3 timbl 448:
1.17 timbl 449: default: /* bad number */
450: HTAlert("Unknown status reply from server!");
451: break;
452:
453: case 3: /* Various forms of redirection */
454: HTAlert(
455: "Redirection response from server is not handled by this client");
456: break;
457:
458: case 4: /* Access Authorization problem */
1.14 luotonen 459: #ifdef ACCESS_AUTH
1.17 timbl 460: switch (server_status) {
461: case 401:
462: length -= start_of_data - text_buffer;
463: if (HTAA_shouldRetryWithAuth(start_of_data, length, s)) {
464: /* Clean up before retrying */
465: if (binary_buffer) free(binary_buffer);
466: if (text_buffer) free(text_buffer);
467: if (TRACE)
468: fprintf(stderr, "%s %d %s\n",
469: "HTTP: close socket", s,
470: "to retry with Access Authorization");
471: (void)NETCLOSE(s);
472: goto retry;
473: break;
474: }
475: else {
476: /* FALL THROUGH */
477: }
478: default:
479: {
480: char *p1 = HTParse(gate ? gate : arg, "",
481: PARSE_HOST);
482: char * message;
483:
484: if (!(message = (char*)malloc(strlen(text_buffer) +
485: strlen(p1) + 100)))
486: outofmem(__FILE__, "HTTP 4xx status");
487: sprintf(message,
488: "HTTP server at %s replies:\n%s\n\n%s\n",
489: p1, text_buffer,
490: ((server_status == 401)
491: ? "Access Authorization package giving up.\n"
492: : ""));
493: status = HTLoadError(request->output_stream,
494: server_status, message);
495: free(message);
496: free(p1);
497: goto clean_up;
498: }
499: } /* switch */
500: goto clean_up;
501: break;
502: #else
503: /* case 4 without Access Authorization falls through */
504: /* to case 5 (previously "I think I goofed"). -- AL */
505: #endif /* ACCESS_AUTH */
506:
507: case 5: /* I think you goofed */
1.14 luotonen 508: {
509: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
1.17 timbl 510: char * message = (char*)malloc(
511: strlen(text_buffer)+strlen(p1) + 100);
512: if (!message) outofmem(__FILE__, "HTTP 5xx status");
1.14 luotonen 513: sprintf(message,
1.17 timbl 514: "HTTP server at %s replies:\n%s", p1, text_buffer);
515: status = HTLoadError(request->output_stream, server_status, message);
1.14 luotonen 516: free(message);
517: free(p1);
518: goto clean_up;
519: }
1.17 timbl 520: break;
521:
522: case 2: /* Good: Got MIME object */
523: break;
524:
525: } /* switch on response code */
526:
527: } /* Full HTTP reply */
528:
529: } /* scope of fields */
530:
1.3 timbl 531: /* Set up the stream stack to handle the body of the message
532: */
1.17 timbl 533:
1.13 duns 534: copy:
1.17 timbl 535:
1.18 timbl 536: target = HTStreamStack(format_in, request);
1.17 timbl 537:
538: if (!target) {
539: char buffer[1024]; /* @@@@@@@@ */
540: if (binary_buffer) free(binary_buffer);
541: if (text_buffer) free(text_buffer);
542: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
543: HTAtom_name(format_in), HTAtom_name(request->output_format));
544: fprintf(stderr, "HTTP: %s", buffer);
545: status = HTLoadError(request->output_stream, 501, buffer);
546: goto clean_up;
547: }
548:
1.19 timbl 549: /* @@ Bug: The decision of whether or not to cache should also be
550: made contingent on a IP address match or non match */
551:
552: if (HTCacheHTTP) {
553: target = HTTee(target, HTCacheWriter(request, NULL, format_in,
554: request->output_format, request->output_stream));
555: }
556:
1.11 timbl 557: /* Push the data down the stream
1.3 timbl 558: ** We have to remember the end of the first buffer we just read
1.2 timbl 559: */
1.17 timbl 560: if (format_in == WWW_HTML) {
561: target = HTNetToText(target); /* Pipe through CR stripper */
562: }
563:
564: (*target->isa->put_block)(target,
565: binary_buffer + (start_of_data - text_buffer),
566: length - (start_of_data - text_buffer));
567: HTCopy(s, target);
568:
569: (*target->isa->free)(target);
570: status = HT_LOADED;
1.11 timbl 571:
1.2 timbl 572: /* Clean up
1.1 timbl 573: */
1.17 timbl 574:
575: clean_up:
576: if (binary_buffer) free(binary_buffer);
577: if (text_buffer) free(text_buffer);
578:
579: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
580: (void) NETCLOSE(s);
581:
582: return status; /* Good return */
1.3 timbl 583:
1.17 timbl 584: } /* read response */
585: } /* load HTTP */
1.1 timbl 586:
587: /* Protocol descriptor
588: */
589:
1.17 timbl 590: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
Webmaster