Annotation of libwww/Library/src/HTTP.c, revision 1.19
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.14 luotonen 44: #include "HTAABrow.h" /* Access Authorization */
1.1 timbl 45:
1.2 timbl 46: struct _HTStream {
47: HTStreamClass * isa; /* all we need to know */
48: };
49:
50:
1.6 timbl 51: extern char * HTAppName; /* Application name: please supply */
52: extern char * HTAppVersion; /* Application version: please supply */
53:
1.19 ! timbl 54: PUBLIC BOOL HTCacheHTTP = YES; /* Enable caching of HTTP-retrieved files */
! 55:
1.1 timbl 56: /* Load Document from HTTP Server HTLoadHTTP()
57: ** ==============================
58: **
59: ** Given a hypertext address, this routine loads a document.
60: **
61: **
62: ** On entry,
63: ** arg is the hypertext reference of the article to be loaded.
64: **
65: ** On exit,
66: ** returns >=0 If no error, a good socket number
67: ** <0 Error.
68: **
69: ** The socket must be closed by the caller after the document has been
70: ** read.
71: **
72: */
1.19 ! timbl 73: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 74: {
1.19 ! timbl 75: CONST char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 76: int s; /* Socket number for returned data */
77: int status; /* tcp return */
1.10 timbl 78: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 79: HTStream * target = NULL; /* Unconverted data */
1.15 luotonen 80: char *auth = NULL; /* Authorization information */
1.3 timbl 81:
1.2 timbl 82: CONST char* gate = 0; /* disable this feature */
1.1 timbl 83: SockA soc_address; /* Binary network address */
84: SockA * sin = &soc_address;
1.2 timbl 85: BOOL extensions = YES; /* Assume good HTTP server */
1.17 timbl 86:
1.1 timbl 87: if (!arg) return -3; /* Bad if no name sepcified */
88: if (!*arg) return -2; /* Bad if name had zero length */
89:
90: /* Set up defaults:
91: */
92: #ifdef DECNET
1.2 timbl 93: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
94: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 95: #else /* Internet */
1.2 timbl 96: sin->sin_family = AF_INET; /* Family = internet, host order */
97: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 98: #endif
99:
1.10 timbl 100: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
101:
1.1 timbl 102: if (TRACE) {
103: if (gate) fprintf(stderr,
104: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
105: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
106: }
107:
108: /* Get node name and optional port number:
109: */
110: {
111: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
112: int status = HTParseInet(sin, p1); /* TBL 920622 */
113: free(p1);
114: if (status) return status; /* No such host for example */
115: }
116:
1.2 timbl 117: retry:
1.15 luotonen 118:
119: /*
120: ** Compose authorization information (this was moved here
121: ** from after the making of the connection so that the connection
122: ** wouldn't have to wait while prompting username and password
123: ** from the user). -- AL 13.10.93
124: */
125: #ifdef ACCESS_AUTH
126: #define FREE(x) if (x) {free(x); x=NULL;}
127: {
128: char *docname;
129: char *hostname;
130: char *colon;
131: int portnumber;
132:
133: docname = HTParse(arg, "", PARSE_PATH);
134: hostname = HTParse((gate ? gate : arg), "", PARSE_HOST);
135: if (hostname &&
136: NULL != (colon = strchr(hostname, ':'))) {
1.16 duns 137: *(colon++) = '0円'; /* Chop off port number */
1.15 luotonen 138: portnumber = atoi(colon);
139: }
140: else portnumber = 80;
141:
142: auth = HTAA_composeAuth(hostname, portnumber, docname);
143:
144: if (TRACE) {
145: if (auth)
146: fprintf(stderr, "HTTP: Sending authorization: %s\n", auth);
147: else
148: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
149: }
150: FREE(hostname);
151: FREE(docname);
152: }
153: #endif /* ACCESS_AUTH */
1.1 timbl 154:
1.10 timbl 155: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 156: */
157: #ifdef DECNET
158: s = socket(AF_DECnet, SOCK_STREAM, 0);
159: #else
160: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
161: #endif
162: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
163: if (status < 0) {
164: if (TRACE) fprintf(stderr,
165: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 timbl 166:
1.1 timbl 167: return HTInetStatus("connect");
168: }
169:
170: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
171:
1.17 timbl 172:
173: /* Compose and send command
174: ** ------------------------
175: */
176: {
177: char *command; /* The whole command */
178:
1.1 timbl 179: /* Ask that node for the document,
180: ** omitting the host name & anchor if not gatewayed.
181: */
1.17 timbl 182: if (gate) {
183: command = malloc(4 + strlen(arg)+ 2 + 31);
184: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
185: strcpy(command, "GET ");
186: strcat(command, arg);
187: } else { /* not gatewayed */
188: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
189: command = malloc(4 + strlen(p1)+ 2 + 31);
190: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
191: strcpy(command, "GET ");
192: strcat(command, p1);
193: free(p1);
194: }
1.2 timbl 195: #ifdef HTTP2
1.17 timbl 196: if (extensions) {
197: strcat(command, " ");
198: strcat(command, HTTP_VERSION);
199: }
1.2 timbl 200: #endif
1.17 timbl 201:
202: strcat(command, crlf); /* CR LF, as in rfc 977 */
203:
204: if (extensions) {
205:
206: int n;
207: int i;
208: HTAtom * present = WWW_PRESENT;
209: char line[256]; /*@@@@ */
210:
211: /* if (!request->conversions) HTFormatInit(request->conversions); */
212: n = HTList_count(request->conversions);
213:
214: for(i=0; i<n; i++) {
215: HTPresentation * pres =
216: HTList_objectAt(request->conversions, i);
217: if (pres->rep_out == present) {
218: if (pres->quality != 1.0) {
219: sprintf(line, "Accept: %s q=%.3f%c%c",
220: HTAtom_name(pres->rep), pres->quality, CR, LF);
221: } else {
222: sprintf(line, "Accept: %s%c%c",
223: HTAtom_name(pres->rep), CR, LF);
224: }
225: StrAllocCat(command, line);
226:
227: }
1.2 timbl 228: }
1.17 timbl 229:
230: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
231: HTAppName ? HTAppName : "unknown",
232: HTAppVersion ? HTAppVersion : "0.0",
233: HTLibraryVersion, CR, LF);
234: StrAllocCat(command, line);
235:
1.14 luotonen 236: #ifdef ACCESS_AUTH
1.17 timbl 237: if (auth != NULL) {
238: sprintf(line, "%s%c%c", auth, CR, LF);
239: StrAllocCat(command, line);
240: }
241: #endif /* ACCESS_AUTH */
1.14 luotonen 242: }
1.17 timbl 243:
244: StrAllocCat(command, crlf); /* Blank line means "end" */
245:
246: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
247:
248: /* Translate into ASCII if necessary
249: */
1.4 timbl 250: #ifdef NOT_ASCII
1.17 timbl 251: {
252: char * p;
253: for(p = command; *p; p++) {
254: *p = TOASCII(*p);
255: }
1.1 timbl 256: }
1.3 timbl 257: #endif
1.17 timbl 258:
259: status = NETWRITE(s, command, (int)strlen(command));
260: free(command);
261: if (status<0) {
262: if (TRACE) fprintf(stderr,
263: "HTTPAccess: Unable to send command.\n");
1.1 timbl 264: return HTInetStatus("send");
1.17 timbl 265: }
266: } /* compose and send command */
267:
1.2 timbl 268:
1.17 timbl 269: /* Read the response
270: ** -----------------
1.11 timbl 271: **
272: ** HTTP0 servers must return ASCII style text, though it can in
273: ** principle be just text without any markup at all.
274: ** Full HTTP servers must return a response
275: ** line and RFC822 style header. The response must therefore in
276: ** either case have a CRLF somewhere soon.
277: **
278: ** This is the theory. In practice, there are (1993) unfortunately
279: ** many binary documents just served up with HTTP0.9. This
280: ** means we have to preserve the binary buffer (on the assumption that
281: ** conversion from ASCII may lose information) in case it turns
282: ** out that we want the binary original.
1.2 timbl 283: */
1.3 timbl 284:
1.17 timbl 285: { /* read response */
286:
287: char * eol = 0; /* End of line if found */
288: char * start_of_data; /* Start of body of reply */
289: int length; /* Number of valid bytes in buffer */
290: char * text_buffer = NULL;
291: char * binary_buffer = NULL;
292: HTFormat format_in; /* Format arriving in the message */
293:
294: { /* local variablees for loop*/
1.2 timbl 295:
296: /* Get numeric status etc */
297:
1.17 timbl 298: BOOL end_of_file = NO;
299: HTAtom * encoding = HTAtom_for("7bit");
300: int buffer_length = INIT_LINE_SIZE; /* Why not? */
301:
302: binary_buffer = (char *) malloc(buffer_length * sizeof(char));
303: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
304: text_buffer = (char *) malloc(buffer_length * sizeof(char));
305: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
306: length = 0;
1.2 timbl 307:
1.17 timbl 308: do { /* Loop to read in the first line */
309:
310: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
311:
312: if (buffer_length - length < LINE_EXTEND_THRESH) {
313: buffer_length = buffer_length + buffer_length;
314: binary_buffer = (char *) realloc(
315: binary_buffer, buffer_length * sizeof(char));
316: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
317: text_buffer = (char *) realloc(
318: text_buffer, buffer_length * sizeof(char));
319: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
320: }
321: status = NETREAD(s, binary_buffer + length,
322: buffer_length - length -1);
323: if (status < 0) {
324: HTAlert("Unexpected network read error on response");
325: NETCLOSE(s);
326: return status;
327: }
328:
329: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
330: status);
331:
332: if (status == 0) {
333: end_of_file = YES;
334: break;
335: }
336: binary_buffer[length+status] = 0;
337:
338:
1.11 timbl 339: /* Make an ASCII *copy* of the buffer
340: */
1.2 timbl 341: #ifdef NOT_ASCII
1.17 timbl 342: if (TRACE) fprintf(stderr,
343: "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 timbl 344: #endif
1.17 timbl 345: {
346: char * p;
347: char * q;
348: for(p = binary_buffer+length, q=text_buffer+length;
349: *p; p++, q++) {
350: *q = FROMASCII(*p);
351: }
352:
353: *q++ = 0;
1.11 timbl 354: }
1.17 timbl 355:
1.11 timbl 356: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
357: ** First time we have enough, look at the stub in ASCII
358: ** and get out of here if it doesn't look right.
359: **
360: ** We also check for characters above 128 in the first few bytes, and
361: ** if we find them we forget the html default.
362: **
363: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
364: ** will be taken as a HTTP 1.0 server. Failure.
365: ** An HTTP 0.9 server returning a binary document with
366: ** characters < 128 will be read as ASCII.
367: */
368: #define STUB_LENGTH 20
1.17 timbl 369: if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
370: if(strncmp("HTTP/", text_buffer, 5)!=0) {
371: char *p;
372: start_of_data = text_buffer; /* reparse whole reply */
373: for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
374: if (((int)*p)&128) {
375: format_in = HTAtom_for("www/unknown");
376: length = length + status;
377: goto copy; /* out of while loop */
378: }
1.11 timbl 379: }
380: }
1.2 timbl 381: }
1.17 timbl 382: /* end kludge */
383:
384:
385: eol = strchr(text_buffer + length, 10);
386: if (eol) {
387: *eol = 0; /* Terminate the line */
388: if (eol[-1] == CR) eol[-1] = 0; /* Chop trailing CR */
389: /* = corrected to == -- AL */
390: }
391:
392: length = length + status;
393:
394: } while (!eol && !end_of_file); /* No LF */
395:
396: } /* Scope of loop variables */
1.2 timbl 397:
1.7 timbl 398:
399: /* We now have a terminated unfolded line. Parse it.
400: ** -------------------------------------------------
1.2 timbl 401: */
402:
1.17 timbl 403: if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
404:
405: {
406: int fields;
407: char server_version [VERSION_LENGTH+1];
408: int server_status;
409:
410:
411: #ifdef OLD_CODE /* old buggy servers should not exist now tbl9311 */
1.2 timbl 412: /* Kludge to work with old buggy servers. They can't handle the third word
413: ** so we try again without it.
414: */
1.17 timbl 415: if (extensions &&
416: 0==strcmp(text_buffer, /* Old buggy server? */
417: "Document address invalid or access not authorised")) {
418: extensions = NO;
419: if (binary_buffer) free(binary_buffer);
420: if (text_buffer) free(text_buffer);
421: if (TRACE) fprintf(stderr,
422: "HTTP: close socket %d to retry with HTTP0\n", s);
423: NETCLOSE(s);
424: goto retry; /* @@@@@@@@@@ */
425: }
426: #endif
427:
428: fields = sscanf(text_buffer, "%20s%d",
429: server_version,
430: &server_status);
431:
432: if (fields < 2 ||
433: strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
434: format_in = WWW_HTML;
435: start_of_data = text_buffer; /* reread whole reply */
436: if (eol) *eol = '\n'; /* Reconstitute buffer */
437:
438: } else { /* Full HTTP reply */
1.2 timbl 439:
1.17 timbl 440: /* Decode full HTTP response */
1.2 timbl 441:
1.17 timbl 442: format_in = HTAtom_for("www/mime");
443: start_of_data = eol ? eol + 1 : text_buffer + length;
444:
445: switch (server_status / 100) {
1.3 timbl 446:
1.17 timbl 447: default: /* bad number */
448: HTAlert("Unknown status reply from server!");
449: break;
450:
451: case 3: /* Various forms of redirection */
452: HTAlert(
453: "Redirection response from server is not handled by this client");
454: break;
455:
456: case 4: /* Access Authorization problem */
1.14 luotonen 457: #ifdef ACCESS_AUTH
1.17 timbl 458: switch (server_status) {
459: case 401:
460: length -= start_of_data - text_buffer;
461: if (HTAA_shouldRetryWithAuth(start_of_data, length, s)) {
462: /* Clean up before retrying */
463: if (binary_buffer) free(binary_buffer);
464: if (text_buffer) free(text_buffer);
465: if (TRACE)
466: fprintf(stderr, "%s %d %s\n",
467: "HTTP: close socket", s,
468: "to retry with Access Authorization");
469: (void)NETCLOSE(s);
470: goto retry;
471: break;
472: }
473: else {
474: /* FALL THROUGH */
475: }
476: default:
477: {
478: char *p1 = HTParse(gate ? gate : arg, "",
479: PARSE_HOST);
480: char * message;
481:
482: if (!(message = (char*)malloc(strlen(text_buffer) +
483: strlen(p1) + 100)))
484: outofmem(__FILE__, "HTTP 4xx status");
485: sprintf(message,
486: "HTTP server at %s replies:\n%s\n\n%s\n",
487: p1, text_buffer,
488: ((server_status == 401)
489: ? "Access Authorization package giving up.\n"
490: : ""));
491: status = HTLoadError(request->output_stream,
492: server_status, message);
493: free(message);
494: free(p1);
495: goto clean_up;
496: }
497: } /* switch */
498: goto clean_up;
499: break;
500: #else
501: /* case 4 without Access Authorization falls through */
502: /* to case 5 (previously "I think I goofed"). -- AL */
503: #endif /* ACCESS_AUTH */
504:
505: case 5: /* I think you goofed */
1.14 luotonen 506: {
507: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
1.17 timbl 508: char * message = (char*)malloc(
509: strlen(text_buffer)+strlen(p1) + 100);
510: if (!message) outofmem(__FILE__, "HTTP 5xx status");
1.14 luotonen 511: sprintf(message,
1.17 timbl 512: "HTTP server at %s replies:\n%s", p1, text_buffer);
513: status = HTLoadError(request->output_stream, server_status, message);
1.14 luotonen 514: free(message);
515: free(p1);
516: goto clean_up;
517: }
1.17 timbl 518: break;
519:
520: case 2: /* Good: Got MIME object */
521: break;
522:
523: } /* switch on response code */
524:
525: } /* Full HTTP reply */
526:
527: } /* scope of fields */
528:
1.3 timbl 529: /* Set up the stream stack to handle the body of the message
530: */
1.17 timbl 531:
1.13 duns 532: copy:
1.17 timbl 533:
1.18 timbl 534: target = HTStreamStack(format_in, request);
1.17 timbl 535:
536: if (!target) {
537: char buffer[1024]; /* @@@@@@@@ */
538: if (binary_buffer) free(binary_buffer);
539: if (text_buffer) free(text_buffer);
540: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
541: HTAtom_name(format_in), HTAtom_name(request->output_format));
542: fprintf(stderr, "HTTP: %s", buffer);
543: status = HTLoadError(request->output_stream, 501, buffer);
544: goto clean_up;
545: }
546:
1.19 ! timbl 547: /* @@ Bug: The decision of whether or not to cache should also be
! 548: made contingent on a IP address match or non match */
! 549:
! 550: if (HTCacheHTTP) {
! 551: target = HTTee(target, HTCacheWriter(request, NULL, format_in,
! 552: request->output_format, request->output_stream));
! 553: }
! 554:
1.11 timbl 555: /* Push the data down the stream
1.3 timbl 556: ** We have to remember the end of the first buffer we just read
1.2 timbl 557: */
1.17 timbl 558: if (format_in == WWW_HTML) {
559: target = HTNetToText(target); /* Pipe through CR stripper */
560: }
561:
562: (*target->isa->put_block)(target,
563: binary_buffer + (start_of_data - text_buffer),
564: length - (start_of_data - text_buffer));
565: HTCopy(s, target);
566:
567: (*target->isa->free)(target);
568: status = HT_LOADED;
1.11 timbl 569:
1.2 timbl 570: /* Clean up
1.1 timbl 571: */
1.17 timbl 572:
573: clean_up:
574: if (binary_buffer) free(binary_buffer);
575: if (text_buffer) free(text_buffer);
576:
577: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
578: (void) NETCLOSE(s);
579:
580: return status; /* Good return */
1.3 timbl 581:
1.17 timbl 582: } /* read response */
583: } /* load HTTP */
1.1 timbl 584:
585: /* Protocol descriptor
586: */
587:
1.17 timbl 588: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
Webmaster