Annotation of libwww/Library/src/HTTP.c, revision 1.14
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.14 ! luotonen 44: #include "HTAABrow.h" /* Access Authorization */
1.1 timbl 45:
1.2 timbl 46: struct _HTStream {
47: HTStreamClass * isa; /* all we need to know */
48: };
49:
50:
1.6 timbl 51: extern char * HTAppName; /* Application name: please supply */
52: extern char * HTAppVersion; /* Application version: please supply */
53:
1.1 timbl 54: /* Load Document from HTTP Server HTLoadHTTP()
55: ** ==============================
56: **
57: ** Given a hypertext address, this routine loads a document.
58: **
59: **
60: ** On entry,
61: ** arg is the hypertext reference of the article to be loaded.
62: ** gate is nill if no gateway, else the gateway address.
63: **
64: ** On exit,
65: ** returns >=0 If no error, a good socket number
66: ** <0 Error.
67: **
68: ** The socket must be closed by the caller after the document has been
69: ** read.
70: **
71: */
1.2 timbl 72: PUBLIC int HTLoadHTTP ARGS4 (
73: CONST char *, arg,
74: /* CONST char *, gate, */
75: HTParentAnchor *, anAnchor,
76: HTFormat, format_out,
77: HTStream*, sink)
1.1 timbl 78: {
79: int s; /* Socket number for returned data */
80: char *command; /* The whole command */
1.3 timbl 81: char * eol = 0; /* End of line if found */
1.7 timbl 82: char * start_of_data; /* Start of body of reply */
1.11 timbl 83: int length; /* Number of valid bytes in buffer */
1.1 timbl 84: int status; /* tcp return */
1.10 timbl 85: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 86: HTStream * target = NULL; /* Unconverted data */
87: HTFormat format_in; /* Format arriving in the message */
88:
1.2 timbl 89: CONST char* gate = 0; /* disable this feature */
1.1 timbl 90: SockA soc_address; /* Binary network address */
91: SockA * sin = &soc_address;
1.2 timbl 92: BOOL had_header = NO; /* Have we had at least one header? */
1.11 timbl 93: char * text_buffer = NULL;
94: char * binary_buffer = NULL;
1.2 timbl 95: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 96: if (!arg) return -3; /* Bad if no name sepcified */
97: if (!*arg) return -2; /* Bad if name had zero length */
98:
99: /* Set up defaults:
100: */
101: #ifdef DECNET
1.2 timbl 102: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
103: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 104: #else /* Internet */
1.2 timbl 105: sin->sin_family = AF_INET; /* Family = internet, host order */
106: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 107: #endif
108:
1.10 timbl 109: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
110:
1.1 timbl 111: if (TRACE) {
112: if (gate) fprintf(stderr,
113: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
114: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
115: }
116:
117: /* Get node name and optional port number:
118: */
119: {
120: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
121: int status = HTParseInet(sin, p1); /* TBL 920622 */
122: free(p1);
123: if (status) return status; /* No such host for example */
124: }
125:
1.2 timbl 126: retry:
1.1 timbl 127:
1.10 timbl 128: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 129: */
130: #ifdef DECNET
131: s = socket(AF_DECnet, SOCK_STREAM, 0);
132: #else
133: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
134: #endif
135: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
136: if (status < 0) {
137: if (TRACE) fprintf(stderr,
138: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
139: /* free(command); BUG OUT TBL 921121 */
140: return HTInetStatus("connect");
141: }
142:
143: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
144:
145: /* Ask that node for the document,
146: ** omitting the host name & anchor if not gatewayed.
147: */
148: if (gate) {
1.2 timbl 149: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 150: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
151: strcpy(command, "GET ");
152: strcat(command, arg);
153: } else { /* not gatewayed */
154: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 155: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 156: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
157: strcpy(command, "GET ");
158: strcat(command, p1);
159: free(p1);
160: }
1.2 timbl 161: #ifdef HTTP2
162: if (extensions) {
163: strcat(command, " ");
164: strcat(command, HTTP_VERSION);
165: }
166: #endif
1.10 timbl 167:
168: strcat(command, crlf); /* CR LF, as in rfc 977 */
1.1 timbl 169:
1.2 timbl 170: if (extensions) {
171:
172: int n;
173: int i;
174: HTAtom * present = WWW_PRESENT;
175: char line[256]; /*@@@@ */
176:
177: if (!HTPresentations) HTFormatInit();
178: n = HTList_count(HTPresentations);
179:
180: for(i=0; i<n; i++) {
181: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
182: if (pres->rep_out == present) {
183: if (pres->quality != 1.0) {
1.3 timbl 184: sprintf(line, "Accept: %s q=%.3f%c%c",
185: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 186: } else {
1.3 timbl 187: sprintf(line, "Accept: %s%c%c",
188: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 189: }
190: StrAllocCat(command, line);
191:
192: }
193: }
1.6 timbl 194:
195: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
196: HTAppName ? HTAppName : "unknown",
197: HTAppVersion ? HTAppVersion : "0.0",
198: HTLibraryVersion, CR, LF);
199: StrAllocCat(command, line);
1.14 ! luotonen 200:
! 201: #ifdef ACCESS_AUTH
! 202: #define FREE(x) if (x) {free(x); x=NULL;}
! 203: {
! 204: char *docname;
! 205: char *hostname;
! 206: char *colon;
! 207: int portnumber;
! 208: char *auth;
! 209:
! 210: docname = HTParse(arg, "", PARSE_PATH);
! 211: hostname = HTParse((gate ? gate : arg), "", PARSE_HOST);
! 212: if (hostname &&
! 213: NULL != (colon = strchr(hostname, ':'))) {
! 214: *(colon++) = NULL; /* Chop off port number */
! 215: portnumber = atoi(colon);
! 216: }
! 217: else portnumber = 80;
! 218:
! 219: if (NULL!=(auth=HTAA_composeAuth(hostname, portnumber, docname))) {
! 220: sprintf(line, "%s%c%c", auth, CR, LF);
! 221: StrAllocCat(command, line);
! 222: }
! 223: if (TRACE) {
! 224: if (auth)
! 225: fprintf(stderr, "HTTP: Sending authorization: %s\n", auth);
! 226: else
! 227: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
! 228: }
! 229: FREE(hostname);
! 230: FREE(docname);
! 231: }
! 232: #endif /* ACCESS_AUTH */
1.2 timbl 233: }
1.14 ! luotonen 234:
1.10 timbl 235: StrAllocCat(command, crlf); /* Blank line means "end" */
236:
237: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
238:
239: /* Translate into ASCII if necessary
240: */
1.4 timbl 241: #ifdef NOT_ASCII
1.1 timbl 242: {
243: char * p;
244: for(p = command; *p; p++) {
245: *p = TOASCII(*p);
246: }
1.4 timbl 247: }
1.3 timbl 248: #endif
1.1 timbl 249:
250: status = NETWRITE(s, command, (int)strlen(command));
251: free(command);
252: if (status<0) {
253: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
254: return HTInetStatus("send");
255: }
256:
1.2 timbl 257:
1.7 timbl 258: /* Read the first line of the response
259: ** -----------------------------------
1.11 timbl 260: **
261: ** HTTP0 servers must return ASCII style text, though it can in
262: ** principle be just text without any markup at all.
263: ** Full HTTP servers must return a response
264: ** line and RFC822 style header. The response must therefore in
265: ** either case have a CRLF somewhere soon.
266: **
267: ** This is the theory. In practice, there are (1993) unfortunately
268: ** many binary documents just served up with HTTP0.9. This
269: ** means we have to preserve the binary buffer (on the assumption that
270: ** conversion from ASCII may lose information) in case it turns
271: ** out that we want the binary original.
1.2 timbl 272: */
1.3 timbl 273:
1.2 timbl 274: {
275:
276: /* Get numeric status etc */
277:
278: BOOL end_of_file = NO;
279: HTAtom * encoding = HTAtom_for("7bit");
280: int buffer_length = INIT_LINE_SIZE; /* Why not? */
281:
1.11 timbl 282: binary_buffer = (char *) malloc(buffer_length * sizeof(char));
283: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
284: text_buffer = (char *) malloc(buffer_length * sizeof(char));
285: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
286: length = 0;
1.2 timbl 287:
1.7 timbl 288: do { /* Loop to read in the first line */
1.2 timbl 289:
290: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
291:
292: if (buffer_length - length < LINE_EXTEND_THRESH) {
293: buffer_length = buffer_length + buffer_length;
1.11 timbl 294: binary_buffer = (char *) realloc(
295: binary_buffer, buffer_length * sizeof(char));
296: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
297: text_buffer = (char *) realloc(
298: text_buffer, buffer_length * sizeof(char));
299: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
1.2 timbl 300: }
1.11 timbl 301: status = NETREAD(s, binary_buffer + length,
1.2 timbl 302: buffer_length - length -1);
303: if (status < 0) {
304: HTAlert("Unexpected network read error on response");
1.9 timbl 305: NETCLOSE(s);
1.2 timbl 306: return status;
307: }
1.10 timbl 308:
309: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
310: status);
311:
1.2 timbl 312: if (status == 0) {
313: end_of_file = YES;
314: break;
315: }
1.11 timbl 316: binary_buffer[length+status] = 0;
317:
318:
319: /* Make an ASCII *copy* of the buffer
320: */
1.2 timbl 321: #ifdef NOT_ASCII
1.10 timbl 322: if (TRACE) fprintf(stderr, "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 timbl 323: #endif
1.2 timbl 324: {
325: char * p;
1.11 timbl 326: char * q;
327: for(p = binary_buffer+length, q=text_buffer+length;
328: *p; p++, q++) {
329: *q = FROMASCII(*p);
330: }
331:
332: *q++ = 0;
333: }
334:
335: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
336: ** First time we have enough, look at the stub in ASCII
337: ** and get out of here if it doesn't look right.
338: **
339: ** We also check for characters above 128 in the first few bytes, and
340: ** if we find them we forget the html default.
341: **
342: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
343: ** will be taken as a HTTP 1.0 server. Failure.
344: ** An HTTP 0.9 server returning a binary document with
345: ** characters < 128 will be read as ASCII.
346: */
347: #define STUB_LENGTH 20
348: if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
349: if(strncmp("HTTP/", text_buffer, 5)!=0) {
350: char *p;
351: start_of_data = text_buffer; /* reparse whole reply */
352: for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
1.13 duns 353: if (((int)*p)&128) {
1.11 timbl 354: format_in = HTAtom_for("www/unknown");
1.13 duns 355: length = length + status;
356: goto copy; /* out of while loop */
1.11 timbl 357: }
358: }
1.2 timbl 359: }
360: }
1.11 timbl 361: /* end kludge */
362:
363:
364: eol = strchr(text_buffer + length, 10);
365: if (eol) {
366: *eol = 0; /* Terminate the line */
1.14 ! luotonen 367: if (eol[-1] == CR) eol[-1] = 0; /* Chop trailing CR */
! 368: /* = corrected to == -- AL */
1.11 timbl 369: }
1.2 timbl 370:
371: length = length + status;
372:
1.7 timbl 373: } while (!eol && !end_of_file); /* No LF */
374:
375: } /* Scope of loop variables */
1.2 timbl 376:
1.7 timbl 377:
378: /* We now have a terminated unfolded line. Parse it.
379: ** -------------------------------------------------
1.2 timbl 380: */
381:
1.11 timbl 382: if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
1.7 timbl 383:
384: {
385: int fields;
386: char server_version [VERSION_LENGTH+1];
387: int server_status;
388:
1.2 timbl 389:
390: /* Kludge to work with old buggy servers. They can't handle the third word
391: ** so we try again without it.
392: */
1.7 timbl 393: if (extensions &&
1.11 timbl 394: 0==strcmp(text_buffer, /* Old buggy server? */
1.7 timbl 395: "Document address invalid or access not authorised")) {
396: extensions = NO;
1.11 timbl 397: if (binary_buffer) free(binary_buffer);
398: if (text_buffer) free(text_buffer);
1.7 timbl 399: if (TRACE) fprintf(stderr,
400: "HTTP: close socket %d to retry with HTTP0\n", s);
401: NETCLOSE(s);
402: goto retry; /* @@@@@@@@@@ */
403: }
1.11 timbl 404: /* end kludge */
1.2 timbl 405:
1.11 timbl 406: fields = sscanf(text_buffer, "%20s%d",
1.7 timbl 407: server_version,
408: &server_status);
409:
1.11 timbl 410: if (fields < 2 ||
411: strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
1.7 timbl 412: format_in = WWW_HTML;
1.11 timbl 413: start_of_data = text_buffer; /* reread whole reply */
1.9 timbl 414: if (eol) *eol = '\n'; /* Reconstitute buffer */
1.2 timbl 415:
1.11 timbl 416: } else { /* Full HTTP reply */
1.7 timbl 417:
418: /* Decode full HTTP response */
419:
1.3 timbl 420: format_in = HTAtom_for("www/mime");
1.11 timbl 421: start_of_data = eol ? eol + 1 : text_buffer + length;
1.3 timbl 422:
1.2 timbl 423: switch (server_status / 100) {
424:
1.3 timbl 425: default: /* bad number */
426: HTAlert("Unknown status reply from server!");
427: break;
428:
1.2 timbl 429: case 3: /* Various forms of redirection */
1.7 timbl 430: HTAlert(
1.3 timbl 431: "Redirection response from server is not handled by this client");
432: break;
433:
1.14 ! luotonen 434: case 4: /* Access Authorization problem */
! 435: #ifdef ACCESS_AUTH
! 436: switch (server_status) {
! 437: case 401:
! 438: length -= start_of_data - text_buffer;
! 439: if (HTAA_shouldRetryWithAuth(start_of_data, length, s)) {
! 440: /* Clean up before retrying */
! 441: if (binary_buffer) free(binary_buffer);
! 442: if (text_buffer) free(text_buffer);
! 443: if (TRACE)
! 444: fprintf(stderr, "%s %d %s\n",
! 445: "HTTP: close socket", s,
! 446: "to retry with Access Authorization");
! 447: (void)NETCLOSE(s);
! 448: goto retry;
! 449: break;
! 450: }
! 451: else {
! 452: /* FALL THROUGH */
! 453: }
! 454: default:
! 455: {
! 456: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
! 457: char * message;
! 458:
! 459: if (!(message = (char*)malloc(strlen(text_buffer) +
! 460: strlen(p1) + 100)))
! 461: outofmem(__FILE__, "HTTP 4xx status");
! 462: sprintf(message,
! 463: "HTTP server at %s replies:\n%s\n\n%s\n",
! 464: p1, text_buffer,
! 465: ((server_status == 401)
! 466: ? "Access Authorization package giving up.\n"
! 467: : ""));
! 468: status = HTLoadError(sink, server_status, message);
! 469: free(message);
! 470: free(p1);
! 471: goto clean_up;
! 472: }
! 473: } /* switch */
! 474: goto clean_up;
! 475: break;
! 476: #else
! 477: /* case 4 without Access Authorization falls through */
! 478: /* to case 5 (previously "I think I goofed"). -- AL */
! 479: #endif /* ACCESS_AUTH */
! 480:
1.2 timbl 481: case 5: /* I think you goofed */
1.6 timbl 482: {
483: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
484: char * message = (char*)malloc(
1.11 timbl 485: strlen(text_buffer)+strlen(p1) + 100);
1.6 timbl 486: if (!message) outofmem(__FILE__, "HTTP 5xx status");
487: sprintf(message,
1.11 timbl 488: "HTTP server at %s replies:\n%s", p1, text_buffer);
1.8 timbl 489: status = HTLoadError(sink, server_status, message);
1.6 timbl 490: free(message);
491: free(p1);
492: goto clean_up;
493: }
1.3 timbl 494: break;
1.2 timbl 495:
496: case 2: /* Good: Got MIME object */
497: break;
498:
1.7 timbl 499: } /* switch on response code */
500:
501: } /* Full HTTP reply */
502:
503: } /* scope of fields */
1.2 timbl 504:
1.3 timbl 505: /* Set up the stream stack to handle the body of the message
506: */
507:
1.13 duns 508: copy:
509:
1.3 timbl 510: target = HTStreamStack(format_in,
511: format_out,
512: sink , anAnchor);
513:
514: if (!target) {
515: char buffer[1024]; /* @@@@@@@@ */
1.11 timbl 516: if (binary_buffer) free(binary_buffer);
517: if (text_buffer) free(text_buffer);
1.3 timbl 518: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
519: HTAtom_name(format_in), HTAtom_name(format_out));
520: fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 521: status = HTLoadError(sink, 501, buffer);
522: goto clean_up;
1.3 timbl 523: }
524:
525:
1.11 timbl 526: /* Push the data down the stream
1.3 timbl 527: ** We have to remember the end of the first buffer we just read
1.2 timbl 528: */
1.11 timbl 529: if (format_in == WWW_HTML) {
530: target = HTNetToText(target); /* Pipe through CR stripper */
531: }
532:
533: (*target->isa->put_block)(target,
534: binary_buffer + (start_of_data - text_buffer),
535: length - (start_of_data - text_buffer));
536: HTCopy(s, target);
1.3 timbl 537:
538: (*target->isa->free)(target);
1.8 timbl 539: status = HT_LOADED;
1.2 timbl 540:
541: /* Clean up
1.1 timbl 542: */
1.3 timbl 543:
1.6 timbl 544: clean_up:
1.11 timbl 545: if (binary_buffer) free(binary_buffer);
546: if (text_buffer) free(text_buffer);
1.3 timbl 547:
1.1 timbl 548: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 549: (void) NETCLOSE(s);
1.1 timbl 550:
1.8 timbl 551: return status; /* Good return */
1.3 timbl 552:
1.1 timbl 553: }
1.7 timbl 554:
1.1 timbl 555:
556: /* Protocol descriptor
557: */
558:
1.13 duns 559: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster