Annotation of libwww/Library/src/HTTP.c, revision 1.11
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
24: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
25: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
26: #define VERSION_LENGTH 20 /* for returned protocol version */
27:
28: /* Uses:
29: */
1.1 timbl 30: #include "HTParse.h"
31: #include "HTUtils.h"
32: #include "tcp.h"
33: #include "HTTCP.h"
34: #include "HTFormat.h"
1.2 timbl 35: #include <ctype.h>
36: #include "HTAlert.h"
37: #include "HTMIME.h"
1.5 timbl 38: #include "HTML.h" /* SCW */
39: #include "HTInit.h" /* SCW */
1.1 timbl 40:
1.2 timbl 41: struct _HTStream {
42: HTStreamClass * isa; /* all we need to know */
43: };
44:
45:
1.6 timbl 46: extern char * HTAppName; /* Application name: please supply */
47: extern char * HTAppVersion; /* Application version: please supply */
48:
1.1 timbl 49: /* Load Document from HTTP Server HTLoadHTTP()
50: ** ==============================
51: **
52: ** Given a hypertext address, this routine loads a document.
53: **
54: **
55: ** On entry,
56: ** arg is the hypertext reference of the article to be loaded.
57: ** gate is nill if no gateway, else the gateway address.
58: **
59: ** On exit,
60: ** returns >=0 If no error, a good socket number
61: ** <0 Error.
62: **
63: ** The socket must be closed by the caller after the document has been
64: ** read.
65: **
66: */
1.2 timbl 67: PUBLIC int HTLoadHTTP ARGS4 (
68: CONST char *, arg,
69: /* CONST char *, gate, */
70: HTParentAnchor *, anAnchor,
71: HTFormat, format_out,
72: HTStream*, sink)
1.1 timbl 73: {
74: int s; /* Socket number for returned data */
75: char *command; /* The whole command */
1.3 timbl 76: char * eol = 0; /* End of line if found */
1.7 timbl 77: char * start_of_data; /* Start of body of reply */
1.11 ! timbl 78: int length; /* Number of valid bytes in buffer */
1.1 timbl 79: int status; /* tcp return */
1.10 timbl 80: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 81: HTStream * target = NULL; /* Unconverted data */
82: HTFormat format_in; /* Format arriving in the message */
83:
1.2 timbl 84: CONST char* gate = 0; /* disable this feature */
1.1 timbl 85: SockA soc_address; /* Binary network address */
86: SockA * sin = &soc_address;
1.2 timbl 87: BOOL had_header = NO; /* Have we had at least one header? */
1.11 ! timbl 88: char * text_buffer = NULL;
! 89: char * binary_buffer = NULL;
1.2 timbl 90: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 91: if (!arg) return -3; /* Bad if no name sepcified */
92: if (!*arg) return -2; /* Bad if name had zero length */
93:
94: /* Set up defaults:
95: */
96: #ifdef DECNET
1.2 timbl 97: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
98: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 99: #else /* Internet */
1.2 timbl 100: sin->sin_family = AF_INET; /* Family = internet, host order */
101: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 102: #endif
103:
1.10 timbl 104: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
105:
1.1 timbl 106: if (TRACE) {
107: if (gate) fprintf(stderr,
108: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
109: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
110: }
111:
112: /* Get node name and optional port number:
113: */
114: {
115: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
116: int status = HTParseInet(sin, p1); /* TBL 920622 */
117: free(p1);
118: if (status) return status; /* No such host for example */
119: }
120:
1.2 timbl 121: retry:
1.1 timbl 122:
1.10 timbl 123: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 124: */
125: #ifdef DECNET
126: s = socket(AF_DECnet, SOCK_STREAM, 0);
127: #else
128: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
129: #endif
130: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
131: if (status < 0) {
132: if (TRACE) fprintf(stderr,
133: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
134: /* free(command); BUG OUT TBL 921121 */
135: return HTInetStatus("connect");
136: }
137:
138: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
139:
140: /* Ask that node for the document,
141: ** omitting the host name & anchor if not gatewayed.
142: */
143: if (gate) {
1.2 timbl 144: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 145: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
146: strcpy(command, "GET ");
147: strcat(command, arg);
148: } else { /* not gatewayed */
149: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 150: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 151: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
152: strcpy(command, "GET ");
153: strcat(command, p1);
154: free(p1);
155: }
1.2 timbl 156: #ifdef HTTP2
157: if (extensions) {
158: strcat(command, " ");
159: strcat(command, HTTP_VERSION);
160: }
161: #endif
1.10 timbl 162:
163: strcat(command, crlf); /* CR LF, as in rfc 977 */
1.1 timbl 164:
1.2 timbl 165: if (extensions) {
166:
167: int n;
168: int i;
169: HTAtom * present = WWW_PRESENT;
170: char line[256]; /*@@@@ */
171:
172: if (!HTPresentations) HTFormatInit();
173: n = HTList_count(HTPresentations);
174:
175: for(i=0; i<n; i++) {
176: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
177: if (pres->rep_out == present) {
178: if (pres->quality != 1.0) {
1.3 timbl 179: sprintf(line, "Accept: %s q=%.3f%c%c",
180: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 181: } else {
1.3 timbl 182: sprintf(line, "Accept: %s%c%c",
183: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 184: }
185: StrAllocCat(command, line);
186:
187: }
188: }
1.6 timbl 189:
190: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
191: HTAppName ? HTAppName : "unknown",
192: HTAppVersion ? HTAppVersion : "0.0",
193: HTLibraryVersion, CR, LF);
194: StrAllocCat(command, line);
1.2 timbl 195: }
1.3 timbl 196:
1.10 timbl 197: StrAllocCat(command, crlf); /* Blank line means "end" */
198:
199: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
200:
201: /* Translate into ASCII if necessary
202: */
1.4 timbl 203: #ifdef NOT_ASCII
1.1 timbl 204: {
205: char * p;
206: for(p = command; *p; p++) {
207: *p = TOASCII(*p);
208: }
1.4 timbl 209: }
1.3 timbl 210: #endif
1.1 timbl 211:
212: status = NETWRITE(s, command, (int)strlen(command));
213: free(command);
214: if (status<0) {
215: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
216: return HTInetStatus("send");
217: }
218:
1.2 timbl 219:
1.7 timbl 220: /* Read the first line of the response
221: ** -----------------------------------
1.11 ! timbl 222: **
! 223: ** HTTP0 servers must return ASCII style text, though it can in
! 224: ** principle be just text without any markup at all.
! 225: ** Full HTTP servers must return a response
! 226: ** line and RFC822 style header. The response must therefore in
! 227: ** either case have a CRLF somewhere soon.
! 228: **
! 229: ** This is the theory. In practice, there are (1993) unfortunately
! 230: ** many binary documents just served up with HTTP0.9. This
! 231: ** means we have to preserve the binary buffer (on the assumption that
! 232: ** conversion from ASCII may lose information) in case it turns
! 233: ** out that we want the binary original.
1.2 timbl 234: */
1.3 timbl 235:
1.2 timbl 236: {
237:
238: /* Get numeric status etc */
239:
240: BOOL end_of_file = NO;
241: HTAtom * encoding = HTAtom_for("7bit");
242: int buffer_length = INIT_LINE_SIZE; /* Why not? */
243:
1.11 ! timbl 244: binary_buffer = (char *) malloc(buffer_length * sizeof(char));
! 245: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 246: text_buffer = (char *) malloc(buffer_length * sizeof(char));
! 247: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 248: length = 0;
1.2 timbl 249:
1.7 timbl 250: do { /* Loop to read in the first line */
1.2 timbl 251:
252: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
253:
254: if (buffer_length - length < LINE_EXTEND_THRESH) {
255: buffer_length = buffer_length + buffer_length;
1.11 ! timbl 256: binary_buffer = (char *) realloc(
! 257: binary_buffer, buffer_length * sizeof(char));
! 258: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 259: text_buffer = (char *) realloc(
! 260: text_buffer, buffer_length * sizeof(char));
! 261: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
1.2 timbl 262: }
1.11 ! timbl 263: status = NETREAD(s, binary_buffer + length,
1.2 timbl 264: buffer_length - length -1);
265: if (status < 0) {
266: HTAlert("Unexpected network read error on response");
1.9 timbl 267: NETCLOSE(s);
1.2 timbl 268: return status;
269: }
1.10 timbl 270:
271: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
272: status);
273:
1.2 timbl 274: if (status == 0) {
275: end_of_file = YES;
276: break;
277: }
1.11 ! timbl 278: binary_buffer[length+status] = 0;
! 279:
! 280:
! 281: /* Make an ASCII *copy* of the buffer
! 282: */
1.2 timbl 283: #ifdef NOT_ASCII
1.10 timbl 284: if (TRACE) fprintf(stderr, "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 ! timbl 285: #endif
1.2 timbl 286: {
287: char * p;
1.11 ! timbl 288: char * q;
! 289: for(p = binary_buffer+length, q=text_buffer+length;
! 290: *p; p++, q++) {
! 291: *q = FROMASCII(*p);
! 292: }
! 293:
! 294: *q++ = 0;
! 295: }
! 296:
! 297: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
! 298: ** First time we have enough, look at the stub in ASCII
! 299: ** and get out of here if it doesn't look right.
! 300: **
! 301: ** We also check for characters above 128 in the first few bytes, and
! 302: ** if we find them we forget the html default.
! 303: **
! 304: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
! 305: ** will be taken as a HTTP 1.0 server. Failure.
! 306: ** An HTTP 0.9 server returning a binary document with
! 307: ** characters < 128 will be read as ASCII.
! 308: */
! 309: #define STUB_LENGTH 20
! 310: if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
! 311: if(strncmp("HTTP/", text_buffer, 5)!=0) {
! 312: char *p;
! 313: start_of_data = text_buffer; /* reparse whole reply */
! 314: for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
! 315: if ((int)p&128) {
! 316: format_in = HTAtom_for("www/unknown");
! 317: }
! 318: }
! 319: break;
1.2 timbl 320: }
321: }
1.11 ! timbl 322: /* end kludge */
! 323:
! 324:
! 325: eol = strchr(text_buffer + length, 10);
! 326: if (eol) {
! 327: *eol = 0; /* Terminate the line */
! 328: if (eol[-1] = CR) eol[-1] = 0; /* Chop trailing CR */
! 329: }
1.2 timbl 330:
331: length = length + status;
332:
1.7 timbl 333: } while (!eol && !end_of_file); /* No LF */
334:
335: } /* Scope of loop variables */
1.2 timbl 336:
1.7 timbl 337:
338: /* We now have a terminated unfolded line. Parse it.
339: ** -------------------------------------------------
1.2 timbl 340: */
341:
1.11 ! timbl 342: if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
1.7 timbl 343:
344: {
345: int fields;
346: char server_version [VERSION_LENGTH+1];
347: int server_status;
348:
1.2 timbl 349:
350: /* Kludge to work with old buggy servers. They can't handle the third word
351: ** so we try again without it.
352: */
1.7 timbl 353: if (extensions &&
1.11 ! timbl 354: 0==strcmp(text_buffer, /* Old buggy server? */
1.7 timbl 355: "Document address invalid or access not authorised")) {
356: extensions = NO;
1.11 ! timbl 357: if (binary_buffer) free(binary_buffer);
! 358: if (text_buffer) free(text_buffer);
1.7 timbl 359: if (TRACE) fprintf(stderr,
360: "HTTP: close socket %d to retry with HTTP0\n", s);
361: NETCLOSE(s);
362: goto retry; /* @@@@@@@@@@ */
363: }
1.11 ! timbl 364: /* end kludge */
1.2 timbl 365:
1.11 ! timbl 366: fields = sscanf(text_buffer, "%20s%d",
1.7 timbl 367: server_version,
368: &server_status);
369:
1.11 ! timbl 370: if (fields < 2 ||
! 371: strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
1.7 timbl 372: format_in = WWW_HTML;
1.11 ! timbl 373: start_of_data = text_buffer; /* reread whole reply */
1.9 timbl 374: if (eol) *eol = '\n'; /* Reconstitute buffer */
1.2 timbl 375:
1.11 ! timbl 376: } else { /* Full HTTP reply */
1.7 timbl 377:
378: /* Decode full HTTP response */
379:
1.3 timbl 380: format_in = HTAtom_for("www/mime");
1.11 ! timbl 381: start_of_data = eol ? eol + 1 : text_buffer + length;
1.3 timbl 382:
1.2 timbl 383: switch (server_status / 100) {
384:
1.3 timbl 385: default: /* bad number */
386: HTAlert("Unknown status reply from server!");
387: break;
388:
1.2 timbl 389: case 3: /* Various forms of redirection */
1.7 timbl 390: HTAlert(
1.3 timbl 391: "Redirection response from server is not handled by this client");
392: break;
393:
1.2 timbl 394: case 4: /* "I think I goofed" */
395: case 5: /* I think you goofed */
1.6 timbl 396: {
397: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
398: char * message = (char*)malloc(
1.11 ! timbl 399: strlen(text_buffer)+strlen(p1) + 100);
1.6 timbl 400: if (!message) outofmem(__FILE__, "HTTP 5xx status");
401: sprintf(message,
1.11 ! timbl 402: "HTTP server at %s replies:\n%s", p1, text_buffer);
1.8 timbl 403: status = HTLoadError(sink, server_status, message);
1.6 timbl 404: free(message);
405: free(p1);
406: goto clean_up;
407: }
1.3 timbl 408: break;
1.2 timbl 409:
410: case 2: /* Good: Got MIME object */
411: break;
412:
1.7 timbl 413: } /* switch on response code */
414:
415: } /* Full HTTP reply */
416:
417: } /* scope of fields */
1.2 timbl 418:
1.3 timbl 419: /* Set up the stream stack to handle the body of the message
420: */
421:
422: target = HTStreamStack(format_in,
423: format_out,
424: sink , anAnchor);
425:
426: if (!target) {
427: char buffer[1024]; /* @@@@@@@@ */
1.11 ! timbl 428: if (binary_buffer) free(binary_buffer);
! 429: if (text_buffer) free(text_buffer);
1.3 timbl 430: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
431: HTAtom_name(format_in), HTAtom_name(format_out));
432: fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 433: status = HTLoadError(sink, 501, buffer);
434: goto clean_up;
1.3 timbl 435: }
436:
437:
1.11 ! timbl 438: /* Push the data down the stream
1.3 timbl 439: ** We have to remember the end of the first buffer we just read
1.2 timbl 440: */
1.11 ! timbl 441: if (format_in == WWW_HTML) {
! 442: target = HTNetToText(target); /* Pipe through CR stripper */
! 443: }
! 444:
! 445: (*target->isa->put_block)(target,
! 446: binary_buffer + (start_of_data - text_buffer),
! 447: length - (start_of_data - text_buffer));
! 448: HTCopy(s, target);
1.3 timbl 449:
450: (*target->isa->free)(target);
1.8 timbl 451: status = HT_LOADED;
1.2 timbl 452:
453: /* Clean up
1.1 timbl 454: */
1.3 timbl 455:
1.6 timbl 456: clean_up:
1.11 ! timbl 457: if (binary_buffer) free(binary_buffer);
! 458: if (text_buffer) free(text_buffer);
1.3 timbl 459:
1.1 timbl 460: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 461: (void) NETCLOSE(s);
1.1 timbl 462:
1.8 timbl 463: return status; /* Good return */
1.3 timbl 464:
1.1 timbl 465: }
1.7 timbl 466:
1.1 timbl 467:
468: /* Protocol descriptor
469: */
470:
1.2 timbl 471: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster