Annotation of libwww/Library/src/HTTP.c, revision 1.22
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.21 luotonen 44: #include "HTAccess.h" /* HTRequest */
1.14 luotonen 45: #include "HTAABrow.h" /* Access Authorization */
1.20 timbl 46: #include "HTTee.h" /* Tee off a cache stream */
47: #include "HTFWriter.h" /* Write to cache file */
1.1 timbl 48:
1.2 timbl 49: struct _HTStream {
50: HTStreamClass * isa; /* all we need to know */
51: };
52:
53:
1.6 timbl 54: extern char * HTAppName; /* Application name: please supply */
55: extern char * HTAppVersion; /* Application version: please supply */
56:
1.19 timbl 57: PUBLIC BOOL HTCacheHTTP = YES; /* Enable caching of HTTP-retrieved files */
58:
1.21 luotonen 59:
60: PRIVATE void parse_401_headers ARGS2(HTRequest *, req,
61: HTInputSocket *, isoc)
62: {
63: HTAAScheme scheme;
64: char *line;
65: int num_schemes = 0;
66: HTList *valid_schemes = HTList_new();
67: HTAssocList **scheme_specifics = NULL;
68: char *template = NULL;
69:
70: /* Read server reply header lines */
71:
72: if (TRACE)
73: fprintf(stderr, "Server 401 reply header lines:\n");
74:
75: while (NULL != (line = HTInputSocket_getUnfoldedLine(isoc)) &&
76: *line != 0) {
77:
78: if (TRACE) fprintf(stderr, "%s\n", line);
79:
80: if (strchr(line, ':')) { /* Valid header line */
81:
82: char *p = line;
83: char *fieldname = HTNextField(&p);
84: char *arg1 = HTNextField(&p);
85: char *args = p;
86:
87: if (0==strcasecomp(fieldname, "WWW-Authenticate:")) {
88: if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) {
89: HTList_addObject(valid_schemes, (void*)scheme);
90: if (!scheme_specifics) {
91: int i;
92: scheme_specifics = (HTAssocList**)
93: malloc(HTAA_MAX_SCHEMES * sizeof(HTAssocList*));
94: if (!scheme_specifics)
95: outofmem(__FILE__, "parse_401_headers");
96: for (i=0; i < HTAA_MAX_SCHEMES; i++)
97: scheme_specifics[i] = NULL;
98: }
99: scheme_specifics[scheme] = HTAA_parseArgList(args);
100: num_schemes++;
101: }
102: else if (TRACE) {
103: fprintf(stderr, "Unknown scheme `%s' %s\n",
104: (arg1 ? arg1 : "(null)"),
105: "in WWW-Authenticate: field");
106: }
107: }
108:
109: else if (0==strcasecomp(fieldname, "WWW-Protection-Template:")) {
110: if (TRACE)
111: fprintf(stderr, "Protection template set to `%s'\n", arg1);
112: StrAllocCopy(template, arg1);
113: }
114:
115: } /* if a valid header line */
116: else if (TRACE) {
117: fprintf(stderr, "Invalid header line `%s' ignored\n", line);
118: } /* else invalid header line */
119: } /* while header lines remain */
120:
121: req->valid_schemes = valid_schemes;
122: req->scheme_specifics = scheme_specifics;
123: req->prot_template = template;
124: }
125:
126:
127:
1.1 timbl 128: /* Load Document from HTTP Server HTLoadHTTP()
129: ** ==============================
130: **
131: ** Given a hypertext address, this routine loads a document.
132: **
133: **
134: ** On entry,
135: ** arg is the hypertext reference of the article to be loaded.
136: **
137: ** On exit,
138: ** returns >=0 If no error, a good socket number
139: ** <0 Error.
140: **
141: ** The socket must be closed by the caller after the document has been
142: ** read.
143: **
144: */
1.19 timbl 145: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 146: {
1.22 ! luotonen 147: CONST char * arg = NULL;
1.1 timbl 148: int s; /* Socket number for returned data */
149: int status; /* tcp return */
1.10 timbl 150: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 151: HTStream * target = NULL; /* Unconverted data */
152:
1.2 timbl 153: CONST char* gate = 0; /* disable this feature */
1.1 timbl 154: SockA soc_address; /* Binary network address */
155: SockA * sin = &soc_address;
1.2 timbl 156: BOOL extensions = YES; /* Assume good HTTP server */
1.17 timbl 157:
1.22 ! luotonen 158: if (request->reason == HTAA_OK_GATEWAY) {
! 159: arg = request->translated;
! 160: }
! 161: else {
! 162: arg = HTAnchor_physical(request->anchor);
! 163: StrAllocCopy(request->argument, arg);
! 164: }
! 165:
1.1 timbl 166: if (!arg) return -3; /* Bad if no name sepcified */
167: if (!*arg) return -2; /* Bad if name had zero length */
168:
169: /* Set up defaults:
170: */
171: #ifdef DECNET
1.2 timbl 172: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
173: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 174: #else /* Internet */
1.2 timbl 175: sin->sin_family = AF_INET; /* Family = internet, host order */
176: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 177: #endif
178:
1.10 timbl 179: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
180:
1.1 timbl 181: if (TRACE) {
182: if (gate) fprintf(stderr,
183: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
184: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
185: }
186:
187: /* Get node name and optional port number:
188: */
189: {
190: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
191: int status = HTParseInet(sin, p1); /* TBL 920622 */
192: free(p1);
193: if (status) return status; /* No such host for example */
194: }
195:
1.15 luotonen 196: /*
197: ** Compose authorization information (this was moved here
198: ** from after the making of the connection so that the connection
199: ** wouldn't have to wait while prompting username and password
200: ** from the user). -- AL 13.10.93
201: */
202: #ifdef ACCESS_AUTH
1.21 luotonen 203: HTAA_composeAuth(request);
204: if (TRACE) {
205: if (request->authorization)
206: fprintf(stderr, "HTTP: Sending Authorization: %s\n",
207: request->authorization);
208: else
209: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
1.15 luotonen 210: }
211: #endif /* ACCESS_AUTH */
1.1 timbl 212:
1.10 timbl 213: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 214: */
215: #ifdef DECNET
216: s = socket(AF_DECnet, SOCK_STREAM, 0);
217: #else
218: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
219: #endif
220: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
221: if (status < 0) {
222: if (TRACE) fprintf(stderr,
223: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 timbl 224:
1.1 timbl 225: return HTInetStatus("connect");
226: }
227:
228: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
229:
1.17 timbl 230:
231: /* Compose and send command
232: ** ------------------------
233: */
234: {
235: char *command; /* The whole command */
236:
1.1 timbl 237: /* Ask that node for the document,
238: ** omitting the host name & anchor if not gatewayed.
239: */
1.17 timbl 240: if (gate) {
241: command = malloc(4 + strlen(arg)+ 2 + 31);
242: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
243: strcpy(command, "GET ");
244: strcat(command, arg);
245: } else { /* not gatewayed */
246: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
247: command = malloc(4 + strlen(p1)+ 2 + 31);
248: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
1.22 ! luotonen 249: if (request->method) {
! 250: strcpy(command, HTAtom_name(request->method));
! 251: strcat(command, " ");
! 252: }
! 253: else {
! 254: strcpy(command, "GET ");
! 255: }
1.17 timbl 256: strcat(command, p1);
257: free(p1);
258: }
1.2 timbl 259: #ifdef HTTP2
1.17 timbl 260: if (extensions) {
261: strcat(command, " ");
262: strcat(command, HTTP_VERSION);
263: }
1.2 timbl 264: #endif
1.17 timbl 265:
266: strcat(command, crlf); /* CR LF, as in rfc 977 */
267:
268: if (extensions) {
1.21 luotonen 269:
1.17 timbl 270: int i;
271: HTAtom * present = WWW_PRESENT;
272: char line[256]; /*@@@@ */
1.21 luotonen 273: HTList *conversions[2];
274:
1.22 ! luotonen 275: if (!HTConversions) {
! 276: HTConversions = HTList_new();
! 277: HTFormatInit(HTConversions);
! 278: }
1.21 luotonen 279: conversions[0] = HTConversions;
280: conversions[1] = request->conversions;
281:
282: for (i=0; i<2; i++) {
283: HTList *cur = conversions[i];
284: HTPresentation *pres;
285:
286: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
287: if (pres->rep_out == present) {
288: if (pres->quality != 1.0) {
289: sprintf(line, "Accept: %s q=%.3f%c%c",
290: HTAtom_name(pres->rep),
291: pres->quality, CR, LF);
292: } else {
293: sprintf(line, "Accept: %s%c%c",
294: HTAtom_name(pres->rep), CR, LF);
295: }
296: StrAllocCat(command, line);
1.17 timbl 297: }
298: }
1.2 timbl 299: }
1.22 ! luotonen 300:
! 301: sprintf(line, "User-Agent: %s%s %s/%s libwww/%s%c%c",
! 302: request->user_agent ? request->user_agent : "",
! 303: request->user_agent ? " VIA Gateway" : "",
1.17 timbl 304: HTAppName ? HTAppName : "unknown",
305: HTAppVersion ? HTAppVersion : "0.0",
306: HTLibraryVersion, CR, LF);
307: StrAllocCat(command, line);
308:
1.22 ! luotonen 309: if (request->from) {
! 310: sprintf(line, "From: %s%c%c", request->from, CR, LF);
! 311: StrAllocCat(command, line);
! 312: }
! 313:
1.14 luotonen 314: #ifdef ACCESS_AUTH
1.21 luotonen 315: if (request->authorization != NULL) {
316: sprintf(line, "Authorization: %s%c%c",
317: request->authorization, CR, LF);
1.17 timbl 318: StrAllocCat(command, line);
319: }
320: #endif /* ACCESS_AUTH */
1.22 ! luotonen 321:
! 322: if (request->content_type) {
! 323: sprintf(line, "Content-Type: %s%c%c",
! 324: HTAtom_name(request->content_type), CR, LF);
! 325: StrAllocCat(command, line);
! 326: }
! 327:
! 328: if (request->content_length > 0) {
! 329: sprintf(line, "Content-Length: %d%c%c",
! 330: request->content_length, CR, LF);
! 331: StrAllocCat(command, line);
! 332: }
! 333:
! 334:
1.14 luotonen 335: }
1.17 timbl 336:
337: StrAllocCat(command, crlf); /* Blank line means "end" */
338:
339: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
340:
341: /* Translate into ASCII if necessary
342: */
1.4 timbl 343: #ifdef NOT_ASCII
1.17 timbl 344: {
345: char * p;
346: for(p = command; *p; p++) {
347: *p = TOASCII(*p);
348: }
1.1 timbl 349: }
1.3 timbl 350: #endif
1.17 timbl 351:
352: status = NETWRITE(s, command, (int)strlen(command));
353: free(command);
354: if (status<0) {
355: if (TRACE) fprintf(stderr,
356: "HTTPAccess: Unable to send command.\n");
1.1 timbl 357: return HTInetStatus("send");
1.17 timbl 358: }
359: } /* compose and send command */
360:
1.2 timbl 361:
1.17 timbl 362: /* Read the response
363: ** -----------------
1.11 timbl 364: **
365: ** HTTP0 servers must return ASCII style text, though it can in
366: ** principle be just text without any markup at all.
367: ** Full HTTP servers must return a response
368: ** line and RFC822 style header. The response must therefore in
369: ** either case have a CRLF somewhere soon.
370: **
371: ** This is the theory. In practice, there are (1993) unfortunately
372: ** many binary documents just served up with HTTP0.9. This
373: ** means we have to preserve the binary buffer (on the assumption that
374: ** conversion from ASCII may lose information) in case it turns
375: ** out that we want the binary original.
1.2 timbl 376: */
1.3 timbl 377:
1.22 ! luotonen 378: if (request->reason == HTAA_OK_GATEWAY) {
! 379: /*
! 380: ** Server as a gateway -- send body of the message
! 381: ** received from client (if any).
! 382: */
! 383: if (request->isoc && request->content_length > 0) {
! 384: int remain = request->content_length;
! 385: int i = remain;
! 386: char * buf;
! 387:
! 388: while (remain > 0 &&
! 389: (buf = HTInputSocket_getBlock(request->isoc, &i))) {
! 390: int status = NETWRITE(s, buf, i);
! 391: if (status < 0) {
! 392: CTRACE(stderr, "HTTPAccess: Unable to forward body\n");
! 393: return HTInetStatus("send");
! 394: }
! 395: remain -= i;
! 396: i = remain;
! 397: }
! 398: }
! 399: /*
! 400: ** Load results directly to client
! 401: */
! 402: HTCopy(s, request->output_stream);
! 403: return HT_LOADED;
! 404: }
! 405: else { /* read response */
1.21 luotonen 406:
1.17 timbl 407: HTFormat format_in; /* Format arriving in the message */
1.21 luotonen 408: HTInputSocket *isoc = HTInputSocket_new(s);
409: char * status_line = HTInputSocket_getStatusLine(isoc);
1.2 timbl 410:
1.11 timbl 411: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
412: ** First time we have enough, look at the stub in ASCII
413: ** and get out of here if it doesn't look right.
414: **
415: ** We also check for characters above 128 in the first few bytes, and
416: ** if we find them we forget the html default.
417: **
418: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
419: ** will be taken as a HTTP 1.0 server. Failure.
420: ** An HTTP 0.9 server returning a binary document with
421: ** characters < 128 will be read as ASCII.
422: */
1.21 luotonen 423: if (!status_line) { /* HTTP0 response */
424: if (HTInputSocket_seemsBinary(isoc)) {
425: format_in = HTAtom_for("www/unknown");
426: }
427: else {
428: format_in = WWW_HTML;
429: }
430: goto copy;
431: } /* end kludge */
432:
433: if (status_line) { /* Decode full HTTP response */
434: /*
435: ** We now have a terminated server status line, and we have
436: ** checked that it is most probably a legal one. Parse it.
437: */
438: char server_version[VERSION_LENGTH+1];
439: int server_status;
440:
441: if (TRACE)
442: fprintf(stderr, "HTTP Status Line: Rx: %.70s\n", status_line);
1.17 timbl 443:
1.21 luotonen 444: sscanf(status_line, "%20s%d", server_version, &server_status);
1.2 timbl 445:
1.21 luotonen 446: format_in = HTAtom_for("www/mime");
1.7 timbl 447:
1.21 luotonen 448: switch (server_status / 100) {
1.2 timbl 449:
1.21 luotonen 450: default: /* bad number */
451: HTAlert("Unknown status reply from server!");
452: break;
1.17 timbl 453:
1.21 luotonen 454: case 3: /* Various forms of redirection */
455: HTAlert(
1.17 timbl 456: "Redirection response from server is not handled by this client");
1.21 luotonen 457: break;
1.17 timbl 458:
1.21 luotonen 459: case 4: /* Access Authorization problem */
1.14 luotonen 460: #ifdef ACCESS_AUTH
1.21 luotonen 461: switch (server_status) {
462: case 401:
463: parse_401_headers(request, isoc);
464:
465: if (TRACE) fprintf(stderr, "%s %d %s\n",
466: "HTTP: close socket", s,
467: "to retry with Access Authorization");
468: HTInputSocket_free(isoc);
469: (void)NETCLOSE(s);
470: if (HTAA_retryWithAuth(request, &HTLoadHTTP)) {
471: status = HT_LOADED;/* @@ THIS ONLY WORKS ON LINEMODE */
472: goto clean_up;
473: }
474: /* else falltrough */
475: default:
1.14 luotonen 476: {
1.21 luotonen 477: char *p1 = HTParse(gate ? gate : arg, "",
478: PARSE_HOST);
479: char * message;
480:
481: if (!(message = (char*)malloc(strlen(status_line) +
482: strlen(p1) + 100)))
483: outofmem(__FILE__, "HTTP 4xx status");
1.14 luotonen 484: sprintf(message,
1.21 luotonen 485: "HTTP server at %s replies:\n%s\n\n%s\n",
486: p1, status_line,
487: ((server_status == 401)
488: ? "Access Authorization package giving up.\n"
489: : ""));
1.22 ! luotonen 490: status = HTLoadError(request, server_status, message);
1.14 luotonen 491: free(message);
492: free(p1);
493: goto clean_up;
494: }
1.21 luotonen 495: } /* switch */
496: goto clean_up;
497: break;
498: #else
499: /* case 4 without Access Authorization falls through */
500: /* to case 5 (previously "I think I goofed"). -- AL */
501: #endif /* ACCESS_AUTH */
502:
503: case 5: /* I think you goofed */
504: {
505: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
506: char * message = (char*)malloc(strlen(status_line) +
507: strlen(p1) + 100);
508: if (!message) outofmem(__FILE__, "HTTP 5xx status");
509: sprintf(message,
510: "HTTP server at %s replies:\n%s", p1, status_line);
1.22 ! luotonen 511: status = HTLoadError(request, server_status, message);
1.21 luotonen 512: free(message);
513: free(p1);
514: goto clean_up;
515: }
516: break;
1.17 timbl 517:
1.21 luotonen 518: case 2: /* Good: Got MIME object */
519: break;
1.17 timbl 520:
1.21 luotonen 521: } /* switch on response code */
1.17 timbl 522:
1.21 luotonen 523: } /* Full HTTP reply */
1.17 timbl 524:
525:
1.3 timbl 526: /* Set up the stream stack to handle the body of the message
527: */
1.21 luotonen 528:
1.13 duns 529: copy:
1.21 luotonen 530:
1.18 timbl 531: target = HTStreamStack(format_in, request);
1.21 luotonen 532:
1.17 timbl 533: if (!target) {
534: char buffer[1024]; /* @@@@@@@@ */
535: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
536: HTAtom_name(format_in), HTAtom_name(request->output_format));
537: fprintf(stderr, "HTTP: %s", buffer);
1.22 ! luotonen 538: status = HTLoadError(request, 501, buffer);
1.17 timbl 539: goto clean_up;
540: }
541:
1.19 timbl 542: /* @@ Bug: The decision of whether or not to cache should also be
1.21 luotonen 543: ** made contingent on a IP address match or non match.
544: */
1.19 timbl 545: if (HTCacheHTTP) {
546: target = HTTee(target, HTCacheWriter(request, NULL, format_in,
1.21 luotonen 547: request->output_format,
548: request->output_stream));
1.19 timbl 549: }
550:
1.11 timbl 551: /* Push the data down the stream
1.3 timbl 552: ** We have to remember the end of the first buffer we just read
1.2 timbl 553: */
1.17 timbl 554: if (format_in == WWW_HTML) {
555: target = HTNetToText(target); /* Pipe through CR stripper */
556: }
1.21 luotonen 557:
1.17 timbl 558: (*target->isa->put_block)(target,
1.21 luotonen 559: isoc->input_pointer,
560: isoc->input_limit - isoc->input_pointer);
561: HTInputSocket_free(isoc);
1.17 timbl 562: HTCopy(s, target);
563:
564: (*target->isa->free)(target);
565: status = HT_LOADED;
1.11 timbl 566:
1.2 timbl 567: /* Clean up
1.1 timbl 568: */
1.17 timbl 569:
570: clean_up:
571: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
572: (void) NETCLOSE(s);
573:
574: return status; /* Good return */
1.3 timbl 575:
1.17 timbl 576: } /* read response */
577: } /* load HTTP */
1.1 timbl 578:
579: /* Protocol descriptor
580: */
581:
1.17 timbl 582: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
1.21 luotonen 583:
Webmaster