Annotation of libwww/Library/src/HTTP.c, revision 1.51
1.44 frystyk 1: /* HyperText73 Tranfer Protocol - Client implementation HTTP.c
1.1 timbl 2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
24: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
25: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
26: #define VERSION_LENGTH 20 /* for returned protocol version */
27:
28: /* Uses:
29: */
1.1 timbl 30: #include "HTParse.h"
31: #include "HTUtils.h"
32: #include "tcp.h"
33: #include "HTTCP.h"
34: #include "HTFormat.h"
1.2 timbl 35: #include <ctype.h>
36: #include "HTAlert.h"
37: #include "HTMIME.h"
1.5 timbl 38: #include "HTML.h" /* SCW */
39: #include "HTInit.h" /* SCW */
1.21 luotonen 40: #include "HTAccess.h" /* HTRequest */
1.14 luotonen 41: #include "HTAABrow.h" /* Access Authorization */
1.20 timbl 42: #include "HTTee.h" /* Tee off a cache stream */
43: #include "HTFWriter.h" /* Write to cache file */
1.1 timbl 44:
1.2 timbl 45: struct _HTStream {
46: HTStreamClass * isa; /* all we need to know */
47: };
48:
1.6 timbl 49: extern char * HTAppName; /* Application name: please supply */
50: extern char * HTAppVersion; /* Application version: please supply */
51:
1.50 luotonen 52: #ifdef OLD_CODE
1.37 luotonen 53: PUBLIC long HTProxyBytes = 0; /* Number of bytes transferred thru proxy */
1.50 luotonen 54: #endif
55:
1.37 luotonen 56: extern BOOL using_proxy; /* are we using a proxy gateway? */
57: PUBLIC char * HTProxyHeaders = NULL; /* Headers to pass as-is */
1.23 luotonen 58:
1.21 luotonen 59: PRIVATE void parse_401_headers ARGS2(HTRequest *, req,
60: HTInputSocket *, isoc)
61: {
62: HTAAScheme scheme;
63: char *line;
64: int num_schemes = 0;
65: HTList *valid_schemes = HTList_new();
66: HTAssocList **scheme_specifics = NULL;
67: char *template = NULL;
68:
69: /* Read server reply header lines */
70:
71: if (TRACE)
72: fprintf(stderr, "Server 401 reply header lines:\n");
73:
74: while (NULL != (line = HTInputSocket_getUnfoldedLine(isoc)) &&
75: *line != 0) {
76:
77: if (TRACE) fprintf(stderr, "%s\n", line);
78:
79: if (strchr(line, ':')) { /* Valid header line */
80:
81: char *p = line;
82: char *fieldname = HTNextField(&p);
83: char *arg1 = HTNextField(&p);
84: char *args = p;
85:
86: if (0==strcasecomp(fieldname, "WWW-Authenticate:")) {
87: if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) {
88: HTList_addObject(valid_schemes, (void*)scheme);
89: if (!scheme_specifics) {
90: int i;
91: scheme_specifics = (HTAssocList**)
92: malloc(HTAA_MAX_SCHEMES * sizeof(HTAssocList*));
93: if (!scheme_specifics)
94: outofmem(__FILE__, "parse_401_headers");
95: for (i=0; i < HTAA_MAX_SCHEMES; i++)
96: scheme_specifics[i] = NULL;
97: }
98: scheme_specifics[scheme] = HTAA_parseArgList(args);
99: num_schemes++;
100: }
101: else if (TRACE) {
102: fprintf(stderr, "Unknown scheme `%s' %s\n",
103: (arg1 ? arg1 : "(null)"),
104: "in WWW-Authenticate: field");
105: }
106: }
107:
108: else if (0==strcasecomp(fieldname, "WWW-Protection-Template:")) {
109: if (TRACE)
110: fprintf(stderr, "Protection template set to `%s'\n", arg1);
111: StrAllocCopy(template, arg1);
112: }
113:
114: } /* if a valid header line */
115: else if (TRACE) {
116: fprintf(stderr, "Invalid header line `%s' ignored\n", line);
117: } /* else invalid header line */
1.44 frystyk 118: free(line);
1.21 luotonen 119: } /* while header lines remain */
1.44 frystyk 120: FREE(line);
1.21 luotonen 121: req->valid_schemes = valid_schemes;
122: req->scheme_specifics = scheme_specifics;
123: req->prot_template = template;
124: }
125:
126:
127:
1.1 timbl 128: /* Load Document from HTTP Server HTLoadHTTP()
129: ** ==============================
130: **
131: ** Given a hypertext address, this routine loads a document.
132: **
133: **
134: ** On entry,
135: ** arg is the hypertext reference of the article to be loaded.
136: **
137: ** On exit,
138: ** returns >=0 If no error, a good socket number
139: ** <0 Error.
140: **
141: ** The socket must be closed by the caller after the document has been
142: ** read.
143: **
144: */
1.19 timbl 145: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 146: {
1.22 luotonen 147: CONST char * arg = NULL;
1.1 timbl 148: int s; /* Socket number for returned data */
149: int status; /* tcp return */
1.10 timbl 150: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 151: HTStream * target = NULL; /* Unconverted data */
1.46 frystyk 152: BOOL cache_http = YES; /* Enable caching of HTTP-retrieved files */
153:
1.2 timbl 154: CONST char* gate = 0; /* disable this feature */
1.1 timbl 155: SockA soc_address; /* Binary network address */
156: SockA * sin = &soc_address;
1.40 frystyk 157: BOOL extensions = YES; /* Assume good HTTP server */
1.36 frystyk 158:
1.50 luotonen 159: #ifdef OLD_CODE
1.37 luotonen 160: if (HTImProxy) HTProxyBytes = 0;
1.50 luotonen 161: #endif
1.23 luotonen 162:
1.37 luotonen 163: arg = HTAnchor_physical(request->anchor);
1.22 luotonen 164:
1.1 timbl 165: if (!arg) return -3; /* Bad if no name sepcified */
166: if (!*arg) return -2; /* Bad if name had zero length */
167:
168: /* Set up defaults:
169: */
170: #ifdef DECNET
1.2 timbl 171: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
172: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 173: #else /* Internet */
1.2 timbl 174: sin->sin_family = AF_INET; /* Family = internet, host order */
175: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 176: #endif
177:
1.10 timbl 178: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
179:
1.1 timbl 180: if (TRACE) {
181: if (gate) fprintf(stderr,
182: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
183: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
184: }
185:
186: /* Get node name and optional port number:
187: */
188: {
189: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
190: int status = HTParseInet(sin, p1); /* TBL 920622 */
1.49 luotonen 191: if (status) {
192: HTAddError2(request,"No such host:",p1);
193: free(p1);
194: return status; /* No such host for example */
195: }
1.1 timbl 196: free(p1);
197: }
198:
1.15 luotonen 199: /*
200: ** Compose authorization information (this was moved here
201: ** from after the making of the connection so that the connection
202: ** wouldn't have to wait while prompting username and password
203: ** from the user). -- AL 13.10.93
204: */
205: #ifdef ACCESS_AUTH
1.21 luotonen 206: HTAA_composeAuth(request);
207: if (TRACE) {
208: if (request->authorization)
209: fprintf(stderr, "HTTP: Sending Authorization: %s\n",
210: request->authorization);
211: else
212: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
1.15 luotonen 213: }
214: #endif /* ACCESS_AUTH */
1.1 timbl 215:
1.10 timbl 216: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 217: */
218: #ifdef DECNET
219: s = socket(AF_DECnet, SOCK_STREAM, 0);
220: #else
221: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
222: #endif
223: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
224: if (status < 0) {
225: if (TRACE) fprintf(stderr,
1.30 frystyk 226: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n",
227: arg, errno);
1.51 ! luotonen 228: HTAddError2(request,"Unable to connect to remote host:",
! 229: HTErrnoString());
1.1 timbl 230: return HTInetStatus("connect");
231: }
232:
233: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
234:
1.17 timbl 235:
236: /* Compose and send command
237: ** ------------------------
238: */
239: {
240: char *command; /* The whole command */
241:
1.1 timbl 242: /* Ask that node for the document,
243: ** omitting the host name & anchor if not gatewayed.
244: */
1.37 luotonen 245: if (gate) { /* This is no longer used, and could be thrown away */
1.17 timbl 246: command = malloc(4 + strlen(arg)+ 2 + 31);
247: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
248: strcpy(command, "GET ");
249: strcat(command, arg);
250: } else { /* not gatewayed */
251: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
252: command = malloc(4 + strlen(p1)+ 2 + 31);
253: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
1.23 luotonen 254: if (request->method != METHOD_INVALID) {
255: strcpy(command, HTMethod_name(request->method));
1.22 luotonen 256: strcat(command, " ");
257: }
258: else {
259: strcpy(command, "GET ");
260: }
1.37 luotonen 261: /* if we are using a proxy gateway don't copy in the first slash
262: ** of say: /gopher://a;lkdjfl;ajdf;lkj/;aldk/adflj
263: ** so that just gohper://.... is sent.
264: */
265: if (using_proxy)
266: strcat(command, p1+1);
267: else
268: strcat(command, p1);
1.17 timbl 269: free(p1);
270: }
1.2 timbl 271: #ifdef HTTP2
1.17 timbl 272: if (extensions) {
273: strcat(command, " ");
274: strcat(command, HTTP_VERSION);
275: }
1.2 timbl 276: #endif
1.17 timbl 277:
278: strcat(command, crlf); /* CR LF, as in rfc 977 */
279:
1.37 luotonen 280: if (extensions && HTImProxy && HTProxyHeaders) {
281: StrAllocCat(command, HTProxyHeaders);
282: }
283: else if (extensions) {
1.21 luotonen 284:
1.17 timbl 285: int i;
286: HTAtom * present = WWW_PRESENT;
287: char line[256]; /*@@@@ */
1.21 luotonen 288: HTList *conversions[2];
289:
1.22 luotonen 290: if (!HTConversions) {
291: HTConversions = HTList_new();
1.34 frystyk 292: /* HTFormatInit(HTConversions); App may do this not us tbl940210 */
1.22 luotonen 293: }
1.28 timbl 294:
1.21 luotonen 295: conversions[0] = HTConversions;
296: conversions[1] = request->conversions;
297:
1.34 frystyk 298:
1.21 luotonen 299: for (i=0; i<2; i++) {
300: HTList *cur = conversions[i];
301: HTPresentation *pres;
302:
303: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
304: if (pres->rep_out == present) {
305: if (pres->quality != 1.0) {
1.35 frystyk 306: sprintf(line, "Accept: %s; q=%.3f%c%c",
1.21 luotonen 307: HTAtom_name(pres->rep),
308: pres->quality, CR, LF);
309: } else {
310: sprintf(line, "Accept: %s%c%c",
311: HTAtom_name(pres->rep), CR, LF);
312: }
313: StrAllocCat(command, line);
1.17 timbl 314: }
315: }
1.2 timbl 316: }
1.22 luotonen 317:
1.37 luotonen 318: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
1.17 timbl 319: HTAppName ? HTAppName : "unknown",
320: HTAppVersion ? HTAppVersion : "0.0",
321: HTLibraryVersion, CR, LF);
1.37 luotonen 322: StrAllocCat(command, line);
1.45 luotonen 323:
324: #ifdef ACCESS_AUTH
325: if (request->authorization != NULL) {
326: sprintf(line, "Authorization: %s%c%c",
327: request->authorization, CR, LF);
328: StrAllocCat(command, line);
329: }
330: #endif /* ACCESS_AUTH */
331:
1.37 luotonen 332: }
1.22 luotonen 333:
1.17 timbl 334: StrAllocCat(command, crlf); /* Blank line means "end" */
335:
336: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
337:
338: /* Translate into ASCII if necessary
339: */
1.4 timbl 340: #ifdef NOT_ASCII
1.17 timbl 341: {
342: char * p;
343: for(p = command; *p; p++) {
344: *p = TOASCII(*p);
345: }
1.1 timbl 346: }
1.3 timbl 347: #endif
1.17 timbl 348:
349: status = NETWRITE(s, command, (int)strlen(command));
350: free(command);
351: if (status<0) {
352: if (TRACE) fprintf(stderr,
353: "HTTPAccess: Unable to send command.\n");
1.51 ! luotonen 354: HTAddError2(request,"Couldn't send request:",HTErrnoString());
1.1 timbl 355: return HTInetStatus("send");
1.17 timbl 356: }
357: } /* compose and send command */
358:
1.2 timbl 359:
1.17 timbl 360: /* Read the response
361: ** -----------------
1.11 timbl 362: **
363: ** HTTP0 servers must return ASCII style text, though it can in
364: ** principle be just text without any markup at all.
365: ** Full HTTP servers must return a response
366: ** line and RFC822 style header. The response must therefore in
367: ** either case have a CRLF somewhere soon.
368: **
369: ** This is the theory. In practice, there are (1993) unfortunately
370: ** many binary documents just served up with HTTP0.9. This
371: ** means we have to preserve the binary buffer (on the assumption that
372: ** conversion from ASCII may lose information) in case it turns
373: ** out that we want the binary original.
1.2 timbl 374: */
1.37 luotonen 375: if (HTImProxy) {
1.24 luotonen 376:
1.22 luotonen 377: /*
378: ** Server as a gateway -- send body of the message
379: ** received from client (if any).
380: */
381: if (request->isoc && request->content_length > 0) {
382: int remain = request->content_length;
383: int i = remain;
384: char * buf;
385:
386: while (remain > 0 &&
387: (buf = HTInputSocket_getBlock(request->isoc, &i))) {
388: int status = NETWRITE(s, buf, i);
389: if (status < 0) {
1.27 luotonen 390: CTRACE(stderr, "HTTPAccess.. Unable to forward body\n");
1.51 ! luotonen 391: HTAddError2(request,"Couldn't forward message body:",
! 392: HTErrnoString());
1.22 luotonen 393: return HTInetStatus("send");
394: }
395: remain -= i;
396: i = remain;
397: }
398: }
1.23 luotonen 399:
400: /*
1.22 luotonen 401: ** Load results directly to client
402: */
1.50 luotonen 403: #ifdef OLD_CODE
404: HTProxyBytes =
405: #endif
406: HTCopy(s, request->output_stream);
1.25 luotonen 407: (*request->output_stream->isa->free)(request->output_stream);
1.37 luotonen 408:
1.22 luotonen 409: return HT_LOADED;
410: }
411: else { /* read response */
1.21 luotonen 412:
1.17 timbl 413: HTFormat format_in; /* Format arriving in the message */
1.21 luotonen 414: HTInputSocket *isoc = HTInputSocket_new(s);
415: char * status_line = HTInputSocket_getStatusLine(isoc);
1.2 timbl 416:
1.11 timbl 417: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
418: ** First time we have enough, look at the stub in ASCII
419: ** and get out of here if it doesn't look right.
420: **
421: ** We also check for characters above 128 in the first few bytes, and
422: ** if we find them we forget the html default.
423: **
424: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
425: ** will be taken as a HTTP 1.0 server. Failure.
426: ** An HTTP 0.9 server returning a binary document with
427: ** characters < 128 will be read as ASCII.
428: */
1.36 frystyk 429:
430: /* If HTTP 0 response, then DO NOT CACHE (Henrik 14/02-94) */
431: if (!status_line) {
1.21 luotonen 432: if (HTInputSocket_seemsBinary(isoc)) {
433: format_in = HTAtom_for("www/unknown");
434: }
435: else {
436: format_in = WWW_HTML;
437: }
1.46 frystyk 438: cache_http = NO; /* Do not cache */
1.21 luotonen 439: goto copy;
440: } /* end kludge */
441:
442: if (status_line) { /* Decode full HTTP response */
443: /*
444: ** We now have a terminated server status line, and we have
445: ** checked that it is most probably a legal one. Parse it.
446: */
447: char server_version[VERSION_LENGTH+1];
448: int server_status;
449:
450: if (TRACE)
451: fprintf(stderr, "HTTP Status Line: Rx: %.70s\n", status_line);
1.17 timbl 452:
1.21 luotonen 453: sscanf(status_line, "%20s%d", server_version, &server_status);
1.2 timbl 454:
1.21 luotonen 455: format_in = HTAtom_for("www/mime");
1.7 timbl 456:
1.21 luotonen 457: switch (server_status / 100) {
1.2 timbl 458:
1.21 luotonen 459: default: /* bad number */
460: HTAlert("Unknown status reply from server!");
461: break;
1.17 timbl 462:
1.21 luotonen 463: case 3: /* Various forms of redirection */
464: HTAlert(
1.17 timbl 465: "Redirection response from server is not handled by this client");
1.21 luotonen 466: break;
1.17 timbl 467:
1.21 luotonen 468: case 4: /* Access Authorization problem */
1.14 luotonen 469: #ifdef ACCESS_AUTH
1.21 luotonen 470: switch (server_status) {
471: case 401:
472: parse_401_headers(request, isoc);
473:
474: if (TRACE) fprintf(stderr, "%s %d %s\n",
475: "HTTP: close socket", s,
476: "to retry with Access Authorization");
1.24 luotonen 477: if (HTAA_retryWithAuth(request, HTLoadHTTP)) {
1.21 luotonen 478: status = HT_LOADED;/* @@ THIS ONLY WORKS ON LINEMODE */
479: goto clean_up;
480: }
481: /* else falltrough */
482: default:
1.14 luotonen 483: {
1.21 luotonen 484: char *p1 = HTParse(gate ? gate : arg, "",
485: PARSE_HOST);
486: char * message;
487:
488: if (!(message = (char*)malloc(strlen(status_line) +
489: strlen(p1) + 100)))
490: outofmem(__FILE__, "HTTP 4xx status");
1.14 luotonen 491: sprintf(message,
1.21 luotonen 492: "HTTP server at %s replies:\n%s\n\n%s\n",
493: p1, status_line,
494: ((server_status == 401)
495: ? "Access Authorization package giving up.\n"
496: : ""));
1.22 luotonen 497: status = HTLoadError(request, server_status, message);
1.14 luotonen 498: free(message);
499: free(p1);
500: goto clean_up;
501: }
1.21 luotonen 502: } /* switch */
503: goto clean_up;
504: break;
505: #else
506: /* case 4 without Access Authorization falls through */
507: /* to case 5 (previously "I think I goofed"). -- AL */
508: #endif /* ACCESS_AUTH */
509:
510: case 5: /* I think you goofed */
511: {
512: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
513: char * message = (char*)malloc(strlen(status_line) +
514: strlen(p1) + 100);
515: if (!message) outofmem(__FILE__, "HTTP 5xx status");
516: sprintf(message,
517: "HTTP server at %s replies:\n%s", p1, status_line);
1.22 luotonen 518: status = HTLoadError(request, server_status, message);
1.21 luotonen 519: free(message);
520: free(p1);
521: goto clean_up;
522: }
523: break;
1.17 timbl 524:
1.21 luotonen 525: case 2: /* Good: Got MIME object */
526: break;
1.17 timbl 527:
1.21 luotonen 528: } /* switch on response code */
1.17 timbl 529:
1.21 luotonen 530: } /* Full HTTP reply */
1.17 timbl 531:
532:
1.3 timbl 533: /* Set up the stream stack to handle the body of the message
1.33 timbl 534: **
535: ** In the special case of user asking for source and the message
536: ** being in MIME, we force the MIME decoding to occur, as it is really
537: ** HTTP decoding. If the user really wants the HTTP headers, he
538: ** can ask for them as www/mime.
1.3 timbl 539: */
1.21 luotonen 540:
1.13 duns 541: copy:
1.21 luotonen 542:
1.43 frystyk 543: if ((format_in == WWW_MIME)
544: && (request->output_format == WWW_SOURCE)) {
1.33 timbl 545: target = HTMIMEConvert(request, NULL, format_in,
546: request->output_format, request->output_stream);
547: } else {
1.47 luotonen 548: target = HTStreamStack(format_in, request, NO);
1.33 timbl 549: }
550:
1.17 timbl 551: if (!target) {
552: char buffer[1024]; /* @@@@@@@@ */
553: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
554: HTAtom_name(format_in), HTAtom_name(request->output_format));
555: fprintf(stderr, "HTTP: %s", buffer);
1.22 luotonen 556: status = HTLoadError(request, 501, buffer);
1.17 timbl 557: goto clean_up;
558: }
559:
1.19 timbl 560: /* @@ Bug: The decision of whether or not to cache should also be
1.21 luotonen 561: ** made contingent on a IP address match or non match.
562: */
1.48 timbl 563:
564: if (HTCacheDir && cache_http) {
1.19 timbl 565: target = HTTee(target, HTCacheWriter(request, NULL, format_in,
1.21 luotonen 566: request->output_format,
567: request->output_stream));
1.19 timbl 568: }
569:
1.11 timbl 570: /* Push the data down the stream
1.3 timbl 571: ** We have to remember the end of the first buffer we just read
1.2 timbl 572: */
1.30 frystyk 573: if (format_in == WWW_HTML) {
1.17 timbl 574: target = HTNetToText(target); /* Pipe through CR stripper */
575: }
1.21 luotonen 576:
1.17 timbl 577: (*target->isa->put_block)(target,
1.21 luotonen 578: isoc->input_pointer,
579: isoc->input_limit - isoc->input_pointer);
1.41 frystyk 580:
1.17 timbl 581: HTCopy(s, target);
582:
583: (*target->isa->free)(target);
584: status = HT_LOADED;
1.11 timbl 585:
1.2 timbl 586: /* Clean up
1.1 timbl 587: */
1.17 timbl 588:
589: clean_up:
590: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
591: (void) NETCLOSE(s);
1.41 frystyk 592: if (isoc)
593: HTInputSocket_free(isoc);
594: if (status_line)
1.38 frystyk 595: free(status_line); /* Leak fix Henrik 18/02-94 */
596: return status; /* Good return */
1.3 timbl 597:
1.17 timbl 598: } /* read response */
599: } /* load HTTP */
1.1 timbl 600:
601: /* Protocol descriptor
602: */
603:
1.17 timbl 604: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
1.21 luotonen 605:
Webmaster