Annotation of libwww/Library/src/HTTP.c, revision 1.17
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.14 luotonen 44: #include "HTAABrow.h" /* Access Authorization */
1.1 timbl 45:
1.2 timbl 46: struct _HTStream {
47: HTStreamClass * isa; /* all we need to know */
48: };
49:
50:
1.6 timbl 51: extern char * HTAppName; /* Application name: please supply */
52: extern char * HTAppVersion; /* Application version: please supply */
53:
1.1 timbl 54: /* Load Document from HTTP Server HTLoadHTTP()
55: ** ==============================
56: **
57: ** Given a hypertext address, this routine loads a document.
58: **
59: **
60: ** On entry,
61: ** arg is the hypertext reference of the article to be loaded.
62: **
63: ** On exit,
64: ** returns >=0 If no error, a good socket number
65: ** <0 Error.
66: **
67: ** The socket must be closed by the caller after the document has been
68: ** read.
69: **
70: */
1.17 ! timbl 71: PUBLIC int HTLoadHTTP ARGS2 (
1.2 timbl 72: CONST char *, arg,
1.17 ! timbl 73: HTRequest *, request)
1.1 timbl 74: {
75: int s; /* Socket number for returned data */
76: int status; /* tcp return */
1.10 timbl 77: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 78: HTStream * target = NULL; /* Unconverted data */
1.15 luotonen 79: char *auth = NULL; /* Authorization information */
1.3 timbl 80:
1.2 timbl 81: CONST char* gate = 0; /* disable this feature */
1.1 timbl 82: SockA soc_address; /* Binary network address */
83: SockA * sin = &soc_address;
1.2 timbl 84: BOOL extensions = YES; /* Assume good HTTP server */
1.17 ! timbl 85:
1.1 timbl 86: if (!arg) return -3; /* Bad if no name sepcified */
87: if (!*arg) return -2; /* Bad if name had zero length */
88:
89: /* Set up defaults:
90: */
91: #ifdef DECNET
1.2 timbl 92: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
93: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 94: #else /* Internet */
1.2 timbl 95: sin->sin_family = AF_INET; /* Family = internet, host order */
96: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 97: #endif
98:
1.10 timbl 99: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
100:
1.1 timbl 101: if (TRACE) {
102: if (gate) fprintf(stderr,
103: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
104: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
105: }
106:
107: /* Get node name and optional port number:
108: */
109: {
110: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
111: int status = HTParseInet(sin, p1); /* TBL 920622 */
112: free(p1);
113: if (status) return status; /* No such host for example */
114: }
115:
1.2 timbl 116: retry:
1.15 luotonen 117:
118: /*
119: ** Compose authorization information (this was moved here
120: ** from after the making of the connection so that the connection
121: ** wouldn't have to wait while prompting username and password
122: ** from the user). -- AL 13.10.93
123: */
124: #ifdef ACCESS_AUTH
125: #define FREE(x) if (x) {free(x); x=NULL;}
126: {
127: char *docname;
128: char *hostname;
129: char *colon;
130: int portnumber;
131:
132: docname = HTParse(arg, "", PARSE_PATH);
133: hostname = HTParse((gate ? gate : arg), "", PARSE_HOST);
134: if (hostname &&
135: NULL != (colon = strchr(hostname, ':'))) {
1.16 duns 136: *(colon++) = '0円'; /* Chop off port number */
1.15 luotonen 137: portnumber = atoi(colon);
138: }
139: else portnumber = 80;
140:
141: auth = HTAA_composeAuth(hostname, portnumber, docname);
142:
143: if (TRACE) {
144: if (auth)
145: fprintf(stderr, "HTTP: Sending authorization: %s\n", auth);
146: else
147: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
148: }
149: FREE(hostname);
150: FREE(docname);
151: }
152: #endif /* ACCESS_AUTH */
1.1 timbl 153:
1.10 timbl 154: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 155: */
156: #ifdef DECNET
157: s = socket(AF_DECnet, SOCK_STREAM, 0);
158: #else
159: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
160: #endif
161: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
162: if (status < 0) {
163: if (TRACE) fprintf(stderr,
164: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 ! timbl 165:
1.1 timbl 166: return HTInetStatus("connect");
167: }
168:
169: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
170:
1.17 ! timbl 171:
! 172: /* Compose and send command
! 173: ** ------------------------
! 174: */
! 175: {
! 176: char *command; /* The whole command */
! 177:
1.1 timbl 178: /* Ask that node for the document,
179: ** omitting the host name & anchor if not gatewayed.
180: */
1.17 ! timbl 181: if (gate) {
! 182: command = malloc(4 + strlen(arg)+ 2 + 31);
! 183: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
! 184: strcpy(command, "GET ");
! 185: strcat(command, arg);
! 186: } else { /* not gatewayed */
! 187: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
! 188: command = malloc(4 + strlen(p1)+ 2 + 31);
! 189: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
! 190: strcpy(command, "GET ");
! 191: strcat(command, p1);
! 192: free(p1);
! 193: }
1.2 timbl 194: #ifdef HTTP2
1.17 ! timbl 195: if (extensions) {
! 196: strcat(command, " ");
! 197: strcat(command, HTTP_VERSION);
! 198: }
1.2 timbl 199: #endif
1.17 ! timbl 200:
! 201: strcat(command, crlf); /* CR LF, as in rfc 977 */
! 202:
! 203: if (extensions) {
! 204:
! 205: int n;
! 206: int i;
! 207: HTAtom * present = WWW_PRESENT;
! 208: char line[256]; /*@@@@ */
! 209:
! 210: /* if (!request->conversions) HTFormatInit(request->conversions); */
! 211: n = HTList_count(request->conversions);
! 212:
! 213: for(i=0; i<n; i++) {
! 214: HTPresentation * pres =
! 215: HTList_objectAt(request->conversions, i);
! 216: if (pres->rep_out == present) {
! 217: if (pres->quality != 1.0) {
! 218: sprintf(line, "Accept: %s q=%.3f%c%c",
! 219: HTAtom_name(pres->rep), pres->quality, CR, LF);
! 220: } else {
! 221: sprintf(line, "Accept: %s%c%c",
! 222: HTAtom_name(pres->rep), CR, LF);
! 223: }
! 224: StrAllocCat(command, line);
! 225:
! 226: }
1.2 timbl 227: }
1.17 ! timbl 228:
! 229: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
! 230: HTAppName ? HTAppName : "unknown",
! 231: HTAppVersion ? HTAppVersion : "0.0",
! 232: HTLibraryVersion, CR, LF);
! 233: StrAllocCat(command, line);
! 234:
1.14 luotonen 235: #ifdef ACCESS_AUTH
1.17 ! timbl 236: if (auth != NULL) {
! 237: sprintf(line, "%s%c%c", auth, CR, LF);
! 238: StrAllocCat(command, line);
! 239: }
! 240: #endif /* ACCESS_AUTH */
1.14 luotonen 241: }
1.17 ! timbl 242:
! 243: StrAllocCat(command, crlf); /* Blank line means "end" */
! 244:
! 245: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
! 246:
! 247: /* Translate into ASCII if necessary
! 248: */
1.4 timbl 249: #ifdef NOT_ASCII
1.17 ! timbl 250: {
! 251: char * p;
! 252: for(p = command; *p; p++) {
! 253: *p = TOASCII(*p);
! 254: }
1.1 timbl 255: }
1.3 timbl 256: #endif
1.17 ! timbl 257:
! 258: status = NETWRITE(s, command, (int)strlen(command));
! 259: free(command);
! 260: if (status<0) {
! 261: if (TRACE) fprintf(stderr,
! 262: "HTTPAccess: Unable to send command.\n");
1.1 timbl 263: return HTInetStatus("send");
1.17 ! timbl 264: }
! 265: } /* compose and send command */
! 266:
1.2 timbl 267:
1.17 ! timbl 268: /* Read the response
! 269: ** -----------------
1.11 timbl 270: **
271: ** HTTP0 servers must return ASCII style text, though it can in
272: ** principle be just text without any markup at all.
273: ** Full HTTP servers must return a response
274: ** line and RFC822 style header. The response must therefore in
275: ** either case have a CRLF somewhere soon.
276: **
277: ** This is the theory. In practice, there are (1993) unfortunately
278: ** many binary documents just served up with HTTP0.9. This
279: ** means we have to preserve the binary buffer (on the assumption that
280: ** conversion from ASCII may lose information) in case it turns
281: ** out that we want the binary original.
1.2 timbl 282: */
1.3 timbl 283:
1.17 ! timbl 284: { /* read response */
! 285:
! 286: char * eol = 0; /* End of line if found */
! 287: char * start_of_data; /* Start of body of reply */
! 288: int length; /* Number of valid bytes in buffer */
! 289: char * text_buffer = NULL;
! 290: char * binary_buffer = NULL;
! 291: HTFormat format_in; /* Format arriving in the message */
! 292:
! 293: { /* local variablees for loop*/
1.2 timbl 294:
295: /* Get numeric status etc */
296:
1.17 ! timbl 297: BOOL end_of_file = NO;
! 298: HTAtom * encoding = HTAtom_for("7bit");
! 299: int buffer_length = INIT_LINE_SIZE; /* Why not? */
! 300:
! 301: binary_buffer = (char *) malloc(buffer_length * sizeof(char));
! 302: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 303: text_buffer = (char *) malloc(buffer_length * sizeof(char));
! 304: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 305: length = 0;
1.2 timbl 306:
1.17 ! timbl 307: do { /* Loop to read in the first line */
! 308:
! 309: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
! 310:
! 311: if (buffer_length - length < LINE_EXTEND_THRESH) {
! 312: buffer_length = buffer_length + buffer_length;
! 313: binary_buffer = (char *) realloc(
! 314: binary_buffer, buffer_length * sizeof(char));
! 315: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 316: text_buffer = (char *) realloc(
! 317: text_buffer, buffer_length * sizeof(char));
! 318: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 319: }
! 320: status = NETREAD(s, binary_buffer + length,
! 321: buffer_length - length -1);
! 322: if (status < 0) {
! 323: HTAlert("Unexpected network read error on response");
! 324: NETCLOSE(s);
! 325: return status;
! 326: }
! 327:
! 328: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
! 329: status);
! 330:
! 331: if (status == 0) {
! 332: end_of_file = YES;
! 333: break;
! 334: }
! 335: binary_buffer[length+status] = 0;
! 336:
! 337:
1.11 timbl 338: /* Make an ASCII *copy* of the buffer
339: */
1.2 timbl 340: #ifdef NOT_ASCII
1.17 ! timbl 341: if (TRACE) fprintf(stderr,
! 342: "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 timbl 343: #endif
1.17 ! timbl 344: {
! 345: char * p;
! 346: char * q;
! 347: for(p = binary_buffer+length, q=text_buffer+length;
! 348: *p; p++, q++) {
! 349: *q = FROMASCII(*p);
! 350: }
! 351:
! 352: *q++ = 0;
1.11 timbl 353: }
1.17 ! timbl 354:
1.11 timbl 355: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
356: ** First time we have enough, look at the stub in ASCII
357: ** and get out of here if it doesn't look right.
358: **
359: ** We also check for characters above 128 in the first few bytes, and
360: ** if we find them we forget the html default.
361: **
362: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
363: ** will be taken as a HTTP 1.0 server. Failure.
364: ** An HTTP 0.9 server returning a binary document with
365: ** characters < 128 will be read as ASCII.
366: */
367: #define STUB_LENGTH 20
1.17 ! timbl 368: if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
! 369: if(strncmp("HTTP/", text_buffer, 5)!=0) {
! 370: char *p;
! 371: start_of_data = text_buffer; /* reparse whole reply */
! 372: for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
! 373: if (((int)*p)&128) {
! 374: format_in = HTAtom_for("www/unknown");
! 375: length = length + status;
! 376: goto copy; /* out of while loop */
! 377: }
1.11 timbl 378: }
379: }
1.2 timbl 380: }
1.17 ! timbl 381: /* end kludge */
! 382:
! 383:
! 384: eol = strchr(text_buffer + length, 10);
! 385: if (eol) {
! 386: *eol = 0; /* Terminate the line */
! 387: if (eol[-1] == CR) eol[-1] = 0; /* Chop trailing CR */
! 388: /* = corrected to == -- AL */
! 389: }
! 390:
! 391: length = length + status;
! 392:
! 393: } while (!eol && !end_of_file); /* No LF */
! 394:
! 395: } /* Scope of loop variables */
1.2 timbl 396:
1.7 timbl 397:
398: /* We now have a terminated unfolded line. Parse it.
399: ** -------------------------------------------------
1.2 timbl 400: */
401:
1.17 ! timbl 402: if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
! 403:
! 404: {
! 405: int fields;
! 406: char server_version [VERSION_LENGTH+1];
! 407: int server_status;
! 408:
! 409:
! 410: #ifdef OLD_CODE /* old buggy servers should not exist now tbl9311 */
1.2 timbl 411: /* Kludge to work with old buggy servers. They can't handle the third word
412: ** so we try again without it.
413: */
1.17 ! timbl 414: if (extensions &&
! 415: 0==strcmp(text_buffer, /* Old buggy server? */
! 416: "Document address invalid or access not authorised")) {
! 417: extensions = NO;
! 418: if (binary_buffer) free(binary_buffer);
! 419: if (text_buffer) free(text_buffer);
! 420: if (TRACE) fprintf(stderr,
! 421: "HTTP: close socket %d to retry with HTTP0\n", s);
! 422: NETCLOSE(s);
! 423: goto retry; /* @@@@@@@@@@ */
! 424: }
! 425: #endif
! 426:
! 427: fields = sscanf(text_buffer, "%20s%d",
! 428: server_version,
! 429: &server_status);
! 430:
! 431: if (fields < 2 ||
! 432: strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
! 433: format_in = WWW_HTML;
! 434: start_of_data = text_buffer; /* reread whole reply */
! 435: if (eol) *eol = '\n'; /* Reconstitute buffer */
! 436:
! 437: } else { /* Full HTTP reply */
1.2 timbl 438:
1.17 ! timbl 439: /* Decode full HTTP response */
1.2 timbl 440:
1.17 ! timbl 441: format_in = HTAtom_for("www/mime");
! 442: start_of_data = eol ? eol + 1 : text_buffer + length;
! 443:
! 444: switch (server_status / 100) {
1.3 timbl 445:
1.17 ! timbl 446: default: /* bad number */
! 447: HTAlert("Unknown status reply from server!");
! 448: break;
! 449:
! 450: case 3: /* Various forms of redirection */
! 451: HTAlert(
! 452: "Redirection response from server is not handled by this client");
! 453: break;
! 454:
! 455: case 4: /* Access Authorization problem */
1.14 luotonen 456: #ifdef ACCESS_AUTH
1.17 ! timbl 457: switch (server_status) {
! 458: case 401:
! 459: length -= start_of_data - text_buffer;
! 460: if (HTAA_shouldRetryWithAuth(start_of_data, length, s)) {
! 461: /* Clean up before retrying */
! 462: if (binary_buffer) free(binary_buffer);
! 463: if (text_buffer) free(text_buffer);
! 464: if (TRACE)
! 465: fprintf(stderr, "%s %d %s\n",
! 466: "HTTP: close socket", s,
! 467: "to retry with Access Authorization");
! 468: (void)NETCLOSE(s);
! 469: goto retry;
! 470: break;
! 471: }
! 472: else {
! 473: /* FALL THROUGH */
! 474: }
! 475: default:
! 476: {
! 477: char *p1 = HTParse(gate ? gate : arg, "",
! 478: PARSE_HOST);
! 479: char * message;
! 480:
! 481: if (!(message = (char*)malloc(strlen(text_buffer) +
! 482: strlen(p1) + 100)))
! 483: outofmem(__FILE__, "HTTP 4xx status");
! 484: sprintf(message,
! 485: "HTTP server at %s replies:\n%s\n\n%s\n",
! 486: p1, text_buffer,
! 487: ((server_status == 401)
! 488: ? "Access Authorization package giving up.\n"
! 489: : ""));
! 490: status = HTLoadError(request->output_stream,
! 491: server_status, message);
! 492: free(message);
! 493: free(p1);
! 494: goto clean_up;
! 495: }
! 496: } /* switch */
! 497: goto clean_up;
! 498: break;
! 499: #else
! 500: /* case 4 without Access Authorization falls through */
! 501: /* to case 5 (previously "I think I goofed"). -- AL */
! 502: #endif /* ACCESS_AUTH */
! 503:
! 504: case 5: /* I think you goofed */
1.14 luotonen 505: {
506: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
1.17 ! timbl 507: char * message = (char*)malloc(
! 508: strlen(text_buffer)+strlen(p1) + 100);
! 509: if (!message) outofmem(__FILE__, "HTTP 5xx status");
1.14 luotonen 510: sprintf(message,
1.17 ! timbl 511: "HTTP server at %s replies:\n%s", p1, text_buffer);
! 512: status = HTLoadError(request->output_stream, server_status, message);
1.14 luotonen 513: free(message);
514: free(p1);
515: goto clean_up;
516: }
1.17 ! timbl 517: break;
! 518:
! 519: case 2: /* Good: Got MIME object */
! 520: break;
! 521:
! 522: } /* switch on response code */
! 523:
! 524: } /* Full HTTP reply */
! 525:
! 526: } /* scope of fields */
! 527:
1.3 timbl 528: /* Set up the stream stack to handle the body of the message
529: */
1.17 ! timbl 530:
1.13 duns 531: copy:
1.17 ! timbl 532:
! 533: target = HTStreamStack(format_in,
! 534: request->output_format,
! 535: request->output_stream , request->anchor);
! 536:
! 537: if (!target) {
! 538: char buffer[1024]; /* @@@@@@@@ */
! 539: if (binary_buffer) free(binary_buffer);
! 540: if (text_buffer) free(text_buffer);
! 541: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
! 542: HTAtom_name(format_in), HTAtom_name(request->output_format));
! 543: fprintf(stderr, "HTTP: %s", buffer);
! 544: status = HTLoadError(request->output_stream, 501, buffer);
! 545: goto clean_up;
! 546: }
! 547:
1.3 timbl 548:
1.11 timbl 549: /* Push the data down the stream
1.3 timbl 550: ** We have to remember the end of the first buffer we just read
1.2 timbl 551: */
1.17 ! timbl 552: if (format_in == WWW_HTML) {
! 553: target = HTNetToText(target); /* Pipe through CR stripper */
! 554: }
! 555:
! 556: (*target->isa->put_block)(target,
! 557: binary_buffer + (start_of_data - text_buffer),
! 558: length - (start_of_data - text_buffer));
! 559: HTCopy(s, target);
! 560:
! 561: (*target->isa->free)(target);
! 562: status = HT_LOADED;
1.11 timbl 563:
1.2 timbl 564: /* Clean up
1.1 timbl 565: */
1.17 ! timbl 566:
! 567: clean_up:
! 568: if (binary_buffer) free(binary_buffer);
! 569: if (text_buffer) free(text_buffer);
! 570:
! 571: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
! 572: (void) NETCLOSE(s);
! 573:
! 574: return status; /* Good return */
1.3 timbl 575:
1.17 ! timbl 576: } /* read response */
! 577: } /* load HTTP */
1.1 timbl 578:
579: /* Protocol descriptor
580: */
581:
1.17 ! timbl 582: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
Webmaster