Annotation of libwww/Library/src/HTTP.c, revision 1.10
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
1.3 timbl 24: #define CR FROMASCII('015円') /* Must be converted to ^M for transmission */
25: #define LF FROMASCII('012円') /* Must be converted to ^J for transmission */
26:
1.2 timbl 27: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
28: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
29: #define VERSION_LENGTH 20 /* for returned protocol version */
30:
31: /* Uses:
32: */
1.1 timbl 33: #include "HTParse.h"
34: #include "HTUtils.h"
35: #include "tcp.h"
36: #include "HTTCP.h"
37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
39: #include "HTAlert.h"
40: #include "HTMIME.h"
1.5 timbl 41: #include "HTML.h" /* SCW */
42: #include "HTInit.h" /* SCW */
1.1 timbl 43:
1.2 timbl 44: struct _HTStream {
45: HTStreamClass * isa; /* all we need to know */
46: };
47:
48:
1.6 timbl 49: extern char * HTAppName; /* Application name: please supply */
50: extern char * HTAppVersion; /* Application version: please supply */
51:
1.1 timbl 52: /* Load Document from HTTP Server HTLoadHTTP()
53: ** ==============================
54: **
55: ** Given a hypertext address, this routine loads a document.
56: **
57: **
58: ** On entry,
59: ** arg is the hypertext reference of the article to be loaded.
60: ** gate is nill if no gateway, else the gateway address.
61: **
62: ** On exit,
63: ** returns >=0 If no error, a good socket number
64: ** <0 Error.
65: **
66: ** The socket must be closed by the caller after the document has been
67: ** read.
68: **
69: */
1.2 timbl 70: PUBLIC int HTLoadHTTP ARGS4 (
71: CONST char *, arg,
72: /* CONST char *, gate, */
73: HTParentAnchor *, anAnchor,
74: HTFormat, format_out,
75: HTStream*, sink)
1.1 timbl 76: {
77: int s; /* Socket number for returned data */
78: char *command; /* The whole command */
1.3 timbl 79: char * eol = 0; /* End of line if found */
1.7 timbl 80: char * start_of_data; /* Start of body of reply */
1.1 timbl 81: int status; /* tcp return */
1.10 ! timbl 82: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 83: HTStream * target = NULL; /* Unconverted data */
84: HTFormat format_in; /* Format arriving in the message */
85:
1.2 timbl 86: CONST char* gate = 0; /* disable this feature */
1.1 timbl 87: SockA soc_address; /* Binary network address */
88: SockA * sin = &soc_address;
1.2 timbl 89: BOOL had_header = NO; /* Have we had at least one header? */
90: char * line_buffer = NULL;
91: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 92: if (!arg) return -3; /* Bad if no name sepcified */
93: if (!*arg) return -2; /* Bad if name had zero length */
94:
95: /* Set up defaults:
96: */
97: #ifdef DECNET
1.2 timbl 98: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
99: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 100: #else /* Internet */
1.2 timbl 101: sin->sin_family = AF_INET; /* Family = internet, host order */
102: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 103: #endif
104:
1.10 ! timbl 105: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
! 106:
1.1 timbl 107: if (TRACE) {
108: if (gate) fprintf(stderr,
109: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
110: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
111: }
112:
113: /* Get node name and optional port number:
114: */
115: {
116: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
117: int status = HTParseInet(sin, p1); /* TBL 920622 */
118: free(p1);
119: if (status) return status; /* No such host for example */
120: }
121:
1.2 timbl 122: retry:
1.1 timbl 123:
1.10 ! timbl 124: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 125: */
126: #ifdef DECNET
127: s = socket(AF_DECnet, SOCK_STREAM, 0);
128: #else
129: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
130: #endif
131: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
132: if (status < 0) {
133: if (TRACE) fprintf(stderr,
134: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
135: /* free(command); BUG OUT TBL 921121 */
136: return HTInetStatus("connect");
137: }
138:
139: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
140:
141: /* Ask that node for the document,
142: ** omitting the host name & anchor if not gatewayed.
143: */
144: if (gate) {
1.2 timbl 145: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 146: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
147: strcpy(command, "GET ");
148: strcat(command, arg);
149: } else { /* not gatewayed */
150: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 151: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 152: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
153: strcpy(command, "GET ");
154: strcat(command, p1);
155: free(p1);
156: }
1.2 timbl 157: #ifdef HTTP2
158: if (extensions) {
159: strcat(command, " ");
160: strcat(command, HTTP_VERSION);
161: }
162: #endif
1.10 ! timbl 163:
! 164: strcat(command, crlf); /* CR LF, as in rfc 977 */
1.1 timbl 165:
1.2 timbl 166: if (extensions) {
167:
168: int n;
169: int i;
170: HTAtom * present = WWW_PRESENT;
171: char line[256]; /*@@@@ */
172:
173: if (!HTPresentations) HTFormatInit();
174: n = HTList_count(HTPresentations);
175:
176: for(i=0; i<n; i++) {
177: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
178: if (pres->rep_out == present) {
179: if (pres->quality != 1.0) {
1.3 timbl 180: sprintf(line, "Accept: %s q=%.3f%c%c",
181: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 182: } else {
1.3 timbl 183: sprintf(line, "Accept: %s%c%c",
184: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 185: }
186: StrAllocCat(command, line);
187:
188: }
189: }
1.6 timbl 190:
191: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
192: HTAppName ? HTAppName : "unknown",
193: HTAppVersion ? HTAppVersion : "0.0",
194: HTLibraryVersion, CR, LF);
195: StrAllocCat(command, line);
1.2 timbl 196: }
1.3 timbl 197:
1.10 ! timbl 198: StrAllocCat(command, crlf); /* Blank line means "end" */
! 199:
! 200: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
! 201:
! 202: /* Translate into ASCII if necessary
! 203: */
1.4 timbl 204: #ifdef NOT_ASCII
1.1 timbl 205: {
206: char * p;
207: for(p = command; *p; p++) {
208: *p = TOASCII(*p);
209: }
1.4 timbl 210: }
1.3 timbl 211: #endif
1.1 timbl 212:
213: status = NETWRITE(s, command, (int)strlen(command));
214: free(command);
215: if (status<0) {
216: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
217: return HTInetStatus("send");
218: }
219:
1.2 timbl 220:
1.7 timbl 221: /* Read the first line of the response
222: ** -----------------------------------
1.2 timbl 223: */
1.3 timbl 224:
1.2 timbl 225: {
226:
227: /* Get numeric status etc */
228:
229: int length = 0;
230: BOOL end_of_file = NO;
231: HTAtom * encoding = HTAtom_for("7bit");
232: int buffer_length = INIT_LINE_SIZE; /* Why not? */
233:
234: line_buffer = (char *) malloc(buffer_length * sizeof(char));
235: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
236:
1.7 timbl 237: do { /* Loop to read in the first line */
1.2 timbl 238:
239: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
240:
241: if (buffer_length - length < LINE_EXTEND_THRESH) {
242: buffer_length = buffer_length + buffer_length;
243: line_buffer = (char *) realloc(
244: line_buffer, buffer_length * sizeof(char));
245: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
246: }
247: status = NETREAD(s, line_buffer + length,
248: buffer_length - length -1);
249: if (status < 0) {
250: HTAlert("Unexpected network read error on response");
1.9 timbl 251: NETCLOSE(s);
1.2 timbl 252: return status;
253: }
1.10 ! timbl 254:
! 255: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
! 256: status);
! 257:
1.2 timbl 258: if (status == 0) {
259: end_of_file = YES;
260: break;
261: }
262: line_buffer[length+status] = 0;
263: #ifdef NOT_ASCII
1.10 ! timbl 264: if (TRACE) fprintf(stderr, "Local codes CR=%d, LF=%d\n", CR, LF);
1.2 timbl 265: {
266: char * p;
267: for(p = line_buffer+length; *p; p++) {
268: *p = FROMASCII(*p);
269: }
270: }
271: #endif
1.3 timbl 272: eol = strchr(line_buffer + length, LF);
273: if (eol && *(eol-1) == CR) *(eol-1) = ' ';
1.2 timbl 274:
275: length = length + status;
276:
1.7 timbl 277: if (eol) *eol = 0; /* Terminate the line */
1.2 timbl 278:
1.7 timbl 279: } while (!eol && !end_of_file); /* No LF */
280:
281: } /* Scope of loop variables */
1.2 timbl 282:
1.7 timbl 283:
284: /* We now have a terminated unfolded line. Parse it.
285: ** -------------------------------------------------
1.2 timbl 286: */
287:
1.7 timbl 288: if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
289:
290: {
291: int fields;
292: char server_version [VERSION_LENGTH+1];
293: int server_status;
294:
1.2 timbl 295:
296: /* Kludge to work with old buggy servers. They can't handle the third word
297: ** so we try again without it.
298: */
1.7 timbl 299: if (extensions &&
300: 0==strcmp(line_buffer, /* Old buggy server? */
301: "Document address invalid or access not authorised")) {
302: extensions = NO;
303: if (line_buffer) free(line_buffer);
304: if (TRACE) fprintf(stderr,
305: "HTTP: close socket %d to retry with HTTP0\n", s);
306: NETCLOSE(s);
307: goto retry; /* @@@@@@@@@@ */
308: }
1.2 timbl 309:
1.7 timbl 310: fields = sscanf(line_buffer, "%20s%d",
311: server_version,
312: &server_status);
313:
314: if (fields < 2) { /* HTTP0 reply */
315: format_in = WWW_HTML;
316: start_of_data = line_buffer; /* reread whole reply */
1.9 timbl 317: if (eol) *eol = '\n'; /* Reconstitute buffer */
1.2 timbl 318:
1.7 timbl 319: } else { /* Ful HTTP reply */
320:
321: /* Decode full HTTP response */
322:
1.3 timbl 323: format_in = HTAtom_for("www/mime");
1.7 timbl 324: start_of_data = eol ? eol + 1 : "";
1.3 timbl 325:
1.2 timbl 326: switch (server_status / 100) {
327:
1.3 timbl 328: default: /* bad number */
329: HTAlert("Unknown status reply from server!");
330: break;
331:
1.2 timbl 332: case 3: /* Various forms of redirection */
1.7 timbl 333: HTAlert(
1.3 timbl 334: "Redirection response from server is not handled by this client");
335: break;
336:
1.2 timbl 337: case 4: /* "I think I goofed" */
338: case 5: /* I think you goofed */
1.6 timbl 339: {
340: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
341: char * message = (char*)malloc(
1.7 timbl 342: strlen(line_buffer)+strlen(p1) + 100);
1.6 timbl 343: if (!message) outofmem(__FILE__, "HTTP 5xx status");
344: sprintf(message,
345: "HTTP server at %s replies:\n%s", p1, line_buffer);
1.8 timbl 346: status = HTLoadError(sink, server_status, message);
1.6 timbl 347: free(message);
348: free(p1);
349: goto clean_up;
350: }
1.3 timbl 351: break;
1.2 timbl 352:
353: case 2: /* Good: Got MIME object */
354: break;
355:
1.7 timbl 356: } /* switch on response code */
357:
358: } /* Full HTTP reply */
359:
360: } /* scope of fields */
1.2 timbl 361:
1.3 timbl 362: /* Set up the stream stack to handle the body of the message
363: */
364:
365: target = HTStreamStack(format_in,
366: format_out,
367: sink , anAnchor);
368:
369: if (!target) {
370: char buffer[1024]; /* @@@@@@@@ */
371: if (line_buffer) free(line_buffer);
372: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
373: HTAtom_name(format_in), HTAtom_name(format_out));
374: fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 375: status = HTLoadError(sink, 501, buffer);
376: goto clean_up;
1.3 timbl 377: }
378:
379:
380: /* Push the data, maybe ignoring CR, down the stream
381: ** We have to remember the end of the first buffer we just read
1.2 timbl 382: */
1.3 timbl 383: if (format_in != WWW_HTML) {
1.7 timbl 384: (*target->isa->put_string)(target, start_of_data);
1.3 timbl 385: HTCopy(s, target);
386:
387: } else { /* ascii text with CRLFs :-( */
1.7 timbl 388: {
1.3 timbl 389: char * p;
1.7 timbl 390: for (p = start_of_data; *p; p++)
1.3 timbl 391: if (*p != '\r') (*target->isa->put_character)(target, *p);
392: }
393: HTCopyNoCR(s, target);
1.2 timbl 394: }
1.3 timbl 395: (*target->isa->end_document)(target);
396: (*target->isa->free)(target);
1.8 timbl 397: status = HT_LOADED;
1.2 timbl 398:
399: /* Clean up
1.1 timbl 400: */
1.3 timbl 401:
1.6 timbl 402: clean_up:
1.2 timbl 403: if (line_buffer) free(line_buffer);
1.3 timbl 404:
1.1 timbl 405: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 406: (void) NETCLOSE(s);
1.1 timbl 407:
1.8 timbl 408: return status; /* Good return */
1.3 timbl 409:
1.1 timbl 410: }
1.7 timbl 411:
1.1 timbl 412:
413: /* Protocol descriptor
414: */
415:
1.2 timbl 416: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster