Annotation of libwww/Library/src/HTTP.c, revision 1.6
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
1.3 timbl 24: #define CR FROMASCII('015円') /* Must be converted to ^M for transmission */
25: #define LF FROMASCII('012円') /* Must be converted to ^J for transmission */
26:
1.2 timbl 27: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
28: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
29: #define VERSION_LENGTH 20 /* for returned protocol version */
30:
31: /* Uses:
32: */
1.1 timbl 33: #include "HTParse.h"
34: #include "HTUtils.h"
35: #include "tcp.h"
36: #include "HTTCP.h"
37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
39: #include "HTAlert.h"
40: #include "HTMIME.h"
1.5 timbl 41: #include "HTML.h" /* SCW */
42: #include "HTInit.h" /* SCW */
1.1 timbl 43:
1.2 timbl 44: struct _HTStream {
45: HTStreamClass * isa; /* all we need to know */
46: };
47:
48:
1.6 ! timbl 49: extern char * HTAppName; /* Application name: please supply */
! 50: extern char * HTAppVersion; /* Application version: please supply */
! 51:
1.1 timbl 52: /* Load Document from HTTP Server HTLoadHTTP()
53: ** ==============================
54: **
55: ** Given a hypertext address, this routine loads a document.
56: **
57: **
58: ** On entry,
59: ** arg is the hypertext reference of the article to be loaded.
60: ** gate is nill if no gateway, else the gateway address.
61: **
62: ** On exit,
63: ** returns >=0 If no error, a good socket number
64: ** <0 Error.
65: **
66: ** The socket must be closed by the caller after the document has been
67: ** read.
68: **
69: */
1.2 timbl 70: PUBLIC int HTLoadHTTP ARGS4 (
71: CONST char *, arg,
72: /* CONST char *, gate, */
73: HTParentAnchor *, anAnchor,
74: HTFormat, format_out,
75: HTStream*, sink)
1.1 timbl 76: {
77: int s; /* Socket number for returned data */
78: char *command; /* The whole command */
1.3 timbl 79: char * eol = 0; /* End of line if found */
1.1 timbl 80: int status; /* tcp return */
1.3 timbl 81: HTStream * target = NULL; /* Unconverted data */
82: HTFormat format_in; /* Format arriving in the message */
83:
1.2 timbl 84: CONST char* gate = 0; /* disable this feature */
1.1 timbl 85: SockA soc_address; /* Binary network address */
86: SockA * sin = &soc_address;
1.2 timbl 87: BOOL had_header = NO; /* Have we had at least one header? */
88: char * line_buffer = NULL;
89: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 90: if (!arg) return -3; /* Bad if no name sepcified */
91: if (!*arg) return -2; /* Bad if name had zero length */
92:
93: /* Set up defaults:
94: */
95: #ifdef DECNET
1.2 timbl 96: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
97: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 98: #else /* Internet */
1.2 timbl 99: sin->sin_family = AF_INET; /* Family = internet, host order */
100: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 101: #endif
102:
103: if (TRACE) {
104: if (gate) fprintf(stderr,
105: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
106: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
107: }
108:
109: /* Get node name and optional port number:
110: */
111: {
112: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
113: int status = HTParseInet(sin, p1); /* TBL 920622 */
114: free(p1);
115: if (status) return status; /* No such host for example */
116: }
117:
1.2 timbl 118: retry:
1.1 timbl 119:
120: /* Now, let's get a socket set up from the server for the sgml data:
121: */
122: #ifdef DECNET
123: s = socket(AF_DECnet, SOCK_STREAM, 0);
124: #else
125: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
126: #endif
127: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
128: if (status < 0) {
129: #ifndef DECNET
130: /* This code is temporary backward-compatibility. It should
131: go away when no server runs on port 2784 alone */
132: if (sin->sin_port == htons(TCP_PORT)) { /* Try the old one */
133: if (TRACE) printf (
134: "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
135: TCP_PORT, errno, OLD_TCP_PORT);
136: sin->sin_port = htons(OLD_TCP_PORT);
137: /* First close current socket and open a clean one */
138: status = NETCLOSE (s);
139: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
140: status = connect(s, (struct sockaddr*)&soc_address,
141: sizeof(soc_address));
142: }
143: if (status < 0)
144: #endif
145: {
146: if (TRACE) fprintf(stderr,
147: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
148: /* free(command); BUG OUT TBL 921121 */
149: return HTInetStatus("connect");
150: }
151: }
152:
153: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
154:
155: /* Ask that node for the document,
156: ** omitting the host name & anchor if not gatewayed.
157: */
158: if (gate) {
1.2 timbl 159: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 160: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
161: strcpy(command, "GET ");
162: strcat(command, arg);
163: } else { /* not gatewayed */
164: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 165: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 166: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
167: strcpy(command, "GET ");
168: strcat(command, p1);
169: free(p1);
170: }
1.2 timbl 171: #ifdef HTTP2
172: if (extensions) {
173: strcat(command, " ");
174: strcat(command, HTTP_VERSION);
175: }
176: #endif
1.3 timbl 177: {
178: char * p = command + strlen(command);
179: *p++ = CR; /* Macros to be correct on Mac */
180: *p++ = LF;
181: *p++ = 0;
182: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
183: }
1.1 timbl 184:
1.2 timbl 185: if (extensions) {
186:
187: int n;
188: int i;
189: HTAtom * present = WWW_PRESENT;
190: char line[256]; /*@@@@ */
191:
192: if (!HTPresentations) HTFormatInit();
193: n = HTList_count(HTPresentations);
194:
195: for(i=0; i<n; i++) {
196: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
197: if (pres->rep_out == present) {
198: if (pres->quality != 1.0) {
1.3 timbl 199: sprintf(line, "Accept: %s q=%.3f%c%c",
200: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 201: } else {
1.3 timbl 202: sprintf(line, "Accept: %s%c%c",
203: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 204: }
205: StrAllocCat(command, line);
206:
207: }
208: }
1.6 ! timbl 209:
! 210: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
! 211: HTAppName ? HTAppName : "unknown",
! 212: HTAppVersion ? HTAppVersion : "0.0",
! 213: HTLibraryVersion, CR, LF);
! 214: StrAllocCat(command, line);
1.2 timbl 215: }
1.3 timbl 216:
1.4 timbl 217: StrAllocCat(command, "015円012円"); /* Blank line means "end" */
218: #ifdef NOT_ASCII
1.1 timbl 219: {
220: char * p;
221: for(p = command; *p; p++) {
222: *p = TOASCII(*p);
223: }
1.4 timbl 224: }
1.3 timbl 225: #endif
1.1 timbl 226:
1.2 timbl 227: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1 timbl 228: status = NETWRITE(s, command, (int)strlen(command));
229: free(command);
230: if (status<0) {
231: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
232: return HTInetStatus("send");
233: }
234:
1.2 timbl 235:
236: /* Now load the data: HTTP2 response parse
237: */
1.3 timbl 238:
239: format_in = WWW_HTML; /* default */
1.2 timbl 240: {
241:
242: /* Get numeric status etc */
243:
244: int status;
245: int length = 0;
246: BOOL end_of_file = NO;
247: HTAtom * encoding = HTAtom_for("7bit");
248: int buffer_length = INIT_LINE_SIZE; /* Why not? */
249:
250: line_buffer = (char *) malloc(buffer_length * sizeof(char));
251: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
252:
253: for(;;) {
254:
255: int fields;
256: char server_version [VERSION_LENGTH+1];
257: int server_status;
258:
259: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
260:
261: if (buffer_length - length < LINE_EXTEND_THRESH) {
262: buffer_length = buffer_length + buffer_length;
263: line_buffer = (char *) realloc(
264: line_buffer, buffer_length * sizeof(char));
265: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
266: }
267: status = NETREAD(s, line_buffer + length,
268: buffer_length - length -1);
269: if (status < 0) {
270: HTAlert("Unexpected network read error on response");
271: return status;
272: }
273: if (status == 0) {
274: end_of_file = YES;
275: break;
276: }
277: line_buffer[length+status] = 0;
278: #ifdef NOT_ASCII
279: {
280: char * p;
281: for(p = line_buffer+length; *p; p++) {
282: *p = FROMASCII(*p);
283: }
284: }
285: #endif
1.3 timbl 286: eol = strchr(line_buffer + length, LF);
287: if (eol && *(eol-1) == CR) *(eol-1) = ' ';
1.2 timbl 288:
289: length = length + status;
290:
291: if (!eol && !end_of_file) continue; /* No LF */
292:
293: *eol = 0; /* Terminate the line */
294:
295:
296: /* We now have a terminated unfolded line.
297: */
298:
299: if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
300:
301: /* Kludge to work with old buggy servers. They can't handle the third word
302: ** so we try again without it.
303: */
304: if (extensions &&
305: 0==strcmp(line_buffer, /* Old buggy server? */
306: "Document address invalid or access not authorised")) {
307: extensions = NO;
308: if (line_buffer) free(line_buffer);
309: if (TRACE) fprintf(stderr,
310: "HTTP: close socket %d to retry with HTTP0\n", s);
311: NETCLOSE(s);
312: goto retry; /* @@@@@@@@@@ */
313: }
314:
315: fields = sscanf(line_buffer, "%20s%d",
316: server_version,
317: &server_status);
318:
319: if (fields < 2) break;
320:
1.3 timbl 321: format_in = HTAtom_for("www/mime");
322:
1.2 timbl 323: switch (server_status / 100) {
324:
1.3 timbl 325: default: /* bad number */
326: HTAlert("Unknown status reply from server!");
327: break;
328:
1.2 timbl 329: case 3: /* Various forms of redirection */
1.3 timbl 330: HTAlert(
331: "Redirection response from server is not handled by this client");
332: break;
333:
1.2 timbl 334: case 4: /* "I think I goofed" */
335: case 5: /* I think you goofed */
1.6 ! timbl 336: {
! 337: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
! 338: char * message = (char*)malloc(
! 339: strlen(line_buffer)+strlen(p1) + 100);
! 340: if (!message) outofmem(__FILE__, "HTTP 5xx status");
! 341: sprintf(message,
! 342: "HTTP server at %s replies:\n%s", p1, line_buffer);
! 343: status = HTLoadError(sink, server_status, message);
! 344: free(message);
! 345: free(p1);
! 346: goto clean_up;
! 347: }
1.3 timbl 348: break;
1.2 timbl 349:
350: case 2: /* Good: Got MIME object */
351: break;
352:
353: }
354:
355: break; /* Get out of for loop */
356:
357: } /* Loop over lines */
358: } /* Scope of HTTP2 handling block */
359:
1.3 timbl 360: /* Set up the stream stack to handle the body of the message
361: */
362:
363: target = HTStreamStack(format_in,
364: format_out,
365: sink , anAnchor);
366:
367: if (!target) {
368: char buffer[1024]; /* @@@@@@@@ */
369: if (line_buffer) free(line_buffer);
370: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
371: HTAtom_name(format_in), HTAtom_name(format_out));
372: fprintf(stderr, "HTTP: %s", buffer);
1.6 ! timbl 373: status = HTLoadError(sink, 501, buffer);
! 374: goto clean_up;
1.3 timbl 375: }
376:
377:
378: /* Push the data, maybe ignoring CR, down the stream
379: ** We have to remember the end of the first buffer we just read
1.2 timbl 380: */
1.3 timbl 381: if (format_in != WWW_HTML) {
382: if (eol) (*target->isa->put_string)(target, eol+1);
383: HTCopy(s, target);
384:
385: } else { /* ascii text with CRLFs :-( */
386: if (eol) {
387: char * p;
388: for (p = eol+1; *p; p++)
389: if (*p != '\r') (*target->isa->put_character)(target, *p);
390: }
391: HTCopyNoCR(s, target);
1.2 timbl 392: }
1.3 timbl 393: (*target->isa->end_document)(target);
394: (*target->isa->free)(target);
395:
1.2 timbl 396:
397: /* Clean up
1.1 timbl 398: */
1.6 ! timbl 399: status = HT_LOADED;
1.3 timbl 400:
1.6 ! timbl 401: clean_up:
1.2 timbl 402: if (line_buffer) free(line_buffer);
1.3 timbl 403:
1.1 timbl 404: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 ! timbl 405: (void) NETCLOSE(s);
1.1 timbl 406:
1.6 ! timbl 407: return status; /* Good return */
1.3 timbl 408:
1.1 timbl 409: }
410:
411: /* Protocol descriptor
412: */
413:
1.2 timbl 414: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster