Annotation of libwww/Library/src/HTTP.c, revision 1.5
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
1.3 timbl 24: #define CR FROMASCII('015円') /* Must be converted to ^M for transmission */
25: #define LF FROMASCII('012円') /* Must be converted to ^J for transmission */
26:
1.2 timbl 27: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
28: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
29: #define VERSION_LENGTH 20 /* for returned protocol version */
30:
31: /* Uses:
32: */
1.1 timbl 33: #include "HTParse.h"
34: #include "HTUtils.h"
35: #include "tcp.h"
36: #include "HTTCP.h"
37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
39: #include "HTAlert.h"
40: #include "HTMIME.h"
1.5 ! timbl 41: #include "HTML.h" /* SCW */
! 42: #include "HTInit.h" /* SCW */
1.1 timbl 43:
1.2 timbl 44: struct _HTStream {
45: HTStreamClass * isa; /* all we need to know */
46: };
47:
48:
1.1 timbl 49: /* Load Document from HTTP Server HTLoadHTTP()
50: ** ==============================
51: **
52: ** Given a hypertext address, this routine loads a document.
53: **
54: **
55: ** On entry,
56: ** arg is the hypertext reference of the article to be loaded.
57: ** gate is nill if no gateway, else the gateway address.
58: **
59: ** On exit,
60: ** returns >=0 If no error, a good socket number
61: ** <0 Error.
62: **
63: ** The socket must be closed by the caller after the document has been
64: ** read.
65: **
66: */
1.2 timbl 67: PUBLIC int HTLoadHTTP ARGS4 (
68: CONST char *, arg,
69: /* CONST char *, gate, */
70: HTParentAnchor *, anAnchor,
71: HTFormat, format_out,
72: HTStream*, sink)
1.1 timbl 73: {
74: int s; /* Socket number for returned data */
75: char *command; /* The whole command */
1.3 timbl 76: char * eol = 0; /* End of line if found */
1.1 timbl 77: int status; /* tcp return */
1.3 timbl 78: HTStream * target = NULL; /* Unconverted data */
79: HTFormat format_in; /* Format arriving in the message */
80:
1.2 timbl 81: CONST char* gate = 0; /* disable this feature */
1.1 timbl 82: SockA soc_address; /* Binary network address */
83: SockA * sin = &soc_address;
1.2 timbl 84: BOOL had_header = NO; /* Have we had at least one header? */
85: char * line_buffer = NULL;
86: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 87: if (!arg) return -3; /* Bad if no name sepcified */
88: if (!*arg) return -2; /* Bad if name had zero length */
89:
90: /* Set up defaults:
91: */
92: #ifdef DECNET
1.2 timbl 93: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
94: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 95: #else /* Internet */
1.2 timbl 96: sin->sin_family = AF_INET; /* Family = internet, host order */
97: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 98: #endif
99:
100: if (TRACE) {
101: if (gate) fprintf(stderr,
102: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
103: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
104: }
105:
106: /* Get node name and optional port number:
107: */
108: {
109: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
110: int status = HTParseInet(sin, p1); /* TBL 920622 */
111: free(p1);
112: if (status) return status; /* No such host for example */
113: }
114:
1.2 timbl 115: retry:
1.1 timbl 116:
117: /* Now, let's get a socket set up from the server for the sgml data:
118: */
119: #ifdef DECNET
120: s = socket(AF_DECnet, SOCK_STREAM, 0);
121: #else
122: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
123: #endif
124: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
125: if (status < 0) {
126: #ifndef DECNET
127: /* This code is temporary backward-compatibility. It should
128: go away when no server runs on port 2784 alone */
129: if (sin->sin_port == htons(TCP_PORT)) { /* Try the old one */
130: if (TRACE) printf (
131: "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
132: TCP_PORT, errno, OLD_TCP_PORT);
133: sin->sin_port = htons(OLD_TCP_PORT);
134: /* First close current socket and open a clean one */
135: status = NETCLOSE (s);
136: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
137: status = connect(s, (struct sockaddr*)&soc_address,
138: sizeof(soc_address));
139: }
140: if (status < 0)
141: #endif
142: {
143: if (TRACE) fprintf(stderr,
144: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
145: /* free(command); BUG OUT TBL 921121 */
146: return HTInetStatus("connect");
147: }
148: }
149:
150: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
151:
152: /* Ask that node for the document,
153: ** omitting the host name & anchor if not gatewayed.
154: */
155: if (gate) {
1.2 timbl 156: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 157: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
158: strcpy(command, "GET ");
159: strcat(command, arg);
160: } else { /* not gatewayed */
161: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 162: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 163: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
164: strcpy(command, "GET ");
165: strcat(command, p1);
166: free(p1);
167: }
1.2 timbl 168: #ifdef HTTP2
169: if (extensions) {
170: strcat(command, " ");
171: strcat(command, HTTP_VERSION);
172: }
173: #endif
1.3 timbl 174: {
175: char * p = command + strlen(command);
176: *p++ = CR; /* Macros to be correct on Mac */
177: *p++ = LF;
178: *p++ = 0;
179: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
180: }
1.1 timbl 181:
1.2 timbl 182: if (extensions) {
183:
184: int n;
185: int i;
186: HTAtom * present = WWW_PRESENT;
187: char line[256]; /*@@@@ */
188:
189: if (!HTPresentations) HTFormatInit();
190: n = HTList_count(HTPresentations);
191:
192: for(i=0; i<n; i++) {
193: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
194: if (pres->rep_out == present) {
195: if (pres->quality != 1.0) {
1.3 timbl 196: sprintf(line, "Accept: %s q=%.3f%c%c",
197: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 198: } else {
1.3 timbl 199: sprintf(line, "Accept: %s%c%c",
200: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 201: }
202: StrAllocCat(command, line);
203:
204: }
205: }
206: }
1.3 timbl 207:
1.4 timbl 208: StrAllocCat(command, "015円012円"); /* Blank line means "end" */
209: #ifdef NOT_ASCII
1.1 timbl 210: {
211: char * p;
212: for(p = command; *p; p++) {
213: *p = TOASCII(*p);
214: }
1.4 timbl 215: }
1.3 timbl 216: #endif
1.1 timbl 217:
1.2 timbl 218: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1 timbl 219: status = NETWRITE(s, command, (int)strlen(command));
220: free(command);
221: if (status<0) {
222: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
223: return HTInetStatus("send");
224: }
225:
1.2 timbl 226:
227: /* Now load the data: HTTP2 response parse
228: */
1.3 timbl 229:
230: format_in = WWW_HTML; /* default */
1.2 timbl 231: {
232:
233: /* Get numeric status etc */
234:
235: int status;
236: int length = 0;
237: BOOL end_of_file = NO;
238: HTAtom * encoding = HTAtom_for("7bit");
239: int buffer_length = INIT_LINE_SIZE; /* Why not? */
240:
241: line_buffer = (char *) malloc(buffer_length * sizeof(char));
242: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
243:
244: for(;;) {
245:
246: int fields;
247: char server_version [VERSION_LENGTH+1];
248: int server_status;
249:
250: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
251:
252: if (buffer_length - length < LINE_EXTEND_THRESH) {
253: buffer_length = buffer_length + buffer_length;
254: line_buffer = (char *) realloc(
255: line_buffer, buffer_length * sizeof(char));
256: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
257: }
258: status = NETREAD(s, line_buffer + length,
259: buffer_length - length -1);
260: if (status < 0) {
261: HTAlert("Unexpected network read error on response");
262: return status;
263: }
264: if (status == 0) {
265: end_of_file = YES;
266: break;
267: }
268: line_buffer[length+status] = 0;
269: #ifdef NOT_ASCII
270: {
271: char * p;
272: for(p = line_buffer+length; *p; p++) {
273: *p = FROMASCII(*p);
274: }
275: }
276: #endif
1.3 timbl 277: eol = strchr(line_buffer + length, LF);
278: if (eol && *(eol-1) == CR) *(eol-1) = ' ';
1.2 timbl 279:
280: length = length + status;
281:
282: if (!eol && !end_of_file) continue; /* No LF */
283:
284: *eol = 0; /* Terminate the line */
285:
286:
287: /* We now have a terminated unfolded line.
288: */
289:
290: if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
291:
292: /* Kludge to work with old buggy servers. They can't handle the third word
293: ** so we try again without it.
294: */
295: if (extensions &&
296: 0==strcmp(line_buffer, /* Old buggy server? */
297: "Document address invalid or access not authorised")) {
298: extensions = NO;
299: if (line_buffer) free(line_buffer);
300: if (TRACE) fprintf(stderr,
301: "HTTP: close socket %d to retry with HTTP0\n", s);
302: NETCLOSE(s);
303: goto retry; /* @@@@@@@@@@ */
304: }
305:
306: fields = sscanf(line_buffer, "%20s%d",
307: server_version,
308: &server_status);
309:
310: if (fields < 2) break;
311:
1.3 timbl 312: format_in = HTAtom_for("www/mime");
313:
1.2 timbl 314: switch (server_status / 100) {
315:
1.3 timbl 316: default: /* bad number */
317: HTAlert("Unknown status reply from server!");
318: break;
319:
1.2 timbl 320: case 3: /* Various forms of redirection */
1.3 timbl 321: HTAlert(
322: "Redirection response from server is not handled by this client");
323: break;
324:
1.2 timbl 325: case 4: /* "I think I goofed" */
326: case 5: /* I think you goofed */
1.3 timbl 327: HTAlert("Error response from server");
328: break;
1.2 timbl 329:
330: case 2: /* Good: Got MIME object */
331: break;
332:
333: }
334:
335: break; /* Get out of for loop */
336:
337: } /* Loop over lines */
338: } /* Scope of HTTP2 handling block */
339:
1.3 timbl 340: /* Set up the stream stack to handle the body of the message
341: */
342:
343: target = HTStreamStack(format_in,
344: format_out,
345: sink , anAnchor);
346:
347: if (!target) {
348: char buffer[1024]; /* @@@@@@@@ */
349: if (line_buffer) free(line_buffer);
350: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
351: HTAtom_name(format_in), HTAtom_name(format_out));
352: fprintf(stderr, "HTTP: %s", buffer);
353: return HTLoadError(sink, 501, buffer);
354: }
355:
356:
357: /* Push the data, maybe ignoring CR, down the stream
358: ** We have to remember the end of the first buffer we just read
1.2 timbl 359: */
1.3 timbl 360: if (format_in != WWW_HTML) {
361: if (eol) (*target->isa->put_string)(target, eol+1);
362: HTCopy(s, target);
363:
364: } else { /* ascii text with CRLFs :-( */
365: if (eol) {
366: char * p;
367: for (p = eol+1; *p; p++)
368: if (*p != '\r') (*target->isa->put_character)(target, *p);
369: }
370: HTCopyNoCR(s, target);
1.2 timbl 371: }
1.3 timbl 372: (*target->isa->end_document)(target);
373: (*target->isa->free)(target);
374:
1.2 timbl 375:
376: /* Clean up
1.1 timbl 377: */
1.3 timbl 378:
1.2 timbl 379: if (line_buffer) free(line_buffer);
1.3 timbl 380:
1.1 timbl 381: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
382: status = NETCLOSE(s);
383:
384: return HT_LOADED; /* Good return */
1.3 timbl 385:
1.1 timbl 386: }
387:
388: /* Protocol descriptor
389: */
390:
1.2 timbl 391: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster