Annotation of libwww/Library/src/HTTP.c, revision 1.4
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
17: /* Implements:
18: */
19: #include "HTTP.h"
20:
21: #define HTTP_VERSION "HTTP/1.0"
22: #define HTTP2 /* Version is greater than 0.9 */
23:
1.3 timbl 24: #define CR FROMASCII('015円') /* Must be converted to ^M for transmission */
25: #define LF FROMASCII('012円') /* Must be converted to ^J for transmission */
26:
1.2 timbl 27: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
28: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
29: #define VERSION_LENGTH 20 /* for returned protocol version */
30:
31: /* Uses:
32: */
1.1 timbl 33: #include "HTParse.h"
34: #include "HTUtils.h"
35: #include "tcp.h"
36: #include "HTTCP.h"
37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
39: #include "HTAlert.h"
40: #include "HTMIME.h"
1.1 timbl 41:
1.2 timbl 42: struct _HTStream {
43: HTStreamClass * isa; /* all we need to know */
44: };
45:
46:
1.1 timbl 47: /* Load Document from HTTP Server HTLoadHTTP()
48: ** ==============================
49: **
50: ** Given a hypertext address, this routine loads a document.
51: **
52: **
53: ** On entry,
54: ** arg is the hypertext reference of the article to be loaded.
55: ** gate is nill if no gateway, else the gateway address.
56: **
57: ** On exit,
58: ** returns >=0 If no error, a good socket number
59: ** <0 Error.
60: **
61: ** The socket must be closed by the caller after the document has been
62: ** read.
63: **
64: */
1.2 timbl 65: PUBLIC int HTLoadHTTP ARGS4 (
66: CONST char *, arg,
67: /* CONST char *, gate, */
68: HTParentAnchor *, anAnchor,
69: HTFormat, format_out,
70: HTStream*, sink)
1.1 timbl 71: {
72: int s; /* Socket number for returned data */
73: char *command; /* The whole command */
1.3 timbl 74: char * eol = 0; /* End of line if found */
1.1 timbl 75: int status; /* tcp return */
1.3 timbl 76: HTStream * target = NULL; /* Unconverted data */
77: HTFormat format_in; /* Format arriving in the message */
78:
1.2 timbl 79: CONST char* gate = 0; /* disable this feature */
1.1 timbl 80: SockA soc_address; /* Binary network address */
81: SockA * sin = &soc_address;
1.2 timbl 82: BOOL had_header = NO; /* Have we had at least one header? */
83: char * line_buffer = NULL;
84: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 85: if (!arg) return -3; /* Bad if no name sepcified */
86: if (!*arg) return -2; /* Bad if name had zero length */
87:
88: /* Set up defaults:
89: */
90: #ifdef DECNET
1.2 timbl 91: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
92: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 93: #else /* Internet */
1.2 timbl 94: sin->sin_family = AF_INET; /* Family = internet, host order */
95: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 96: #endif
97:
98: if (TRACE) {
99: if (gate) fprintf(stderr,
100: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
101: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
102: }
103:
104: /* Get node name and optional port number:
105: */
106: {
107: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
108: int status = HTParseInet(sin, p1); /* TBL 920622 */
109: free(p1);
110: if (status) return status; /* No such host for example */
111: }
112:
1.2 timbl 113: retry:
1.1 timbl 114:
115: /* Now, let's get a socket set up from the server for the sgml data:
116: */
117: #ifdef DECNET
118: s = socket(AF_DECnet, SOCK_STREAM, 0);
119: #else
120: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
121: #endif
122: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
123: if (status < 0) {
124: #ifndef DECNET
125: /* This code is temporary backward-compatibility. It should
126: go away when no server runs on port 2784 alone */
127: if (sin->sin_port == htons(TCP_PORT)) { /* Try the old one */
128: if (TRACE) printf (
129: "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
130: TCP_PORT, errno, OLD_TCP_PORT);
131: sin->sin_port = htons(OLD_TCP_PORT);
132: /* First close current socket and open a clean one */
133: status = NETCLOSE (s);
134: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
135: status = connect(s, (struct sockaddr*)&soc_address,
136: sizeof(soc_address));
137: }
138: if (status < 0)
139: #endif
140: {
141: if (TRACE) fprintf(stderr,
142: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
143: /* free(command); BUG OUT TBL 921121 */
144: return HTInetStatus("connect");
145: }
146: }
147:
148: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
149:
150: /* Ask that node for the document,
151: ** omitting the host name & anchor if not gatewayed.
152: */
153: if (gate) {
1.2 timbl 154: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 155: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
156: strcpy(command, "GET ");
157: strcat(command, arg);
158: } else { /* not gatewayed */
159: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 160: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 161: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
162: strcpy(command, "GET ");
163: strcat(command, p1);
164: free(p1);
165: }
1.2 timbl 166: #ifdef HTTP2
167: if (extensions) {
168: strcat(command, " ");
169: strcat(command, HTTP_VERSION);
170: }
171: #endif
1.3 timbl 172: {
173: char * p = command + strlen(command);
174: *p++ = CR; /* Macros to be correct on Mac */
175: *p++ = LF;
176: *p++ = 0;
177: /* strcat(command, "\r\n"); */ /* CR LF, as in rfc 977 */
178: }
1.1 timbl 179:
1.2 timbl 180: if (extensions) {
181:
182: int n;
183: int i;
184: HTAtom * present = WWW_PRESENT;
185: char line[256]; /*@@@@ */
186:
187: if (!HTPresentations) HTFormatInit();
188: n = HTList_count(HTPresentations);
189:
190: for(i=0; i<n; i++) {
191: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
192: if (pres->rep_out == present) {
193: if (pres->quality != 1.0) {
1.3 timbl 194: sprintf(line, "Accept: %s q=%.3f%c%c",
195: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 196: } else {
1.3 timbl 197: sprintf(line, "Accept: %s%c%c",
198: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 199: }
200: StrAllocCat(command, line);
201:
202: }
203: }
204: }
1.3 timbl 205:
1.4 ! timbl 206: StrAllocCat(command, "015円012円"); /* Blank line means "end" */
! 207: #ifdef NOT_ASCII
1.1 timbl 208: {
209: char * p;
210: for(p = command; *p; p++) {
211: *p = TOASCII(*p);
212: }
1.4 ! timbl 213: }
1.3 timbl 214: #endif
1.1 timbl 215:
1.2 timbl 216: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1 timbl 217: status = NETWRITE(s, command, (int)strlen(command));
218: free(command);
219: if (status<0) {
220: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
221: return HTInetStatus("send");
222: }
223:
1.2 timbl 224:
225: /* Now load the data: HTTP2 response parse
226: */
1.3 timbl 227:
228: format_in = WWW_HTML; /* default */
1.2 timbl 229: {
230:
231: /* Get numeric status etc */
232:
233: int status;
234: int length = 0;
235: BOOL end_of_file = NO;
236: HTAtom * encoding = HTAtom_for("7bit");
237: int buffer_length = INIT_LINE_SIZE; /* Why not? */
238:
239: line_buffer = (char *) malloc(buffer_length * sizeof(char));
240: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
241:
242: for(;;) {
243:
244: int fields;
245: char server_version [VERSION_LENGTH+1];
246: int server_status;
247:
248: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
249:
250: if (buffer_length - length < LINE_EXTEND_THRESH) {
251: buffer_length = buffer_length + buffer_length;
252: line_buffer = (char *) realloc(
253: line_buffer, buffer_length * sizeof(char));
254: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
255: }
256: status = NETREAD(s, line_buffer + length,
257: buffer_length - length -1);
258: if (status < 0) {
259: HTAlert("Unexpected network read error on response");
260: return status;
261: }
262: if (status == 0) {
263: end_of_file = YES;
264: break;
265: }
266: line_buffer[length+status] = 0;
267: #ifdef NOT_ASCII
268: {
269: char * p;
270: for(p = line_buffer+length; *p; p++) {
271: *p = FROMASCII(*p);
272: }
273: }
274: #endif
1.3 timbl 275: eol = strchr(line_buffer + length, LF);
276: if (eol && *(eol-1) == CR) *(eol-1) = ' ';
1.2 timbl 277:
278: length = length + status;
279:
280: if (!eol && !end_of_file) continue; /* No LF */
281:
282: *eol = 0; /* Terminate the line */
283:
284:
285: /* We now have a terminated unfolded line.
286: */
287:
288: if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
289:
290: /* Kludge to work with old buggy servers. They can't handle the third word
291: ** so we try again without it.
292: */
293: if (extensions &&
294: 0==strcmp(line_buffer, /* Old buggy server? */
295: "Document address invalid or access not authorised")) {
296: extensions = NO;
297: if (line_buffer) free(line_buffer);
298: if (TRACE) fprintf(stderr,
299: "HTTP: close socket %d to retry with HTTP0\n", s);
300: NETCLOSE(s);
301: goto retry; /* @@@@@@@@@@ */
302: }
303:
304: fields = sscanf(line_buffer, "%20s%d",
305: server_version,
306: &server_status);
307:
308: if (fields < 2) break;
309:
1.3 timbl 310: format_in = HTAtom_for("www/mime");
311:
1.2 timbl 312: switch (server_status / 100) {
313:
1.3 timbl 314: default: /* bad number */
315: HTAlert("Unknown status reply from server!");
316: break;
317:
1.2 timbl 318: case 3: /* Various forms of redirection */
1.3 timbl 319: HTAlert(
320: "Redirection response from server is not handled by this client");
321: break;
322:
1.2 timbl 323: case 4: /* "I think I goofed" */
324: case 5: /* I think you goofed */
1.3 timbl 325: HTAlert("Error response from server");
326: break;
1.2 timbl 327:
328: case 2: /* Good: Got MIME object */
329: break;
330:
331: }
332:
333: break; /* Get out of for loop */
334:
335: } /* Loop over lines */
336: } /* Scope of HTTP2 handling block */
337:
1.3 timbl 338: /* Set up the stream stack to handle the body of the message
339: */
340:
341: target = HTStreamStack(format_in,
342: format_out,
343: sink , anAnchor);
344:
345: if (!target) {
346: char buffer[1024]; /* @@@@@@@@ */
347: if (line_buffer) free(line_buffer);
348: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
349: HTAtom_name(format_in), HTAtom_name(format_out));
350: fprintf(stderr, "HTTP: %s", buffer);
351: return HTLoadError(sink, 501, buffer);
352: }
353:
354:
355: /* Push the data, maybe ignoring CR, down the stream
356: ** We have to remember the end of the first buffer we just read
1.2 timbl 357: */
1.3 timbl 358: if (format_in != WWW_HTML) {
359: if (eol) (*target->isa->put_string)(target, eol+1);
360: HTCopy(s, target);
361:
362: } else { /* ascii text with CRLFs :-( */
363: if (eol) {
364: char * p;
365: for (p = eol+1; *p; p++)
366: if (*p != '\r') (*target->isa->put_character)(target, *p);
367: }
368: HTCopyNoCR(s, target);
1.2 timbl 369: }
1.3 timbl 370: (*target->isa->end_document)(target);
371: (*target->isa->free)(target);
372:
1.2 timbl 373:
374: /* Clean up
1.1 timbl 375: */
1.3 timbl 376:
1.2 timbl 377: if (line_buffer) free(line_buffer);
1.3 timbl 378:
1.1 timbl 379: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
380: status = NETCLOSE(s);
381:
382: return HT_LOADED; /* Good return */
1.3 timbl 383:
1.1 timbl 384: }
385:
386: /* Protocol descriptor
387: */
388:
1.2 timbl 389: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster