[BACK] Return to HTTP.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTP.c, revision 1.11

1.1 timbl 1: /*   HyperText Tranfer Protocol   - Client implementation     HTTP.c
 2: **   ==========================
1.2 timbl 3: **
 4: ** Bugs:
 5: **   Not implemented:
 6: **       Forward
 7: **       Redirection
 8: **       Error handling
1.1 timbl 9: */
 10: 
 11: /*   Module parameters:
 12: **   -----------------
 13: **
 14: ** These may be undefined and redefined by syspec.h
 15: */
1.2 timbl 16: 
 17: /* Implements:
 18: */
 19: #include "HTTP.h"
 20: 
 21: #define HTTP_VERSION  "HTTP/1.0"
 22: #define HTTP2             /* Version is greater than 0.9 */
 23: 
 24: #define INIT_LINE_SIZE     1024  /* Start with line buffer this big */
 25: #define LINE_EXTEND_THRESH   256   /* Minimum read size */
 26: #define VERSION_LENGTH         20   /* for returned protocol version */
 27: 
 28: /* Uses:
 29: */
1.1 timbl 30: #include "HTParse.h"
 31: #include "HTUtils.h"
 32: #include "tcp.h"
 33: #include "HTTCP.h"
 34: #include "HTFormat.h"
1.2 timbl 35: #include <ctype.h>
 36: #include "HTAlert.h"
 37: #include "HTMIME.h"
1.5 timbl 38: #include "HTML.h"       /* SCW */
 39: #include "HTInit.h"      /* SCW */
1.1 timbl 40: 
1.2 timbl 41: struct _HTStream {
 42:    HTStreamClass * isa;      /* all we need to know */
 43: };
 44: 
 45: 
1.6 timbl 46: extern char * HTAppName;    /* Application name: please supply */
 47: extern char * HTAppVersion;  /* Application version: please supply */
 48: 
1.1 timbl 49: /*       Load Document from HTTP Server         HTLoadHTTP()
 50: **       ==============================
 51: **
 52: **   Given a hypertext address, this routine loads a document.
 53: **
 54: **
 55: ** On entry,
 56: **   arg   is the hypertext reference of the article to be loaded.
 57: **   gate  is nill if no gateway, else the gateway address.
 58: **
 59: ** On exit,
 60: **   returns >=0   If no error, a good socket number
 61: **       <0   Error.
 62: **
 63: **   The socket must be closed by the caller after the document has been
 64: **   read.
 65: **
 66: */
1.2 timbl 67: PUBLIC int HTLoadHTTP ARGS4 (
 68:    CONST char *,      arg,
 69: /*   CONST char *,      gate, */
 70:    HTParentAnchor *,    anAnchor,
 71:    HTFormat,        format_out,
 72:    HTStream*,       sink)
1.1 timbl 73: {
 74:   int s;               /* Socket number for returned data */
 75:   char *command;           /* The whole command */
1.3 timbl 76:   char * eol = 0;          /* End of line if found */
1.7 timbl 77:   char * start_of_data;       /* Start of body of reply */
1.11 ! timbl 78:   int length;                /* Number of valid bytes in buffer */
1.1 timbl 79:   int status;                /* tcp return */
1.10 timbl 80:   char crlf[3];           /* A CR LF equivalent string */
1.3 timbl 81:   HTStream * target = NULL;     /* Unconverted data */
 82:   HTFormat format_in;            /* Format arriving in the message */
 83:   
1.2 timbl 84:   CONST char* gate = 0;       /* disable this feature */
1.1 timbl 85:   SockA soc_address;         /* Binary network address */
 86:   SockA * sin = &soc_address;
1.2 timbl 87:   BOOL had_header = NO;       /* Have we had at least one header? */
1.11 ! timbl 88:   char * text_buffer = NULL;
 ! 89:   char * binary_buffer = NULL;
1.2 timbl 90:   BOOL extensions = YES;       /* Assume good HTTP server */
1.1 timbl 91:   if (!arg) return -3;        /* Bad if no name sepcified   */
 92:   if (!*arg) return -2;       /* Bad if name had zero length */
 93: 
 94: /* Set up defaults:
 95: */
 96: #ifdef DECNET
1.2 timbl 97:   sin->sdn_family = AF_DECnet;      /* Family = DECnet, host order */
 98:   sin->sdn_objnum = DNP_OBJ;     /* Default: http object number */
1.1 timbl 99: #else /* Internet */
1.2 timbl 100:   sin->sin_family = AF_INET;   /* Family = internet, host order */
 101:   sin->sin_port = htons(TCP_PORT);  /* Default: http port  */
1.1 timbl 102: #endif
 103: 
1.10 timbl 104:   sprintf(crlf, "%c%c", CR, LF);   /* To be corect on Mac, VM, etc */
 105:   
1.1 timbl 106:   if (TRACE) {
 107:     if (gate) fprintf(stderr,
 108:        "HTTPAccess: Using gateway %s for %s\n", gate, arg);
 109:     else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
 110:   }
 111:   
 112: /* Get node name and optional port number:
 113: */
 114:   {
 115:    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 116:    int status = HTParseInet(sin, p1); /* TBL 920622 */
 117:     free(p1);
 118:    if (status) return status;  /* No such host for example */
 119:   }
 120:   
1.2 timbl 121: retry:
1.1 timbl 122:  
1.10 timbl 123: /*   Now, let's get a socket set up from the server for the data:
1.1 timbl 124: */   
 125: #ifdef DECNET
 126:   s = socket(AF_DECnet, SOCK_STREAM, 0);
 127: #else
 128:   s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 129: #endif
 130:   status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
 131:   if (status < 0) {
 132:      if (TRACE) fprintf(stderr, 
 133:       "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
 134:      /* free(command);  BUG OUT TBL 921121 */
 135:      return HTInetStatus("connect");
 136:    }
 137:   
 138:   if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
 139: 
 140: /*   Ask that node for the document,
 141: **   omitting the host name & anchor if not gatewayed.
 142: */    
 143:   if (gate) {
1.2 timbl 144:     command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 145:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 146:     strcpy(command, "GET ");
 147:    strcat(command, arg);
 148:   } else { /* not gatewayed */
 149:    char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 150:     command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 151:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 152:     strcpy(command, "GET ");
 153:    strcat(command, p1);
 154:    free(p1);
 155:   }
1.2 timbl 156: #ifdef HTTP2
 157:   if (extensions) {
 158:     strcat(command, " ");
 159:     strcat(command, HTTP_VERSION);
 160:   }
 161: #endif
1.10 timbl 162: 
 163:   strcat(command, crlf);   /* CR LF, as in rfc 977 */
1.1 timbl 164: 
1.2 timbl 165:   if (extensions) {
 166: 
 167:    int n;
 168:    int i;
 169:     HTAtom * present = WWW_PRESENT;
 170:    char line[256];  /*@@@@ */
 171: 
 172:    if (!HTPresentations) HTFormatInit();
 173:    n = HTList_count(HTPresentations);
 174: 
 175:    for(i=0; i<n; i++) {
 176:      HTPresentation * pres = HTList_objectAt(HTPresentations, i);
 177:      if (pres->rep_out == present) {
 178:       if (pres->quality != 1.0) {
1.3 timbl 179:         sprintf(line, "Accept: %s q=%.3f%c%c",
 180:             HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 181:       } else {
1.3 timbl 182:         sprintf(line, "Accept: %s%c%c",
 183:             HTAtom_name(pres->rep), CR, LF);
1.2 timbl 184:       }
 185:       StrAllocCat(command, line);
 186: 
 187:      }
 188:    }
1.6 timbl 189:    
 190:    sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
 191:        HTAppName ? HTAppName : "unknown",
 192:        HTAppVersion ? HTAppVersion : "0.0",
 193:        HTLibraryVersion, CR, LF);
 194:       StrAllocCat(command, line);
1.2 timbl 195:   }
1.3 timbl 196:    
1.10 timbl 197:   StrAllocCat(command, crlf);    /* Blank line means "end" */
 198: 
 199:   if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
 200: 
 201: /*   Translate into ASCII if necessary
 202: */
1.4 timbl 203: #ifdef NOT_ASCII
1.1 timbl 204:   {
 205:    char * p;
 206:    for(p = command; *p; p++) {
 207:      *p = TOASCII(*p);
 208:    }
1.4 timbl 209:   }
1.3 timbl 210: #endif
1.1 timbl 211: 
 212:   status = NETWRITE(s, command, (int)strlen(command));
 213:   free(command);
 214:   if (status<0) {
 215:    if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
 216:      return HTInetStatus("send");
 217:   }
 218: 
1.2 timbl 219: 
1.7 timbl 220: /*   Read the first line of the response
 221: **   -----------------------------------
1.11 ! timbl 222: **
 ! 223: **   HTTP0 servers must return ASCII style text, though it can in
 ! 224: **   principle be just text without any markup at all.
 ! 225: **   Full HTTP servers must return a response
 ! 226: **   line and RFC822 style header. The response must therefore in
 ! 227: **   either case have a CRLF somewhere soon.
 ! 228: **
 ! 229: **   This is the theory. In practice, there are (1993) unfortunately
 ! 230: **   many binary documents just served up with HTTP0.9. This
 ! 231: **   means we have to preserve the binary buffer (on the assumption that
 ! 232: **   conversion from ASCII may lose information) in case it turns
 ! 233: **   out that we want the binary original.
1.2 timbl 234: */
1.3 timbl 235: 
1.2 timbl 236:   {
 237:   
 238:   /* Get numeric status etc */
 239: 
 240:    BOOL end_of_file = NO;
 241:    HTAtom * encoding = HTAtom_for("7bit");
 242:    int buffer_length = INIT_LINE_SIZE;   /* Why not? */
 243:    
1.11 ! timbl 244:    binary_buffer = (char *) malloc(buffer_length * sizeof(char));
 ! 245:    if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
 ! 246:    text_buffer = (char *) malloc(buffer_length * sizeof(char));
 ! 247:    if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
 ! 248:    length = 0;
1.2 timbl 249:    
1.7 timbl 250:    do {  /* Loop to read in the first line */
1.2 timbl 251:      
 252:      /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
 253:      
 254:      if (buffer_length - length < LINE_EXTEND_THRESH) {
 255:        buffer_length = buffer_length + buffer_length;
1.11 ! timbl 256:        binary_buffer = (char *) realloc(
 ! 257:            binary_buffer, buffer_length * sizeof(char));
 ! 258:        if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
 ! 259:        text_buffer = (char *) realloc(
 ! 260:            text_buffer, buffer_length * sizeof(char));
 ! 261:        if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
1.2 timbl 262:      }
1.11 ! timbl 263:      status = NETREAD(s, binary_buffer + length,
1.2 timbl 264:                buffer_length - length -1);
 265:      if (status < 0) {
 266:        HTAlert("Unexpected network read error on response");
1.9 timbl 267:        NETCLOSE(s);
1.2 timbl 268:        return status;
 269:      }
1.10 timbl 270: 
 271:      if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
 272:         status);
 273: 
1.2 timbl 274:      if (status == 0) {
 275:        end_of_file = YES;
 276:        break;
 277:      }
1.11 ! timbl 278:      binary_buffer[length+status] = 0;
 ! 279: 
 ! 280: 
 ! 281: /*   Make an ASCII *copy* of the buffer
 ! 282: */
1.2 timbl 283: #ifdef NOT_ASCII
1.10 timbl 284:      if (TRACE) fprintf(stderr, "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 ! timbl 285: #endif
1.2 timbl 286:      {
 287:        char * p;
1.11 ! timbl 288:        char * q;
 ! 289:        for(p = binary_buffer+length, q=text_buffer+length;
 ! 290:            *p; p++, q++) {
 ! 291:          *q = FROMASCII(*p);
 ! 292:        }
 ! 293: 
 ! 294:        *q++ = 0;
 ! 295:      }
 ! 296: 
 ! 297: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
 ! 298: ** First time we have enough, look at the stub in ASCII
 ! 299: ** and get out of here if it doesn't look right.
 ! 300: **
 ! 301: ** We also check for characters above 128 in the first few bytes, and
 ! 302: ** if we find them we forget the html default.
 ! 303: **
 ! 304: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
 ! 305: **   will be taken as a HTTP 1.0 server. Failure.
 ! 306: **   An HTTP 0.9 server returning a binary document with
 ! 307: **   characters < 128 will be read as ASCII.
 ! 308: */
 ! 309: #define STUB_LENGTH 20
 ! 310:      if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
 ! 311:        if(strncmp("HTTP/", text_buffer, 5)!=0) {
 ! 312:          char *p;
 ! 313:          start_of_data = text_buffer; /* reparse whole reply */
 ! 314:          for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
 ! 315:            if ((int)p&128) {
 ! 316:              format_in = HTAtom_for("www/unknown");
 ! 317:            }
 ! 318:          }
 ! 319:          break;
1.2 timbl 320:        }
 321:      }
1.11 ! timbl 322: /* end kludge */
 ! 323: 
 ! 324:      
 ! 325:      eol = strchr(text_buffer + length, 10);     
 ! 326:      if (eol) {
 ! 327:        *eol = 0;        /* Terminate the line */
 ! 328:        if (eol[-1] = CR) eol[-1] = 0; /* Chop trailing CR */
 ! 329:       }
1.2 timbl 330: 
 331:      length = length + status;
 332: 
1.7 timbl 333:    } while (!eol && !end_of_file);     /* No LF */     
 334:        
 335:   } /* Scope of loop variables */
1.2 timbl 336: 
1.7 timbl 337:   
 338: /*   We now have a terminated unfolded line. Parse it.
 339: **   -------------------------------------------------
1.2 timbl 340: */
 341: 
1.11 ! timbl 342:   if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
1.7 timbl 343: 
 344:   {
 345:    int fields;
 346:    char server_version [VERSION_LENGTH+1];
 347:    int server_status;
 348: 
1.2 timbl 349: 
 350: /* Kludge to work with old buggy servers. They can't handle the third word
 351: ** so we try again without it.
 352: */
1.7 timbl 353:    if (extensions &&
1.11 ! timbl 354:        0==strcmp(text_buffer,     /* Old buggy server? */
1.7 timbl 355:        "Document address invalid or access not authorised")) {
 356:      extensions = NO;
1.11 ! timbl 357:      if (binary_buffer) free(binary_buffer);
 ! 358:      if (text_buffer) free(text_buffer);
1.7 timbl 359:      if (TRACE) fprintf(stderr,
 360:        "HTTP: close socket %d to retry with HTTP0\n", s);
 361:      NETCLOSE(s);
 362:      goto retry;     /* @@@@@@@@@@ */
 363:    }
1.11 ! timbl 364: /* end kludge */
1.2 timbl 365: 
1.11 ! timbl 366:    fields = sscanf(text_buffer, "%20s%d",
1.7 timbl 367:      server_version,
 368:      &server_status);
 369: 
1.11 ! timbl 370:    if (fields < 2 || 
 ! 371:        strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
1.7 timbl 372:      format_in = WWW_HTML;
1.11 ! timbl 373:      start_of_data = text_buffer;    /* reread whole reply */
1.9 timbl 374:      if (eol) *eol = '\n';        /* Reconstitute buffer */
1.2 timbl 375:      
1.11 ! timbl 376:    } else {                /* Full HTTP reply */
1.7 timbl 377:    
 378:    /*   Decode full HTTP response */
 379:    
1.3 timbl 380:      format_in = HTAtom_for("www/mime");
1.11 ! timbl 381:      start_of_data = eol ? eol + 1 : text_buffer + length;
1.3 timbl 382: 
1.2 timbl 383:      switch (server_status / 100) {
 384:      
1.3 timbl 385:      default:      /* bad number */
 386:        HTAlert("Unknown status reply from server!");
 387:        break;
 388:        
1.2 timbl 389:      case 3:       /* Various forms of redirection */
1.7 timbl 390:        HTAlert(
1.3 timbl 391:    "Redirection response from server is not handled by this client");
 392:        break;
 393:        
1.2 timbl 394:      case 4:       /* "I think I goofed" */
 395:      case 5:       /* I think you goofed */
1.6 timbl 396:        {
 397:          char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 398:          char * message = (char*)malloc(
1.11 ! timbl 399:            strlen(text_buffer)+strlen(p1) + 100);
1.6 timbl 400:          if (!message) outofmem(__FILE__, "HTTP 5xx status");
 401:          sprintf(message,
1.11 ! timbl 402:          "HTTP server at %s replies:\n%s", p1, text_buffer);
1.8 timbl 403:          status = HTLoadError(sink, server_status, message);
1.6 timbl 404:          free(message);
 405:          free(p1);
 406:          goto clean_up;
 407:        }
1.3 timbl 408:        break;
1.2 timbl 409:        
 410:      case 2:       /* Good: Got MIME object */
 411:        break;
 412: 
1.7 timbl 413:      } /* switch on response code */
 414:    
 415:    }        /* Full HTTP reply */
 416:    
 417:   } /* scope of fields */
1.2 timbl 418: 
1.3 timbl 419: /*   Set up the stream stack to handle the body of the message
 420: */
 421: 
 422:   target = HTStreamStack(format_in,
 423:            format_out,
 424:            sink , anAnchor);
 425: 
 426:   if (!target) {
 427:    char buffer[1024];   /* @@@@@@@@ */
1.11 ! timbl 428:    if (binary_buffer) free(binary_buffer);
 ! 429:    if (text_buffer) free(text_buffer);
1.3 timbl 430:    sprintf(buffer, "Sorry, no known way of converting %s to %s.",
 431:        HTAtom_name(format_in), HTAtom_name(format_out));
 432:    fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 433:    status = HTLoadError(sink, 501, buffer);
 434:    goto clean_up;
1.3 timbl 435:   }
 436: 
 437:   
1.11 ! timbl 438: /*   Push the data down the stream
1.3 timbl 439: **   We have to remember the end of the first buffer we just read
1.2 timbl 440: */
1.11 ! timbl 441:   if (format_in == WWW_HTML) {
 ! 442:     target = HTNetToText(target); /* Pipe through CR stripper */
 ! 443:   }
 ! 444:   
 ! 445:   (*target->isa->put_block)(target,
 ! 446:        binary_buffer + (start_of_data - text_buffer),
 ! 447:        length - (start_of_data - text_buffer));
 ! 448:   HTCopy(s, target);
1.3 timbl 449:    
 450:   (*target->isa->free)(target);
1.8 timbl 451:   status = HT_LOADED;
1.2 timbl 452: 
 453: /*   Clean up
1.1 timbl 454: */
1.3 timbl 455:   
1.6 timbl 456: clean_up: 
1.11 ! timbl 457:   if (binary_buffer) free(binary_buffer);
 ! 458:   if (text_buffer) free(text_buffer);
1.3 timbl 459: 
1.1 timbl 460:   if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 461:   (void) NETCLOSE(s);
1.1 timbl 462: 
1.8 timbl 463:   return status;           /* Good return */
1.3 timbl 464: 
1.1 timbl 465: }
1.7 timbl 466: 
1.1 timbl 467: 
 468: /*   Protocol descriptor
 469: */
 470: 
1.2 timbl 471: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };

Webmaster

AltStyle によって変換されたページ (->オリジナル) /