[BACK] Return to HTTP.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTP.c, revision 1.18

1.1 timbl 1: /*   HyperText Tranfer Protocol   - Client implementation     HTTP.c
 2: **   ==========================
1.2 timbl 3: **
 4: ** Bugs:
 5: **   Not implemented:
 6: **       Forward
 7: **       Redirection
 8: **       Error handling
1.1 timbl 9: */
 10: 
 11: /*   Module parameters:
 12: **   -----------------
 13: **
 14: ** These may be undefined and redefined by syspec.h
 15: */
1.2 timbl 16: 
1.12 timbl 17: /*   MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
 18: **   file from the URL. It is STRICTLY illegal to do this!
 19: */
 20: 
1.2 timbl 21: /* Implements:
 22: */
 23: #include "HTTP.h"
 24: 
 25: #define HTTP_VERSION  "HTTP/1.0"
 26: #define HTTP2             /* Version is greater than 0.9 */
 27: 
 28: #define INIT_LINE_SIZE     1024  /* Start with line buffer this big */
 29: #define LINE_EXTEND_THRESH   256   /* Minimum read size */
 30: #define VERSION_LENGTH         20   /* for returned protocol version */
 31: 
 32: /* Uses:
 33: */
1.1 timbl 34: #include "HTParse.h"
 35: #include "HTUtils.h"
 36: #include "tcp.h"
 37: #include "HTTCP.h"
 38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
 40: #include "HTAlert.h"
 41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h"       /* SCW */
 43: #include "HTInit.h"      /* SCW */
1.14 luotonen 44: #include "HTAABrow.h"     /* Access Authorization */
1.1 timbl 45: 
1.2 timbl 46: struct _HTStream {
 47:    HTStreamClass * isa;      /* all we need to know */
 48: };
 49: 
 50: 
1.6 timbl 51: extern char * HTAppName;    /* Application name: please supply */
 52: extern char * HTAppVersion;  /* Application version: please supply */
 53: 
1.1 timbl 54: /*       Load Document from HTTP Server         HTLoadHTTP()
 55: **       ==============================
 56: **
 57: **   Given a hypertext address, this routine loads a document.
 58: **
 59: **
 60: ** On entry,
 61: **   arg   is the hypertext reference of the article to be loaded.
 62: **
 63: ** On exit,
 64: **   returns >=0   If no error, a good socket number
 65: **       <0   Error.
 66: **
 67: **   The socket must be closed by the caller after the document has been
 68: **   read.
 69: **
 70: */
1.17 timbl 71: PUBLIC int HTLoadHTTP ARGS2 (
1.2 timbl 72:    CONST char *,      arg,
1.17 timbl 73:    HTRequest *,      request)
1.1 timbl 74: {
 75:   int s;               /* Socket number for returned data */
 76:   int status;                /* tcp return */
1.10 timbl 77:   char crlf[3];           /* A CR LF equivalent string */
1.3 timbl 78:   HTStream * target = NULL;     /* Unconverted data */
1.15 luotonen 79:   char *auth = NULL;         /* Authorization information */
1.3 timbl 80:   
1.2 timbl 81:   CONST char* gate = 0;       /* disable this feature */
1.1 timbl 82:   SockA soc_address;         /* Binary network address */
 83:   SockA * sin = &soc_address;
1.2 timbl 84:   BOOL extensions = YES;       /* Assume good HTTP server */
1.17 timbl 85: 
1.1 timbl 86:   if (!arg) return -3;        /* Bad if no name sepcified   */
 87:   if (!*arg) return -2;       /* Bad if name had zero length */
 88: 
 89: /* Set up defaults:
 90: */
 91: #ifdef DECNET
1.2 timbl 92:   sin->sdn_family = AF_DECnet;      /* Family = DECnet, host order */
 93:   sin->sdn_objnum = DNP_OBJ;     /* Default: http object number */
1.1 timbl 94: #else /* Internet */
1.2 timbl 95:   sin->sin_family = AF_INET;   /* Family = internet, host order */
 96:   sin->sin_port = htons(TCP_PORT);  /* Default: http port  */
1.1 timbl 97: #endif
 98: 
1.10 timbl 99:   sprintf(crlf, "%c%c", CR, LF);   /* To be corect on Mac, VM, etc */
 100:   
1.1 timbl 101:   if (TRACE) {
 102:     if (gate) fprintf(stderr,
 103:        "HTTPAccess: Using gateway %s for %s\n", gate, arg);
 104:     else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
 105:   }
 106:   
 107: /* Get node name and optional port number:
 108: */
 109:   {
 110:    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 111:    int status = HTParseInet(sin, p1); /* TBL 920622 */
 112:     free(p1);
 113:    if (status) return status;  /* No such host for example */
 114:   }
 115:   
1.2 timbl 116: retry:
1.15 luotonen 117: 
 118: /*
 119: ** Compose authorization information (this was moved here
 120: ** from after the making of the connection so that the connection
 121: ** wouldn't have to wait while prompting username and password
 122: ** from the user).               -- AL 13.10.93
 123: */
 124: #ifdef ACCESS_AUTH
 125: #define FREE(x)    if (x) {free(x); x=NULL;}
 126:   {
 127:    char *docname;
 128:    char *hostname;
 129:    char *colon;
 130:    int portnumber;
 131: 
 132:    docname = HTParse(arg, "", PARSE_PATH);
 133:    hostname = HTParse((gate ? gate : arg), "", PARSE_HOST);
 134:    if (hostname &&
 135:      NULL != (colon = strchr(hostname, ':'))) {
1.16 duns 136:      *(colon++) = '0円'; /* Chop off port number */
1.15 luotonen 137:      portnumber = atoi(colon);
 138:    }
 139:    else portnumber = 80;
 140:    
 141:    auth = HTAA_composeAuth(hostname, portnumber, docname);
 142: 
 143:    if (TRACE) {
 144:      if (auth)
 145:        fprintf(stderr, "HTTP: Sending authorization: %s\n", auth);
 146:      else
 147:        fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
 148:    }
 149:    FREE(hostname);
 150:    FREE(docname);
 151:   }
 152: #endif /* ACCESS_AUTH */
1.1 timbl 153:  
1.10 timbl 154: /*   Now, let's get a socket set up from the server for the data:
1.1 timbl 155: */   
 156: #ifdef DECNET
 157:   s = socket(AF_DECnet, SOCK_STREAM, 0);
 158: #else
 159:   s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 160: #endif
 161:   status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
 162:   if (status < 0) {
 163:      if (TRACE) fprintf(stderr, 
 164:       "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 timbl 165: 
1.1 timbl 166:      return HTInetStatus("connect");
 167:    }
 168:   
 169:   if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
 170: 
1.17 timbl 171: 
 172: /*   Compose and send command
 173: **   ------------------------
 174: */
 175:   {
 176:     char *command;         /* The whole command */
 177:    
1.1 timbl 178: /*   Ask that node for the document,
 179: **   omitting the host name & anchor if not gatewayed.
 180: */    
1.17 timbl 181:    if (gate) {
 182:      command = malloc(4 + strlen(arg)+ 2 + 31);
 183:      if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 184:      strcpy(command, "GET ");
 185:      strcat(command, arg);
 186:    } else { /* not gatewayed */
 187:      char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
 188:      command = malloc(4 + strlen(p1)+ 2 + 31);
 189:      if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 190:      strcpy(command, "GET ");
 191:      strcat(command, p1);
 192:      free(p1);
 193:    }
1.2 timbl 194: #ifdef HTTP2
1.17 timbl 195:    if (extensions) {
 196:      strcat(command, " ");
 197:      strcat(command, HTTP_VERSION);
 198:    }
1.2 timbl 199: #endif
1.17 timbl 200:   
 201:    strcat(command, crlf); /* CR LF, as in rfc 977 */
 202:   
 203:    if (extensions) {
 204:   
 205:      int n;
 206:      int i;
 207:      HTAtom * present = WWW_PRESENT;
 208:      char line[256];  /*@@@@ */
 209:   
 210: /*     if (!request->conversions) HTFormatInit(request->conversions); */
 211:      n = HTList_count(request->conversions);
 212:   
 213:      for(i=0; i<n; i++) {
 214:        HTPresentation * pres =
 215:            HTList_objectAt(request->conversions, i);
 216:        if (pres->rep_out == present) {
 217:          if (pres->quality != 1.0) {
 218:            sprintf(line, "Accept: %s q=%.3f%c%c",
 219:                HTAtom_name(pres->rep), pres->quality, CR, LF);
 220:          } else {
 221:            sprintf(line, "Accept: %s%c%c",
 222:                HTAtom_name(pres->rep), CR, LF);
 223:          }
 224:          StrAllocCat(command, line);
 225:   
 226:        }
1.2 timbl 227:      }
1.17 timbl 228:      
 229:      sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
 230:          HTAppName ? HTAppName : "unknown",
 231:          HTAppVersion ? HTAppVersion : "0.0",
 232:          HTLibraryVersion, CR, LF);
 233:          StrAllocCat(command, line);
 234:   
1.14 luotonen 235: #ifdef ACCESS_AUTH
1.17 timbl 236:      if (auth != NULL) {
 237:        sprintf(line, "%s%c%c", auth, CR, LF);
 238:        StrAllocCat(command, line);
 239:      }
 240: #endif /* ACCESS_AUTH */
1.14 luotonen 241:    }
1.17 timbl 242:   
 243:    StrAllocCat(command, crlf);   /* Blank line means "end" */
 244:   
 245:    if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
 246:   
 247:   /* Translate into ASCII if necessary
 248:   */
1.4 timbl 249: #ifdef NOT_ASCII
1.17 timbl 250:    {
 251:      char * p;
 252:      for(p = command; *p; p++) {
 253:        *p = TOASCII(*p);
 254:      }
1.1 timbl 255:    }
1.3 timbl 256: #endif
1.17 timbl 257:   
 258:    status = NETWRITE(s, command, (int)strlen(command));
 259:    free(command);
 260:    if (status<0) {
 261:      if (TRACE) fprintf(stderr,
 262:        "HTTPAccess: Unable to send command.\n");
1.1 timbl 263:      return HTInetStatus("send");
1.17 timbl 264:    }
 265:   } /* compose and send command */
 266:   
1.2 timbl 267: 
1.17 timbl 268: /*   Read the response
 269: **   -----------------
1.11 timbl 270: **
 271: **   HTTP0 servers must return ASCII style text, though it can in
 272: **   principle be just text without any markup at all.
 273: **   Full HTTP servers must return a response
 274: **   line and RFC822 style header. The response must therefore in
 275: **   either case have a CRLF somewhere soon.
 276: **
 277: **   This is the theory. In practice, there are (1993) unfortunately
 278: **   many binary documents just served up with HTTP0.9. This
 279: **   means we have to preserve the binary buffer (on the assumption that
 280: **   conversion from ASCII may lose information) in case it turns
 281: **   out that we want the binary original.
1.2 timbl 282: */
1.3 timbl 283: 
1.17 timbl 284:   {  /* read response */
 285:   
 286:    char * eol = 0;         /* End of line if found */
 287:    char * start_of_data;      /* Start of body of reply */
 288:    int length;           /* Number of valid bytes in buffer */
 289:    char * text_buffer = NULL;
 290:    char * binary_buffer = NULL;
 291:    HTFormat format_in;       /* Format arriving in the message */
 292:    
 293:     { /* local variablees for loop*/
1.2 timbl 294:   
 295:   /* Get numeric status etc */
 296: 
1.17 timbl 297:      BOOL end_of_file = NO;
 298:      HTAtom * encoding = HTAtom_for("7bit");
 299:      int buffer_length = INIT_LINE_SIZE; /* Why not? */
 300:      
 301:      binary_buffer = (char *) malloc(buffer_length * sizeof(char));
 302:      if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
 303:      text_buffer = (char *) malloc(buffer_length * sizeof(char));
 304:      if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
 305:      length = 0;
1.2 timbl 306:      
1.17 timbl 307:      do {    /* Loop to read in the first line */
 308:        
 309:        /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
 310:        
 311:        if (buffer_length - length < LINE_EXTEND_THRESH) {
 312:          buffer_length = buffer_length + buffer_length;
 313:          binary_buffer = (char *) realloc(
 314:              binary_buffer, buffer_length * sizeof(char));
 315:          if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
 316:          text_buffer = (char *) realloc(
 317:              text_buffer, buffer_length * sizeof(char));
 318:          if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
 319:        }
 320:        status = NETREAD(s, binary_buffer + length,
 321:                  buffer_length - length -1);
 322:        if (status < 0) {
 323:          HTAlert("Unexpected network read error on response");
 324:          NETCLOSE(s);
 325:          return status;
 326:        }
 327:   
 328:        if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
 329:            status);
 330:   
 331:        if (status == 0) {
 332:          end_of_file = YES;
 333:          break;
 334:        }
 335:        binary_buffer[length+status] = 0;
 336:   
 337:   
1.11 timbl 338: /*   Make an ASCII *copy* of the buffer
 339: */
1.2 timbl 340: #ifdef NOT_ASCII
1.17 timbl 341:        if (TRACE) fprintf(stderr, 
 342:            "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 timbl 343: #endif
1.17 timbl 344:        {
 345:          char * p;
 346:          char * q;
 347:          for(p = binary_buffer+length, q=text_buffer+length;
 348:              *p; p++, q++) {
 349:            *q = FROMASCII(*p);
 350:          }
 351:   
 352:          *q++ = 0;
1.11 timbl 353:        }
1.17 timbl 354:   
1.11 timbl 355: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
 356: ** First time we have enough, look at the stub in ASCII
 357: ** and get out of here if it doesn't look right.
 358: **
 359: ** We also check for characters above 128 in the first few bytes, and
 360: ** if we find them we forget the html default.
 361: **
 362: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
 363: **   will be taken as a HTTP 1.0 server. Failure.
 364: **   An HTTP 0.9 server returning a binary document with
 365: **   characters < 128 will be read as ASCII.
 366: */
 367: #define STUB_LENGTH 20
1.17 timbl 368:        if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
 369:          if(strncmp("HTTP/", text_buffer, 5)!=0) {
 370:            char *p;
 371:            start_of_data = text_buffer; /* reparse whole reply */
 372:            for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
 373:              if (((int)*p)&128) {
 374:                format_in = HTAtom_for("www/unknown");
 375:                length = length + status;
 376:                goto copy; /* out of while loop */
 377:              }
1.11 timbl 378:            }
 379:          }
1.2 timbl 380:        }
1.17 timbl 381:   /* end kludge */
 382:   
 383:        
 384:        eol = strchr(text_buffer + length, 10);   
 385:        if (eol) {
 386:          *eol = 0;      /* Terminate the line */
 387:          if (eol[-1] == CR) eol[-1] = 0;   /* Chop trailing CR */
 388:                          /* = corrected to == -- AL */
 389:        }
 390:   
 391:        length = length + status;
 392:   
 393:      } while (!eol && !end_of_file);       /* No LF */     
 394:          
 395:    } /* Scope of loop variables */
1.2 timbl 396: 
1.7 timbl 397:   
 398: /*   We now have a terminated unfolded line. Parse it.
 399: **   -------------------------------------------------
1.2 timbl 400: */
 401: 
1.17 timbl 402:    if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
 403:   
 404:    {
 405:      int fields;
 406:      char server_version [VERSION_LENGTH+1];
 407:      int server_status;
 408:   
 409:   
 410: #ifdef OLD_CODE                /* old buggy servers should not exist now tbl9311 */
1.2 timbl 411: /* Kludge to work with old buggy servers. They can't handle the third word
 412: ** so we try again without it.
 413: */
1.17 timbl 414:      if (extensions &&
 415:          0==strcmp(text_buffer,       /* Old buggy server? */
 416:          "Document address invalid or access not authorised")) {
 417:        extensions = NO;
 418:        if (binary_buffer) free(binary_buffer);
 419:        if (text_buffer) free(text_buffer);
 420:        if (TRACE) fprintf(stderr,
 421:          "HTTP: close socket %d to retry with HTTP0\n", s);
 422:        NETCLOSE(s);
 423:        goto retry;       /* @@@@@@@@@@ */
 424:      }
 425: #endif
 426:   
 427:      fields = sscanf(text_buffer, "%20s%d",
 428:        server_version,
 429:        &server_status);
 430:   
 431:      if (fields < 2 || 
 432:          strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
 433:        format_in = WWW_HTML;
 434:        start_of_data = text_buffer;  /* reread whole reply */
 435:        if (eol) *eol = '\n';      /* Reconstitute buffer */
 436:        
 437:      } else {              /* Full HTTP reply */
1.2 timbl 438:      
1.17 timbl 439:        /*   Decode full HTTP response */
1.2 timbl 440:      
1.17 timbl 441:        format_in = HTAtom_for("www/mime");
 442:        start_of_data = eol ? eol + 1 : text_buffer + length;
 443:   
 444:        switch (server_status / 100) {
1.3 timbl 445:        
1.17 timbl 446:        default:        /* bad number */
 447:          HTAlert("Unknown status reply from server!");
 448:          break;
 449:          
 450:        case 3:     /* Various forms of redirection */
 451:          HTAlert(
 452:      "Redirection response from server is not handled by this client");
 453:          break;
 454:          
 455:        case 4:     /* Access Authorization problem */
1.14 luotonen 456: #ifdef ACCESS_AUTH
1.17 timbl 457:          switch (server_status) {
 458:            case 401:
 459:            length -= start_of_data - text_buffer;
 460:            if (HTAA_shouldRetryWithAuth(start_of_data, length, s)) {
 461:              /* Clean up before retrying */
 462:              if (binary_buffer) free(binary_buffer);
 463:              if (text_buffer) free(text_buffer);
 464:              if (TRACE) 
 465:                fprintf(stderr, "%s %d %s\n",
 466:                    "HTTP: close socket", s,
 467:                    "to retry with Access Authorization");
 468:              (void)NETCLOSE(s);
 469:              goto retry;
 470:              break;
 471:            }
 472:            else {
 473:              /* FALL THROUGH */
 474:            }
 475:            default:
 476:            {
 477:              char *p1 = HTParse(gate ? gate : arg, "",
 478:                PARSE_HOST);
 479:              char * message;
 480:   
 481:              if (!(message = (char*)malloc(strlen(text_buffer) +
 482:                              strlen(p1) + 100)))
 483:                outofmem(__FILE__, "HTTP 4xx status");
 484:              sprintf(message,
 485:                  "HTTP server at %s replies:\n%s\n\n%s\n",
 486:                  p1, text_buffer,
 487:                  ((server_status == 401) 
 488:                    ? "Access Authorization package giving up.\n"
 489:                    : ""));
 490:              status = HTLoadError(request->output_stream,
 491:                 server_status, message);
 492:              free(message);
 493:              free(p1);
 494:              goto clean_up;
 495:            }
 496:          } /* switch */
 497:          goto clean_up;
 498:          break;
 499: #else
 500:          /* case 4 without Access Authorization falls through */
 501:          /* to case 5 (previously "I think I goofed"). -- AL */
 502: #endif /* ACCESS_AUTH */
 503:   
 504:        case 5:     /* I think you goofed */
1.14 luotonen 505:          {
 506:            char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
1.17 timbl 507:            char * message = (char*)malloc(
 508:              strlen(text_buffer)+strlen(p1) + 100);
 509:            if (!message) outofmem(__FILE__, "HTTP 5xx status");
1.14 luotonen 510:            sprintf(message,
1.17 timbl 511:            "HTTP server at %s replies:\n%s", p1, text_buffer);
 512:            status = HTLoadError(request->output_stream, server_status, message);
1.14 luotonen 513:            free(message);
 514:            free(p1);
 515:            goto clean_up;
 516:          }
1.17 timbl 517:          break;
 518:          
 519:        case 2:     /* Good: Got MIME object */
 520:          break;
 521:   
 522:        } /* switch on response code */
 523:      
 524:      }      /* Full HTTP reply */
 525:      
 526:    } /* scope of fields */
 527:   
1.3 timbl 528: /*   Set up the stream stack to handle the body of the message
 529: */
1.17 timbl 530:   
1.13 duns 531: copy:
1.17 timbl 532:   
1.18 ! timbl 533:    target = HTStreamStack(format_in, request);
1.17 timbl 534:   
 535:    if (!target) {
 536:      char buffer[1024]; /* @@@@@@@@ */
 537:      if (binary_buffer) free(binary_buffer);
 538:      if (text_buffer) free(text_buffer);
 539:      sprintf(buffer, "Sorry, no known way of converting %s to %s.",
 540:          HTAtom_name(format_in), HTAtom_name(request->output_format));
 541:      fprintf(stderr, "HTTP: %s", buffer);
 542:      status = HTLoadError(request->output_stream, 501, buffer);
 543:      goto clean_up;
 544:    }
 545:   
1.3 timbl 546:   
1.11 timbl 547: /*   Push the data down the stream
1.3 timbl 548: **   We have to remember the end of the first buffer we just read
1.2 timbl 549: */
1.17 timbl 550:    if (format_in == WWW_HTML) {
 551:      target = HTNetToText(target);    /* Pipe through CR stripper */
 552:    }
 553:    
 554:    (*target->isa->put_block)(target,
 555:          binary_buffer + (start_of_data - text_buffer),
 556:          length - (start_of_data - text_buffer));
 557:    HTCopy(s, target);
 558:      
 559:    (*target->isa->free)(target);
 560:    status = HT_LOADED;
1.11 timbl 561:   
1.2 timbl 562: /*   Clean up
1.1 timbl 563: */
1.17 timbl 564:    
 565: clean_up: 
 566:    if (binary_buffer) free(binary_buffer);
 567:    if (text_buffer) free(text_buffer);
 568:   
 569:    if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
 570:    (void) NETCLOSE(s);
 571:   
 572:    return status;         /* Good return */
1.3 timbl 573:   
1.17 timbl 574:   } /* read response */
 575: } /* load HTTP */
1.1 timbl 576: 
 577: /*   Protocol descriptor
 578: */
 579: 
1.17 timbl 580: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };

Webmaster

AltStyle によって変換されたページ (->オリジナル) /