[BACK] Return to HTTP.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTP.c, revision 1.21

1.1 timbl 1: /*   HyperText Tranfer Protocol   - Client implementation     HTTP.c
 2: **   ==========================
1.2 timbl 3: **
 4: ** Bugs:
 5: **   Not implemented:
 6: **       Forward
 7: **       Redirection
 8: **       Error handling
1.1 timbl 9: */
 10: 
 11: /*   Module parameters:
 12: **   -----------------
 13: **
 14: ** These may be undefined and redefined by syspec.h
 15: */
1.2 timbl 16: 
1.12 timbl 17: /*   MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
 18: **   file from the URL. It is STRICTLY illegal to do this!
 19: */
 20: 
1.2 timbl 21: /* Implements:
 22: */
 23: #include "HTTP.h"
 24: 
 25: #define HTTP_VERSION  "HTTP/1.0"
 26: #define HTTP2             /* Version is greater than 0.9 */
 27: 
 28: #define INIT_LINE_SIZE     1024  /* Start with line buffer this big */
 29: #define LINE_EXTEND_THRESH   256   /* Minimum read size */
 30: #define VERSION_LENGTH         20   /* for returned protocol version */
 31: 
 32: /* Uses:
 33: */
1.1 timbl 34: #include "HTParse.h"
 35: #include "HTUtils.h"
 36: #include "tcp.h"
 37: #include "HTTCP.h"
 38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
 40: #include "HTAlert.h"
 41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h"       /* SCW */
 43: #include "HTInit.h"      /* SCW */
1.21 ! luotonen 44: #include "HTAccess.h"     /* HTRequest */
1.14 luotonen 45: #include "HTAABrow.h"     /* Access Authorization */
1.20 timbl 46: #include "HTTee.h"       /* Tee off a cache stream */
 47: #include "HTFWriter.h"     /* Write to cache file */
1.1 timbl 48: 
1.2 timbl 49: struct _HTStream {
 50:    HTStreamClass * isa;      /* all we need to know */
 51: };
 52: 
 53: 
1.6 timbl 54: extern char * HTAppName;    /* Application name: please supply */
 55: extern char * HTAppVersion;  /* Application version: please supply */
 56: 
1.19 timbl 57: PUBLIC BOOL HTCacheHTTP = YES; /* Enable caching of HTTP-retrieved files */
 58: 
1.21 ! luotonen 59: 
 ! 60: PRIVATE void parse_401_headers ARGS2(HTRequest *,   req,
 ! 61:                   HTInputSocket *,  isoc)
 ! 62: {
 ! 63:   HTAAScheme scheme;
 ! 64:   char *line;
 ! 65:   int num_schemes = 0;
 ! 66:   HTList *valid_schemes = HTList_new();
 ! 67:   HTAssocList **scheme_specifics = NULL;
 ! 68:   char *template = NULL;
 ! 69: 
 ! 70:   /* Read server reply header lines */
 ! 71: 
 ! 72:   if (TRACE)
 ! 73:    fprintf(stderr, "Server 401 reply header lines:\n");
 ! 74: 
 ! 75:   while (NULL != (line = HTInputSocket_getUnfoldedLine(isoc)) &&
 ! 76:      *line != 0) {
 ! 77: 
 ! 78:    if (TRACE) fprintf(stderr, "%s\n", line);
 ! 79: 
 ! 80:    if (strchr(line, ':')) {    /* Valid header line */
 ! 81: 
 ! 82:      char *p = line;
 ! 83:      char *fieldname = HTNextField(&p);
 ! 84:      char *arg1 = HTNextField(&p);
 ! 85:      char *args = p;
 ! 86:      
 ! 87:      if (0==strcasecomp(fieldname, "WWW-Authenticate:")) {
 ! 88:        if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) {
 ! 89:          HTList_addObject(valid_schemes, (void*)scheme);
 ! 90:          if (!scheme_specifics) {
 ! 91:            int i;
 ! 92:            scheme_specifics = (HTAssocList**)
 ! 93:              malloc(HTAA_MAX_SCHEMES * sizeof(HTAssocList*));
 ! 94:            if (!scheme_specifics)
 ! 95:              outofmem(__FILE__, "parse_401_headers");
 ! 96:            for (i=0; i < HTAA_MAX_SCHEMES; i++)
 ! 97:              scheme_specifics[i] = NULL;
 ! 98:          }
 ! 99:          scheme_specifics[scheme] = HTAA_parseArgList(args);
 ! 100:          num_schemes++;
 ! 101:        }
 ! 102:        else if (TRACE) {
 ! 103:          fprintf(stderr, "Unknown scheme `%s' %s\n",
 ! 104:              (arg1 ? arg1 : "(null)"),
 ! 105:              "in WWW-Authenticate: field");
 ! 106:        }
 ! 107:      }
 ! 108: 
 ! 109:      else if (0==strcasecomp(fieldname, "WWW-Protection-Template:")) {
 ! 110:        if (TRACE)
 ! 111:          fprintf(stderr, "Protection template set to `%s'\n", arg1);
 ! 112:        StrAllocCopy(template, arg1);
 ! 113:      }
 ! 114: 
 ! 115:    } /* if a valid header line */
 ! 116:    else if (TRACE) {
 ! 117:      fprintf(stderr, "Invalid header line `%s' ignored\n", line);
 ! 118:    } /* else invalid header line */
 ! 119:   } /* while header lines remain */
 ! 120: 
 ! 121:   req->valid_schemes = valid_schemes;
 ! 122:   req->scheme_specifics = scheme_specifics;
 ! 123:   req->prot_template = template;
 ! 124: }
 ! 125: 
 ! 126: 
 ! 127: 
1.1 timbl 128: /*       Load Document from HTTP Server         HTLoadHTTP()
 129: **       ==============================
 130: **
 131: **   Given a hypertext address, this routine loads a document.
 132: **
 133: **
 134: ** On entry,
 135: **   arg   is the hypertext reference of the article to be loaded.
 136: **
 137: ** On exit,
 138: **   returns >=0   If no error, a good socket number
 139: **       <0   Error.
 140: **
 141: **   The socket must be closed by the caller after the document has been
 142: **   read.
 143: **
 144: */
1.19 timbl 145: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 146: {
1.19 timbl 147:   CONST char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 148:   int s;               /* Socket number for returned data */
 149:   int status;                /* tcp return */
1.10 timbl 150:   char crlf[3];           /* A CR LF equivalent string */
1.3 timbl 151:   HTStream * target = NULL;     /* Unconverted data */
 152:   
1.2 timbl 153:   CONST char* gate = 0;       /* disable this feature */
1.1 timbl 154:   SockA soc_address;         /* Binary network address */
 155:   SockA * sin = &soc_address;
1.2 timbl 156:   BOOL extensions = YES;       /* Assume good HTTP server */
1.17 timbl 157: 
1.1 timbl 158:   if (!arg) return -3;        /* Bad if no name sepcified   */
 159:   if (!*arg) return -2;       /* Bad if name had zero length */
 160: 
 161: /* Set up defaults:
 162: */
 163: #ifdef DECNET
1.2 timbl 164:   sin->sdn_family = AF_DECnet;      /* Family = DECnet, host order */
 165:   sin->sdn_objnum = DNP_OBJ;     /* Default: http object number */
1.1 timbl 166: #else /* Internet */
1.2 timbl 167:   sin->sin_family = AF_INET;   /* Family = internet, host order */
 168:   sin->sin_port = htons(TCP_PORT);  /* Default: http port  */
1.1 timbl 169: #endif
 170: 
1.10 timbl 171:   sprintf(crlf, "%c%c", CR, LF);   /* To be corect on Mac, VM, etc */
 172:   
1.1 timbl 173:   if (TRACE) {
 174:     if (gate) fprintf(stderr,
 175:        "HTTPAccess: Using gateway %s for %s\n", gate, arg);
 176:     else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
 177:   }
 178:   
 179: /* Get node name and optional port number:
 180: */
 181:   {
 182:    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 183:    int status = HTParseInet(sin, p1); /* TBL 920622 */
 184:     free(p1);
 185:    if (status) return status;  /* No such host for example */
 186:   }
 187:   
1.15 luotonen 188: /*
 189: ** Compose authorization information (this was moved here
 190: ** from after the making of the connection so that the connection
 191: ** wouldn't have to wait while prompting username and password
 192: ** from the user).               -- AL 13.10.93
 193: */
 194: #ifdef ACCESS_AUTH
1.21 ! luotonen 195:   StrAllocCopy(request->argument, arg);
 ! 196:   HTAA_composeAuth(request);
 ! 197:   if (TRACE) {
 ! 198:    if (request->authorization)
 ! 199:      fprintf(stderr, "HTTP: Sending Authorization: %s\n",
 ! 200:          request->authorization);
 ! 201:    else
 ! 202:      fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
1.15 luotonen 203:   }
 204: #endif /* ACCESS_AUTH */
1.1 timbl 205:  
1.10 timbl 206: /*   Now, let's get a socket set up from the server for the data:
1.1 timbl 207: */   
 208: #ifdef DECNET
 209:   s = socket(AF_DECnet, SOCK_STREAM, 0);
 210: #else
 211:   s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 212: #endif
 213:   status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
 214:   if (status < 0) {
 215:      if (TRACE) fprintf(stderr, 
 216:       "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 timbl 217: 
1.1 timbl 218:      return HTInetStatus("connect");
 219:    }
 220:   
 221:   if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
 222: 
1.17 timbl 223: 
 224: /*   Compose and send command
 225: **   ------------------------
 226: */
 227:   {
 228:     char *command;         /* The whole command */
 229:    
1.1 timbl 230: /*   Ask that node for the document,
 231: **   omitting the host name & anchor if not gatewayed.
 232: */    
1.17 timbl 233:    if (gate) {
 234:      command = malloc(4 + strlen(arg)+ 2 + 31);
 235:      if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 236:      strcpy(command, "GET ");
 237:      strcat(command, arg);
 238:    } else { /* not gatewayed */
 239:      char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
 240:      command = malloc(4 + strlen(p1)+ 2 + 31);
 241:      if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 242:      strcpy(command, "GET ");
 243:      strcat(command, p1);
 244:      free(p1);
 245:    }
1.2 timbl 246: #ifdef HTTP2
1.17 timbl 247:    if (extensions) {
 248:      strcat(command, " ");
 249:      strcat(command, HTTP_VERSION);
 250:    }
1.2 timbl 251: #endif
1.17 timbl 252:   
 253:    strcat(command, crlf); /* CR LF, as in rfc 977 */
 254:   
 255:    if (extensions) {
1.21 ! luotonen 256: 
1.17 timbl 257:      int i;
 258:      HTAtom * present = WWW_PRESENT;
 259:      char line[256];  /*@@@@ */
1.21 ! luotonen 260:      HTList *conversions[2];
 ! 261: 
 ! 262:      if (!HTConversions) HTFormatInit(HTConversions);
 ! 263:      conversions[0] = HTConversions;
 ! 264:      conversions[1] = request->conversions;
 ! 265: 
 ! 266:      for (i=0; i<2; i++) {
 ! 267:        HTList *cur = conversions[i];
 ! 268:        HTPresentation *pres;
 ! 269: 
 ! 270:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
 ! 271:          if (pres->rep_out == present) {
 ! 272:            if (pres->quality != 1.0) {
 ! 273:              sprintf(line, "Accept: %s q=%.3f%c%c",
 ! 274:                  HTAtom_name(pres->rep),
 ! 275:                  pres->quality, CR, LF);
 ! 276:            } else {
 ! 277:              sprintf(line, "Accept: %s%c%c",
 ! 278:                  HTAtom_name(pres->rep), CR, LF);
 ! 279:            }
 ! 280:            StrAllocCat(command, line);
1.17 timbl 281:          }
 282:        }
1.2 timbl 283:      }
1.17 timbl 284:      
 285:      sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
 286:          HTAppName ? HTAppName : "unknown",
 287:          HTAppVersion ? HTAppVersion : "0.0",
 288:          HTLibraryVersion, CR, LF);
 289:          StrAllocCat(command, line);
 290:   
1.14 luotonen 291: #ifdef ACCESS_AUTH
1.21 ! luotonen 292:      if (request->authorization != NULL) {
 ! 293:        sprintf(line, "Authorization: %s%c%c",
 ! 294:            request->authorization, CR, LF);
1.17 timbl 295:        StrAllocCat(command, line);
 296:      }
 297: #endif /* ACCESS_AUTH */
1.14 luotonen 298:    }
1.17 timbl 299:   
 300:    StrAllocCat(command, crlf);   /* Blank line means "end" */
 301:   
 302:    if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
 303:   
 304:   /* Translate into ASCII if necessary
 305:   */
1.4 timbl 306: #ifdef NOT_ASCII
1.17 timbl 307:    {
 308:      char * p;
 309:      for(p = command; *p; p++) {
 310:        *p = TOASCII(*p);
 311:      }
1.1 timbl 312:    }
1.3 timbl 313: #endif
1.17 timbl 314:   
 315:    status = NETWRITE(s, command, (int)strlen(command));
 316:    free(command);
 317:    if (status<0) {
 318:      if (TRACE) fprintf(stderr,
 319:        "HTTPAccess: Unable to send command.\n");
1.1 timbl 320:      return HTInetStatus("send");
1.17 timbl 321:    }
 322:   } /* compose and send command */
 323:   
1.2 timbl 324: 
1.17 timbl 325: /*   Read the response
 326: **   -----------------
1.11 timbl 327: **
 328: **   HTTP0 servers must return ASCII style text, though it can in
 329: **   principle be just text without any markup at all.
 330: **   Full HTTP servers must return a response
 331: **   line and RFC822 style header. The response must therefore in
 332: **   either case have a CRLF somewhere soon.
 333: **
 334: **   This is the theory. In practice, there are (1993) unfortunately
 335: **   many binary documents just served up with HTTP0.9. This
 336: **   means we have to preserve the binary buffer (on the assumption that
 337: **   conversion from ASCII may lose information) in case it turns
 338: **   out that we want the binary original.
1.2 timbl 339: */
1.3 timbl 340: 
1.21 ! luotonen 341:   { /* read response */
 ! 342: 
1.17 timbl 343:    HTFormat format_in;       /* Format arriving in the message */
1.21 ! luotonen 344:    HTInputSocket *isoc = HTInputSocket_new(s);
 ! 345:    char * status_line = HTInputSocket_getStatusLine(isoc);
1.2 timbl 346: 
1.11 timbl 347: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
 348: ** First time we have enough, look at the stub in ASCII
 349: ** and get out of here if it doesn't look right.
 350: **
 351: ** We also check for characters above 128 in the first few bytes, and
 352: ** if we find them we forget the html default.
 353: **
 354: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
 355: **   will be taken as a HTTP 1.0 server. Failure.
 356: **   An HTTP 0.9 server returning a binary document with
 357: **   characters < 128 will be read as ASCII.
 358: */
1.21 ! luotonen 359:    if (!status_line) {   /* HTTP0 response */
 ! 360:      if (HTInputSocket_seemsBinary(isoc)) {
 ! 361:        format_in = HTAtom_for("www/unknown");
 ! 362:      }
 ! 363:      else {
 ! 364:        format_in = WWW_HTML;
 ! 365:      }
 ! 366:      goto copy;
 ! 367:    } /* end kludge */
 ! 368: 
 ! 369:    if (status_line) {   /* Decode full HTTP response */
 ! 370:      /*
 ! 371:      ** We now have a terminated server status line, and we have
 ! 372:      ** checked that it is most probably a legal one. Parse it.
 ! 373:      */
 ! 374:      char server_version[VERSION_LENGTH+1];
 ! 375:      int server_status;
 ! 376: 
 ! 377:      if (TRACE)
 ! 378:        fprintf(stderr, "HTTP Status Line: Rx: %.70s\n", status_line);
1.17 timbl 379:   
1.21 ! luotonen 380:      sscanf(status_line, "%20s%d", server_version, &server_status);
1.2 timbl 381: 
1.21 ! luotonen 382:      format_in = HTAtom_for("www/mime");
1.7 timbl 383:   
1.21 ! luotonen 384:      switch (server_status / 100) {
1.2 timbl 385: 
1.21 ! luotonen 386:       default:     /* bad number */
 ! 387:        HTAlert("Unknown status reply from server!");
 ! 388:        break;
1.17 timbl 389:          
1.21 ! luotonen 390:       case 3:      /* Various forms of redirection */
 ! 391:        HTAlert(
1.17 timbl 392:      "Redirection response from server is not handled by this client");
1.21 ! luotonen 393:        break;
1.17 timbl 394:          
1.21 ! luotonen 395:       case 4:      /* Access Authorization problem */
1.14 luotonen 396: #ifdef ACCESS_AUTH
1.21 ! luotonen 397:        switch (server_status) {
 ! 398:         case 401:
 ! 399:          parse_401_headers(request, isoc);
 ! 400: 
 ! 401:          if (TRACE) fprintf(stderr, "%s %d %s\n",
 ! 402:                    "HTTP: close socket", s,
 ! 403:                    "to retry with Access Authorization");
 ! 404:          HTInputSocket_free(isoc);
 ! 405:          (void)NETCLOSE(s);
 ! 406:          if (HTAA_retryWithAuth(request, &HTLoadHTTP)) {
 ! 407:            status = HT_LOADED;/* @@ THIS ONLY WORKS ON LINEMODE */
 ! 408:            goto clean_up;
 ! 409:          }
 ! 410:          /* else falltrough */
 ! 411:         default:
1.14 luotonen 412:          {
1.21 ! luotonen 413:            char *p1 = HTParse(gate ? gate : arg, "",
 ! 414:                      PARSE_HOST);
 ! 415:            char * message;
 ! 416: 
 ! 417:            if (!(message = (char*)malloc(strlen(status_line) +
 ! 418:                           strlen(p1) + 100)))
 ! 419:              outofmem(__FILE__, "HTTP 4xx status");
1.14 luotonen 420:            sprintf(message,
1.21 ! luotonen 421:                "HTTP server at %s replies:\n%s\n\n%s\n",
 ! 422:                p1, status_line,
 ! 423:                ((server_status == 401) 
 ! 424:                 ? "Access Authorization package giving up.\n"
 ! 425:                 : ""));
 ! 426:            status = HTLoadError(request->output_stream,
 ! 427:                       server_status, message);
1.14 luotonen 428:            free(message);
 429:            free(p1);
 430:            goto clean_up;
 431:          }
1.21 ! luotonen 432:        } /* switch */
 ! 433:        goto clean_up;
 ! 434:        break;
 ! 435: #else
 ! 436:        /* case 4 without Access Authorization falls through */
 ! 437:        /* to case 5 (previously "I think I goofed"). -- AL */
 ! 438: #endif /* ACCESS_AUTH */
 ! 439: 
 ! 440:       case 5:      /* I think you goofed */
 ! 441:        {
 ! 442:          char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 ! 443:          char * message = (char*)malloc(strlen(status_line) + 
 ! 444:                          strlen(p1) + 100);
 ! 445:          if (!message) outofmem(__FILE__, "HTTP 5xx status");
 ! 446:          sprintf(message,
 ! 447:              "HTTP server at %s replies:\n%s", p1, status_line);
 ! 448:          status = HTLoadError(request->output_stream,
 ! 449:                     server_status, message);
 ! 450:          free(message);
 ! 451:          free(p1);
 ! 452:          goto clean_up;
 ! 453:        }
 ! 454:        break;
1.17 timbl 455:          
1.21 ! luotonen 456:       case 2:      /* Good: Got MIME object */
 ! 457:        break;
1.17 timbl 458:   
1.21 ! luotonen 459:      } /* switch on response code */
1.17 timbl 460:      
1.21 ! luotonen 461:    } /* Full HTTP reply */
1.17 timbl 462:      
 463:   
1.3 timbl 464: /*   Set up the stream stack to handle the body of the message
 465: */
1.21 ! luotonen 466: 
1.13 duns 467: copy:
1.21 ! luotonen 468: 
1.18 timbl 469:    target = HTStreamStack(format_in, request);
1.21 ! luotonen 470: 
1.17 timbl 471:    if (!target) {
 472:      char buffer[1024]; /* @@@@@@@@ */
 473:      sprintf(buffer, "Sorry, no known way of converting %s to %s.",
 474:          HTAtom_name(format_in), HTAtom_name(request->output_format));
 475:      fprintf(stderr, "HTTP: %s", buffer);
 476:      status = HTLoadError(request->output_stream, 501, buffer);
 477:      goto clean_up;
 478:    }
 479:   
1.19 timbl 480:     /* @@ Bug: The decision of whether or not to cache should also be
1.21 ! luotonen 481:    ** made contingent on a IP address match or non match.
 ! 482:    */
1.19 timbl 483:     if (HTCacheHTTP) {
 484:      target = HTTee(target, HTCacheWriter(request, NULL, format_in,
1.21 ! luotonen 485:                         request->output_format,
 ! 486:                         request->output_stream));
1.19 timbl 487:    }
 488:    
1.11 timbl 489: /*   Push the data down the stream
1.3 timbl 490: **   We have to remember the end of the first buffer we just read
1.2 timbl 491: */
1.17 timbl 492:    if (format_in == WWW_HTML) {
 493:      target = HTNetToText(target);    /* Pipe through CR stripper */
 494:    }
1.21 ! luotonen 495: 
1.17 timbl 496:    (*target->isa->put_block)(target,
1.21 ! luotonen 497:                 isoc->input_pointer,
 ! 498:                 isoc->input_limit - isoc->input_pointer);
 ! 499:    HTInputSocket_free(isoc);
1.17 timbl 500:    HTCopy(s, target);
 501:      
 502:    (*target->isa->free)(target);
 503:    status = HT_LOADED;
1.11 timbl 504:   
1.2 timbl 505: /*   Clean up
1.1 timbl 506: */
1.17 timbl 507:    
 508: clean_up: 
 509:    if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
 510:    (void) NETCLOSE(s);
 511:   
 512:    return status;         /* Good return */
1.3 timbl 513:   
1.17 timbl 514:   } /* read response */
 515: } /* load HTTP */
1.1 timbl 516: 
 517: /*   Protocol descriptor
 518: */
 519: 
1.17 timbl 520: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
1.21 ! luotonen 521: 

Webmaster

AltStyle によって変換されたページ (->オリジナル) /