[BACK] Return to HTTP.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTP.c, revision 1.8

1.1 timbl 1: /*   HyperText Tranfer Protocol   - Client implementation     HTTP.c
 2: **   ==========================
1.2 timbl 3: **
 4: ** Bugs:
 5: **   Not implemented:
 6: **       Forward
 7: **       Redirection
 8: **       Error handling
1.1 timbl 9: */
 10: 
 11: /*   Module parameters:
 12: **   -----------------
 13: **
 14: ** These may be undefined and redefined by syspec.h
 15: */
1.2 timbl 16: 
 17: /* Implements:
 18: */
 19: #include "HTTP.h"
 20: 
 21: #define HTTP_VERSION  "HTTP/1.0"
 22: #define HTTP2             /* Version is greater than 0.9 */
 23: 
1.3 timbl 24: #define CR  FROMASCII('015円') /* Must be converted to ^M for transmission */
 25: #define LF  FROMASCII('012円') /* Must be converted to ^J for transmission */
 26: 
1.2 timbl 27: #define INIT_LINE_SIZE     1024  /* Start with line buffer this big */
 28: #define LINE_EXTEND_THRESH   256   /* Minimum read size */
 29: #define VERSION_LENGTH         20   /* for returned protocol version */
 30: 
 31: /* Uses:
 32: */
1.1 timbl 33: #include "HTParse.h"
 34: #include "HTUtils.h"
 35: #include "tcp.h"
 36: #include "HTTCP.h"
 37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
 39: #include "HTAlert.h"
 40: #include "HTMIME.h"
1.5 timbl 41: #include "HTML.h"       /* SCW */
 42: #include "HTInit.h"      /* SCW */
1.1 timbl 43: 
1.2 timbl 44: struct _HTStream {
 45:    HTStreamClass * isa;      /* all we need to know */
 46: };
 47: 
 48: 
1.6 timbl 49: extern char * HTAppName;    /* Application name: please supply */
 50: extern char * HTAppVersion;  /* Application version: please supply */
 51: 
1.1 timbl 52: /*       Load Document from HTTP Server         HTLoadHTTP()
 53: **       ==============================
 54: **
 55: **   Given a hypertext address, this routine loads a document.
 56: **
 57: **
 58: ** On entry,
 59: **   arg   is the hypertext reference of the article to be loaded.
 60: **   gate  is nill if no gateway, else the gateway address.
 61: **
 62: ** On exit,
 63: **   returns >=0   If no error, a good socket number
 64: **       <0   Error.
 65: **
 66: **   The socket must be closed by the caller after the document has been
 67: **   read.
 68: **
 69: */
1.2 timbl 70: PUBLIC int HTLoadHTTP ARGS4 (
 71:    CONST char *,      arg,
 72: /*   CONST char *,      gate, */
 73:    HTParentAnchor *,    anAnchor,
 74:    HTFormat,        format_out,
 75:    HTStream*,       sink)
1.1 timbl 76: {
 77:   int s;               /* Socket number for returned data */
 78:   char *command;           /* The whole command */
1.3 timbl 79:   char * eol = 0;          /* End of line if found */
1.7 timbl 80:   char * start_of_data;       /* Start of body of reply */
1.1 timbl 81:   int status;                /* tcp return */
1.3 timbl 82:   HTStream * target = NULL;     /* Unconverted data */
 83:   HTFormat format_in;            /* Format arriving in the message */
 84:   
1.2 timbl 85:   CONST char* gate = 0;       /* disable this feature */
1.1 timbl 86:   SockA soc_address;         /* Binary network address */
 87:   SockA * sin = &soc_address;
1.2 timbl 88:   BOOL had_header = NO;       /* Have we had at least one header? */
 89:   char * line_buffer = NULL;
 90:   BOOL extensions = YES;       /* Assume good HTTP server */
1.1 timbl 91:   if (!arg) return -3;        /* Bad if no name sepcified   */
 92:   if (!*arg) return -2;       /* Bad if name had zero length */
 93: 
 94: /* Set up defaults:
 95: */
 96: #ifdef DECNET
1.2 timbl 97:   sin->sdn_family = AF_DECnet;      /* Family = DECnet, host order */
 98:   sin->sdn_objnum = DNP_OBJ;     /* Default: http object number */
1.1 timbl 99: #else /* Internet */
1.2 timbl 100:   sin->sin_family = AF_INET;   /* Family = internet, host order */
 101:   sin->sin_port = htons(TCP_PORT);  /* Default: http port  */
1.1 timbl 102: #endif
 103: 
 104:   if (TRACE) {
 105:     if (gate) fprintf(stderr,
 106:        "HTTPAccess: Using gateway %s for %s\n", gate, arg);
 107:     else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
 108:   }
 109:   
 110: /* Get node name and optional port number:
 111: */
 112:   {
 113:    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 114:    int status = HTParseInet(sin, p1); /* TBL 920622 */
 115:     free(p1);
 116:    if (status) return status;  /* No such host for example */
 117:   }
 118:   
1.2 timbl 119: retry:
1.1 timbl 120:  
 121: /*   Now, let's get a socket set up from the server for the sgml data:
 122: */   
 123: #ifdef DECNET
 124:   s = socket(AF_DECnet, SOCK_STREAM, 0);
 125: #else
 126:   s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 127: #endif
 128:   status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
 129:   if (status < 0) {
 130: #ifndef DECNET
 131:    /* This code is temporary backward-compatibility. It should
 132:      go away when no server runs on port 2784 alone */
 133:    if (sin->sin_port == htons(TCP_PORT)) { /* Try the old one */
 134:     if (TRACE) printf (
 135:      "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
 136:      TCP_PORT, errno, OLD_TCP_PORT);
 137:     sin->sin_port = htons(OLD_TCP_PORT);
 138:     /* First close current socket and open a clean one */
 139:     status = NETCLOSE (s);
 140:     s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 141:     status = connect(s, (struct sockaddr*)&soc_address,
 142:              sizeof(soc_address));
 143:    }
 144:    if (status < 0)
 145: #endif
 146:     {
 147:      if (TRACE) fprintf(stderr, 
 148:       "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
 149:      /* free(command);  BUG OUT TBL 921121 */
 150:      return HTInetStatus("connect");
 151:     }
 152:    }
 153:   
 154:   if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
 155: 
 156: /*   Ask that node for the document,
 157: **   omitting the host name & anchor if not gatewayed.
 158: */    
 159:   if (gate) {
1.2 timbl 160:     command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 161:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 162:     strcpy(command, "GET ");
 163:    strcat(command, arg);
 164:   } else { /* not gatewayed */
 165:    char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 166:     command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 167:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 168:     strcpy(command, "GET ");
 169:    strcat(command, p1);
 170:    free(p1);
 171:   }
1.2 timbl 172: #ifdef HTTP2
 173:   if (extensions) {
 174:     strcat(command, " ");
 175:     strcat(command, HTTP_VERSION);
 176:   }
 177: #endif
1.3 timbl 178:   {
 179:    char * p = command + strlen(command);
 180:    *p++ = CR;       /* Macros to be correct on Mac */
 181:    *p++ = LF;
 182:    *p++ = 0;
 183:    /* strcat(command, "\r\n");   */   /* CR LF, as in rfc 977 */
 184:   }
1.1 timbl 185: 
1.2 timbl 186:   if (extensions) {
 187: 
 188:    int n;
 189:    int i;
 190:     HTAtom * present = WWW_PRESENT;
 191:    char line[256];  /*@@@@ */
 192: 
 193:    if (!HTPresentations) HTFormatInit();
 194:    n = HTList_count(HTPresentations);
 195: 
 196:    for(i=0; i<n; i++) {
 197:      HTPresentation * pres = HTList_objectAt(HTPresentations, i);
 198:      if (pres->rep_out == present) {
 199:       if (pres->quality != 1.0) {
1.3 timbl 200:         sprintf(line, "Accept: %s q=%.3f%c%c",
 201:             HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 202:       } else {
1.3 timbl 203:         sprintf(line, "Accept: %s%c%c",
 204:             HTAtom_name(pres->rep), CR, LF);
1.2 timbl 205:       }
 206:       StrAllocCat(command, line);
 207: 
 208:      }
 209:    }
1.6 timbl 210:    
 211:    sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
 212:        HTAppName ? HTAppName : "unknown",
 213:        HTAppVersion ? HTAppVersion : "0.0",
 214:        HTLibraryVersion, CR, LF);
 215:       StrAllocCat(command, line);
1.2 timbl 216:   }
1.3 timbl 217:    
1.4 timbl 218:   StrAllocCat(command, "015円012円"); /* Blank line means "end" */
 219: #ifdef NOT_ASCII
1.1 timbl 220:   {
 221:    char * p;
 222:    for(p = command; *p; p++) {
 223:      *p = TOASCII(*p);
 224:    }
1.4 timbl 225:   }
1.3 timbl 226: #endif
1.1 timbl 227: 
1.2 timbl 228:   if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1 timbl 229:   status = NETWRITE(s, command, (int)strlen(command));
 230:   free(command);
 231:   if (status<0) {
 232:    if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
 233:      return HTInetStatus("send");
 234:   }
 235: 
1.2 timbl 236: 
1.7 timbl 237: /*   Read the first line of the response
 238: **   -----------------------------------
1.2 timbl 239: */
1.3 timbl 240: 
1.2 timbl 241:   {
 242:   
 243:   /* Get numeric status etc */
 244: 
 245:    int status;
 246:    int length = 0;
 247:    BOOL end_of_file = NO;
 248:    HTAtom * encoding = HTAtom_for("7bit");
 249:    int buffer_length = INIT_LINE_SIZE;   /* Why not? */
 250:    
 251:    line_buffer = (char *) malloc(buffer_length * sizeof(char));
 252:    if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
 253:    
1.7 timbl 254:    do {  /* Loop to read in the first line */
1.2 timbl 255:      
 256:      /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
 257:      
 258:      if (buffer_length - length < LINE_EXTEND_THRESH) {
 259:        buffer_length = buffer_length + buffer_length;
 260:        line_buffer = (char *) realloc(
 261:            line_buffer, buffer_length * sizeof(char));
 262:        if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
 263:      }
 264:      status = NETREAD(s, line_buffer + length,
 265:                buffer_length - length -1);
 266:      if (status < 0) {
 267:        HTAlert("Unexpected network read error on response");
 268:        return status;
 269:      }
 270:      if (status == 0) {
 271:        end_of_file = YES;
 272:        break;
 273:      }
 274:      line_buffer[length+status] = 0;
 275: #ifdef NOT_ASCII
 276:      {
 277:        char * p;
 278:        for(p = line_buffer+length; *p; p++) {
 279:          *p = FROMASCII(*p);
 280:        }
 281:      }
 282: #endif
1.3 timbl 283:      eol = strchr(line_buffer + length, LF);
 284:       if (eol && *(eol-1) == CR) *(eol-1) = ' '; 
1.2 timbl 285: 
 286:      length = length + status;
 287:          
1.7 timbl 288:      if (eol) *eol = 0;     /* Terminate the line */
1.2 timbl 289: 
1.7 timbl 290:    } while (!eol && !end_of_file);     /* No LF */     
 291:        
 292:   } /* Scope of loop variables */
1.2 timbl 293: 
1.7 timbl 294:   
 295: /*   We now have a terminated unfolded line. Parse it.
 296: **   -------------------------------------------------
1.2 timbl 297: */
 298: 
1.7 timbl 299:   if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
 300: 
 301:   {
 302:    int fields;
 303:    char server_version [VERSION_LENGTH+1];
 304:    int server_status;
 305: 
1.2 timbl 306: 
 307: /* Kludge to work with old buggy servers. They can't handle the third word
 308: ** so we try again without it.
 309: */
1.7 timbl 310:    if (extensions &&
 311:        0==strcmp(line_buffer,     /* Old buggy server? */
 312:        "Document address invalid or access not authorised")) {
 313:      extensions = NO;
 314:      if (line_buffer) free(line_buffer);
 315:      if (TRACE) fprintf(stderr,
 316:        "HTTP: close socket %d to retry with HTTP0\n", s);
 317:      NETCLOSE(s);
 318:      goto retry;     /* @@@@@@@@@@ */
 319:    }
1.2 timbl 320: 
1.7 timbl 321:    fields = sscanf(line_buffer, "%20s%d",
 322:      server_version,
 323:      &server_status);
 324: 
 325:    if (fields < 2) {            /* HTTP0 reply */
 326:      format_in = WWW_HTML;
 327:      start_of_data = line_buffer;    /* reread whole reply */
1.2 timbl 328:      
1.7 timbl 329:    } else {                /* Ful HTTP reply */
 330:    
 331:    /*   Decode full HTTP response */
 332:    
1.3 timbl 333:      format_in = HTAtom_for("www/mime");
1.7 timbl 334:      start_of_data = eol ? eol + 1 : "";
1.3 timbl 335: 
1.2 timbl 336:      switch (server_status / 100) {
 337:      
1.3 timbl 338:      default:      /* bad number */
 339:        HTAlert("Unknown status reply from server!");
 340:        break;
 341:        
1.2 timbl 342:      case 3:       /* Various forms of redirection */
1.7 timbl 343:        HTAlert(
1.3 timbl 344:    "Redirection response from server is not handled by this client");
 345:        break;
 346:        
1.2 timbl 347:      case 4:       /* "I think I goofed" */
 348:      case 5:       /* I think you goofed */
1.6 timbl 349:        {
 350:          char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 351:          char * message = (char*)malloc(
1.7 timbl 352:            strlen(line_buffer)+strlen(p1) + 100);
1.8 ! timbl 353:          int status;
1.6 timbl 354:          if (!message) outofmem(__FILE__, "HTTP 5xx status");
 355:          sprintf(message,
 356:          "HTTP server at %s replies:\n%s", p1, line_buffer);
1.8 ! timbl 357:          status = HTLoadError(sink, server_status, message);
1.6 timbl 358:          free(message);
 359:          free(p1);
 360:          goto clean_up;
 361:        }
1.3 timbl 362:        break;
1.2 timbl 363:        
 364:      case 2:       /* Good: Got MIME object */
 365:        break;
 366: 
1.7 timbl 367:      } /* switch on response code */
 368:    
 369:    }        /* Full HTTP reply */
 370:    
 371:   } /* scope of fields */
1.2 timbl 372: 
1.3 timbl 373: /*   Set up the stream stack to handle the body of the message
 374: */
 375: 
 376:   target = HTStreamStack(format_in,
 377:            format_out,
 378:            sink , anAnchor);
 379: 
 380:   if (!target) {
 381:    char buffer[1024];   /* @@@@@@@@ */
 382:    if (line_buffer) free(line_buffer);
 383:    sprintf(buffer, "Sorry, no known way of converting %s to %s.",
 384:        HTAtom_name(format_in), HTAtom_name(format_out));
 385:    fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 386:    status = HTLoadError(sink, 501, buffer);
 387:    goto clean_up;
1.3 timbl 388:   }
 389: 
 390:   
 391: /*   Push the data, maybe ignoring CR, down the stream
 392: **   We have to remember the end of the first buffer we just read
1.2 timbl 393: */
1.3 timbl 394:   if (format_in != WWW_HTML) {
1.7 timbl 395:     (*target->isa->put_string)(target, start_of_data);
1.3 timbl 396:    HTCopy(s, target);
 397:    
 398:   } else {  /* ascii text with CRLFs :-( */
1.7 timbl 399:     {
1.3 timbl 400:      char * p;
1.7 timbl 401:      for (p = start_of_data; *p; p++)
1.3 timbl 402:        if (*p != '\r') (*target->isa->put_character)(target, *p);
 403:    }
 404:    HTCopyNoCR(s, target);
1.2 timbl 405:   }
1.3 timbl 406:   (*target->isa->end_document)(target);
 407:   (*target->isa->free)(target);
1.8 ! timbl 408:   status = HT_LOADED;
1.2 timbl 409: 
 410: /*   Clean up
1.1 timbl 411: */
1.3 timbl 412:   
1.6 timbl 413: clean_up: 
1.2 timbl 414:   if (line_buffer) free(line_buffer);
1.3 timbl 415: 
1.1 timbl 416:   if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 417:   (void) NETCLOSE(s);
1.1 timbl 418: 
1.8 ! timbl 419:   return status;           /* Good return */
1.3 timbl 420: 
1.1 timbl 421: }
1.7 timbl 422: 
1.1 timbl 423: 
 424: /*   Protocol descriptor
 425: */
 426: 
1.2 timbl 427: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };

Webmaster

AltStyle によって変換されたページ (->オリジナル) /