[BACK] Return to HTTP.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTP.c, revision 1.10

1.1 timbl 1: /*   HyperText Tranfer Protocol   - Client implementation     HTTP.c
 2: **   ==========================
1.2 timbl 3: **
 4: ** Bugs:
 5: **   Not implemented:
 6: **       Forward
 7: **       Redirection
 8: **       Error handling
1.1 timbl 9: */
 10: 
 11: /*   Module parameters:
 12: **   -----------------
 13: **
 14: ** These may be undefined and redefined by syspec.h
 15: */
1.2 timbl 16: 
 17: /* Implements:
 18: */
 19: #include "HTTP.h"
 20: 
 21: #define HTTP_VERSION  "HTTP/1.0"
 22: #define HTTP2             /* Version is greater than 0.9 */
 23: 
1.3 timbl 24: #define CR  FROMASCII('015円') /* Must be converted to ^M for transmission */
 25: #define LF  FROMASCII('012円') /* Must be converted to ^J for transmission */
 26: 
1.2 timbl 27: #define INIT_LINE_SIZE     1024  /* Start with line buffer this big */
 28: #define LINE_EXTEND_THRESH   256   /* Minimum read size */
 29: #define VERSION_LENGTH         20   /* for returned protocol version */
 30: 
 31: /* Uses:
 32: */
1.1 timbl 33: #include "HTParse.h"
 34: #include "HTUtils.h"
 35: #include "tcp.h"
 36: #include "HTTCP.h"
 37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
 39: #include "HTAlert.h"
 40: #include "HTMIME.h"
1.5 timbl 41: #include "HTML.h"       /* SCW */
 42: #include "HTInit.h"      /* SCW */
1.1 timbl 43: 
1.2 timbl 44: struct _HTStream {
 45:    HTStreamClass * isa;      /* all we need to know */
 46: };
 47: 
 48: 
1.6 timbl 49: extern char * HTAppName;    /* Application name: please supply */
 50: extern char * HTAppVersion;  /* Application version: please supply */
 51: 
1.1 timbl 52: /*       Load Document from HTTP Server         HTLoadHTTP()
 53: **       ==============================
 54: **
 55: **   Given a hypertext address, this routine loads a document.
 56: **
 57: **
 58: ** On entry,
 59: **   arg   is the hypertext reference of the article to be loaded.
 60: **   gate  is nill if no gateway, else the gateway address.
 61: **
 62: ** On exit,
 63: **   returns >=0   If no error, a good socket number
 64: **       <0   Error.
 65: **
 66: **   The socket must be closed by the caller after the document has been
 67: **   read.
 68: **
 69: */
1.2 timbl 70: PUBLIC int HTLoadHTTP ARGS4 (
 71:    CONST char *,      arg,
 72: /*   CONST char *,      gate, */
 73:    HTParentAnchor *,    anAnchor,
 74:    HTFormat,        format_out,
 75:    HTStream*,       sink)
1.1 timbl 76: {
 77:   int s;               /* Socket number for returned data */
 78:   char *command;           /* The whole command */
1.3 timbl 79:   char * eol = 0;          /* End of line if found */
1.7 timbl 80:   char * start_of_data;       /* Start of body of reply */
1.1 timbl 81:   int status;                /* tcp return */
1.10 ! timbl 82:   char crlf[3];           /* A CR LF equivalent string */
1.3 timbl 83:   HTStream * target = NULL;     /* Unconverted data */
 84:   HTFormat format_in;            /* Format arriving in the message */
 85:   
1.2 timbl 86:   CONST char* gate = 0;       /* disable this feature */
1.1 timbl 87:   SockA soc_address;         /* Binary network address */
 88:   SockA * sin = &soc_address;
1.2 timbl 89:   BOOL had_header = NO;       /* Have we had at least one header? */
 90:   char * line_buffer = NULL;
 91:   BOOL extensions = YES;       /* Assume good HTTP server */
1.1 timbl 92:   if (!arg) return -3;        /* Bad if no name sepcified   */
 93:   if (!*arg) return -2;       /* Bad if name had zero length */
 94: 
 95: /* Set up defaults:
 96: */
 97: #ifdef DECNET
1.2 timbl 98:   sin->sdn_family = AF_DECnet;      /* Family = DECnet, host order */
 99:   sin->sdn_objnum = DNP_OBJ;     /* Default: http object number */
1.1 timbl 100: #else /* Internet */
1.2 timbl 101:   sin->sin_family = AF_INET;   /* Family = internet, host order */
 102:   sin->sin_port = htons(TCP_PORT);  /* Default: http port  */
1.1 timbl 103: #endif
 104: 
1.10 ! timbl 105:   sprintf(crlf, "%c%c", CR, LF);   /* To be corect on Mac, VM, etc */
 ! 106:   
1.1 timbl 107:   if (TRACE) {
 108:     if (gate) fprintf(stderr,
 109:        "HTTPAccess: Using gateway %s for %s\n", gate, arg);
 110:     else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
 111:   }
 112:   
 113: /* Get node name and optional port number:
 114: */
 115:   {
 116:    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 117:    int status = HTParseInet(sin, p1); /* TBL 920622 */
 118:     free(p1);
 119:    if (status) return status;  /* No such host for example */
 120:   }
 121:   
1.2 timbl 122: retry:
1.1 timbl 123:  
1.10 ! timbl 124: /*   Now, let's get a socket set up from the server for the data:
1.1 timbl 125: */   
 126: #ifdef DECNET
 127:   s = socket(AF_DECnet, SOCK_STREAM, 0);
 128: #else
 129:   s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 130: #endif
 131:   status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
 132:   if (status < 0) {
 133:      if (TRACE) fprintf(stderr, 
 134:       "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
 135:      /* free(command);  BUG OUT TBL 921121 */
 136:      return HTInetStatus("connect");
 137:    }
 138:   
 139:   if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
 140: 
 141: /*   Ask that node for the document,
 142: **   omitting the host name & anchor if not gatewayed.
 143: */    
 144:   if (gate) {
1.2 timbl 145:     command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 146:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 147:     strcpy(command, "GET ");
 148:    strcat(command, arg);
 149:   } else { /* not gatewayed */
 150:    char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 151:     command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 152:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 153:     strcpy(command, "GET ");
 154:    strcat(command, p1);
 155:    free(p1);
 156:   }
1.2 timbl 157: #ifdef HTTP2
 158:   if (extensions) {
 159:     strcat(command, " ");
 160:     strcat(command, HTTP_VERSION);
 161:   }
 162: #endif
1.10 ! timbl 163: 
 ! 164:   strcat(command, crlf);   /* CR LF, as in rfc 977 */
1.1 timbl 165: 
1.2 timbl 166:   if (extensions) {
 167: 
 168:    int n;
 169:    int i;
 170:     HTAtom * present = WWW_PRESENT;
 171:    char line[256];  /*@@@@ */
 172: 
 173:    if (!HTPresentations) HTFormatInit();
 174:    n = HTList_count(HTPresentations);
 175: 
 176:    for(i=0; i<n; i++) {
 177:      HTPresentation * pres = HTList_objectAt(HTPresentations, i);
 178:      if (pres->rep_out == present) {
 179:       if (pres->quality != 1.0) {
1.3 timbl 180:         sprintf(line, "Accept: %s q=%.3f%c%c",
 181:             HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 182:       } else {
1.3 timbl 183:         sprintf(line, "Accept: %s%c%c",
 184:             HTAtom_name(pres->rep), CR, LF);
1.2 timbl 185:       }
 186:       StrAllocCat(command, line);
 187: 
 188:      }
 189:    }
1.6 timbl 190:    
 191:    sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
 192:        HTAppName ? HTAppName : "unknown",
 193:        HTAppVersion ? HTAppVersion : "0.0",
 194:        HTLibraryVersion, CR, LF);
 195:       StrAllocCat(command, line);
1.2 timbl 196:   }
1.3 timbl 197:    
1.10 ! timbl 198:   StrAllocCat(command, crlf);    /* Blank line means "end" */
 ! 199: 
 ! 200:   if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
 ! 201: 
 ! 202: /*   Translate into ASCII if necessary
 ! 203: */
1.4 timbl 204: #ifdef NOT_ASCII
1.1 timbl 205:   {
 206:    char * p;
 207:    for(p = command; *p; p++) {
 208:      *p = TOASCII(*p);
 209:    }
1.4 timbl 210:   }
1.3 timbl 211: #endif
1.1 timbl 212: 
 213:   status = NETWRITE(s, command, (int)strlen(command));
 214:   free(command);
 215:   if (status<0) {
 216:    if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
 217:      return HTInetStatus("send");
 218:   }
 219: 
1.2 timbl 220: 
1.7 timbl 221: /*   Read the first line of the response
 222: **   -----------------------------------
1.2 timbl 223: */
1.3 timbl 224: 
1.2 timbl 225:   {
 226:   
 227:   /* Get numeric status etc */
 228: 
 229:    int length = 0;
 230:    BOOL end_of_file = NO;
 231:    HTAtom * encoding = HTAtom_for("7bit");
 232:    int buffer_length = INIT_LINE_SIZE;   /* Why not? */
 233:    
 234:    line_buffer = (char *) malloc(buffer_length * sizeof(char));
 235:    if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
 236:    
1.7 timbl 237:    do {  /* Loop to read in the first line */
1.2 timbl 238:      
 239:      /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
 240:      
 241:      if (buffer_length - length < LINE_EXTEND_THRESH) {
 242:        buffer_length = buffer_length + buffer_length;
 243:        line_buffer = (char *) realloc(
 244:            line_buffer, buffer_length * sizeof(char));
 245:        if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
 246:      }
 247:      status = NETREAD(s, line_buffer + length,
 248:                buffer_length - length -1);
 249:      if (status < 0) {
 250:        HTAlert("Unexpected network read error on response");
1.9 timbl 251:        NETCLOSE(s);
1.2 timbl 252:        return status;
 253:      }
1.10 ! timbl 254: 
 ! 255:      if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
 ! 256:         status);
 ! 257: 
1.2 timbl 258:      if (status == 0) {
 259:        end_of_file = YES;
 260:        break;
 261:      }
 262:      line_buffer[length+status] = 0;
 263: #ifdef NOT_ASCII
1.10 ! timbl 264:      if (TRACE) fprintf(stderr, "Local codes CR=%d, LF=%d\n", CR, LF);
1.2 timbl 265:      {
 266:        char * p;
 267:        for(p = line_buffer+length; *p; p++) {
 268:          *p = FROMASCII(*p);
 269:        }
 270:      }
 271: #endif
1.3 timbl 272:      eol = strchr(line_buffer + length, LF);
 273:       if (eol && *(eol-1) == CR) *(eol-1) = ' '; 
1.2 timbl 274: 
 275:      length = length + status;
 276:          
1.7 timbl 277:      if (eol) *eol = 0;     /* Terminate the line */
1.2 timbl 278: 
1.7 timbl 279:    } while (!eol && !end_of_file);     /* No LF */     
 280:        
 281:   } /* Scope of loop variables */
1.2 timbl 282: 
1.7 timbl 283:   
 284: /*   We now have a terminated unfolded line. Parse it.
 285: **   -------------------------------------------------
1.2 timbl 286: */
 287: 
1.7 timbl 288:   if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
 289: 
 290:   {
 291:    int fields;
 292:    char server_version [VERSION_LENGTH+1];
 293:    int server_status;
 294: 
1.2 timbl 295: 
 296: /* Kludge to work with old buggy servers. They can't handle the third word
 297: ** so we try again without it.
 298: */
1.7 timbl 299:    if (extensions &&
 300:        0==strcmp(line_buffer,     /* Old buggy server? */
 301:        "Document address invalid or access not authorised")) {
 302:      extensions = NO;
 303:      if (line_buffer) free(line_buffer);
 304:      if (TRACE) fprintf(stderr,
 305:        "HTTP: close socket %d to retry with HTTP0\n", s);
 306:      NETCLOSE(s);
 307:      goto retry;     /* @@@@@@@@@@ */
 308:    }
1.2 timbl 309: 
1.7 timbl 310:    fields = sscanf(line_buffer, "%20s%d",
 311:      server_version,
 312:      &server_status);
 313: 
 314:    if (fields < 2) {            /* HTTP0 reply */
 315:      format_in = WWW_HTML;
 316:      start_of_data = line_buffer;    /* reread whole reply */
1.9 timbl 317:      if (eol) *eol = '\n';        /* Reconstitute buffer */
1.2 timbl 318:      
1.7 timbl 319:    } else {                /* Ful HTTP reply */
 320:    
 321:    /*   Decode full HTTP response */
 322:    
1.3 timbl 323:      format_in = HTAtom_for("www/mime");
1.7 timbl 324:      start_of_data = eol ? eol + 1 : "";
1.3 timbl 325: 
1.2 timbl 326:      switch (server_status / 100) {
 327:      
1.3 timbl 328:      default:      /* bad number */
 329:        HTAlert("Unknown status reply from server!");
 330:        break;
 331:        
1.2 timbl 332:      case 3:       /* Various forms of redirection */
1.7 timbl 333:        HTAlert(
1.3 timbl 334:    "Redirection response from server is not handled by this client");
 335:        break;
 336:        
1.2 timbl 337:      case 4:       /* "I think I goofed" */
 338:      case 5:       /* I think you goofed */
1.6 timbl 339:        {
 340:          char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 341:          char * message = (char*)malloc(
1.7 timbl 342:            strlen(line_buffer)+strlen(p1) + 100);
1.6 timbl 343:          if (!message) outofmem(__FILE__, "HTTP 5xx status");
 344:          sprintf(message,
 345:          "HTTP server at %s replies:\n%s", p1, line_buffer);
1.8 timbl 346:          status = HTLoadError(sink, server_status, message);
1.6 timbl 347:          free(message);
 348:          free(p1);
 349:          goto clean_up;
 350:        }
1.3 timbl 351:        break;
1.2 timbl 352:        
 353:      case 2:       /* Good: Got MIME object */
 354:        break;
 355: 
1.7 timbl 356:      } /* switch on response code */
 357:    
 358:    }        /* Full HTTP reply */
 359:    
 360:   } /* scope of fields */
1.2 timbl 361: 
1.3 timbl 362: /*   Set up the stream stack to handle the body of the message
 363: */
 364: 
 365:   target = HTStreamStack(format_in,
 366:            format_out,
 367:            sink , anAnchor);
 368: 
 369:   if (!target) {
 370:    char buffer[1024];   /* @@@@@@@@ */
 371:    if (line_buffer) free(line_buffer);
 372:    sprintf(buffer, "Sorry, no known way of converting %s to %s.",
 373:        HTAtom_name(format_in), HTAtom_name(format_out));
 374:    fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 375:    status = HTLoadError(sink, 501, buffer);
 376:    goto clean_up;
1.3 timbl 377:   }
 378: 
 379:   
 380: /*   Push the data, maybe ignoring CR, down the stream
 381: **   We have to remember the end of the first buffer we just read
1.2 timbl 382: */
1.3 timbl 383:   if (format_in != WWW_HTML) {
1.7 timbl 384:     (*target->isa->put_string)(target, start_of_data);
1.3 timbl 385:    HTCopy(s, target);
 386:    
 387:   } else {  /* ascii text with CRLFs :-( */
1.7 timbl 388:     {
1.3 timbl 389:      char * p;
1.7 timbl 390:      for (p = start_of_data; *p; p++)
1.3 timbl 391:        if (*p != '\r') (*target->isa->put_character)(target, *p);
 392:    }
 393:    HTCopyNoCR(s, target);
1.2 timbl 394:   }
1.3 timbl 395:   (*target->isa->end_document)(target);
 396:   (*target->isa->free)(target);
1.8 timbl 397:   status = HT_LOADED;
1.2 timbl 398: 
 399: /*   Clean up
1.1 timbl 400: */
1.3 timbl 401:   
1.6 timbl 402: clean_up: 
1.2 timbl 403:   if (line_buffer) free(line_buffer);
1.3 timbl 404: 
1.1 timbl 405:   if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 406:   (void) NETCLOSE(s);
1.1 timbl 407: 
1.8 timbl 408:   return status;           /* Good return */
1.3 timbl 409: 
1.1 timbl 410: }
1.7 timbl 411: 
1.1 timbl 412: 
 413: /*   Protocol descriptor
 414: */
 415: 
1.2 timbl 416: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };

Webmaster

AltStyle によって変換されたページ (->オリジナル) /