[BACK] Return to HTTP.c CVS log [TXT] [DIR] Up to [Public] / libwww / Library / src

Annotation of libwww/Library/src/HTTP.c, revision 1.3

1.1 timbl 1: /*   HyperText Tranfer Protocol   - Client implementation     HTTP.c
 2: **   ==========================
1.2 timbl 3: **
 4: ** Bugs:
 5: **   Not implemented:
 6: **       Forward
 7: **       Redirection
 8: **       Error handling
1.1 timbl 9: */
 10: 
 11: /*   Module parameters:
 12: **   -----------------
 13: **
 14: ** These may be undefined and redefined by syspec.h
 15: */
1.2 timbl 16: 
 17: /* Implements:
 18: */
 19: #include "HTTP.h"
 20: 
 21: #define HTTP_VERSION  "HTTP/1.0"
 22: #define HTTP2             /* Version is greater than 0.9 */
 23: 
1.3 ! timbl 24: #define CR  FROMASCII('015円') /* Must be converted to ^M for transmission */
 ! 25: #define LF  FROMASCII('012円') /* Must be converted to ^J for transmission */
 ! 26: 
1.2 timbl 27: #define INIT_LINE_SIZE     1024  /* Start with line buffer this big */
 28: #define LINE_EXTEND_THRESH   256   /* Minimum read size */
 29: #define VERSION_LENGTH         20   /* for returned protocol version */
 30: 
 31: /* Uses:
 32: */
1.1 timbl 33: #include "HTParse.h"
 34: #include "HTUtils.h"
 35: #include "tcp.h"
 36: #include "HTTCP.h"
 37: #include "HTFormat.h"
1.2 timbl 38: #include <ctype.h>
 39: #include "HTAlert.h"
 40: #include "HTMIME.h"
1.1 timbl 41: 
1.2 timbl 42: struct _HTStream {
 43:    HTStreamClass * isa;      /* all we need to know */
 44: };
 45: 
 46: 
1.1 timbl 47: /*       Load Document from HTTP Server         HTLoadHTTP()
 48: **       ==============================
 49: **
 50: **   Given a hypertext address, this routine loads a document.
 51: **
 52: **
 53: ** On entry,
 54: **   arg   is the hypertext reference of the article to be loaded.
 55: **   gate  is nill if no gateway, else the gateway address.
 56: **
 57: ** On exit,
 58: **   returns >=0   If no error, a good socket number
 59: **       <0   Error.
 60: **
 61: **   The socket must be closed by the caller after the document has been
 62: **   read.
 63: **
 64: */
1.2 timbl 65: PUBLIC int HTLoadHTTP ARGS4 (
 66:    CONST char *,      arg,
 67: /*   CONST char *,      gate, */
 68:    HTParentAnchor *,    anAnchor,
 69:    HTFormat,        format_out,
 70:    HTStream*,       sink)
1.1 timbl 71: {
 72:   int s;               /* Socket number for returned data */
 73:   char *command;           /* The whole command */
1.3 ! timbl 74:   char * eol = 0;          /* End of line if found */
1.1 timbl 75:   int status;                /* tcp return */
1.3 ! timbl 76:   HTStream * target = NULL;     /* Unconverted data */
 ! 77:   HTFormat format_in;            /* Format arriving in the message */
 ! 78:   
1.2 timbl 79:   CONST char* gate = 0;       /* disable this feature */
1.1 timbl 80:   SockA soc_address;         /* Binary network address */
 81:   SockA * sin = &soc_address;
1.2 timbl 82:   BOOL had_header = NO;       /* Have we had at least one header? */
 83:   char * line_buffer = NULL;
 84:   BOOL extensions = YES;       /* Assume good HTTP server */
1.1 timbl 85:   if (!arg) return -3;        /* Bad if no name sepcified   */
 86:   if (!*arg) return -2;       /* Bad if name had zero length */
 87: 
 88: /* Set up defaults:
 89: */
 90: #ifdef DECNET
1.2 timbl 91:   sin->sdn_family = AF_DECnet;      /* Family = DECnet, host order */
 92:   sin->sdn_objnum = DNP_OBJ;     /* Default: http object number */
1.1 timbl 93: #else /* Internet */
1.2 timbl 94:   sin->sin_family = AF_INET;   /* Family = internet, host order */
 95:   sin->sin_port = htons(TCP_PORT);  /* Default: http port  */
1.1 timbl 96: #endif
 97: 
 98:   if (TRACE) {
 99:     if (gate) fprintf(stderr,
 100:        "HTTPAccess: Using gateway %s for %s\n", gate, arg);
 101:     else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
 102:   }
 103:   
 104: /* Get node name and optional port number:
 105: */
 106:   {
 107:    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
 108:    int status = HTParseInet(sin, p1); /* TBL 920622 */
 109:     free(p1);
 110:    if (status) return status;  /* No such host for example */
 111:   }
 112:   
1.2 timbl 113: retry:
1.1 timbl 114:  
 115: /*   Now, let's get a socket set up from the server for the sgml data:
 116: */   
 117: #ifdef DECNET
 118:   s = socket(AF_DECnet, SOCK_STREAM, 0);
 119: #else
 120:   s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 121: #endif
 122:   status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
 123:   if (status < 0) {
 124: #ifndef DECNET
 125:    /* This code is temporary backward-compatibility. It should
 126:      go away when no server runs on port 2784 alone */
 127:    if (sin->sin_port == htons(TCP_PORT)) { /* Try the old one */
 128:     if (TRACE) printf (
 129:      "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
 130:      TCP_PORT, errno, OLD_TCP_PORT);
 131:     sin->sin_port = htons(OLD_TCP_PORT);
 132:     /* First close current socket and open a clean one */
 133:     status = NETCLOSE (s);
 134:     s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
 135:     status = connect(s, (struct sockaddr*)&soc_address,
 136:              sizeof(soc_address));
 137:    }
 138:    if (status < 0)
 139: #endif
 140:     {
 141:      if (TRACE) fprintf(stderr, 
 142:       "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
 143:      /* free(command);  BUG OUT TBL 921121 */
 144:      return HTInetStatus("connect");
 145:     }
 146:    }
 147:   
 148:   if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
 149: 
 150: /*   Ask that node for the document,
 151: **   omitting the host name & anchor if not gatewayed.
 152: */    
 153:   if (gate) {
1.2 timbl 154:     command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 155:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 156:     strcpy(command, "GET ");
 157:    strcat(command, arg);
 158:   } else { /* not gatewayed */
 159:    char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 160:     command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 161:     if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
 162:     strcpy(command, "GET ");
 163:    strcat(command, p1);
 164:    free(p1);
 165:   }
1.2 timbl 166: #ifdef HTTP2
 167:   if (extensions) {
 168:     strcat(command, " ");
 169:     strcat(command, HTTP_VERSION);
 170:   }
 171: #endif
1.3 ! timbl 172:   {
 ! 173:    char * p = command + strlen(command);
 ! 174:    *p++ = CR;       /* Macros to be correct on Mac */
 ! 175:    *p++ = LF;
 ! 176:    *p++ = 0;
 ! 177:    /* strcat(command, "\r\n");   */   /* CR LF, as in rfc 977 */
 ! 178:   }
1.1 timbl 179: 
1.2 timbl 180:   if (extensions) {
 181: 
 182:    int n;
 183:    int i;
 184:     HTAtom * present = WWW_PRESENT;
 185:    char line[256];  /*@@@@ */
 186: 
 187:    if (!HTPresentations) HTFormatInit();
 188:    n = HTList_count(HTPresentations);
 189: 
 190:    for(i=0; i<n; i++) {
 191:      HTPresentation * pres = HTList_objectAt(HTPresentations, i);
 192:      if (pres->rep_out == present) {
 193:       if (pres->quality != 1.0) {
1.3 ! timbl 194:         sprintf(line, "Accept: %s q=%.3f%c%c",
 ! 195:             HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 196:       } else {
1.3 ! timbl 197:         sprintf(line, "Accept: %s%c%c",
 ! 198:             HTAtom_name(pres->rep), CR, LF);
1.2 timbl 199:       }
 200:       StrAllocCat(command, line);
 201: 
 202:      }
 203:    }
 204:   }
1.3 ! timbl 205:    
1.1 timbl 206:   {
 207:    char * p;
1.3 ! timbl 208:    char crlf[3] = "015円012円";
 ! 209:    StrAllocCat(command, crlf);   /* Blank line means "end" */
 ! 210: #ifdef NOT_ASCII
1.1 timbl 211:    for(p = command; *p; p++) {
 212:      *p = TOASCII(*p);
 213:    }
1.3 ! timbl 214: #endif
 ! 215: 
1.1 timbl 216:   }
 217: 
1.2 timbl 218:   if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1 timbl 219:   status = NETWRITE(s, command, (int)strlen(command));
 220:   free(command);
 221:   if (status<0) {
 222:    if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
 223:      return HTInetStatus("send");
 224:   }
 225: 
1.2 timbl 226: 
 227: /*   Now load the data:   HTTP2 response parse
 228: */
1.3 ! timbl 229: 
 ! 230:   format_in = WWW_HTML;   /* default */
1.2 timbl 231:   {
 232:   
 233:   /* Get numeric status etc */
 234: 
 235:    int status;
 236:    int length = 0;
 237:    BOOL end_of_file = NO;
 238:    HTAtom * encoding = HTAtom_for("7bit");
 239:    int buffer_length = INIT_LINE_SIZE;   /* Why not? */
 240:    
 241:    line_buffer = (char *) malloc(buffer_length * sizeof(char));
 242:    if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
 243:    
 244:    for(;;) {
 245: 
 246:      int fields;
 247:      char server_version [VERSION_LENGTH+1];
 248:      int server_status;
 249:      
 250:      /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
 251:      
 252:      if (buffer_length - length < LINE_EXTEND_THRESH) {
 253:        buffer_length = buffer_length + buffer_length;
 254:        line_buffer = (char *) realloc(
 255:            line_buffer, buffer_length * sizeof(char));
 256:        if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
 257:      }
 258:      status = NETREAD(s, line_buffer + length,
 259:                buffer_length - length -1);
 260:      if (status < 0) {
 261:        HTAlert("Unexpected network read error on response");
 262:        return status;
 263:      }
 264:      if (status == 0) {
 265:        end_of_file = YES;
 266:        break;
 267:      }
 268:      line_buffer[length+status] = 0;
 269: #ifdef NOT_ASCII
 270:      {
 271:        char * p;
 272:        for(p = line_buffer+length; *p; p++) {
 273:          *p = FROMASCII(*p);
 274:        }
 275:      }
 276: #endif
1.3 ! timbl 277:      eol = strchr(line_buffer + length, LF);
 ! 278:       if (eol && *(eol-1) == CR) *(eol-1) = ' '; 
1.2 timbl 279: 
 280:      length = length + status;
 281:          
 282:      if (!eol && !end_of_file) continue;     /* No LF */     
 283:      
 284:      *eol = 0;      /* Terminate the line */
 285: 
 286: 
 287: /*   We now have a terminated unfolded line.
 288: */
 289: 
 290:      if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
 291: 
 292: /* Kludge to work with old buggy servers. They can't handle the third word
 293: ** so we try again without it.
 294: */
 295:      if (extensions &&
 296:         0==strcmp(line_buffer,     /* Old buggy server? */
 297:          "Document address invalid or access not authorised")) {
 298:        extensions = NO;
 299:        if (line_buffer) free(line_buffer);
 300:        if (TRACE) fprintf(stderr,
 301:          "HTTP: close socket %d to retry with HTTP0\n", s);
 302:        NETCLOSE(s);
 303:        goto retry;       /* @@@@@@@@@@ */
 304:      }
 305: 
 306:      fields = sscanf(line_buffer, "%20s%d",
 307:        server_version,
 308:        &server_status);
 309: 
 310:      if (fields < 2) break;
 311:      
1.3 ! timbl 312:      format_in = HTAtom_for("www/mime");
 ! 313: 
1.2 timbl 314:      switch (server_status / 100) {
 315:      
1.3 ! timbl 316:      default:      /* bad number */
 ! 317:        HTAlert("Unknown status reply from server!");
 ! 318:        break;
 ! 319:        
1.2 timbl 320:      case 3:       /* Various forms of redirection */
1.3 ! timbl 321:        HTAlert(
 ! 322:    "Redirection response from server is not handled by this client");
 ! 323:        break;
 ! 324:        
1.2 timbl 325:      case 4:       /* "I think I goofed" */
 326:      case 5:       /* I think you goofed */
1.3 ! timbl 327:        HTAlert("Error response from server");
 ! 328:        break;
1.2 timbl 329:        
 330:      case 2:       /* Good: Got MIME object */
 331:        break;
 332: 
 333:      }
 334:      
 335:      break;       /* Get out of for loop */
 336:      
 337:    } /* Loop over lines */
 338:   }     /* Scope of HTTP2 handling block */
 339: 
1.3 ! timbl 340: /*   Set up the stream stack to handle the body of the message
 ! 341: */
 ! 342: 
 ! 343:   target = HTStreamStack(format_in,
 ! 344:            format_out,
 ! 345:            sink , anAnchor);
 ! 346: 
 ! 347:   if (!target) {
 ! 348:    char buffer[1024];   /* @@@@@@@@ */
 ! 349:    if (line_buffer) free(line_buffer);
 ! 350:    sprintf(buffer, "Sorry, no known way of converting %s to %s.",
 ! 351:        HTAtom_name(format_in), HTAtom_name(format_out));
 ! 352:    fprintf(stderr, "HTTP: %s", buffer);
 ! 353:    return HTLoadError(sink, 501, buffer);
 ! 354:   }
 ! 355: 
 ! 356:   
 ! 357: /*   Push the data, maybe ignoring CR, down the stream
 ! 358: **   We have to remember the end of the first buffer we just read
1.2 timbl 359: */
1.3 ! timbl 360:   if (format_in != WWW_HTML) {
 ! 361:     if (eol) (*target->isa->put_string)(target, eol+1);
 ! 362:    HTCopy(s, target);
 ! 363:    
 ! 364:   } else {  /* ascii text with CRLFs :-( */
 ! 365:     if (eol) {
 ! 366:      char * p;
 ! 367:      for (p = eol+1; *p; p++)
 ! 368:        if (*p != '\r') (*target->isa->put_character)(target, *p);
 ! 369:    }
 ! 370:    HTCopyNoCR(s, target);
1.2 timbl 371:   }
1.3 ! timbl 372:   (*target->isa->end_document)(target);
 ! 373:   (*target->isa->free)(target);
 ! 374:    
1.2 timbl 375: 
 376: /*   Clean up
1.1 timbl 377: */
1.3 ! timbl 378:   
1.2 timbl 379:   if (line_buffer) free(line_buffer);
1.3 ! timbl 380: 
1.1 timbl 381:   if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
 382:   status = NETCLOSE(s);
 383: 
 384:   return HT_LOADED;         /* Good return */
1.3 ! timbl 385: 
1.1 timbl 386: }
 387: 
 388: /*   Protocol descriptor
 389: */
 390: 
1.2 timbl 391: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };

Webmaster

AltStyle によって変換されたページ (->オリジナル) /