#include #include #ifndef DEBUG #define DEBUG 0 #endif enum { BUF_SIZE = 0x1FFF, BUF_BYTES = 2, }; static void print_char(unsigned char c) { if((' '<=c) && (0x7F>c)) putchar(c); else printf("%02x",c); } int main(int argc,char** args) { if(4!=argc) { fprintf(stderr,"usage: bpe [c|d] src dest\n"); return -1; } FILE* srcf = fopen(args[2],"rb"); FILE* destf = fopen(args[3],"wb"); unsigned char buf[BUF_SIZE], tmp[BUF_SIZE]; // what to do? if('c'==*args[1]) { // compress; lets be greedy // but if this was competition, we could just do a proper search and doubtless trim a few bytes off :) int digraphs[256*256]; char used[256]; int buf_num = 0; size_t read = 0, written = 0; while(true) { size_t buf_len = fread(buf,1,BUF_SIZE,srcf); if(!buf_len) break; read += buf_len; buf_num++; size_t cycle; for(cycle=0; cycle<256; cycle++) { memset(digraphs,0,sizeof(digraphs)); memset(used,0,sizeof(used)); used[buf[0]] = 1; for(int i=1; i best_count) { best_count = digraphs[i]; best = i; } if(best_count < 4) // nothing to improve break; const unsigned char one = best/256, two = best%256; char replace, replace_found = 0; for(int i=0; i<256; i++) if(!used[i]) { replace = i; replace_found = true; break; } if(!replace_found) break; #if DEBUG>=2 printf("%3d %2zu [",buf_num,cycle); print_char(one); putchar(':'); print_char(two); printf("] %d = ",digraphs[best]); print_char(replace); #endif size_t out = 0; tmp[out++] = one; tmp[out++] = two; tmp[out++] = replace; size_t i; for(int i=0; i=2 printf(" = %zu saved, %zu left\n",buf_len-out,out); #endif buf_len = out; memcpy(buf,tmp,buf_len); } for(int i=0; i>((BUF_BYTES-i-1)*8))&0xff,destf); written++; } fputc(cycle,destf); written++; fwrite(buf,1,buf_len,destf); written += buf_len; #if DEBUG>=1 printf("%d = %zu, %zu\n",buf_num,buf_len,cycle); #endif } printf("compressed %zu to %zu in %d blocks (%0.0f%%)\n",read,written,buf_num,((double)written/(double)read)*100.0); } else { // decompress size_t read = 0, written = 0; int buf_num; bool eof = false; for(; !eof; buf_num++) { size_t buf_len = 0; for(int i=0; i BUF_SIZE) { fprintf(stderr,"illegal buf_len %zu\n",buf_len); return 1; } const size_t cycles = fgetc(srcf); read ++; #if DEBUG>=1 printf("%d = %zu, %zu\n",buf_num,buf_len,cycles); #endif read += fread(buf,1,buf_len,srcf); for(size_t cycle=0; cycle=2 printf("%3d %2zu [",buf_num,cycle); print_char(one); putchar(':'); print_char(two); printf("] = "); print_char(replace); #endif size_t out = 0; for(; in < buf_len; in++) if(buf[in]==replace) { tmp[out++] = one; tmp[out++] = two; } else tmp[out++] = buf[in]; #if DEBUG>=2 printf(" = %zu saved, %zu left\n",out-buf_len,out); #endif buf_len = out; memcpy(buf,tmp,buf_len); } written += fwrite(buf,1,buf_len,destf); } printf("decompressed %zu to %zu in %d blocks (%0.0f%%)\n",read,written,buf_num,((double)read/(double)written)*100.0); } return 0; }

AltStyle によって変換されたページ (->オリジナル) /