Jump to content
Wikimedia Meta-Wiki

Wikix

From Meta, a Wikimedia project coordination wiki
This is an archived version of this page, as edited by Jmerkey (talk | contribs) at 20:16, 24 April 2007 (Source Code ). It may differ significantly from the current version .

Wikix is a 'C' based program that will read any XML dump provided by the foundation, extract all image names from the XML dump which it may reference, then generate a series of BASH or Bourne Unix style scripts which can be invoked to download all images from Wikipedia Commons and Wikipedia.

The program relies on Curl, an automated web spyder, to download referenced images. The program will also convert text based utf8 characters into actual utf8 strings for those dumps which may contain imporperly formatted names for specific images. The program can be configured to generate 16 parallel scripts which will download all images from Wikipedia.

On a cable modem, the entire set of Wikipedia images can be downloaded in about 36 hours using this program.


Source Code

Makefile

#CFLAGS = -g
#CFLAGS_LIB = -g -c
CFLAGS = -Wno-pointer-sign -g
CFLAGS_LIB = -Wno-pointer-sign -g -c
CC = gcc
LD = ld
AR = ar
all: wikix 
libcutf8.so: utf8.o
	$(LD) -shared -lc -o libcutf8.so utf8.o /usr/lib/libc.a
libcutf8.a: utf8.o
	$(AR) r libcutf8.a utf8.o
wikix: wikix.c libcutf8.a 
	$(CC) $(CFLAGS) wikix.c -o wikix libcutf8.a -lssl 
clean:
	rm -f *.o wikix
install: all
	install -m 755 wikix /usr/bin
	install -m 644 libcutf8.so /usr/lib
	install -m 644 libcutf8.a /usr/lib

platform.h

#define LINUX

utf8.h

#include <stdarg.h>
/* is c the start of a utf8 sequence? */
#define isutf(c) (((c)&0xC0)!=0x80)
/* convert UTF-8 data to wide character */
int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz);
/* the opposite conversion */
int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz);
/* single character to UTF-8 */
int u8_wc_toutf8(char *dest, u_int32_t ch);
/* character number to byte offset */
int u8_offset(char *str, int charnum);
/* byte offset to character number */
int u8_charnum(char *s, int offset);
/* return next character, updating an index variable */
u_int32_t u8_nextchar(char *s, int *i);
/* move to next character */
void u8_inc(char *s, int *i);
/* move to previous character */
void u8_dec(char *s, int *i);
/* returns length of next utf-8 sequence */
int u8_seqlen(char *s);
/* assuming src points to the character after a backslash, read an
 escape sequence, storing the result in dest and returning the number of
 input characters processed */
int u8_read_escape_sequence(char *src, u_int32_t *dest);
/* given a wide character, convert it to an ASCII escape sequence stored in
 buf, where buf is "sz" bytes. returns the number of characters output. */
int u8_escape_wchar(char *buf, int sz, u_int32_t ch);
/* convert a string "src" containing escape sequences to UTF-8 */
int u8_unescape(char *buf, int sz, char *src);
/* convert UTF-8 "src" to ASCII with escape sequences.
 if escape_quotes is nonzero, quote characters will be preceded by
 backslashes as well. */
int u8_escape(char *buf, int sz, char *src, int escape_quotes);
/* utility predicates used by the above */
int octal_digit(char c);
int hex_digit(char c);
/* return a pointer to the first occurrence of ch in s, or NULL if not
 found. character index of found character returned in *charn. */
char *u8_strchr(char *s, u_int32_t ch, int *charn);
/* same as the above, but searches a buffer of a given size instead of
 a NUL-terminated string. */
char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn);
/* count the number of characters in a UTF-8 string */
int u8_strlen(char *s);
int u8_is_locale_utf8(char *locale);
/* printf where the format string and arguments may be in UTF-8.
 you can avoid this function and just use ordinary printf() if the current
 locale is UTF-8. */
int u8_vprintf(char *fmt, va_list ap);
int u8_printf(char *fmt, ...);

utf8.c public domain

/*
 Basic UTF-8 manipulation routines
 by Jeff Bezanson
 placed in the public domain Fall 2005
 This code is designed to provide the utilities you need to manipulate
 UTF-8 as an internal string encoding. These functions do not perform the
 error checking normally needed when handling UTF-8 data, so if you happen
 to be from the Unicode Consortium you will want to flay me alive.
 I do this because error checking can be performed at the boundaries (I/O),
 with these routines reserved for higher performance on data known to be
 valid.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#ifdef WIN32
#include <malloc.h>
#else
#include <alloca.h>
#endif
#include "utf8.h"
static const u_int32_t offsetsFromUTF8[6] = {
 0x00000000UL, 0x00003080UL, 0x000E2080UL,
 0x03C82080UL, 0xFA082080UL, 0x82082080UL
};
static const char trailingBytesForUTF8[256] = {
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
/* returns length of next utf-8 sequence */
int u8_seqlen(char *s)
{
 return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1;
}
/* conversions without error checking
 only works for valid UTF-8, i.e. no 5- or 6-byte sequences
 srcsz = source size in bytes, or -1 if 0-terminated
 sz = dest size in # of wide characters
 returns # characters converted
 dest will always be L'0円'-terminated, even if there isn't enough room
 for all the characters.
 if sz = srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space.
*/
int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz)
{
 u_int32_t ch;
 char *src_end = src + srcsz;
 int nb;
 int i=0;
 while (i < sz-1) {
 nb = trailingBytesForUTF8[(unsigned char)*src];
 if (srcsz == -1) {
 if (*src == 0)
 goto done_toucs;
 }
 else {
 if (src + nb >= src_end)
 goto done_toucs;
 }
 ch = 0;
 switch (nb) {
 /* these fall through deliberately */
 case 3: ch += (unsigned char)*src++; ch <<= 6;
 case 2: ch += (unsigned char)*src++; ch <<= 6;
 case 1: ch += (unsigned char)*src++; ch <<= 6;
 case 0: ch += (unsigned char)*src++;
 }
 ch -= offsetsFromUTF8[nb];
 dest[i++] = ch;
 }
 done_toucs:
 dest[i] = 0;
 return i;
}
/* srcsz = number of source characters, or -1 if 0-terminated
 sz = size of dest buffer in bytes
 returns # characters converted
 dest will only be '0円'-terminated if there is enough space. this is
 for consistency; imagine there are 2 bytes of space left, but the next
 character requires 3 bytes. in this case we could NUL-terminate, but in
 general we can't when there's insufficient space. therefore this function
 only NUL-terminates if all the characters fit, and there's space for
 the NUL as well.
 the destination string will never be bigger than the source string.
*/
int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz)
{
 u_int32_t ch;
 int i = 0;
 char *dest_end = dest + sz;
 while (srcsz<0 ? src[i]!=0 : i < srcsz) {
 ch = src[i];
 if (ch < 0x80) {
 if (dest >= dest_end)
 return i;
 *dest++ = (char)ch;
 }
 else if (ch < 0x800) {
 if (dest >= dest_end-1)
 return i;
 *dest++ = (ch>>6) | 0xC0;
 *dest++ = (ch & 0x3F) | 0x80;
 }
 else if (ch < 0x10000) {
 if (dest >= dest_end-2)
 return i;
 *dest++ = (ch>>12) | 0xE0;
 *dest++ = ((ch>>6) & 0x3F) | 0x80;
 *dest++ = (ch & 0x3F) | 0x80;
 }
 else if (ch < 0x110000) {
 if (dest >= dest_end-3)
 return i;
 *dest++ = (ch>>18) | 0xF0;
 *dest++ = ((ch>>12) & 0x3F) | 0x80;
 *dest++ = ((ch>>6) & 0x3F) | 0x80;
 *dest++ = (ch & 0x3F) | 0x80;
 }
 i++;
 }
 if (dest < dest_end)
 *dest = '0円';
 return i;
}
int u8_wc_toutf8(char *dest, u_int32_t ch)
{
 if (ch < 0x80) {
 dest[0] = (char)ch;
 return 1;
 }
 if (ch < 0x800) {
 dest[0] = (ch>>6) | 0xC0;
 dest[1] = (ch & 0x3F) | 0x80;
 return 2;
 }
 if (ch < 0x10000) {
 dest[0] = (ch>>12) | 0xE0;
 dest[1] = ((ch>>6) & 0x3F) | 0x80;
 dest[2] = (ch & 0x3F) | 0x80;
 return 3;
 }
 if (ch < 0x110000) {
 dest[0] = (ch>>18) | 0xF0;
 dest[1] = ((ch>>12) & 0x3F) | 0x80;
 dest[2] = ((ch>>6) & 0x3F) | 0x80;
 dest[3] = (ch & 0x3F) | 0x80;
 return 4;
 }
 return 0;
}
/* charnum => byte offset */
int u8_offset(char *str, int charnum)
{
 int offs=0;
 while (charnum > 0 && str[offs]) {
 (void)(isutf(str[++offs]) || isutf(str[++offs]) ||
 isutf(str[++offs]) || ++offs);
 charnum--;
 }
 return offs;
}
/* byte offset => charnum */
int u8_charnum(char *s, int offset)
{
 int charnum = 0, offs=0;
 while (offs < offset && s[offs]) {
 (void)(isutf(s[++offs]) || isutf(s[++offs]) ||
 isutf(s[++offs]) || ++offs);
 charnum++;
 }
 return charnum;
}
/* number of characters */
int u8_strlen(char *s)
{
 int count = 0;
 int i = 0;
 while (u8_nextchar(s, &i) != 0)
 count++;
 return count;
}
/* reads the next utf-8 sequence out of a string, updating an index */
u_int32_t u8_nextchar(char *s, int *i)
{
 u_int32_t ch = 0;
 int sz = 0;
 do {
 ch <<= 6;
 ch += (unsigned char)s[(*i)++];
 sz++;
 } while (s[*i] && !isutf(s[*i]));
 ch -= offsetsFromUTF8[sz-1];
 return ch;
}
void u8_inc(char *s, int *i)
{
 (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) ||
 isutf(s[++(*i)]) || ++(*i));
}
void u8_dec(char *s, int *i)
{
 (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) ||
 isutf(s[--(*i)]) || --(*i));
}
int octal_digit(char c)
{
 return (c >= '0' && c <= '7');
}
int hex_digit(char c)
{
 return ((c >= '0' && c <= '9') ||
 (c >= 'A' && c <= 'F') ||
 (c >= 'a' && c <= 'f'));
}
/* assumes that src points to the character after a backslash
 returns number of input characters processed */
int u8_read_escape_sequence(char *str, u_int32_t *dest)
{
 u_int32_t ch;
 char digs[9]="0円0円0円0円0円0円0円0円0円";
 int dno=0, i=1;
 ch = (u_int32_t)str[0]; /* take literal character */
 if (str[0] == 'n')
 ch = L'\n';
 else if (str[0] == 't')
 ch = L'\t';
 else if (str[0] == 'r')
 ch = L'\r';
 else if (str[0] == 'b')
 ch = L'\b';
 else if (str[0] == 'f')
 ch = L'\f';
 else if (str[0] == 'v')
 ch = L'\v';
 else if (str[0] == 'a')
 ch = L'\a';
 else if (octal_digit(str[0])) {
 i = 0;
 do {
 digs[dno++] = str[i++];
 } while (octal_digit(str[i]) && dno < 3);
 ch = strtol(digs, NULL, 8);
 }
 else if (str[0] == 'x') {
 while (hex_digit(str[i]) && dno < 2) {
 digs[dno++] = str[i++];
 }
 if (dno > 0)
 ch = strtol(digs, NULL, 16);
 }
 else if (str[0] == 'u') {
 while (hex_digit(str[i]) && dno < 4) {
 digs[dno++] = str[i++];
 }
 if (dno > 0)
 ch = strtol(digs, NULL, 16);
 }
 else if (str[0] == 'U') {
 while (hex_digit(str[i]) && dno < 8) {
 digs[dno++] = str[i++];
 }
 if (dno > 0)
 ch = strtol(digs, NULL, 16);
 }
 *dest = ch;
 return i;
}
/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8
 example: u8_unescape(mybuf, 256, "hello\\u220e")
 note the double backslash is needed if called on a C string literal */
int u8_unescape(char *buf, int sz, char *src)
{
 int c=0, amt;
 u_int32_t ch;
 char temp[4];
 while (*src && c < sz) {
 if (*src == '\\') {
 src++;
 amt = u8_read_escape_sequence(src, &ch);
 }
 else {
 ch = (u_int32_t)*src;
 amt = 1;
 }
 src += amt;
 amt = u8_wc_toutf8(temp, ch);
 if (amt > sz-c)
 break;
 memcpy(&buf[c], temp, amt);
 c += amt;
 }
 if (c < sz)
 buf[c] = '0円';
 return c;
}
int u8_escape_wchar(char *buf, int sz, u_int32_t ch)
{
 if (ch == L'\n')
 return snprintf(buf, sz, "\\n");
 else if (ch == L'\t')
 return snprintf(buf, sz, "\\t");
 else if (ch == L'\r')
 return snprintf(buf, sz, "\\r");
 else if (ch == L'\b')
 return snprintf(buf, sz, "\\b");
 else if (ch == L'\f')
 return snprintf(buf, sz, "\\f");
 else if (ch == L'\v')
 return snprintf(buf, sz, "\\v");
 else if (ch == L'\a')
 return snprintf(buf, sz, "\\a");
 else if (ch == L'\\')
 return snprintf(buf, sz, "\\\\");
 else if (ch < 32 || ch == 0x7f)
 return snprintf(buf, sz, "\\x%hhX", (unsigned char)ch);
 else if (ch > 0xFFFF)
 return snprintf(buf, sz, "\\U%.8X", (u_int32_t)ch);
 else if (ch >= 0x80 && ch <= 0xFFFF)
 return snprintf(buf, sz, "\\u%.4hX", (unsigned short)ch);
 return snprintf(buf, sz, "%c", (char)ch);
}
int u8_escape(char *buf, int sz, char *src, int escape_quotes)
{
 int c=0, i=0, amt;
 while (src[i] && c < sz) {
 if (escape_quotes && src[i] == '"') {
 amt = snprintf(buf, sz - c, "\\\"");
 i++;
 }
 else {
 amt = u8_escape_wchar(buf, sz - c, u8_nextchar(src, &i));
 }
 c += amt;
 buf += amt;
 }
 if (c < sz)
 *buf = '0円';
 return c;
}
char *u8_strchr(char *s, u_int32_t ch, int *charn)
{
 int i = 0, lasti=0;
 u_int32_t c;
 *charn = 0;
 while (s[i]) {
 c = u8_nextchar(s, &i);
 if (c == ch) {
 return &s[lasti];
 }
 lasti = i;
 (*charn)++;
 }
 return NULL;
}
char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn)
{
 int i = 0, lasti=0;
 u_int32_t c;
 int csz;
 *charn = 0;
 while (i < sz) {
 c = csz = 0;
 do {
 c <<= 6;
 c += (unsigned char)s[i++];
 csz++;
 } while (i < sz && !isutf(s[i]));
 c -= offsetsFromUTF8[csz-1];
 if (c == ch) {
 return &s[lasti];
 }
 lasti = i;
 (*charn)++;
 }
 return NULL;
}
int u8_is_locale_utf8(char *locale)
{
 /* this code based on libutf8 */
 const char* cp = locale;
 for (; *cp != '0円' && *cp != '@' && *cp != '+' && *cp != ','; cp++) {
 if (*cp == '.') {
 const char* encoding = ++cp;
 for (; *cp != '0円' && *cp != '@' && *cp != '+' && *cp != ','; cp++)
  ;
 if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5))
 || (cp-encoding == 4 && !strncmp(encoding, "utf8", 4)))
 return 1; /* it's UTF-8 */
 break;
 }
 }
 return 0;
}
int u8_vprintf(char *fmt, va_list ap)
{
 int cnt, sz=0;
 char *buf;
 u_int32_t *wcs;
 sz = 512;
 buf = (char*)alloca(sz);
 try_print:
 cnt = vsnprintf(buf, sz, fmt, ap);
 if (cnt >= sz) {
 buf = (char*)alloca(cnt - sz + 1);
 sz = cnt + 1;
 goto try_print;
 }
 wcs = (u_int32_t*)alloca((cnt+1) * sizeof(u_int32_t));
 cnt = u8_toucs(wcs, cnt+1, buf, cnt);
 printf("%ls", (wchar_t*)wcs);
 return cnt;
}
int u8_printf(char *fmt, ...)
{
 int cnt;
 va_list args;
 va_start(args, fmt);
 cnt = u8_vprintf(fmt, args);
 va_end(args);
 return cnt;
}


wikix.c

#include "platform.h"
#ifdef WINDOWS
#define strncasecmp strnicmp
#include "windows.h"
#include "winioctl.h"
#include "winuser.h"
#include "stdarg.h"
typedef UCHAR BYTE;
typedef USHORT WORD;
#include "stdio.h"
#include "stdlib.h"
#include "ctype.h"
#include "conio.h"
#endif
#ifdef LINUX
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <ctype.h>
#include <string.h>
//#include <ncurses.h>
#include <termios.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <net/if.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sched.h>
#include <ctype.h>
#include <openssl/md5.h>
#endif
#define NAME_HASH_SIZE 8192
typedef struct _hash
{
 struct _hash *next;
 struct _hash *prior;
 unsigned long len;
 char *text;
} hash;
typedef struct _hash_list {
 hash *head;
 hash *tail;
} hash_list;
unsigned char buffer[8192 * 4];
unsigned char ImagePath[512];
unsigned char OutputPath[512];
unsigned char iPath[512];
unsigned char cPath[512];
unsigned char md5_out[1024];
unsigned char md5_ulout[1024];
unsigned char wk[8192];
unsigned char final1[4096];
unsigned char final2[4096];
unsigned char ulwk[4096];
unsigned char fwk[4096];
unsigned char expand[4096];
unsigned char html[4096];
FILE *fpl[16];
int pmode = 0, tree = 0;
hash_list *learn_list_head = NULL;
hash_list *name_list_head = NULL;
int lobj = 0;
FILE *imagelog = NULL, *imagereject = NULL, *fragmentlog = NULL;
unsigned long shash(char *v, unsigned long len, unsigned long M)
{
 register unsigned long h = 0, a = 127, i;
 for (i = 0; i < len && *v; v++, i++)
 h = ((a * h) + tolower(*v)) % M;
 return h;
}
unsigned long add_to_hash(hash_list *top, hash *name)
{
 register unsigned long Value;
 register hash_list *HashTable;
 Value = shash(name->text, name->len, NAME_HASH_SIZE);
 if (Value == (unsigned long) -1)
 return -1;
 HashTable = (hash_list *) top;
 if (HashTable)
 {
 if (!HashTable[Value].head)
 {
	 HashTable[Value].head = name;
	 HashTable[Value].tail = name;
	 name->next = name->prior = 0;
 }
 else
 {
	 HashTable[Value].tail->next = name;
	 name->next = 0;
	 name->prior = HashTable[Value].tail;
	 HashTable[Value].tail = name;
 }
 return 0;
 }
 return -1;
}
unsigned long remove_from_hash(hash_list *top, hash *name)
{
 register unsigned long Value;
 register hash_list *HashTable;
 Value = shash(name->text, name->len, NAME_HASH_SIZE);
 if (Value == (unsigned long) -1)
 return -1;
 HashTable = (hash_list *) top;
 if (HashTable)
 {
 if (HashTable[Value].head == name)
 {
	 HashTable[Value].head = name->next;
	 if (HashTable[Value].head)
	 HashTable[Value].head->prior = NULL;
	 else
	 HashTable[Value].tail = NULL;
 }
 else
 {
	 name->prior->next = name->next;
	 if (name != HashTable[Value].tail)
	 name->next->prior = name->prior;
	 else
	 HashTable[Value].tail = name->prior;
 }
 if (lobj)
 lobj--;
 return 0;
 }
 return -1;
}
void free_hash(void)
{
 register int i;
 register hash_list *HashTable;
 register hash *tmp, *name;
 if (learn_list_head)
 {
 HashTable = (hash_list *) learn_list_head;
 for (i=0; i < NAME_HASH_SIZE; i++)
 {
 name = HashTable[i].head;
 HashTable[i].head = HashTable[i].tail = 0;
 while (name)
 {
 tmp = name;
 name = name->next;
 free((void *)tmp);
 }
 }
 free(learn_list_head);
 learn_list_head = NULL;
 }
 if (name_list_head)
 {
 HashTable = (hash_list *) name_list_head;
 for (i=0; i < NAME_HASH_SIZE; i++)
 {
 name = HashTable[i].head;
 HashTable[i].head = HashTable[i].tail = 0;
 while (name)
 {
 tmp = name;
 name = name->next;
 free((void *)tmp);
 }
 }
 free(name_list_head);
 name_list_head = NULL;
 }
}
hash_list *init_hash_list(void)
{
 learn_list_head = (hash_list *) malloc(sizeof(hash_list) * NAME_HASH_SIZE);
 if (!learn_list_head)
 return NULL;
 memset(learn_list_head, 0, sizeof(hash_list) * NAME_HASH_SIZE);
 name_list_head = (hash_list *) malloc(sizeof(hash_list) * NAME_HASH_SIZE);
 if (!name_list_head)
 return NULL;
 memset(name_list_head, 0, sizeof(hash_list) * NAME_HASH_SIZE);
 return learn_list_head;
}
hash *search_name_hash(hash_list *top, char *text, unsigned long len)
{
 register unsigned long Value;
 register hash *name;
 register hash_list *HashTable;
 Value = shash(text, len, NAME_HASH_SIZE);
 HashTable = (hash_list *) top;
 name = (hash *) HashTable[Value].head;
 while (name)
 {
 if (len == name->len) 
 {
	 if (!strncasecmp(name->text, text, len))
	 return (hash *) name;
 }
 name = name->next;
 }
 return NULL;
}
int learn(char *s, int len)
{
 register hash *name;
 name = search_name_hash(learn_list_head, s, len);
 if (name)
 return 1;
 name = malloc(sizeof(hash) + len + 2);
 if (!name)
 return 1;
 memset(name, 0, sizeof(hash) + len);
 name->text = (char *)((unsigned long)name + sizeof(hash));
 name->len = len;
 strncpy(name->text, s, len);
 if (add_to_hash(learn_list_head, name) == -1)
 {
 free(name);
 return 1;
 }
 lobj++;
 return 0;
}
int imagename(char *s, int len)
{
 register hash *name;
 name = search_name_hash(name_list_head, s, len);
 if (name)
 return 1;
 name = malloc(sizeof(hash) + len + 2);
 if (!name)
 return 1;
 memset(name, 0, sizeof(hash) + len);
 name->text = (char *)((unsigned long)name + sizeof(hash));
 name->len = len;
 strncpy(name->text, s, len);
 if (add_to_hash(name_list_head, name) == -1)
 {
 free(name);
 return 1;
 }
 lobj++;
 return 0;
}
unsigned char *nprintf(char *s, int len, FILE *fp)
{
 register int i;
 if (!s || !*s)
 return s;
 for (i=0; *s && (i < len); i++)
 putc(*s++, fp);
 return s;
}
unsigned char *str8rchr(const char * s, int c1, int c2, int c3, int c4, 
 int c5, int c6, int c7, int c8)
{
 const char *p = s + strlen(s);
 do {
 if ((*p == (char)c1) || (*p == (char)c2) || (*p == (char)c3) ||
 (*p == (char)c4) || (*p == (char)c5) || (*p == (char)c6) || 
 (*p == (char)c7) || (*p == (char)c8))
 return (char *)p;
 } while (--p >= s);
 return NULL;
}
unsigned char *str5rchr(const char * s, int c1, int c2, int c3, int c4, 
 int c5)
{
 const char *p = s + strlen(s);
 do {
 if ((*p == (char)c1) || (*p == (char)c2) || (*p == (char)c3) ||
 (*p == (char)c4) || (*p == (char)c5))
 return (char *)p;
 } while (--p >= s);
 return NULL;
}
char *strnstr(const char * s1,const char * s2)
{
	int l1, l2;
	l2 = strlen(s2);
	if (!l2)
		return (char *) s1;
	l1 = strlen(s1);
	while (l1 >= l2) {
		l1--;
		if (!strncasecmp(s1,s2,l2))
			return (char *) s1;
		s1++;
	}
	return NULL;
}
unsigned char *imagetypes[]=
{
 // 7
 ".svg+xml",
 ".xcf.bz2", 
 // 6
 ".bitmap", 
 ".xcfbz2", 
 // 5
 ".xcfgz", 
 ".alpha", 
 ".dicom", 
 ".matte", 
 ".xjtgz", 
 // 4
 ".mask", 
 ".aifc",
 ".aiff",
 ".fits", 
 ".icon", 
 ".im24",
 ".im32", 
 ".jpeg", 
 ".midi", 
 ".mpeg", 
 ".xwav", 
 ".mpga", 
 ".tiff", 
 // 3
 ".aif",
 ".als", 
 ".apm", 
 ".bmp", 
 ".bz2", 
 ".cel", 
 ".dcm", 
 ".eps", 
 ".fit", 
 ".flc", 
 ".fli", 
 ".gbr",
 ".gif", 
 ".gih", 
 ".gpb", 
 ".ico", 
 ".im1", 
 ".im8", 
 ".jpe", 
 ".jpg", 
 ".kar",
 ".mid",
 ".mov", 
 ".mp2",
 ".mp3", 
 ".mp4", 
 ".mpa",
 ".mpg", 
 ".ogg", 
 ".ogm", 
 ".pcc", 
 ".pcx", 
 ".pdf", 
 ".pdm", 
 ".pgm", 
 ".pix", 
 ".png", 
 ".pnm", 
 ".ppm",
 ".psd", 
 ".psp", 
 ".ras", 
 ".rgb",
 ".sgi", 
 ".svg", 
 ".swf",
 ".tga", 
 ".tif",
 ".tub", 
 ".wav", 
 ".wmf", 
 ".xbm", 
 ".xcf", 
 ".xjt", 
 ".xpm", 
 ".xwd", 
 ".pov", 
 ".wma", 
 ".dia", 
 ".fig", 
 ".jif", 
 ".pgn", 
 ".art", 
 // 2
 ".bw", 
 ".ps", 
 ".g3", 
 ".js",
 ".rs", 
};
unsigned char *strip_image_info(unsigned char *s, char *title)
{
 register int i;
 unsigned char *p, *j;
 FILE *fp = stdout;
 unsigned char ch = '0円';
 while (*s && (isspace(*s))) s++;
 if (!strncasecmp(s, "no image", 8))
 return s;
 p = s;
 while (*s)
 {
 if ((!strncasecmp(s, "image", 5) || 
 !strncasecmp(s, "map", 3)) && !isalnum(ch))
 {
 unsigned char *fragment, *end;
 fragment = s;
 if (!strncasecmp(s, "image", 5))
 s += 5;
 else
 if (!strncasecmp(s, "map", 3)) 
 s += 3;
 if (*s)
 {
 while (*s && isalnum(*s)) s++;
 end = s;
 while (*s && isspace(*s)) s++;
 if (*s && *s == '=' || *s == ':')
 {
 memset(&fwk[0], 0, 256); 
 memmove(&fwk[0], fragment, (end - fragment));
 if (!learn(&fwk[0], end - fragment))
 {
 if (*title)
 fprintf(fragmentlog, "[%s] %s\n", title, &fwk[0]);
 else
 fprintf(fragmentlog, "%s\n", &fwk[0]);
 fflush(fragmentlog);
 }
 s++;
 s = strip_image_info(s, title);
 ch = '0円';
 }
 } 
 continue;
 }
 if ((*s == '|') || (*s == ']') || (*s == '\n'))
 {
 register int y;
 unsigned char ch = '\x22', *l;
 unsigned char dir1[32], dir2[32]; 
 unsigned char *lp, *lw, *lo, *delim, *blp;
 unsigned char *ulp, *fname, *bulp;
 register int cnvt = 0, bcnvt = 0, unicnvt = 0, invl = 0;
 
 lp = &wk[0];
 j = lp;
 while (*p && (p < s))
 {
 // skip self referencing images
 if (!strncasecmp(p, "{{", 2))
 return s; 
 if (!memcmp(p, """, 6))
 {
 p += 6;
 *j++ = '\x22';
 }
 if (!memcmp(p, "&", 5))
 {
 p += 5;
 *j++ = '&';
 }
 if (!memcmp(p, "<", 4))
 {
 p += 4;
 while (*p)
 {
 if (!memcmp(p, ">", 4))
 {
 p += 4;
 break; 
		 }
 p++;
 } 
 }
 if (*p == '\n')
 p++;
 if (!memcmp(p, "[[", 2))
 break;
 *j++ = *p++;
 }
 *j = '0円';
 s++;
 
 for (j=NULL, y=0; y < (sizeof(imagetypes) / sizeof (char *)); y++)
 {
 j = strnstr(lp, imagetypes[y]);
 if (j)
 {
 register int ilen = strlen(imagetypes[y]);
 j += ilen;
 *j = '0円';
 break;
 }
 }
 if (!j)
 {
 if (*lp && isalpha(*lp))
 {
 unsigned char *sp = strchr(lp, '.');
 unsigned char *sj, *slp = lp;
 if (sp)
 {
 unsigned char *sllp = sp, *meter;
 sllp++;
 if ((*sllp != ' ') && (isalpha(*sllp)))
 {
 sj = str8rchr(slp, ':', '/', '\\', '{', '\n', '&', 
 '=', '>');
 if (sj)
 slp = ++sj;
 meter = sllp;
 while (*sllp)
 {
 if (!isalpha(*sllp))
 {
 *sllp = '0円';
 break;
 }
 sllp++;
 }
 if (*slp && 
 (((sllp - meter) >= 3) && ((sllp - meter) <= 5)))
 {
 if (*title)
 fprintf(imagereject, "[%s] %s\n", title, slp);
 else
 fprintf(imagereject, "%s\n", slp);
 fflush(imagereject);
 }
 }
 }
 }
 return s;
 }
 j = str5rchr(lp, ':', '/', '\\', '{', '\n');
 if (j)
 lp = ++j;
 
 if (!*lp)
 return s;
#ifdef UNICODE_EXPANSION
 // filename string extracted. convert xml control character tags
 l = &expand[0];
 ulp = lp;
 while (*ulp)
 {
 if (!strncasecmp(ulp, "&", 5))
 {
 ulp += 5;
 *l++ = '&';
 continue;
 }
 if (!strncasecmp(ulp, "<", 4))
 {
 ulp += 4;
 *l++ = '<';
 continue;
 }
 if (!strncasecmp(ulp, ">", 4))
 {
 ulp += 4;
 *l++ = '>';
 continue;
 }
 if (!strncasecmp(ulp, """, 6))
 {
 ulp += 6;
 *l++ = '\"';
 continue;
 }
 if (!strncasecmp(ulp, "'", 6))
 {
 ulp += 6;
 *l++ = '\'';
 continue;
 }
 if (!strncasecmp(ulp, " ", 6))
 {
 ulp += 6;
 *l++ = ' ';
 continue;
 }
 if (!strncasecmp(ulp, "–", 6))
 {
 ulp += 6;
 *l++ = '-';
 continue;
 }
 if ((ulp[0] == '&') && (ulp[1] != '&'))
 {
 unsigned char *sc = strchr(ulp, ';'), *slp;
 unsigned char unicode[32];
 unsigned char unidest[32];
 unsigned short uni;
 
 if (sc)
 {
 slp = ulp;
 slp++;
 while (*slp != ';')
 {
 if ((*slp == '#') || (*slp == '-') ||
 (*slp == 'x') || (*slp == 'X') ||
 isxdigit(*slp))
 slp++;
 else
 {
 invl = 1;
 break;
 }
 }
 if (!invl)
 {
 int unilen = sc - ulp;
 int slen = sc - ulp;
 slp = ulp;
 slp++;
 unilen--;
 if (*slp == '#')
 {
 unilen--;
 slp++;
 }
 if (unilen < 31)
 {
 memset(unicode, 0, 32);
 strncpy(unicode, slp, unilen);
 uni = atoi(unicode);
 fprintf(imagelog, "UNI1: %s (#%d) %s \n", 
 unicode, (int)uni,
 lp);
 unicode[0] = '0円';
 sprintf(unicode, "\\u%04X", uni);
 unilen = u8_unescape(l, 32, unicode);
 
 fprintf(imagelog, "UNI2: %s unilen %d slen %d\n", 
 unicode, (int)unilen, (int)slen);
 ulp += slen;
 l += unilen;
 ulp++;
 unicnvt = 1;
 continue;
 }
 }
 }
 }
 *l++ = *ulp++;
 }
 *l = '0円'; 
 lp = &expand[0];
#endif
 // convert spaces to underline characters in image names 
 ulp = &ulwk[0];
 memmove(ulp, lp, strlen(lp) + 1);
 ulp[0] = toupper(ulp[0]); 
 {
 l = ulp;
 while (*l)
 {
 if (*l == ' ')
 {
 *l = '_';
 cnvt = 1;
 }
 l++;
 }
 }
 if (learn(lp, strlen(lp)))
 return s;
 if (cnvt && learn(ulp, strlen(ulp)))
 return s;
 memset(md5_out, 0, 16);
 lp[0] = toupper(lp[0]); 
#ifdef UNICODE_EXPANSION
 if (unicnvt || invl)
 {
 if (invl)
 fprintf(imagelog, "INVL: %s -> %s\n", wk, lp);
 else
 fprintf(imagelog, "%s -> %s\n", wk, lp);
 fflush(imagelog);
 if (invl)
 return s;
 }
 else
 return s;
#else
 fprintf(imagelog, "%s\n", lp);
 fflush(imagelog);
#endif
 MD5(lp, strlen(lp), md5_out);
 dir1[0] = '0円'; 
 sprintf(dir1, "%x/%02x/", (md5_out[0] >> 4), md5_out[0]);
 if (cnvt)
 {
 memset(md5_ulout, 0, 16);
 ulp[0] = toupper(ulp[0]); 
 MD5(ulp, strlen(ulp), md5_ulout);
 dir2[0] = '0円'; 
 sprintf(dir2, "%x/%02x/", (md5_ulout[0] >> 4), md5_ulout[0]);
 }
 // add trailing \\ characters to bash control chars
 fname = &final1[0];
 blp = lp;
 while (*blp)
 {
 if ((*blp == '\"') || (*blp == '\'') || (*blp == '`'))
 {
 bcnvt = 1; 
 *fname++ = '\\';
 }
 else
 if ((*blp == ' ') || (*blp == '(') || (*blp == ')') ||
 (*blp == '{') || (*blp == '}') || (*blp == '[') || 
 (*blp == ']') || (*blp == '&') || (*blp == '-') ||
 (*blp == ';'))
 *fname++ = '\\';
 *fname++ = *blp++;
 }
 *fname = '0円';
 blp = &final1[0];
 // add trailing \\ characters to bash control chars
 fname = &final2[0];
 bulp = ulp;
 while (*bulp)
 {
 if ((*bulp == '\"') || (*bulp == '\'') || (*bulp == '`'))
 {
 bcnvt = 1; 
 *fname++ = '\\';
 }
 else
 if ((*bulp == ' ') || (*bulp == '(') || (*bulp == ')') ||
 (*bulp == '{') || (*bulp == '}') || (*bulp == '[') || 
 (*bulp == ']') || (*bulp == '&') || (*bulp == '-') ||
 (*bulp == ';'))
 *fname++ = '\\';
 *fname++ = *bulp++;
 }
 *fname = '0円';
 bulp = &final2[0];
 // debug of control characters
// if (!bcnvt)
// return s;
 if (tree)
 {
 if (pmode)
 fp = fpl[(md5_out[0] >> 4) % 16];
 fprintf(fp, "if [ -a $IMAGE./%s%s ]; then\n", 
 dir1, blp);
 fprintf(fp, "\t/bin/mkdir -p $OUTPUT./%s\n", dir1);
 fprintf(fp, "\tcp -f $IMAGE./%s%s $OUTPUT./%s%s\n", 
 dir1, blp, dir1, blp);
 fprintf(fp, "\techo ./%s%s copied to $OUTPUT./%s%s >> "
 "copied.log\n", dir1, blp, dir1, blp);
 if (cnvt) 
 {
 fprintf(fp, "elif [ -a $IMAGE./%s%s ]; then\n", 
 dir2, bulp);
 fprintf(fp, "\t/bin/mkdir -p $OUTPUT./%s\n", dir2);
 fprintf(fp, "\tcp -f $IMAGE./%s%s $OUTPUT./%s%s\n", 
 dir2, bulp, dir2, bulp);
 fprintf(fp, "\techo ./%s%s copied to $OUTPUT./%s%s >> "
 "copied.log\n", dir2, bulp, dir2, bulp);
 }
 fprintf(fp, "else\n");
 fprintf(fp, 
 "\techo ./%s%s file not found >> failed.log\n", dir1, 
 blp);
 fprintf(fp, "fi\n\n");
 }
 else
 {
 if (pmode)
 fp = fpl[(md5_out[0] >> 4) % 16];
 fprintf(fp, "if [ -a $IMAGE./%s%s ]; then\n", 
 dir1, blp);
 fprintf(fp, "\techo %s%s already exists >> exists.log\n", 
 dir1, blp);
 if (cnvt) 
 {
 fprintf(fp, "elif [ -a $IMAGE./%s%s ]; then\n", 
 dir2, bulp);
 fprintf(fp, "\techo %s%s already exists >> exists.log\n", 
 dir2, bulp);
 }
 fprintf(fp, "else\n");
 fprintf(fp, "\tcurl --retry 7 -f -O $IMAGEPATH./%s%s\n",
 dir1, blp);
 fprintf(fp, "\tif [ -a $IMAGE./%s ]; then\n", blp);
 fprintf(fp, "\t\t/bin/mkdir -p $OUTPUT./%s\n", dir1);
 fprintf(fp, "\t\t/bin/mv ./%s $OUTPUT./%s\n", 
 blp, dir1);
 fprintf(fp, "\t\techo ./%s%s downloaded >> download.log\n", 
 dir1, blp);
 fprintf(fp, "\telse\n");
 fprintf(fp, "\t\tcurl --retry 7 -f -O $COMMONSPATH./%s%s\n",
 dir1, blp);
 fprintf(fp, "\t\tif [ -a $IMAGE./%s ]; then\n", 
 blp);
 fprintf(fp, "\t\t\t/bin/mkdir -p $OUTPUT./%s\n", dir1);
 fprintf(fp, "\t\t\t/bin/mv ./%s $OUTPUT./%s\n", 
 blp, dir1);
 fprintf(fp, "\t\t\techo ./%s%s downloaded >> download.log\n", 
 dir1, blp);
 fprintf(fp, "\t\telse\n");
 if (cnvt)
 {
 fprintf(fp, "\t\t\tcurl --retry 7 -f -O $IMAGEPATH./%s%s\n",
 dir2, bulp);
 fprintf(fp, "\t\t\tif [ -a $IMAGE./%s ]; then\n", 
 bulp);
 fprintf(fp, "\t\t\t\t/bin/mkdir -p $OUTPUT./%s\n", 
 dir2);
 fprintf(fp, "\t\t\t\t/bin/mv ./%s $OUTPUT./%s\n", 
 bulp, dir2);
 fprintf(fp, "\t\t\t\techo ./%s%s downloaded >> "
 "download.log\n", dir2, bulp);
 fprintf(fp, "\t\t\telse\n");
 fprintf(fp, "\t\t\t\tcurl --retry 7 -f -O $COMMONSPATH./%s%s\n",
 dir2, bulp);
 fprintf(fp, "\t\t\t\tif [ -a $IMAGE./%s ]; then\n", 
 bulp);
 fprintf(fp, "\t\t\t\t\t/bin/mkdir -p $OUTPUT./%s\n", 
 dir2);
 fprintf(fp, "\t\t\t\t\t/bin/mv ./%s $OUTPUT./%s\n", 
 bulp, dir2);
 fprintf(fp, "\t\t\t\t\techo ./%s%s downloaded >> "
 "download.log\n", dir2, bulp);
 fprintf(fp, "\t\t\t\telse\n");
 fprintf(fp, "\t\t\t\t\techo ./%s%s failed >> failed.log\n", 
 dir1, blp);
 fprintf(fp, "\t\t\t\t\techo ./%s%s failed >> failed.log\n", 
 dir2, bulp);
 fprintf(fp, "\t\t\t\tfi\n");
 fprintf(fp, "\t\t\tfi\n");
 }
 else
 {
 fprintf(fp, 
 "\t\t\techo ./%s%s failed >> failed.log\n", dir1, 
 blp);
 }
 fprintf(fp, "\t\tfi\n");
 fprintf(fp, "\tfi\n");
 fprintf(fp, "fi\n\n");
 }
 return s;
 }
 ch = *s;
 s++;
 }
 return s;
}
int main(int argc, char *argv[])
{
 register int i, r, inpage = 0;
 unsigned char *s, *j, fname[32], *buffer, *title, *title_p;
 FILE *fl;
 ImagePath[0] = '0円';
 OutputPath[0] = '0円';
 // http://upload.wikimedia.org/wikipedia/en/
 // http://upload.wikimedia.org/wikipedia/commons/
 iPath[0] = '0円';
 cPath[0] = '0円';
 strcpy(iPath, "http://upload.wikimedia.org/wikipedia/en/");
 strcpy(cPath, "http://upload.wikimedia.org/wikipedia/commons/");
 for (i=0; i < argc; i++)
 {
 // remote path
 if (!memcmp(argv[i], "-h", 2))
 {
 printf("USAGE: wikix -htrciop < file.xml [ > script.out ]\n");
 printf(" -h this help screen\n");
 printf(" -t use xml dump to strip from tree\n");
 printf(" -r wikipedia path\n");
 printf(" -c commons path\n");
 printf(" -i image path\n");
 printf(" -o output path\n");
 printf(" -p parallel (16 process) mode\n");
 exit(1);
 }
 // remote path
 if (!memcmp(argv[i], "-t", 2))
 {
 tree = 1;
 }
 // remote path
 if (!memcmp(argv[i], "-r", 2))
 {
 i++;
 if (argv[i])
 strncpy(iPath, argv[i], 256);
 }
 // commons
 if (!memcmp(argv[i], "-c", 2))
 {
 i++;
 if (argv[i])
 strncpy(cPath, argv[i], 256);
 }
 // image tree
 if (!memcmp(argv[i], "-i", 2))
 {
 i++;
 if (argv[i])
 strncpy(ImagePath, argv[i], 256);
 }
 // output image tree
 if (!memcmp(argv[i], "-o", 2))
 {
 i++;
 if (argv[i])
 strncpy(OutputPath, argv[i], 256);
 }
 //parallel thread mode (16 processes)
 if (!memcmp(argv[i], "-p", 2))
 {
 pmode = 1;
 }
 }
 memset(&fwk[0], 0xFF, 256); 
 if (!init_hash_list())
 {
 printf("wikix: could not allocate workspace\n");
 exit(1);
 }
 buffer = malloc(0x10000);
 if (!buffer)
 {
 printf("gfdl-wikititle: could not allocate buffer workspace\n");
 exit(1);
 }
 buffer[0] = '0円';
 title = malloc(0x10000);
 if (!title)
 {
 printf("gfdl-wikititle: could not allocate namespace\n");
 exit(1);
 }
 title[0] = '0円';
 if (!pmode)
 {
 printf("#!/bin/sh\n\n");
 printf("IMAGE=%s\n", ImagePath);
 printf("OUTPUT=%s\n", OutputPath);
 printf("IMAGEPATH=%s\n", iPath);
 printf("COMMONSPATH=%s\n\n", cPath);
 printf("/bin/mkdir -p $OUTPUT./thumb\n");
 printf("/bin/chmod 777 $OUTPUT./thumb\n");
 printf("/bin/mkdir -p $OUTPUT./temp\n");
 printf("/bin/chmod 777 $OUTPUT./temp\n");
 printf("/bin/mkdir -p $OUTPUT./tmp\n");
 printf("/bin/chmod 777 $OUTPUT./tmp\n\n");
 }
 else
 {
 fl = fopen("image_sh", "w");
 if (!fl)
 {
 printf("FILE error could not create image_sh\n");
 exit(1);
 }
 chmod("image_sh", 0755);
 fprintf(fl, "#!/bin/sh\n\n");
 fprintf(fl, "IMAGE=%s\n", ImagePath);
 fprintf(fl, "OUTPUT=%s\n", OutputPath);
 fprintf(fl, "IMAGEPATH=%s\n", iPath);
 fprintf(fl, "COMMONSPATH=%s\n\n", cPath);
 fprintf(fl, "/bin/mkdir -p $OUTPUT./thumb\n");
 fprintf(fl, "/bin/chmod 777 $OUTPUT./thumb\n");
 fprintf(fl, "/bin/mkdir -p $OUTPUT./temp\n");
 fprintf(fl, "/bin/chmod 777 $OUTPUT./temp\n");
 fprintf(fl, "/bin/mkdir -p $OUTPUT./tmp\n");
 fprintf(fl, "/bin/chmod 777 $OUTPUT./tmp\n\n");
 for (r=0; r < 16; r++)
 {
 fname[0] = '0円';
 sprintf(fname, "image%02d", r);
 fpl[r] = fopen(fname, "w");
 if (!fpl[r])
 {
 printf("FILE error could not create [%s]\n", fname);
 exit(1);
 }
 chmod(fname, 0755);
 fprintf(fpl[r], "#!/bin/sh\n\n");
 fprintf(fpl[r], "\nIMAGE=%s\n", ImagePath);
 fprintf(fpl[r], "OUTPUT=%s\n", OutputPath);
 fprintf(fpl[r], "IMAGEPATH=%s\n", iPath);
 fprintf(fpl[r], "COMMONSPATH=%s\n\n", cPath);
 fprintf(fl, "./%s >& imagelog.%02d &\n", fname, r);
 }
 fclose(fl);
 }
 
 imagelog = fopen("image.log", "wb");
 if (!imagelog)
 {
 printf("FILE error could not create image log\n");
 }
 imagereject = fopen("reject.log", "wb");
 if (!imagereject)
 {
 printf("FILE error could not create reject log\n");
 }
 fragmentlog = fopen("fragment.log", "wb");
 if (!fragmentlog)
 {
 printf("FILE error could not create image name fragment log\n");
 }
 while (s = fgets(buffer, 8192 * 4, stdin))
 {
 unsigned char ch = '0円';
 if (strstr(s, "<page>"))
 {
 inpage++;
 if (*title)
 *title = '0円';
 continue;
 }
 if (strstr(s, "</page>"))
 {
 if (inpage)
 inpage--;
 if (*title)
 *title = '0円';
 continue; 
 }
 title_p = strstr(s, "<title>");
 if (inpage && title_p)
 {
 register char *ts, *tp;
 ts = title_p;
 ts += 7;
 tp = strstr(ts, "</title>");
 if (tp)
 {
 if (tp - ts)
 {
 strncpy(title, ts, tp - ts);
 title[tp - ts] = '0円';
 }
 }
 }
 while (*s)
 {
 if (inpage && !strncasecmp(s, "<title>", 7))
 {
 register char *ts, *tp;
 s += 7;
 ts = s;
 tp = strstr(ts, "</title>");
 if (tp)
 {
 if (tp - ts)
 {
 strncpy(title, ts, tp - ts);
 title[tp - ts] = '0円';
 }
 }
 }
 if ((!strncasecmp(s, "image", 5) || 
 !strncasecmp(s, "map", 3)) && 
 !isalnum(ch))
 {
 unsigned char *fragment, *end;
 fragment = s;
 if (!strncasecmp(s, "image", 5))
 s += 5;
 else 
 if (!strncasecmp(s, "map", 3)) 
 s += 3;
 if (*s)
 {
 while (*s && isalnum(*s)) s++;
 end = s;
 while (*s && isspace(*s)) s++;
 if (*s && (*s == '=' || *s == ':'))
 {
 memset(&fwk[0], 0, 256); 
 memmove(&fwk[0], fragment, (end - fragment));
 if (!imagename(&fwk[0], end - fragment))
 {
 if (*title)
 fprintf(fragmentlog, "[%s] %s\n", title, &fwk[0]);
 else
 fprintf(fragmentlog, "%s\n", &fwk[0]);
 fflush(fragmentlog);
 }
 s++;
 s = strip_image_info(s, title);
 ch = '0円';
 }
 } 
 continue;
 }
 ch = *s;
 s++;
 }
 }
 if (pmode)
 {
 for (r=0; r < 16; r++)
 {
 if (!fpl[r])
 fclose(fpl[r]);
 fpl[r] = NULL;
 }
 }
 fclose(fragmentlog);
 fclose(imagelog);
 fclose(imagereject);
 free(title);
 free(buffer);
 free_hash();
 return 0;
}

AltStyle によって変換されたページ (->オリジナル) /