00001 /* SVN FILE INFO 00002 * $Revision: 174 $ : Last Committed Revision 00003 * $Date: 2008年06月24日 10:50:29 -0700 (2008年6月24日) $ : Last Committed Date */ 00004 /* 00005 * "$Id: mxml-entity.c,v 1.1 2007年05月23日 20:43:27 david_ko Exp $" 00006 * 00007 * Character entity support code for Mini-XML, a small XML-like 00008 * file parsing library. 00009 * 00010 * Copyright 2003-2005 by Michael Sweet. 00011 * 00012 * This program is free software; you can redistribute it and/or 00013 * modify it under the terms of the GNU Library General Public 00014 * License as published by the Free Software Foundation; either 00015 * version 2, or (at your option) any later version. 00016 * 00017 * This program is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * Contents: 00023 * 00024 * mxmlEntityAddCallback() - Add a callback to convert entities to 00025 * Unicode. 00026 * mxmlEntityGetName() - Get the name that corresponds to the 00027 * character value. 00028 * mxmlEntityGetValue() - Get the character corresponding to a named 00029 * entity. 00030 * mxmlEntityRemoveCallback() - Remove a callback. 00031 * default_callback() - Lookup standard (X)HTML entities. 00032 */ 00033 00034 /* 00035 * Include necessary headers... 00036 */ 00037 00038 #include "config.h" 00039 #include "mxml.h" 00040 00041 00042 /* 00043 * Local functions... 00044 */ 00045 00046 static int default_callback(const char *name); 00047 00048 00049 /* 00050 * Callback array... 00051 */ 00052 00053 static int num_callbacks = 1; 00054 static int (*callbacks[100])(const char *name) = 00055 { 00056 default_callback 00057 }; 00058 00059 00060 /* 00061 * 'mxmlEntityAddCallback()' - Add a callback to convert entities to Unicode. 00062 */ 00063 00064 int /* O - 0 on success, -1 on failure */ 00065 mxmlEntityAddCallback(int (*cb)(const char *name)) 00066 /* I - Callback function to add */ 00067 { 00068 if (num_callbacks < (int)(sizeof(callbacks) / sizeof(callbacks[0]))) 00069 { 00070 callbacks[num_callbacks] = cb; 00071 num_callbacks ++; 00072 00073 return (0); 00074 } 00075 else 00076 { 00077 mxml_error("Unable to add entity callback!"); 00078 00079 return (-1); 00080 } 00081 } 00082 00083 00084 /* 00085 * 'mxmlEntityGetName()' - Get the name that corresponds to the character value. 00086 * 00087 * If val does not need to be represented by a named entity, NULL is returned. 00088 */ 00089 00090 const char * /* O - Entity name or NULL */ 00091 mxmlEntityGetName(int val) /* I - Character value */ 00092 { 00093 switch (val) 00094 { 00095 case '&' : 00096 return ("amp"); 00097 00098 case '<' : 00099 return ("lt"); 00100 00101 case '>' : 00102 return ("gt"); 00103 00104 case '\"' : 00105 return ("quot"); 00106 00107 default : 00108 return (NULL); 00109 } 00110 } 00111 00112 00113 /* 00114 * 'mxmlEntityGetValue()' - Get the character corresponding to a named entity. 00115 * 00116 * The entity name can also be a numeric constant. -1 is returned if the 00117 * name is not known. 00118 */ 00119 00120 int /* O - Character value or -1 on error */ 00121 mxmlEntityGetValue(const char *name) /* I - Entity name */ 00122 { 00123 int i; /* Looping var */ 00124 int ch; /* Character value */ 00125 00126 00127 for (i = 0; i < num_callbacks; i ++) 00128 if ((ch = (callbacks[i])(name)) >= 0) 00129 return (ch); 00130 00131 return (-1); 00132 } 00133 00134 00135 /* 00136 * 'mxmlEntityRemoveCallback()' - Remove a callback. 00137 */ 00138 00139 void 00140 mxmlEntityRemoveCallback(int (*cb)(const char *name)) 00141 /* I - Callback function to remove */ 00142 { 00143 int i; /* Looping var */ 00144 00145 00146 for (i = 0; i < num_callbacks; i ++) 00147 if (cb == callbacks[i]) 00148 { 00149 /* 00150 * Remove the callback... 00151 */ 00152 00153 num_callbacks --; 00154 00155 if (i < num_callbacks) 00156 memmove(callbacks + i, callbacks + i + 1, 00157 (num_callbacks - i) * sizeof(callbacks[0])); 00158 00159 return; 00160 } 00161 } 00162 00163 00164 /* 00165 * 'default_callback()' - Lookup standard (X)HTML entities. 00166 */ 00167 00168 static int /* O - Unicode value or -1 */ 00169 default_callback(const char *name) /* I - Entity name */ 00170 { 00171 int diff, /* Difference between names */ 00172 current, /* Current entity in search */ 00173 first, /* First entity in search */ 00174 last; /* Last entity in search */ 00175 static const struct 00176 { 00177 const char *name; /* Entity name */ 00178 int val; /* Character value */ 00179 } entities[] = 00180 { 00181 { "AElig", 198 }, 00182 { "Aacute", 193 }, 00183 { "Acirc", 194 }, 00184 { "Agrave", 192 }, 00185 { "Alpha", 913 }, 00186 { "Aring", 197 }, 00187 { "Atilde", 195 }, 00188 { "Auml", 196 }, 00189 { "Beta", 914 }, 00190 { "Ccedil", 199 }, 00191 { "Chi", 935 }, 00192 { "Dagger", 8225 }, 00193 { "Delta", 916 }, 00194 { "Dstrok", 208 }, 00195 { "ETH", 208 }, 00196 { "Eacute", 201 }, 00197 { "Ecirc", 202 }, 00198 { "Egrave", 200 }, 00199 { "Epsilon", 917 }, 00200 { "Eta", 919 }, 00201 { "Euml", 203 }, 00202 { "Gamma", 915 }, 00203 { "Iacute", 205 }, 00204 { "Icirc", 206 }, 00205 { "Igrave", 204 }, 00206 { "Iota", 921 }, 00207 { "Iuml", 207 }, 00208 { "Kappa", 922 }, 00209 { "Lambda", 923 }, 00210 { "Mu", 924 }, 00211 { "Ntilde", 209 }, 00212 { "Nu", 925 }, 00213 { "OElig", 338 }, 00214 { "Oacute", 211 }, 00215 { "Ocirc", 212 }, 00216 { "Ograve", 210 }, 00217 { "Omega", 937 }, 00218 { "Omicron", 927 }, 00219 { "Oslash", 216 }, 00220 { "Otilde", 213 }, 00221 { "Ouml", 214 }, 00222 { "Phi", 934 }, 00223 { "Pi", 928 }, 00224 { "Prime", 8243 }, 00225 { "Psi", 936 }, 00226 { "Rho", 929 }, 00227 { "Scaron", 352 }, 00228 { "Sigma", 931 }, 00229 { "THORN", 222 }, 00230 { "Tau", 932 }, 00231 { "Theta", 920 }, 00232 { "Uacute", 218 }, 00233 { "Ucirc", 219 }, 00234 { "Ugrave", 217 }, 00235 { "Upsilon", 933 }, 00236 { "Uuml", 220 }, 00237 { "Xi", 926 }, 00238 { "Yacute", 221 }, 00239 { "Yuml", 376 }, 00240 { "Zeta", 918 }, 00241 { "aacute", 225 }, 00242 { "acirc", 226 }, 00243 { "acute", 180 }, 00244 { "aelig", 230 }, 00245 { "agrave", 224 }, 00246 { "alefsym", 8501 }, 00247 { "alpha", 945 }, 00248 { "amp", '&' }, 00249 { "and", 8743 }, 00250 { "ang", 8736 }, 00251 { "aring", 229 }, 00252 { "asymp", 8776 }, 00253 { "atilde", 227 }, 00254 { "auml", 228 }, 00255 { "bdquo", 8222 }, 00256 { "beta", 946 }, 00257 { "brkbar", 166 }, 00258 { "brvbar", 166 }, 00259 { "bull", 8226 }, 00260 { "cap", 8745 }, 00261 { "ccedil", 231 }, 00262 { "cedil", 184 }, 00263 { "cent", 162 }, 00264 { "chi", 967 }, 00265 { "circ", 710 }, 00266 { "clubs", 9827 }, 00267 { "cong", 8773 }, 00268 { "copy", 169 }, 00269 { "crarr", 8629 }, 00270 { "cup", 8746 }, 00271 { "curren", 164 }, 00272 { "dArr", 8659 }, 00273 { "dagger", 8224 }, 00274 { "darr", 8595 }, 00275 { "deg", 176 }, 00276 { "delta", 948 }, 00277 { "diams", 9830 }, 00278 { "die", 168 }, 00279 { "divide", 247 }, 00280 { "eacute", 233 }, 00281 { "ecirc", 234 }, 00282 { "egrave", 232 }, 00283 { "empty", 8709 }, 00284 { "emsp", 8195 }, 00285 { "ensp", 8194 }, 00286 { "epsilon", 949 }, 00287 { "equiv", 8801 }, 00288 { "eta", 951 }, 00289 { "eth", 240 }, 00290 { "euml", 235 }, 00291 { "euro", 8364 }, 00292 { "exist", 8707 }, 00293 { "fnof", 402 }, 00294 { "forall", 8704 }, 00295 { "frac12", 189 }, 00296 { "frac14", 188 }, 00297 { "frac34", 190 }, 00298 { "frasl", 8260 }, 00299 { "gamma", 947 }, 00300 { "ge", 8805 }, 00301 { "gt", '>' }, 00302 { "hArr", 8660 }, 00303 { "harr", 8596 }, 00304 { "hearts", 9829 }, 00305 { "hellip", 8230 }, 00306 { "hibar", 175 }, 00307 { "iacute", 237 }, 00308 { "icirc", 238 }, 00309 { "iexcl", 161 }, 00310 { "igrave", 236 }, 00311 { "image", 8465 }, 00312 { "infin", 8734 }, 00313 { "int", 8747 }, 00314 { "iota", 953 }, 00315 { "iquest", 191 }, 00316 { "isin", 8712 }, 00317 { "iuml", 239 }, 00318 { "kappa", 954 }, 00319 { "lArr", 8656 }, 00320 { "lambda", 955 }, 00321 { "lang", 9001 }, 00322 { "laquo", 171 }, 00323 { "larr", 8592 }, 00324 { "lceil", 8968 }, 00325 { "ldquo", 8220 }, 00326 { "le", 8804 }, 00327 { "lfloor", 8970 }, 00328 { "lowast", 8727 }, 00329 { "loz", 9674 }, 00330 { "lrm", 8206 }, 00331 { "lsaquo", 8249 }, 00332 { "lsquo", 8216 }, 00333 { "lt", '<' }, 00334 { "macr", 175 }, 00335 { "mdash", 8212 }, 00336 { "micro", 181 }, 00337 { "middot", 183 }, 00338 { "minus", 8722 }, 00339 { "mu", 956 }, 00340 { "nabla", 8711 }, 00341 { "nbsp", 160 }, 00342 { "ndash", 8211 }, 00343 { "ne", 8800 }, 00344 { "ni", 8715 }, 00345 { "not", 172 }, 00346 { "notin", 8713 }, 00347 { "nsub", 8836 }, 00348 { "ntilde", 241 }, 00349 { "nu", 957 }, 00350 { "oacute", 243 }, 00351 { "ocirc", 244 }, 00352 { "oelig", 339 }, 00353 { "ograve", 242 }, 00354 { "oline", 8254 }, 00355 { "omega", 969 }, 00356 { "omicron", 959 }, 00357 { "oplus", 8853 }, 00358 { "or", 8744 }, 00359 { "ordf", 170 }, 00360 { "ordm", 186 }, 00361 { "oslash", 248 }, 00362 { "otilde", 245 }, 00363 { "otimes", 8855 }, 00364 { "ouml", 246 }, 00365 { "para", 182 }, 00366 { "part", 8706 }, 00367 { "permil", 8240 }, 00368 { "perp", 8869 }, 00369 { "phi", 966 }, 00370 { "pi", 960 }, 00371 { "piv", 982 }, 00372 { "plusmn", 177 }, 00373 { "pound", 163 }, 00374 { "prime", 8242 }, 00375 { "prod", 8719 }, 00376 { "prop", 8733 }, 00377 { "psi", 968 }, 00378 { "quot", '\"' }, 00379 { "rArr", 8658 }, 00380 { "radic", 8730 }, 00381 { "rang", 9002 }, 00382 { "raquo", 187 }, 00383 { "rarr", 8594 }, 00384 { "rceil", 8969 }, 00385 { "rdquo", 8221 }, 00386 { "real", 8476 }, 00387 { "reg", 174 }, 00388 { "rfloor", 8971 }, 00389 { "rho", 961 }, 00390 { "rlm", 8207 }, 00391 { "rsaquo", 8250 }, 00392 { "rsquo", 8217 }, 00393 { "sbquo", 8218 }, 00394 { "scaron", 353 }, 00395 { "sdot", 8901 }, 00396 { "sect", 167 }, 00397 { "shy", 173 }, 00398 { "sigma", 963 }, 00399 { "sigmaf", 962 }, 00400 { "sim", 8764 }, 00401 { "spades", 9824 }, 00402 { "sub", 8834 }, 00403 { "sube", 8838 }, 00404 { "sum", 8721 }, 00405 { "sup", 8835 }, 00406 { "sup1", 185 }, 00407 { "sup2", 178 }, 00408 { "sup3", 179 }, 00409 { "supe", 8839 }, 00410 { "szlig", 223 }, 00411 { "tau", 964 }, 00412 { "there4", 8756 }, 00413 { "theta", 952 }, 00414 { "thetasym", 977 }, 00415 { "thinsp", 8201 }, 00416 { "thorn", 254 }, 00417 { "tilde", 732 }, 00418 { "times", 215 }, 00419 { "trade", 8482 }, 00420 { "uArr", 8657 }, 00421 { "uacute", 250 }, 00422 { "uarr", 8593 }, 00423 { "ucirc", 251 }, 00424 { "ugrave", 249 }, 00425 { "uml", 168 }, 00426 { "upsih", 978 }, 00427 { "upsilon", 965 }, 00428 { "uuml", 252 }, 00429 { "weierp", 8472 }, 00430 { "xi", 958 }, 00431 { "yacute", 253 }, 00432 { "yen", 165 }, 00433 { "yuml", 255 }, 00434 { "zeta", 950 }, 00435 { "zwj", 8205 }, 00436 { "zwnj", 8204 } 00437 }; 00438 00439 00440 /* 00441 * Do a binary search for the named entity... 00442 */ 00443 00444 first = 0; 00445 last = (int)(sizeof(entities) / sizeof(entities[0]) - 1); 00446 00447 while ((last - first) > 1) 00448 { 00449 current = (first + last) / 2; 00450 00451 if ((diff = strcmp(name, entities[current].name)) == 0) 00452 return (entities[current].val); 00453 else if (diff < 0) 00454 last = current; 00455 else 00456 first = current; 00457 } 00458 00459 /* 00460 * If we get here, there is a small chance that there is still 00461 * a match; check first and last... 00462 */ 00463 00464 if (!strcmp(name, entities[first].name)) 00465 return (entities[first].val); 00466 else if (!strcmp(name, entities[last].name)) 00467 return (entities[last].val); 00468 else 00469 return (-1); 00470 } 00471 00472 00473 /* 00474 * End of "$Id: mxml-entity.c,v 1.1 2007年05月23日 20:43:27 david_ko Exp $". 00475 */