Overview Package Class Use Source Tree Index Deprecated About
Source for java.lang.Character

 1:  /* java.lang.Character -- Wrapper class for char, and Unicode subsets
 2:  Copyright (C) 1998, 1999, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
 3: 
 4: This file is part of GNU Classpath.
 5: 
 6: GNU Classpath is free software; you can redistribute it and/or modify
 7: it under the terms of the GNU General Public License as published by
 8: the Free Software Foundation; either version 2, or (at your option)
 9: any later version.
 10: 
 11: GNU Classpath is distributed in the hope that it will be useful, but
 12: WITHOUT ANY WARRANTY; without even the implied warranty of
 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 14: General Public License for more details.
 15: 
 16: You should have received a copy of the GNU General Public License
 17: along with GNU Classpath; see the file COPYING. If not, write to the
 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 19: 02110-1301 USA.
 20: 
 21: Linking this library statically or dynamically with other modules is
 22: making a combined work based on this library. Thus, the terms and
 23: conditions of the GNU General Public License cover the whole
 24: combination.
 25: 
 26: As a special exception, the copyright holders of this library give you
 27: permission to link this library with independent modules to produce an
 28: executable, regardless of the license terms of these independent
 29: modules, and to copy and distribute the resulting executable under
 30: terms of your choice, provided that you also meet, for each linked
 31: independent module, the terms and conditions of the license of that
 32: module. An independent module is a module which is not derived from
 33: or based on this library. If you modify this library, you may extend
 34: this exception to your version of the library, but you are not
 35: obligated to do so. If you do not wish to do so, delete this
 36: exception statement from your version. */
 37: 
 38: 
 39:  package java.lang;
 40: 
 41:  import gnu.java.lang.CharData;
 42: 
 43:  import java.io.Serializable;
 44:  import java.text.Collator;
 45:  import java.util.Locale;
 46: 
 47:  /**
 48:  * Wrapper class for the primitive char data type. In addition, this class
 49:  * allows one to retrieve property information and perform transformations
 50:  * on the defined characters in the Unicode Standard, Version 4.0.0.
 51:  * java.lang.Character is designed to be very dynamic, and as such, it
 52:  * retrieves information on the Unicode character set from a separate
 53:  * database, gnu.java.lang.CharData, which can be easily upgraded.
 54:  *
 55:  * <p>For predicates, boundaries are used to describe
 56:  * the set of characters for which the method will return true.
 57:  * This syntax uses fairly normal regular expression notation.
 58:  * See 5.13 of the Unicode Standard, Version 4.0, for the
 59:  * boundary specification.
 60:  *
 61:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
 62:  * for more information on the Unicode Standard.
 63:  *
 64:  * @author Tom Tromey (tromey@cygnus.com)
 65:  * @author Paul N. Fisher
 66:  * @author Jochen Hoenicke
 67:  * @author Eric Blake (ebb9@email.byu.edu)
 68:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
 69:  * @see CharData
 70:  * @since 1.0
 71:  * @status partly updated to 1.5; some things still missing
 72:  */
 73:  public final class Character implements Serializable, Comparable<Character>
 74: {
 75:  /**
 76:  * A subset of Unicode blocks.
 77:  *
 78:  * @author Paul N. Fisher
 79:  * @author Eric Blake (ebb9@email.byu.edu)
 80:  * @since 1.2
 81:  */
 82:  public static class Subset
 83:  {
 84:  /** The name of the subset. */
 85:  private final String name;
 86: 
 87:  /**
 88:  * Construct a new subset of characters.
 89:  *
 90:  * @param name the name of the subset
 91:  * @throws NullPointerException if name is null
 92:  */
 93:  protected Subset(String name)
 94:  {
 95:  // Note that name.toString() is name, unless name was null.
 96:  this.name = name.toString();
 97:  }
 98: 
 99:  /**
 100:  * Compares two Subsets for equality. This is <code>final</code>, and
 101:  * restricts the comparison on the <code>==</code> operator, so it returns
 102:  * true only for the same object.
 103:  *
 104:  * @param o the object to compare
 105:  * @return true if o is this
 106:  */
 107:  public final boolean equals(Object o)
 108:  {
 109:  return o == this;
 110:  }
 111: 
 112:  /**
 113:  * Makes the original hashCode of Object final, to be consistent with
 114:  * equals.
 115:  *
 116:  * @return the hash code for this object
 117:  */
 118:  public final int hashCode()
 119:  {
 120:  return super.hashCode();
 121:  }
 122: 
 123:  /**
 124:  * Returns the name of the subset.
 125:  *
 126:  * @return the name
 127:  */
 128:  public final String toString()
 129:  {
 130:  return name;
 131:  }
 132:  } // class Subset
 133: 
 134:  /**
 135:  * A family of character subsets in the Unicode specification. A character
 136:  * is in at most one of these blocks.
 137:  *
 138:  * This inner class was generated automatically from
 139:  * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts.
 140:  * This Unicode definition file can be found on the
 141:  * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 142:  * JDK 1.5 uses Unicode version 4.0.0.
 143:  *
 144:  * @author scripts/unicode-blocks.pl (written by Eric Blake)
 145:  * @since 1.2
 146:  */
 147:  public static final class UnicodeBlock extends Subset
 148:  {
 149:  /** The start of the subset. */
 150:  private final int start;
 151: 
 152:  /** The end of the subset. */
 153:  private final int end;
 154: 
 155:  /** The canonical name of the block according to the Unicode standard. */
 156:  private final String canonicalName;
 157: 
 158:  /** Enumeration for the <code>forName()</code> method */
 159:  private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }
 160: 
 161:  /**
 162:  * Constructor for strictly defined blocks.
 163:  *
 164:  * @param start the start character of the range
 165:  * @param end the end character of the range
 166:  * @param name the block name
 167:  * @param canonicalName the name of the block as defined in the Unicode
 168:  * standard.
 169:  */
 170:  private UnicodeBlock(int start, int end, String name,
 171:  String canonicalName)
 172:  {
 173:  super(name);
 174:  this.start = start;
 175:  this.end = end;
 176:  this.canonicalName = canonicalName;
 177:  }
 178: 
 179:  /**
 180:  * Returns the Unicode character block which a character belongs to.
 181:  * <strong>Note</strong>: This method does not support the use of
 182:  * supplementary characters. For such support, <code>of(int)</code>
 183:  * should be used instead.
 184:  *
 185:  * @param ch the character to look up
 186:  * @return the set it belongs to, or null if it is not in one
 187:  */
 188:  public static UnicodeBlock of(char ch)
 189:  {
 190:  return of((int) ch);
 191:  }
 192: 
 193:  /**
 194:  * Returns the Unicode character block which a code point belongs to.
 195:  *
 196:  * @param codePoint the character to look up
 197:  * @return the set it belongs to, or null if it is not in one.
 198:  * @throws IllegalArgumentException if the specified code point is
 199:  * invalid.
 200:  * @since 1.5
 201:  */
 202:  public static UnicodeBlock of(int codePoint)
 203:  {
 204:  if (codePoint > MAX_CODE_POINT)
 205:  throw new IllegalArgumentException("The supplied integer value is " +
 206:  "too large to be a codepoint.");
 207:  // Simple binary search for the correct block.
 208:  int low = 0;
 209:  int hi = sets.length - 1;
 210:  while (low <= hi)
 211:  {
 212:  int mid = (low + hi) >> 1;
 213:  UnicodeBlock b = sets[mid];
 214:  if (codePoint < b.start)
 215:  hi = mid - 1;
 216:  else if (codePoint > b.end)
 217:  low = mid + 1;
 218:  else
 219:  return b;
 220:  }
 221:  return null;
 222:  }
 223: 
 224:  /**
 225:  * <p>
 226:  * Returns the <code>UnicodeBlock</code> with the given name, as defined
 227:  * by the Unicode standard. The version of Unicode in use is defined by
 228:  * the <code>Character</code> class, and the names are given in the
 229:  * <code>Blocks-<version>.txt</code> file corresponding to that version.
 230:  * The name may be specified in one of three ways:
 231:  * </p>
 232:  * <ol>
 233:  * <li>The canonical, human-readable name used by the Unicode standard.
 234:  * This is the name with all spaces and hyphens retained. For example,
 235:  * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 236:  * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 237:  * <li>The name used for the constants specified by this class, which
 238:  * is the canonical name with all spaces and hyphens replaced with
 239:  * underscores e.g. `BASIC_LATIN'</li>
 240:  * </ol>
 241:  * <p>
 242:  * The names are compared case-insensitively using the case comparison
 243:  * associated with the U.S. English locale. The method recognises the
 244:  * previous names used for blocks as well as the current ones. At
 245:  * present, this simply means that the deprecated `SURROGATES_AREA'
 246:  * will be recognised by this method (the <code>of()</code> methods
 247:  * only return one of the three new surrogate blocks).
 248:  * </p>
 249:  *
 250:  * @param blockName the name of the block to look up.
 251:  * @return the specified block.
 252:  * @throws NullPointerException if the <code>blockName</code> is
 253:  * <code>null</code>.
 254:  * @throws IllegalArgumentException if the name does not match any Unicode
 255:  * block.
 256:  * @since 1.5
 257:  */
 258:  public static final UnicodeBlock forName(String blockName)
 259:  {
 260:  NameType type;
 261:  if (blockName.indexOf(' ') != -1)
 262:  type = NameType.CANONICAL;
 263:  else if (blockName.indexOf('_') != -1)
 264:  type = NameType.CONSTANT;
 265:  else
 266:  type = NameType.NO_SPACES;
 267:  Collator usCollator = Collator.getInstance(Locale.US);
 268:  usCollator.setStrength(Collator.PRIMARY);
 269:  /* Special case for deprecated blocks not in sets */
 270:  switch (type)
 271:  {
 272:  case CANONICAL:
 273:  if (usCollator.compare(blockName, "Surrogates Area") == 0)
 274:  return SURROGATES_AREA;
 275:  break;
 276:  case NO_SPACES:
 277:  if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 278:  return SURROGATES_AREA;
 279:  break;
 280:  case CONSTANT:
 281:  if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 282:  return SURROGATES_AREA;
 283:  break;
 284:  }
 285:  /* Other cases */
 286:  switch (type)
 287:  {
 288:  case CANONICAL:
 289:  for (UnicodeBlock block : sets)
 290:  if (usCollator.compare(blockName, block.canonicalName) == 0)
 291:  return block;
 292:  break;
 293:  case NO_SPACES:
 294:  for (UnicodeBlock block : sets)
 295:  {
 296:  String nsName = block.canonicalName.replaceAll(" ","");
 297:  if (usCollator.compare(blockName, nsName) == 0)
 298:  return block;
 299:  }
 300:  break;
 301:  case CONSTANT:
 302:  for (UnicodeBlock block : sets)
 303:  if (usCollator.compare(blockName, block.toString()) == 0)
 304:  return block;
 305:  break;
 306:  }
 307:  throw new IllegalArgumentException("No Unicode block found for " +
 308:  blockName + ".");
 309:  }
 310: 
 311:  /**
 312:  * Basic Latin.
 313:  * 0x0000 - 0x007F.
 314:  */
 315:  public static final UnicodeBlock BASIC_LATIN
 316:  = new UnicodeBlock(0x0000, 0x007F,
 317:  "BASIC_LATIN", 
 318:  "Basic Latin");
 319: 
 320:  /**
 321:  * Latin-1 Supplement.
 322:  * 0x0080 - 0x00FF.
 323:  */
 324:  public static final UnicodeBlock LATIN_1_SUPPLEMENT
 325:  = new UnicodeBlock(0x0080, 0x00FF,
 326:  "LATIN_1_SUPPLEMENT", 
 327:  "Latin-1 Supplement");
 328: 
 329:  /**
 330:  * Latin Extended-A.
 331:  * 0x0100 - 0x017F.
 332:  */
 333:  public static final UnicodeBlock LATIN_EXTENDED_A
 334:  = new UnicodeBlock(0x0100, 0x017F,
 335:  "LATIN_EXTENDED_A", 
 336:  "Latin Extended-A");
 337: 
 338:  /**
 339:  * Latin Extended-B.
 340:  * 0x0180 - 0x024F.
 341:  */
 342:  public static final UnicodeBlock LATIN_EXTENDED_B
 343:  = new UnicodeBlock(0x0180, 0x024F,
 344:  "LATIN_EXTENDED_B", 
 345:  "Latin Extended-B");
 346: 
 347:  /**
 348:  * IPA Extensions.
 349:  * 0x0250 - 0x02AF.
 350:  */
 351:  public static final UnicodeBlock IPA_EXTENSIONS
 352:  = new UnicodeBlock(0x0250, 0x02AF,
 353:  "IPA_EXTENSIONS", 
 354:  "IPA Extensions");
 355: 
 356:  /**
 357:  * Spacing Modifier Letters.
 358:  * 0x02B0 - 0x02FF.
 359:  */
 360:  public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 361:  = new UnicodeBlock(0x02B0, 0x02FF,
 362:  "SPACING_MODIFIER_LETTERS", 
 363:  "Spacing Modifier Letters");
 364: 
 365:  /**
 366:  * Combining Diacritical Marks.
 367:  * 0x0300 - 0x036F.
 368:  */
 369:  public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 370:  = new UnicodeBlock(0x0300, 0x036F,
 371:  "COMBINING_DIACRITICAL_MARKS", 
 372:  "Combining Diacritical Marks");
 373: 
 374:  /**
 375:  * Greek.
 376:  * 0x0370 - 0x03FF.
 377:  */
 378:  public static final UnicodeBlock GREEK
 379:  = new UnicodeBlock(0x0370, 0x03FF,
 380:  "GREEK", 
 381:  "Greek");
 382: 
 383:  /**
 384:  * Cyrillic.
 385:  * 0x0400 - 0x04FF.
 386:  */
 387:  public static final UnicodeBlock CYRILLIC
 388:  = new UnicodeBlock(0x0400, 0x04FF,
 389:  "CYRILLIC", 
 390:  "Cyrillic");
 391: 
 392:  /**
 393:  * Cyrillic Supplementary.
 394:  * 0x0500 - 0x052F.
 395:  * @since 1.5
 396:  */
 397:  public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 398:  = new UnicodeBlock(0x0500, 0x052F,
 399:  "CYRILLIC_SUPPLEMENTARY", 
 400:  "Cyrillic Supplementary");
 401: 
 402:  /**
 403:  * Armenian.
 404:  * 0x0530 - 0x058F.
 405:  */
 406:  public static final UnicodeBlock ARMENIAN
 407:  = new UnicodeBlock(0x0530, 0x058F,
 408:  "ARMENIAN", 
 409:  "Armenian");
 410: 
 411:  /**
 412:  * Hebrew.
 413:  * 0x0590 - 0x05FF.
 414:  */
 415:  public static final UnicodeBlock HEBREW
 416:  = new UnicodeBlock(0x0590, 0x05FF,
 417:  "HEBREW", 
 418:  "Hebrew");
 419: 
 420:  /**
 421:  * Arabic.
 422:  * 0x0600 - 0x06FF.
 423:  */
 424:  public static final UnicodeBlock ARABIC
 425:  = new UnicodeBlock(0x0600, 0x06FF,
 426:  "ARABIC", 
 427:  "Arabic");
 428: 
 429:  /**
 430:  * Syriac.
 431:  * 0x0700 - 0x074F.
 432:  * @since 1.4
 433:  */
 434:  public static final UnicodeBlock SYRIAC
 435:  = new UnicodeBlock(0x0700, 0x074F,
 436:  "SYRIAC", 
 437:  "Syriac");
 438: 
 439:  /**
 440:  * Thaana.
 441:  * 0x0780 - 0x07BF.
 442:  * @since 1.4
 443:  */
 444:  public static final UnicodeBlock THAANA
 445:  = new UnicodeBlock(0x0780, 0x07BF,
 446:  "THAANA", 
 447:  "Thaana");
 448: 
 449:  /**
 450:  * Devanagari.
 451:  * 0x0900 - 0x097F.
 452:  */
 453:  public static final UnicodeBlock DEVANAGARI
 454:  = new UnicodeBlock(0x0900, 0x097F,
 455:  "DEVANAGARI", 
 456:  "Devanagari");
 457: 
 458:  /**
 459:  * Bengali.
 460:  * 0x0980 - 0x09FF.
 461:  */
 462:  public static final UnicodeBlock BENGALI
 463:  = new UnicodeBlock(0x0980, 0x09FF,
 464:  "BENGALI", 
 465:  "Bengali");
 466: 
 467:  /**
 468:  * Gurmukhi.
 469:  * 0x0A00 - 0x0A7F.
 470:  */
 471:  public static final UnicodeBlock GURMUKHI
 472:  = new UnicodeBlock(0x0A00, 0x0A7F,
 473:  "GURMUKHI", 
 474:  "Gurmukhi");
 475: 
 476:  /**
 477:  * Gujarati.
 478:  * 0x0A80 - 0x0AFF.
 479:  */
 480:  public static final UnicodeBlock GUJARATI
 481:  = new UnicodeBlock(0x0A80, 0x0AFF,
 482:  "GUJARATI", 
 483:  "Gujarati");
 484: 
 485:  /**
 486:  * Oriya.
 487:  * 0x0B00 - 0x0B7F.
 488:  */
 489:  public static final UnicodeBlock ORIYA
 490:  = new UnicodeBlock(0x0B00, 0x0B7F,
 491:  "ORIYA", 
 492:  "Oriya");
 493: 
 494:  /**
 495:  * Tamil.
 496:  * 0x0B80 - 0x0BFF.
 497:  */
 498:  public static final UnicodeBlock TAMIL
 499:  = new UnicodeBlock(0x0B80, 0x0BFF,
 500:  "TAMIL", 
 501:  "Tamil");
 502: 
 503:  /**
 504:  * Telugu.
 505:  * 0x0C00 - 0x0C7F.
 506:  */
 507:  public static final UnicodeBlock TELUGU
 508:  = new UnicodeBlock(0x0C00, 0x0C7F,
 509:  "TELUGU", 
 510:  "Telugu");
 511: 
 512:  /**
 513:  * Kannada.
 514:  * 0x0C80 - 0x0CFF.
 515:  */
 516:  public static final UnicodeBlock KANNADA
 517:  = new UnicodeBlock(0x0C80, 0x0CFF,
 518:  "KANNADA", 
 519:  "Kannada");
 520: 
 521:  /**
 522:  * Malayalam.
 523:  * 0x0D00 - 0x0D7F.
 524:  */
 525:  public static final UnicodeBlock MALAYALAM
 526:  = new UnicodeBlock(0x0D00, 0x0D7F,
 527:  "MALAYALAM", 
 528:  "Malayalam");
 529: 
 530:  /**
 531:  * Sinhala.
 532:  * 0x0D80 - 0x0DFF.
 533:  * @since 1.4
 534:  */
 535:  public static final UnicodeBlock SINHALA
 536:  = new UnicodeBlock(0x0D80, 0x0DFF,
 537:  "SINHALA", 
 538:  "Sinhala");
 539: 
 540:  /**
 541:  * Thai.
 542:  * 0x0E00 - 0x0E7F.
 543:  */
 544:  public static final UnicodeBlock THAI
 545:  = new UnicodeBlock(0x0E00, 0x0E7F,
 546:  "THAI", 
 547:  "Thai");
 548: 
 549:  /**
 550:  * Lao.
 551:  * 0x0E80 - 0x0EFF.
 552:  */
 553:  public static final UnicodeBlock LAO
 554:  = new UnicodeBlock(0x0E80, 0x0EFF,
 555:  "LAO", 
 556:  "Lao");
 557: 
 558:  /**
 559:  * Tibetan.
 560:  * 0x0F00 - 0x0FFF.
 561:  */
 562:  public static final UnicodeBlock TIBETAN
 563:  = new UnicodeBlock(0x0F00, 0x0FFF,
 564:  "TIBETAN", 
 565:  "Tibetan");
 566: 
 567:  /**
 568:  * Myanmar.
 569:  * 0x1000 - 0x109F.
 570:  * @since 1.4
 571:  */
 572:  public static final UnicodeBlock MYANMAR
 573:  = new UnicodeBlock(0x1000, 0x109F,
 574:  "MYANMAR", 
 575:  "Myanmar");
 576: 
 577:  /**
 578:  * Georgian.
 579:  * 0x10A0 - 0x10FF.
 580:  */
 581:  public static final UnicodeBlock GEORGIAN
 582:  = new UnicodeBlock(0x10A0, 0x10FF,
 583:  "GEORGIAN", 
 584:  "Georgian");
 585: 
 586:  /**
 587:  * Hangul Jamo.
 588:  * 0x1100 - 0x11FF.
 589:  */
 590:  public static final UnicodeBlock HANGUL_JAMO
 591:  = new UnicodeBlock(0x1100, 0x11FF,
 592:  "HANGUL_JAMO", 
 593:  "Hangul Jamo");
 594: 
 595:  /**
 596:  * Ethiopic.
 597:  * 0x1200 - 0x137F.
 598:  * @since 1.4
 599:  */
 600:  public static final UnicodeBlock ETHIOPIC
 601:  = new UnicodeBlock(0x1200, 0x137F,
 602:  "ETHIOPIC", 
 603:  "Ethiopic");
 604: 
 605:  /**
 606:  * Cherokee.
 607:  * 0x13A0 - 0x13FF.
 608:  * @since 1.4
 609:  */
 610:  public static final UnicodeBlock CHEROKEE
 611:  = new UnicodeBlock(0x13A0, 0x13FF,
 612:  "CHEROKEE", 
 613:  "Cherokee");
 614: 
 615:  /**
 616:  * Unified Canadian Aboriginal Syllabics.
 617:  * 0x1400 - 0x167F.
 618:  * @since 1.4
 619:  */
 620:  public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 621:  = new UnicodeBlock(0x1400, 0x167F,
 622:  "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 623:  "Unified Canadian Aboriginal Syllabics");
 624: 
 625:  /**
 626:  * Ogham.
 627:  * 0x1680 - 0x169F.
 628:  * @since 1.4
 629:  */
 630:  public static final UnicodeBlock OGHAM
 631:  = new UnicodeBlock(0x1680, 0x169F,
 632:  "OGHAM", 
 633:  "Ogham");
 634: 
 635:  /**
 636:  * Runic.
 637:  * 0x16A0 - 0x16FF.
 638:  * @since 1.4
 639:  */
 640:  public static final UnicodeBlock RUNIC
 641:  = new UnicodeBlock(0x16A0, 0x16FF,
 642:  "RUNIC", 
 643:  "Runic");
 644: 
 645:  /**
 646:  * Tagalog.
 647:  * 0x1700 - 0x171F.
 648:  * @since 1.5
 649:  */
 650:  public static final UnicodeBlock TAGALOG
 651:  = new UnicodeBlock(0x1700, 0x171F,
 652:  "TAGALOG", 
 653:  "Tagalog");
 654: 
 655:  /**
 656:  * Hanunoo.
 657:  * 0x1720 - 0x173F.
 658:  * @since 1.5
 659:  */
 660:  public static final UnicodeBlock HANUNOO
 661:  = new UnicodeBlock(0x1720, 0x173F,
 662:  "HANUNOO", 
 663:  "Hanunoo");
 664: 
 665:  /**
 666:  * Buhid.
 667:  * 0x1740 - 0x175F.
 668:  * @since 1.5
 669:  */
 670:  public static final UnicodeBlock BUHID
 671:  = new UnicodeBlock(0x1740, 0x175F,
 672:  "BUHID", 
 673:  "Buhid");
 674: 
 675:  /**
 676:  * Tagbanwa.
 677:  * 0x1760 - 0x177F.
 678:  * @since 1.5
 679:  */
 680:  public static final UnicodeBlock TAGBANWA
 681:  = new UnicodeBlock(0x1760, 0x177F,
 682:  "TAGBANWA", 
 683:  "Tagbanwa");
 684: 
 685:  /**
 686:  * Khmer.
 687:  * 0x1780 - 0x17FF.
 688:  * @since 1.4
 689:  */
 690:  public static final UnicodeBlock KHMER
 691:  = new UnicodeBlock(0x1780, 0x17FF,
 692:  "KHMER", 
 693:  "Khmer");
 694: 
 695:  /**
 696:  * Mongolian.
 697:  * 0x1800 - 0x18AF.
 698:  * @since 1.4
 699:  */
 700:  public static final UnicodeBlock MONGOLIAN
 701:  = new UnicodeBlock(0x1800, 0x18AF,
 702:  "MONGOLIAN", 
 703:  "Mongolian");
 704: 
 705:  /**
 706:  * Limbu.
 707:  * 0x1900 - 0x194F.
 708:  * @since 1.5
 709:  */
 710:  public static final UnicodeBlock LIMBU
 711:  = new UnicodeBlock(0x1900, 0x194F,
 712:  "LIMBU", 
 713:  "Limbu");
 714: 
 715:  /**
 716:  * Tai Le.
 717:  * 0x1950 - 0x197F.
 718:  * @since 1.5
 719:  */
 720:  public static final UnicodeBlock TAI_LE
 721:  = new UnicodeBlock(0x1950, 0x197F,
 722:  "TAI_LE", 
 723:  "Tai Le");
 724: 
 725:  /**
 726:  * Khmer Symbols.
 727:  * 0x19E0 - 0x19FF.
 728:  * @since 1.5
 729:  */
 730:  public static final UnicodeBlock KHMER_SYMBOLS
 731:  = new UnicodeBlock(0x19E0, 0x19FF,
 732:  "KHMER_SYMBOLS", 
 733:  "Khmer Symbols");
 734: 
 735:  /**
 736:  * Phonetic Extensions.
 737:  * 0x1D00 - 0x1D7F.
 738:  * @since 1.5
 739:  */
 740:  public static final UnicodeBlock PHONETIC_EXTENSIONS
 741:  = new UnicodeBlock(0x1D00, 0x1D7F,
 742:  "PHONETIC_EXTENSIONS", 
 743:  "Phonetic Extensions");
 744: 
 745:  /**
 746:  * Latin Extended Additional.
 747:  * 0x1E00 - 0x1EFF.
 748:  */
 749:  public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 750:  = new UnicodeBlock(0x1E00, 0x1EFF,
 751:  "LATIN_EXTENDED_ADDITIONAL", 
 752:  "Latin Extended Additional");
 753: 
 754:  /**
 755:  * Greek Extended.
 756:  * 0x1F00 - 0x1FFF.
 757:  */
 758:  public static final UnicodeBlock GREEK_EXTENDED
 759:  = new UnicodeBlock(0x1F00, 0x1FFF,
 760:  "GREEK_EXTENDED", 
 761:  "Greek Extended");
 762: 
 763:  /**
 764:  * General Punctuation.
 765:  * 0x2000 - 0x206F.
 766:  */
 767:  public static final UnicodeBlock GENERAL_PUNCTUATION
 768:  = new UnicodeBlock(0x2000, 0x206F,
 769:  "GENERAL_PUNCTUATION", 
 770:  "General Punctuation");
 771: 
 772:  /**
 773:  * Superscripts and Subscripts.
 774:  * 0x2070 - 0x209F.
 775:  */
 776:  public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 777:  = new UnicodeBlock(0x2070, 0x209F,
 778:  "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 779:  "Superscripts and Subscripts");
 780: 
 781:  /**
 782:  * Currency Symbols.
 783:  * 0x20A0 - 0x20CF.
 784:  */
 785:  public static final UnicodeBlock CURRENCY_SYMBOLS
 786:  = new UnicodeBlock(0x20A0, 0x20CF,
 787:  "CURRENCY_SYMBOLS", 
 788:  "Currency Symbols");
 789: 
 790:  /**
 791:  * Combining Marks for Symbols.
 792:  * 0x20D0 - 0x20FF.
 793:  */
 794:  public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 795:  = new UnicodeBlock(0x20D0, 0x20FF,
 796:  "COMBINING_MARKS_FOR_SYMBOLS", 
 797:  "Combining Marks for Symbols");
 798: 
 799:  /**
 800:  * Letterlike Symbols.
 801:  * 0x2100 - 0x214F.
 802:  */
 803:  public static final UnicodeBlock LETTERLIKE_SYMBOLS
 804:  = new UnicodeBlock(0x2100, 0x214F,
 805:  "LETTERLIKE_SYMBOLS", 
 806:  "Letterlike Symbols");
 807: 
 808:  /**
 809:  * Number Forms.
 810:  * 0x2150 - 0x218F.
 811:  */
 812:  public static final UnicodeBlock NUMBER_FORMS
 813:  = new UnicodeBlock(0x2150, 0x218F,
 814:  "NUMBER_FORMS", 
 815:  "Number Forms");
 816: 
 817:  /**
 818:  * Arrows.
 819:  * 0x2190 - 0x21FF.
 820:  */
 821:  public static final UnicodeBlock ARROWS
 822:  = new UnicodeBlock(0x2190, 0x21FF,
 823:  "ARROWS", 
 824:  "Arrows");
 825: 
 826:  /**
 827:  * Mathematical Operators.
 828:  * 0x2200 - 0x22FF.
 829:  */
 830:  public static final UnicodeBlock MATHEMATICAL_OPERATORS
 831:  = new UnicodeBlock(0x2200, 0x22FF,
 832:  "MATHEMATICAL_OPERATORS", 
 833:  "Mathematical Operators");
 834: 
 835:  /**
 836:  * Miscellaneous Technical.
 837:  * 0x2300 - 0x23FF.
 838:  */
 839:  public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 840:  = new UnicodeBlock(0x2300, 0x23FF,
 841:  "MISCELLANEOUS_TECHNICAL", 
 842:  "Miscellaneous Technical");
 843: 
 844:  /**
 845:  * Control Pictures.
 846:  * 0x2400 - 0x243F.
 847:  */
 848:  public static final UnicodeBlock CONTROL_PICTURES
 849:  = new UnicodeBlock(0x2400, 0x243F,
 850:  "CONTROL_PICTURES", 
 851:  "Control Pictures");
 852: 
 853:  /**
 854:  * Optical Character Recognition.
 855:  * 0x2440 - 0x245F.
 856:  */
 857:  public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 858:  = new UnicodeBlock(0x2440, 0x245F,
 859:  "OPTICAL_CHARACTER_RECOGNITION", 
 860:  "Optical Character Recognition");
 861: 
 862:  /**
 863:  * Enclosed Alphanumerics.
 864:  * 0x2460 - 0x24FF.
 865:  */
 866:  public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 867:  = new UnicodeBlock(0x2460, 0x24FF,
 868:  "ENCLOSED_ALPHANUMERICS", 
 869:  "Enclosed Alphanumerics");
 870: 
 871:  /**
 872:  * Box Drawing.
 873:  * 0x2500 - 0x257F.
 874:  */
 875:  public static final UnicodeBlock BOX_DRAWING
 876:  = new UnicodeBlock(0x2500, 0x257F,
 877:  "BOX_DRAWING", 
 878:  "Box Drawing");
 879: 
 880:  /**
 881:  * Block Elements.
 882:  * 0x2580 - 0x259F.
 883:  */
 884:  public static final UnicodeBlock BLOCK_ELEMENTS
 885:  = new UnicodeBlock(0x2580, 0x259F,
 886:  "BLOCK_ELEMENTS", 
 887:  "Block Elements");
 888: 
 889:  /**
 890:  * Geometric Shapes.
 891:  * 0x25A0 - 0x25FF.
 892:  */
 893:  public static final UnicodeBlock GEOMETRIC_SHAPES
 894:  = new UnicodeBlock(0x25A0, 0x25FF,
 895:  "GEOMETRIC_SHAPES", 
 896:  "Geometric Shapes");
 897: 
 898:  /**
 899:  * Miscellaneous Symbols.
 900:  * 0x2600 - 0x26FF.
 901:  */
 902:  public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 903:  = new UnicodeBlock(0x2600, 0x26FF,
 904:  "MISCELLANEOUS_SYMBOLS", 
 905:  "Miscellaneous Symbols");
 906: 
 907:  /**
 908:  * Dingbats.
 909:  * 0x2700 - 0x27BF.
 910:  */
 911:  public static final UnicodeBlock DINGBATS
 912:  = new UnicodeBlock(0x2700, 0x27BF,
 913:  "DINGBATS", 
 914:  "Dingbats");
 915: 
 916:  /**
 917:  * Miscellaneous Mathematical Symbols-A.
 918:  * 0x27C0 - 0x27EF.
 919:  * @since 1.5
 920:  */
 921:  public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 922:  = new UnicodeBlock(0x27C0, 0x27EF,
 923:  "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 924:  "Miscellaneous Mathematical Symbols-A");
 925: 
 926:  /**
 927:  * Supplemental Arrows-A.
 928:  * 0x27F0 - 0x27FF.
 929:  * @since 1.5
 930:  */
 931:  public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 932:  = new UnicodeBlock(0x27F0, 0x27FF,
 933:  "SUPPLEMENTAL_ARROWS_A", 
 934:  "Supplemental Arrows-A");
 935: 
 936:  /**
 937:  * Braille Patterns.
 938:  * 0x2800 - 0x28FF.
 939:  * @since 1.4
 940:  */
 941:  public static final UnicodeBlock BRAILLE_PATTERNS
 942:  = new UnicodeBlock(0x2800, 0x28FF,
 943:  "BRAILLE_PATTERNS", 
 944:  "Braille Patterns");
 945: 
 946:  /**
 947:  * Supplemental Arrows-B.
 948:  * 0x2900 - 0x297F.
 949:  * @since 1.5
 950:  */
 951:  public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 952:  = new UnicodeBlock(0x2900, 0x297F,
 953:  "SUPPLEMENTAL_ARROWS_B", 
 954:  "Supplemental Arrows-B");
 955: 
 956:  /**
 957:  * Miscellaneous Mathematical Symbols-B.
 958:  * 0x2980 - 0x29FF.
 959:  * @since 1.5
 960:  */
 961:  public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 962:  = new UnicodeBlock(0x2980, 0x29FF,
 963:  "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 964:  "Miscellaneous Mathematical Symbols-B");
 965: 
 966:  /**
 967:  * Supplemental Mathematical Operators.
 968:  * 0x2A00 - 0x2AFF.
 969:  * @since 1.5
 970:  */
 971:  public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 972:  = new UnicodeBlock(0x2A00, 0x2AFF,
 973:  "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 974:  "Supplemental Mathematical Operators");
 975: 
 976:  /**
 977:  * Miscellaneous Symbols and Arrows.
 978:  * 0x2B00 - 0x2BFF.
 979:  * @since 1.5
 980:  */
 981:  public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 982:  = new UnicodeBlock(0x2B00, 0x2BFF,
 983:  "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 984:  "Miscellaneous Symbols and Arrows");
 985: 
 986:  /**
 987:  * CJK Radicals Supplement.
 988:  * 0x2E80 - 0x2EFF.
 989:  * @since 1.4
 990:  */
 991:  public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
 992:  = new UnicodeBlock(0x2E80, 0x2EFF,
 993:  "CJK_RADICALS_SUPPLEMENT", 
 994:  "CJK Radicals Supplement");
 995: 
 996:  /**
 997:  * Kangxi Radicals.
 998:  * 0x2F00 - 0x2FDF.
 999:  * @since 1.4
1000:  */
1001:  public static final UnicodeBlock KANGXI_RADICALS
1002:  = new UnicodeBlock(0x2F00, 0x2FDF,
1003:  "KANGXI_RADICALS", 
1004:  "Kangxi Radicals");
1005: 
1006:  /**
1007:  * Ideographic Description Characters.
1008:  * 0x2FF0 - 0x2FFF.
1009:  * @since 1.4
1010:  */
1011:  public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1012:  = new UnicodeBlock(0x2FF0, 0x2FFF,
1013:  "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1014:  "Ideographic Description Characters");
1015: 
1016:  /**
1017:  * CJK Symbols and Punctuation.
1018:  * 0x3000 - 0x303F.
1019:  */
1020:  public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1021:  = new UnicodeBlock(0x3000, 0x303F,
1022:  "CJK_SYMBOLS_AND_PUNCTUATION", 
1023:  "CJK Symbols and Punctuation");
1024: 
1025:  /**
1026:  * Hiragana.
1027:  * 0x3040 - 0x309F.
1028:  */
1029:  public static final UnicodeBlock HIRAGANA
1030:  = new UnicodeBlock(0x3040, 0x309F,
1031:  "HIRAGANA", 
1032:  "Hiragana");
1033: 
1034:  /**
1035:  * Katakana.
1036:  * 0x30A0 - 0x30FF.
1037:  */
1038:  public static final UnicodeBlock KATAKANA
1039:  = new UnicodeBlock(0x30A0, 0x30FF,
1040:  "KATAKANA", 
1041:  "Katakana");
1042: 
1043:  /**
1044:  * Bopomofo.
1045:  * 0x3100 - 0x312F.
1046:  */
1047:  public static final UnicodeBlock BOPOMOFO
1048:  = new UnicodeBlock(0x3100, 0x312F,
1049:  "BOPOMOFO", 
1050:  "Bopomofo");
1051: 
1052:  /**
1053:  * Hangul Compatibility Jamo.
1054:  * 0x3130 - 0x318F.
1055:  */
1056:  public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1057:  = new UnicodeBlock(0x3130, 0x318F,
1058:  "HANGUL_COMPATIBILITY_JAMO", 
1059:  "Hangul Compatibility Jamo");
1060: 
1061:  /**
1062:  * Kanbun.
1063:  * 0x3190 - 0x319F.
1064:  */
1065:  public static final UnicodeBlock KANBUN
1066:  = new UnicodeBlock(0x3190, 0x319F,
1067:  "KANBUN", 
1068:  "Kanbun");
1069: 
1070:  /**
1071:  * Bopomofo Extended.
1072:  * 0x31A0 - 0x31BF.
1073:  * @since 1.4
1074:  */
1075:  public static final UnicodeBlock BOPOMOFO_EXTENDED
1076:  = new UnicodeBlock(0x31A0, 0x31BF,
1077:  "BOPOMOFO_EXTENDED", 
1078:  "Bopomofo Extended");
1079: 
1080:  /**
1081:  * Katakana Phonetic Extensions.
1082:  * 0x31F0 - 0x31FF.
1083:  * @since 1.5
1084:  */
1085:  public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1086:  = new UnicodeBlock(0x31F0, 0x31FF,
1087:  "KATAKANA_PHONETIC_EXTENSIONS", 
1088:  "Katakana Phonetic Extensions");
1089: 
1090:  /**
1091:  * Enclosed CJK Letters and Months.
1092:  * 0x3200 - 0x32FF.
1093:  */
1094:  public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1095:  = new UnicodeBlock(0x3200, 0x32FF,
1096:  "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1097:  "Enclosed CJK Letters and Months");
1098: 
1099:  /**
1100:  * CJK Compatibility.
1101:  * 0x3300 - 0x33FF.
1102:  */
1103:  public static final UnicodeBlock CJK_COMPATIBILITY
1104:  = new UnicodeBlock(0x3300, 0x33FF,
1105:  "CJK_COMPATIBILITY", 
1106:  "CJK Compatibility");
1107: 
1108:  /**
1109:  * CJK Unified Ideographs Extension A.
1110:  * 0x3400 - 0x4DBF.
1111:  * @since 1.4
1112:  */
1113:  public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1114:  = new UnicodeBlock(0x3400, 0x4DBF,
1115:  "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1116:  "CJK Unified Ideographs Extension A");
1117: 
1118:  /**
1119:  * Yijing Hexagram Symbols.
1120:  * 0x4DC0 - 0x4DFF.
1121:  * @since 1.5
1122:  */
1123:  public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1124:  = new UnicodeBlock(0x4DC0, 0x4DFF,
1125:  "YIJING_HEXAGRAM_SYMBOLS", 
1126:  "Yijing Hexagram Symbols");
1127: 
1128:  /**
1129:  * CJK Unified Ideographs.
1130:  * 0x4E00 - 0x9FFF.
1131:  */
1132:  public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1133:  = new UnicodeBlock(0x4E00, 0x9FFF,
1134:  "CJK_UNIFIED_IDEOGRAPHS", 
1135:  "CJK Unified Ideographs");
1136: 
1137:  /**
1138:  * Yi Syllables.
1139:  * 0xA000 - 0xA48F.
1140:  * @since 1.4
1141:  */
1142:  public static final UnicodeBlock YI_SYLLABLES
1143:  = new UnicodeBlock(0xA000, 0xA48F,
1144:  "YI_SYLLABLES", 
1145:  "Yi Syllables");
1146: 
1147:  /**
1148:  * Yi Radicals.
1149:  * 0xA490 - 0xA4CF.
1150:  * @since 1.4
1151:  */
1152:  public static final UnicodeBlock YI_RADICALS
1153:  = new UnicodeBlock(0xA490, 0xA4CF,
1154:  "YI_RADICALS", 
1155:  "Yi Radicals");
1156: 
1157:  /**
1158:  * Hangul Syllables.
1159:  * 0xAC00 - 0xD7AF.
1160:  */
1161:  public static final UnicodeBlock HANGUL_SYLLABLES
1162:  = new UnicodeBlock(0xAC00, 0xD7AF,
1163:  "HANGUL_SYLLABLES", 
1164:  "Hangul Syllables");
1165: 
1166:  /**
1167:  * High Surrogates.
1168:  * 0xD800 - 0xDB7F.
1169:  * @since 1.5
1170:  */
1171:  public static final UnicodeBlock HIGH_SURROGATES
1172:  = new UnicodeBlock(0xD800, 0xDB7F,
1173:  "HIGH_SURROGATES", 
1174:  "High Surrogates");
1175: 
1176:  /**
1177:  * High Private Use Surrogates.
1178:  * 0xDB80 - 0xDBFF.
1179:  * @since 1.5
1180:  */
1181:  public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1182:  = new UnicodeBlock(0xDB80, 0xDBFF,
1183:  "HIGH_PRIVATE_USE_SURROGATES", 
1184:  "High Private Use Surrogates");
1185: 
1186:  /**
1187:  * Low Surrogates.
1188:  * 0xDC00 - 0xDFFF.
1189:  * @since 1.5
1190:  */
1191:  public static final UnicodeBlock LOW_SURROGATES
1192:  = new UnicodeBlock(0xDC00, 0xDFFF,
1193:  "LOW_SURROGATES", 
1194:  "Low Surrogates");
1195: 
1196:  /**
1197:  * Private Use Area.
1198:  * 0xE000 - 0xF8FF.
1199:  */
1200:  public static final UnicodeBlock PRIVATE_USE_AREA
1201:  = new UnicodeBlock(0xE000, 0xF8FF,
1202:  "PRIVATE_USE_AREA", 
1203:  "Private Use Area");
1204: 
1205:  /**
1206:  * CJK Compatibility Ideographs.
1207:  * 0xF900 - 0xFAFF.
1208:  */
1209:  public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1210:  = new UnicodeBlock(0xF900, 0xFAFF,
1211:  "CJK_COMPATIBILITY_IDEOGRAPHS", 
1212:  "CJK Compatibility Ideographs");
1213: 
1214:  /**
1215:  * Alphabetic Presentation Forms.
1216:  * 0xFB00 - 0xFB4F.
1217:  */
1218:  public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1219:  = new UnicodeBlock(0xFB00, 0xFB4F,
1220:  "ALPHABETIC_PRESENTATION_FORMS", 
1221:  "Alphabetic Presentation Forms");
1222: 
1223:  /**
1224:  * Arabic Presentation Forms-A.
1225:  * 0xFB50 - 0xFDFF.
1226:  */
1227:  public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1228:  = new UnicodeBlock(0xFB50, 0xFDFF,
1229:  "ARABIC_PRESENTATION_FORMS_A", 
1230:  "Arabic Presentation Forms-A");
1231: 
1232:  /**
1233:  * Variation Selectors.
1234:  * 0xFE00 - 0xFE0F.
1235:  * @since 1.5
1236:  */
1237:  public static final UnicodeBlock VARIATION_SELECTORS
1238:  = new UnicodeBlock(0xFE00, 0xFE0F,
1239:  "VARIATION_SELECTORS", 
1240:  "Variation Selectors");
1241: 
1242:  /**
1243:  * Combining Half Marks.
1244:  * 0xFE20 - 0xFE2F.
1245:  */
1246:  public static final UnicodeBlock COMBINING_HALF_MARKS
1247:  = new UnicodeBlock(0xFE20, 0xFE2F,
1248:  "COMBINING_HALF_MARKS", 
1249:  "Combining Half Marks");
1250: 
1251:  /**
1252:  * CJK Compatibility Forms.
1253:  * 0xFE30 - 0xFE4F.
1254:  */
1255:  public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1256:  = new UnicodeBlock(0xFE30, 0xFE4F,
1257:  "CJK_COMPATIBILITY_FORMS", 
1258:  "CJK Compatibility Forms");
1259: 
1260:  /**
1261:  * Small Form Variants.
1262:  * 0xFE50 - 0xFE6F.
1263:  */
1264:  public static final UnicodeBlock SMALL_FORM_VARIANTS
1265:  = new UnicodeBlock(0xFE50, 0xFE6F,
1266:  "SMALL_FORM_VARIANTS", 
1267:  "Small Form Variants");
1268: 
1269:  /**
1270:  * Arabic Presentation Forms-B.
1271:  * 0xFE70 - 0xFEFF.
1272:  */
1273:  public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1274:  = new UnicodeBlock(0xFE70, 0xFEFF,
1275:  "ARABIC_PRESENTATION_FORMS_B", 
1276:  "Arabic Presentation Forms-B");
1277: 
1278:  /**
1279:  * Halfwidth and Fullwidth Forms.
1280:  * 0xFF00 - 0xFFEF.
1281:  */
1282:  public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1283:  = new UnicodeBlock(0xFF00, 0xFFEF,
1284:  "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1285:  "Halfwidth and Fullwidth Forms");
1286: 
1287:  /**
1288:  * Specials.
1289:  * 0xFFF0 - 0xFFFF.
1290:  */
1291:  public static final UnicodeBlock SPECIALS
1292:  = new UnicodeBlock(0xFFF0, 0xFFFF,
1293:  "SPECIALS", 
1294:  "Specials");
1295: 
1296:  /**
1297:  * Linear B Syllabary.
1298:  * 0x10000 - 0x1007F.
1299:  * @since 1.5
1300:  */
1301:  public static final UnicodeBlock LINEAR_B_SYLLABARY
1302:  = new UnicodeBlock(0x10000, 0x1007F,
1303:  "LINEAR_B_SYLLABARY", 
1304:  "Linear B Syllabary");
1305: 
1306:  /**
1307:  * Linear B Ideograms.
1308:  * 0x10080 - 0x100FF.
1309:  * @since 1.5
1310:  */
1311:  public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1312:  = new UnicodeBlock(0x10080, 0x100FF,
1313:  "LINEAR_B_IDEOGRAMS", 
1314:  "Linear B Ideograms");
1315: 
1316:  /**
1317:  * Aegean Numbers.
1318:  * 0x10100 - 0x1013F.
1319:  * @since 1.5
1320:  */
1321:  public static final UnicodeBlock AEGEAN_NUMBERS
1322:  = new UnicodeBlock(0x10100, 0x1013F,
1323:  "AEGEAN_NUMBERS", 
1324:  "Aegean Numbers");
1325: 
1326:  /**
1327:  * Old Italic.
1328:  * 0x10300 - 0x1032F.
1329:  * @since 1.5
1330:  */
1331:  public static final UnicodeBlock OLD_ITALIC
1332:  = new UnicodeBlock(0x10300, 0x1032F,
1333:  "OLD_ITALIC", 
1334:  "Old Italic");
1335: 
1336:  /**
1337:  * Gothic.
1338:  * 0x10330 - 0x1034F.
1339:  * @since 1.5
1340:  */
1341:  public static final UnicodeBlock GOTHIC
1342:  = new UnicodeBlock(0x10330, 0x1034F,
1343:  "GOTHIC", 
1344:  "Gothic");
1345: 
1346:  /**
1347:  * Ugaritic.
1348:  * 0x10380 - 0x1039F.
1349:  * @since 1.5
1350:  */
1351:  public static final UnicodeBlock UGARITIC
1352:  = new UnicodeBlock(0x10380, 0x1039F,
1353:  "UGARITIC", 
1354:  "Ugaritic");
1355: 
1356:  /**
1357:  * Deseret.
1358:  * 0x10400 - 0x1044F.
1359:  * @since 1.5
1360:  */
1361:  public static final UnicodeBlock DESERET
1362:  = new UnicodeBlock(0x10400, 0x1044F,
1363:  "DESERET", 
1364:  "Deseret");
1365: 
1366:  /**
1367:  * Shavian.
1368:  * 0x10450 - 0x1047F.
1369:  * @since 1.5
1370:  */
1371:  public static final UnicodeBlock SHAVIAN
1372:  = new UnicodeBlock(0x10450, 0x1047F,
1373:  "SHAVIAN", 
1374:  "Shavian");
1375: 
1376:  /**
1377:  * Osmanya.
1378:  * 0x10480 - 0x104AF.
1379:  * @since 1.5
1380:  */
1381:  public static final UnicodeBlock OSMANYA
1382:  = new UnicodeBlock(0x10480, 0x104AF,
1383:  "OSMANYA", 
1384:  "Osmanya");
1385: 
1386:  /**
1387:  * Cypriot Syllabary.
1388:  * 0x10800 - 0x1083F.
1389:  * @since 1.5
1390:  */
1391:  public static final UnicodeBlock CYPRIOT_SYLLABARY
1392:  = new UnicodeBlock(0x10800, 0x1083F,
1393:  "CYPRIOT_SYLLABARY", 
1394:  "Cypriot Syllabary");
1395: 
1396:  /**
1397:  * Byzantine Musical Symbols.
1398:  * 0x1D000 - 0x1D0FF.
1399:  * @since 1.5
1400:  */
1401:  public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1402:  = new UnicodeBlock(0x1D000, 0x1D0FF,
1403:  "BYZANTINE_MUSICAL_SYMBOLS", 
1404:  "Byzantine Musical Symbols");
1405: 
1406:  /**
1407:  * Musical Symbols.
1408:  * 0x1D100 - 0x1D1FF.
1409:  * @since 1.5
1410:  */
1411:  public static final UnicodeBlock MUSICAL_SYMBOLS
1412:  = new UnicodeBlock(0x1D100, 0x1D1FF,
1413:  "MUSICAL_SYMBOLS", 
1414:  "Musical Symbols");
1415: 
1416:  /**
1417:  * Tai Xuan Jing Symbols.
1418:  * 0x1D300 - 0x1D35F.
1419:  * @since 1.5
1420:  */
1421:  public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1422:  = new UnicodeBlock(0x1D300, 0x1D35F,
1423:  "TAI_XUAN_JING_SYMBOLS", 
1424:  "Tai Xuan Jing Symbols");
1425: 
1426:  /**
1427:  * Mathematical Alphanumeric Symbols.
1428:  * 0x1D400 - 0x1D7FF.
1429:  * @since 1.5
1430:  */
1431:  public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1432:  = new UnicodeBlock(0x1D400, 0x1D7FF,
1433:  "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1434:  "Mathematical Alphanumeric Symbols");
1435: 
1436:  /**
1437:  * CJK Unified Ideographs Extension B.
1438:  * 0x20000 - 0x2A6DF.
1439:  * @since 1.5
1440:  */
1441:  public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1442:  = new UnicodeBlock(0x20000, 0x2A6DF,
1443:  "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1444:  "CJK Unified Ideographs Extension B");
1445: 
1446:  /**
1447:  * CJK Compatibility Ideographs Supplement.
1448:  * 0x2F800 - 0x2FA1F.
1449:  * @since 1.5
1450:  */
1451:  public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1452:  = new UnicodeBlock(0x2F800, 0x2FA1F,
1453:  "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1454:  "CJK Compatibility Ideographs Supplement");
1455: 
1456:  /**
1457:  * Tags.
1458:  * 0xE0000 - 0xE007F.
1459:  * @since 1.5
1460:  */
1461:  public static final UnicodeBlock TAGS
1462:  = new UnicodeBlock(0xE0000, 0xE007F,
1463:  "TAGS", 
1464:  "Tags");
1465: 
1466:  /**
1467:  * Variation Selectors Supplement.
1468:  * 0xE0100 - 0xE01EF.
1469:  * @since 1.5
1470:  */
1471:  public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1472:  = new UnicodeBlock(0xE0100, 0xE01EF,
1473:  "VARIATION_SELECTORS_SUPPLEMENT", 
1474:  "Variation Selectors Supplement");
1475: 
1476:  /**
1477:  * Supplementary Private Use Area-A.
1478:  * 0xF0000 - 0xFFFFF.
1479:  * @since 1.5
1480:  */
1481:  public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1482:  = new UnicodeBlock(0xF0000, 0xFFFFF,
1483:  "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1484:  "Supplementary Private Use Area-A");
1485: 
1486:  /**
1487:  * Supplementary Private Use Area-B.
1488:  * 0x100000 - 0x10FFFF.
1489:  * @since 1.5
1490:  */
1491:  public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1492:  = new UnicodeBlock(0x100000, 0x10FFFF,
1493:  "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1494:  "Supplementary Private Use Area-B");
1495: 
1496:  /**
1497:  * Surrogates Area.
1498:  * 'D800' - 'DFFF'.
1499:  * @deprecated As of 1.5, the three areas, 
1500:  * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1501:  * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1502:  * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1503:  * by the Unicode standard, should be used in preference to
1504:  * this. These are also returned from calls to <code>of(int)</code>
1505:  * and <code>of(char)</code>.
1506:  */
1507:  @Deprecated
1508:  public static final UnicodeBlock SURROGATES_AREA
1509:  = new UnicodeBlock(0xD800, 0xDFFF,
1510:  "SURROGATES_AREA",
1511:  "Surrogates Area");
1512: 
1513:  /**
1514:  * The defined subsets.
1515:  */
1516:  private static final UnicodeBlock sets[] = {
1517:  BASIC_LATIN,
1518:  LATIN_1_SUPPLEMENT,
1519:  LATIN_EXTENDED_A,
1520:  LATIN_EXTENDED_B,
1521:  IPA_EXTENSIONS,
1522:  SPACING_MODIFIER_LETTERS,
1523:  COMBINING_DIACRITICAL_MARKS,
1524:  GREEK,
1525:  CYRILLIC,
1526:  CYRILLIC_SUPPLEMENTARY,
1527:  ARMENIAN,
1528:  HEBREW,
1529:  ARABIC,
1530:  SYRIAC,
1531:  THAANA,
1532:  DEVANAGARI,
1533:  BENGALI,
1534:  GURMUKHI,
1535:  GUJARATI,
1536:  ORIYA,
1537:  TAMIL,
1538:  TELUGU,
1539:  KANNADA,
1540:  MALAYALAM,
1541:  SINHALA,
1542:  THAI,
1543:  LAO,
1544:  TIBETAN,
1545:  MYANMAR,
1546:  GEORGIAN,
1547:  HANGUL_JAMO,
1548:  ETHIOPIC,
1549:  CHEROKEE,
1550:  UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1551:  OGHAM,
1552:  RUNIC,
1553:  TAGALOG,
1554:  HANUNOO,
1555:  BUHID,
1556:  TAGBANWA,
1557:  KHMER,
1558:  MONGOLIAN,
1559:  LIMBU,
1560:  TAI_LE,
1561:  KHMER_SYMBOLS,
1562:  PHONETIC_EXTENSIONS,
1563:  LATIN_EXTENDED_ADDITIONAL,
1564:  GREEK_EXTENDED,
1565:  GENERAL_PUNCTUATION,
1566:  SUPERSCRIPTS_AND_SUBSCRIPTS,
1567:  CURRENCY_SYMBOLS,
1568:  COMBINING_MARKS_FOR_SYMBOLS,
1569:  LETTERLIKE_SYMBOLS,
1570:  NUMBER_FORMS,
1571:  ARROWS,
1572:  MATHEMATICAL_OPERATORS,
1573:  MISCELLANEOUS_TECHNICAL,
1574:  CONTROL_PICTURES,
1575:  OPTICAL_CHARACTER_RECOGNITION,
1576:  ENCLOSED_ALPHANUMERICS,
1577:  BOX_DRAWING,
1578:  BLOCK_ELEMENTS,
1579:  GEOMETRIC_SHAPES,
1580:  MISCELLANEOUS_SYMBOLS,
1581:  DINGBATS,
1582:  MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1583:  SUPPLEMENTAL_ARROWS_A,
1584:  BRAILLE_PATTERNS,
1585:  SUPPLEMENTAL_ARROWS_B,
1586:  MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1587:  SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1588:  MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1589:  CJK_RADICALS_SUPPLEMENT,
1590:  KANGXI_RADICALS,
1591:  IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1592:  CJK_SYMBOLS_AND_PUNCTUATION,
1593:  HIRAGANA,
1594:  KATAKANA,
1595:  BOPOMOFO,
1596:  HANGUL_COMPATIBILITY_JAMO,
1597:  KANBUN,
1598:  BOPOMOFO_EXTENDED,
1599:  KATAKANA_PHONETIC_EXTENSIONS,
1600:  ENCLOSED_CJK_LETTERS_AND_MONTHS,
1601:  CJK_COMPATIBILITY,
1602:  CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1603:  YIJING_HEXAGRAM_SYMBOLS,
1604:  CJK_UNIFIED_IDEOGRAPHS,
1605:  YI_SYLLABLES,
1606:  YI_RADICALS,
1607:  HANGUL_SYLLABLES,
1608:  HIGH_SURROGATES,
1609:  HIGH_PRIVATE_USE_SURROGATES,
1610:  LOW_SURROGATES,
1611:  PRIVATE_USE_AREA,
1612:  CJK_COMPATIBILITY_IDEOGRAPHS,
1613:  ALPHABETIC_PRESENTATION_FORMS,
1614:  ARABIC_PRESENTATION_FORMS_A,
1615:  VARIATION_SELECTORS,
1616:  COMBINING_HALF_MARKS,
1617:  CJK_COMPATIBILITY_FORMS,
1618:  SMALL_FORM_VARIANTS,
1619:  ARABIC_PRESENTATION_FORMS_B,
1620:  HALFWIDTH_AND_FULLWIDTH_FORMS,
1621:  SPECIALS,
1622:  LINEAR_B_SYLLABARY,
1623:  LINEAR_B_IDEOGRAMS,
1624:  AEGEAN_NUMBERS,
1625:  OLD_ITALIC,
1626:  GOTHIC,
1627:  UGARITIC,
1628:  DESERET,
1629:  SHAVIAN,
1630:  OSMANYA,
1631:  CYPRIOT_SYLLABARY,
1632:  BYZANTINE_MUSICAL_SYMBOLS,
1633:  MUSICAL_SYMBOLS,
1634:  TAI_XUAN_JING_SYMBOLS,
1635:  MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1636:  CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1637:  CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1638:  TAGS,
1639:  VARIATION_SELECTORS_SUPPLEMENT,
1640:  SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1641:  SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1642:  };
1643:  } // class UnicodeBlock
1644: 
1645:  /**
1646:  * A class to encompass all the properties of characters in the 
1647:  * private use blocks in the Unicode standard. This class extends
1648:  * UnassignedCharacters because the return type from getType() is 
1649:  * different.
1650:  * @author Anthony Balkissoon abalkiss at redhat dot com
1651:  *
1652:  */
1653:  private static class PrivateUseCharacters extends UnassignedCharacters
1654:  {
1655:  /**
1656:  * Returns the type of the character cp.
1657:  */
1658:  static int getType(int cp)
1659:  {
1660:  // The upper 2 code points in any plane are considered unassigned, 
1661:  // even in the private-use planes.
1662:  if ((cp & 0xffff) >= 0xfffe)
1663:  return UnassignedCharacters.getType(cp);
1664:  return PRIVATE_USE;
1665:  }
1666:  
1667:  /**
1668:  * Returns true if the character cp is defined.
1669:  */
1670:  static boolean isDefined(int cp)
1671:  {
1672:  // The upper 2 code points in any plane are considered unassigned, 
1673:  // even in the private-use planes.
1674:  if ((cp & 0xffff) >= 0xfffe)
1675:  return UnassignedCharacters.isDefined(cp);
1676:  return true;
1677:  }
1678:  
1679:  /**
1680:  * Gets the directionality for the character cp.
1681:  */
1682:  static byte getDirectionality(int cp)
1683:  {
1684:  if ((cp & 0xffff) >= 0xfffe)
1685:  return UnassignedCharacters.getDirectionality(cp);
1686:  return DIRECTIONALITY_LEFT_TO_RIGHT;
1687:  }
1688:  }
1689:  
1690:  /**
1691:  * A class to encompass all the properties of code points that are 
1692:  * currently undefined in the Unicode standard.
1693:  * @author Anthony Balkissoon abalkiss at redhat dot com
1694:  *
1695:  */
1696:  private static class UnassignedCharacters
1697:  {
1698:  /**
1699:  * Returns the numeric value for the unassigned characters.
1700:  * @param cp the character
1701:  * @param radix the radix (not used)
1702:  * @return the numeric value of this character in this radix
1703:  */
1704:  static int digit(int cp, int radix)
1705:  {
1706:  return -1;
1707:  }
1708: 
1709:  /**
1710:  * Returns the Unicode directionality property for unassigned 
1711:  * characters.
1712:  * @param cp the character
1713:  * @return DIRECTIONALITY_UNDEFINED
1714:  */
1715:  static byte getDirectionality(int cp)
1716:  {
1717:  return DIRECTIONALITY_UNDEFINED;
1718:  }
1719: 
1720:  /**
1721:  * Returns -1, the numeric value for unassigned Unicode characters.
1722:  * @param cp the character
1723:  * @return -1
1724:  */
1725:  static int getNumericValue(int cp)
1726:  {
1727:  return -1;
1728:  }
1729: 
1730:  /**
1731:  * Returns UNASSIGNED, the type of unassigned Unicode characters.
1732:  * @param cp the character
1733:  * @return UNASSIGNED
1734:  */
1735:  static int getType(int cp)
1736:  {
1737:  return UNASSIGNED;
1738:  }
1739:  
1740:  /**
1741:  * Returns false to indiciate that the character is not defined in the 
1742:  * Unicode standard.
1743:  * @param cp the character
1744:  * @return false
1745:  */
1746:  static boolean isDefined(int cp)
1747:  {
1748:  return false;
1749:  }
1750: 
1751:  /**
1752:  * Returns false to indicate that the character is not a digit.
1753:  * @param cp the character
1754:  * @return false
1755:  */
1756:  static boolean isDigit(int cp)
1757:  {
1758:  return false;
1759:  }
1760: 
1761:  /**
1762:  * Returns false to indicate that the character cannot be ignored 
1763:  * within an identifier
1764:  * @param cp the character
1765:  * @return false
1766:  */
1767:  static boolean isIdentifierIgnorable(int cp)
1768:  {
1769:  return false;
1770:  }
1771: 
1772:  /**
1773:  * Returns false to indicate that the character cannot be part of a 
1774:  * Java identifier.
1775:  * @param cp the character
1776:  * @return false
1777:  */
1778:  static boolean isJavaIdentifierPart(int cp)
1779:  {
1780:  return false;
1781:  }
1782: 
1783:  /**
1784:  * Returns false to indicate that the character cannot be start a 
1785:  * Java identifier.
1786:  * @param cp the character
1787:  * @return false
1788:  */
1789:  static boolean isJavaIdentiferStart(int cp)
1790:  {
1791:  return false;
1792:  }
1793: 
1794:  /**
1795:  * Returns false to indicate that the character is not a letter.
1796:  * @param cp the character
1797:  * @return false
1798:  */
1799:  static boolean isLetter(int cp)
1800:  {
1801:  return false;
1802:  }
1803: 
1804:  /**
1805:  * Returns false to indicate that the character cannot is neither a letter
1806:  * nor a digit.
1807:  * @param cp the character
1808:  * @return false
1809:  */
1810:  static boolean isLetterOrDigit(int cp)
1811:  {
1812:  return false;
1813:  }
1814: 
1815:  /**
1816:  * Returns false to indicate that the character is not a lowercase letter.
1817:  * @param cp the character
1818:  * @return false
1819:  */
1820:  static boolean isLowerCase(int cp)
1821:  {
1822:  return false;
1823:  }
1824:  
1825:  /**
1826:  * Returns false to indicate that the character cannot is not mirrored.
1827:  * @param cp the character
1828:  * @return false
1829:  */
1830:  static boolean isMirrored(int cp)
1831:  {
1832:  return false;
1833:  }
1834: 
1835:  /**
1836:  * Returns false to indicate that the character is not a space character.
1837:  * @param cp the character
1838:  * @return false
1839:  */
1840:  static boolean isSpaceChar(int cp)
1841:  {
1842:  return false;
1843:  }
1844:  
1845:  /**
1846:  * Returns false to indicate that the character it not a titlecase letter.
1847:  * @param cp the character
1848:  * @return false
1849:  */
1850:  static boolean isTitleCase(int cp)
1851:  {
1852:  return false;
1853:  }
1854:  
1855:  /**
1856:  * Returns false to indicate that the character cannot be part of a 
1857:  * Unicode identifier.
1858:  * @param cp the character
1859:  * @return false
1860:  */
1861:  static boolean isUnicodeIdentifierPart(int cp)
1862:  {
1863:  return false;
1864:  }
1865: 
1866:  /**
1867:  * Returns false to indicate that the character cannot start a 
1868:  * Unicode identifier.
1869:  * @param cp the character
1870:  * @return false
1871:  */
1872:  static boolean isUnicodeIdentifierStart(int cp)
1873:  {
1874:  return false;
1875:  }
1876: 
1877:  /**
1878:  * Returns false to indicate that the character is not an uppercase letter.
1879:  * @param cp the character
1880:  * @return false
1881:  */
1882:  static boolean isUpperCase(int cp)
1883:  {
1884:  return false;
1885:  }
1886: 
1887:  /**
1888:  * Returns false to indicate that the character is not a whitespace
1889:  * character.
1890:  * @param cp the character
1891:  * @return false
1892:  */
1893:  static boolean isWhiteSpace(int cp)
1894:  {
1895:  return false;
1896:  }
1897: 
1898:  /**
1899:  * Returns cp to indicate this character has no lowercase conversion.
1900:  * @param cp the character
1901:  * @return cp
1902:  */
1903:  static int toLowerCase(int cp)
1904:  {
1905:  return cp;
1906:  }
1907:  
1908:  /**
1909:  * Returns cp to indicate this character has no titlecase conversion.
1910:  * @param cp the character
1911:  * @return cp
1912:  */
1913:  static int toTitleCase(int cp)
1914:  {
1915:  return cp;
1916:  }
1917: 
1918:  /**
1919:  * Returns cp to indicate this character has no uppercase conversion.
1920:  * @param cp the character
1921:  * @return cp
1922:  */
1923:  static int toUpperCase(int cp)
1924:  {
1925:  return cp;
1926:  } 
1927:  }
1928: 
1929:  /**
1930:  * The immutable value of this Character.
1931:  *
1932:  * @serial the value of this Character
1933:  */
1934:  private final char value;
1935: 
1936:  /**
1937:  * Compatible with JDK 1.0+.
1938:  */
1939:  private static final long serialVersionUID = 3786198910865385080L;
1940: 
1941:  /**
1942:  * Smallest value allowed for radix arguments in Java. This value is 2.
1943:  *
1944:  * @see #digit(char, int)
1945:  * @see #forDigit(int, int)
1946:  * @see Integer#toString(int, int)
1947:  * @see Integer#valueOf(String)
1948:  */
1949:  public static final int MIN_RADIX = 2;
1950: 
1951:  /**
1952:  * Largest value allowed for radix arguments in Java. This value is 36.
1953:  *
1954:  * @see #digit(char, int)
1955:  * @see #forDigit(int, int)
1956:  * @see Integer#toString(int, int)
1957:  * @see Integer#valueOf(String)
1958:  */
1959:  public static final int MAX_RADIX = 36;
1960: 
1961:  /**
1962:  * The minimum value the char data type can hold.
1963:  * This value is <code>'\\u0000'</code>.
1964:  */
1965:  public static final char MIN_VALUE = '\u0000';
1966: 
1967:  /**
1968:  * The maximum value the char data type can hold.
1969:  * This value is <code>'\\uFFFF'</code>.
1970:  */
1971:  public static final char MAX_VALUE = '\uFFFF';
1972: 
1973:  /**
1974:  * The minimum Unicode 4.0 code point. This value is <code>0</code>.
1975:  * @since 1.5
1976:  */
1977:  public static final int MIN_CODE_POINT = 0;
1978: 
1979:  /**
1980:  * The maximum Unicode 4.0 code point, which is greater than the range
1981:  * of the char data type.
1982:  * This value is <code>0x10FFFF</code>.
1983:  * @since 1.5
1984:  */
1985:  public static final int MAX_CODE_POINT = 0x10FFFF;
1986: 
1987:  /**
1988:  * The minimum Unicode high surrogate code unit, or
1989:  * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1990:  * This value is <code>'\uD800'</code>.
1991:  * @since 1.5
1992:  */
1993:  public static final char MIN_HIGH_SURROGATE = '\uD800';
1994: 
1995:  /**
1996:  * The maximum Unicode high surrogate code unit, or
1997:  * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
1998:  * This value is <code>'\uDBFF'</code>.
1999:  * @since 1.5
2000:  */
2001:  public static final char MAX_HIGH_SURROGATE = '\uDBFF';
2002: 
2003:  /**
2004:  * The minimum Unicode low surrogate code unit, or
2005:  * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2006:  * This value is <code>'\uDC00'</code>.
2007:  * @since 1.5
2008:  */
2009:  public static final char MIN_LOW_SURROGATE = '\uDC00';
2010: 
2011:  /**
2012:  * The maximum Unicode low surrogate code unit, or
2013:  * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
2014:  * This value is <code>'\uDFFF'</code>.
2015:  * @since 1.5
2016:  */
2017:  public static final char MAX_LOW_SURROGATE = '\uDFFF'; 
2018: 
2019:  /**
2020:  * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
2021:  * This value is <code>'\uD800'</code>.
2022:  * @since 1.5
2023:  */
2024:  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2025: 
2026:  /**
2027:  * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
2028:  * This value is <code>'\uDFFF'</code>.
2029:  * @since 1.5
2030:  */
2031:  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2032: 
2033:  /**
2034:  * The lowest possible supplementary Unicode code point (the first code
2035:  * point outside the basic multilingual plane (BMP)).
2036:  * This value is <code>0x10000</code>.
2037:  */ 
2038:  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2039: 
2040:  /**
2041:  * Class object representing the primitive char data type.
2042:  *
2043:  * @since 1.1
2044:  */
2045:  public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
2046: 
2047:  /**
2048:  * The number of bits needed to represent a <code>char</code>.
2049:  * @since 1.5
2050:  */
2051:  public static final int SIZE = 16;
2052: 
2053:  // This caches some Character values, and is used by boxing
2054:  // conversions via valueOf(). We must cache at least 0..127;
2055:  // this constant controls how much we actually cache.
2056:  private static final int MAX_CACHE = 127;
2057:  private static Character[] charCache = new Character[MAX_CACHE + 1];
2058: 
2059:  /**
2060:  * Lu = Letter, Uppercase (Informative).
2061:  *
2062:  * @since 1.1
2063:  */
2064:  public static final byte UPPERCASE_LETTER = 1;
2065: 
2066:  /**
2067:  * Ll = Letter, Lowercase (Informative).
2068:  *
2069:  * @since 1.1
2070:  */
2071:  public static final byte LOWERCASE_LETTER = 2;
2072: 
2073:  /**
2074:  * Lt = Letter, Titlecase (Informative).
2075:  *
2076:  * @since 1.1
2077:  */
2078:  public static final byte TITLECASE_LETTER = 3;
2079: 
2080:  /**
2081:  * Mn = Mark, Non-Spacing (Normative).
2082:  *
2083:  * @since 1.1
2084:  */
2085:  public static final byte NON_SPACING_MARK = 6;
2086: 
2087:  /**
2088:  * Mc = Mark, Spacing Combining (Normative).
2089:  *
2090:  * @since 1.1
2091:  */
2092:  public static final byte COMBINING_SPACING_MARK = 8;
2093: 
2094:  /**
2095:  * Me = Mark, Enclosing (Normative).
2096:  *
2097:  * @since 1.1
2098:  */
2099:  public static final byte ENCLOSING_MARK = 7;
2100: 
2101:  /**
2102:  * Nd = Number, Decimal Digit (Normative).
2103:  *
2104:  * @since 1.1
2105:  */
2106:  public static final byte DECIMAL_DIGIT_NUMBER = 9;
2107: 
2108:  /**
2109:  * Nl = Number, Letter (Normative).
2110:  *
2111:  * @since 1.1
2112:  */
2113:  public static final byte LETTER_NUMBER = 10;
2114: 
2115:  /**
2116:  * No = Number, Other (Normative).
2117:  *
2118:  * @since 1.1
2119:  */
2120:  public static final byte OTHER_NUMBER = 11;
2121: 
2122:  /**
2123:  * Zs = Separator, Space (Normative).
2124:  *
2125:  * @since 1.1
2126:  */
2127:  public static final byte SPACE_SEPARATOR = 12;
2128: 
2129:  /**
2130:  * Zl = Separator, Line (Normative).
2131:  *
2132:  * @since 1.1
2133:  */
2134:  public static final byte LINE_SEPARATOR = 13;
2135: 
2136:  /**
2137:  * Zp = Separator, Paragraph (Normative).
2138:  *
2139:  * @since 1.1
2140:  */
2141:  public static final byte PARAGRAPH_SEPARATOR = 14;
2142: 
2143:  /**
2144:  * Cc = Other, Control (Normative).
2145:  *
2146:  * @since 1.1
2147:  */
2148:  public static final byte CONTROL = 15;
2149: 
2150:  /**
2151:  * Cf = Other, Format (Normative).
2152:  *
2153:  * @since 1.1
2154:  */
2155:  public static final byte FORMAT = 16;
2156: 
2157:  /**
2158:  * Cs = Other, Surrogate (Normative).
2159:  *
2160:  * @since 1.1
2161:  */
2162:  public static final byte SURROGATE = 19;
2163: 
2164:  /**
2165:  * Co = Other, Private Use (Normative).
2166:  *
2167:  * @since 1.1
2168:  */
2169:  public static final byte PRIVATE_USE = 18;
2170: 
2171:  /**
2172:  * Cn = Other, Not Assigned (Normative).
2173:  *
2174:  * @since 1.1
2175:  */
2176:  public static final byte UNASSIGNED = 0;
2177: 
2178:  /**
2179:  * Lm = Letter, Modifier (Informative).
2180:  *
2181:  * @since 1.1
2182:  */
2183:  public static final byte MODIFIER_LETTER = 4;
2184: 
2185:  /**
2186:  * Lo = Letter, Other (Informative).
2187:  *
2188:  * @since 1.1
2189:  */
2190:  public static final byte OTHER_LETTER = 5;
2191: 
2192:  /**
2193:  * Pc = Punctuation, Connector (Informative).
2194:  *
2195:  * @since 1.1
2196:  */
2197:  public static final byte CONNECTOR_PUNCTUATION = 23;
2198: 
2199:  /**
2200:  * Pd = Punctuation, Dash (Informative).
2201:  *
2202:  * @since 1.1
2203:  */
2204:  public static final byte DASH_PUNCTUATION = 20;
2205: 
2206:  /**
2207:  * Ps = Punctuation, Open (Informative).
2208:  *
2209:  * @since 1.1
2210:  */
2211:  public static final byte START_PUNCTUATION = 21;
2212: 
2213:  /**
2214:  * Pe = Punctuation, Close (Informative).
2215:  *
2216:  * @since 1.1
2217:  */
2218:  public static final byte END_PUNCTUATION = 22;
2219: 
2220:  /**
2221:  * Pi = Punctuation, Initial Quote (Informative).
2222:  *
2223:  * @since 1.4
2224:  */
2225:  public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
2226: 
2227:  /**
2228:  * Pf = Punctuation, Final Quote (Informative).
2229:  *
2230:  * @since 1.4
2231:  */
2232:  public static final byte FINAL_QUOTE_PUNCTUATION = 30;
2233: 
2234:  /**
2235:  * Po = Punctuation, Other (Informative).
2236:  *
2237:  * @since 1.1
2238:  */
2239:  public static final byte OTHER_PUNCTUATION = 24;
2240: 
2241:  /**
2242:  * Sm = Symbol, Math (Informative).
2243:  *
2244:  * @since 1.1
2245:  */
2246:  public static final byte MATH_SYMBOL = 25;
2247: 
2248:  /**
2249:  * Sc = Symbol, Currency (Informative).
2250:  *
2251:  * @since 1.1
2252:  */
2253:  public static final byte CURRENCY_SYMBOL = 26;
2254: 
2255:  /**
2256:  * Sk = Symbol, Modifier (Informative).
2257:  *
2258:  * @since 1.1
2259:  */
2260:  public static final byte MODIFIER_SYMBOL = 27;
2261: 
2262:  /**
2263:  * So = Symbol, Other (Informative).
2264:  *
2265:  * @since 1.1
2266:  */
2267:  public static final byte OTHER_SYMBOL = 28;
2268: 
2269:  /**
2270:  * Undefined bidirectional character type. Undefined char values have
2271:  * undefined directionality in the Unicode specification.
2272:  *
2273:  * @since 1.4
2274:  */
2275:  public static final byte DIRECTIONALITY_UNDEFINED = -1;
2276: 
2277:  /**
2278:  * Strong bidirectional character type "L".
2279:  *
2280:  * @since 1.4
2281:  */
2282:  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
2283: 
2284:  /**
2285:  * Strong bidirectional character type "R".
2286:  *
2287:  * @since 1.4
2288:  */
2289:  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
2290: 
2291:  /**
2292:  * Strong bidirectional character type "AL".
2293:  *
2294:  * @since 1.4
2295:  */
2296:  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
2297: 
2298:  /**
2299:  * Weak bidirectional character type "EN".
2300:  *
2301:  * @since 1.4
2302:  */
2303:  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
2304: 
2305:  /**
2306:  * Weak bidirectional character type "ES".
2307:  *
2308:  * @since 1.4
2309:  */
2310:  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
2311: 
2312:  /**
2313:  * Weak bidirectional character type "ET".
2314:  *
2315:  * @since 1.4
2316:  */
2317:  public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
2318: 
2319:  /**
2320:  * Weak bidirectional character type "AN".
2321:  *
2322:  * @since 1.4
2323:  */
2324:  public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
2325: 
2326:  /**
2327:  * Weak bidirectional character type "CS".
2328:  *
2329:  * @since 1.4
2330:  */
2331:  public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
2332: 
2333:  /**
2334:  * Weak bidirectional character type "NSM".
2335:  *
2336:  * @since 1.4
2337:  */
2338:  public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2339: 
2340:  /**
2341:  * Weak bidirectional character type "BN".
2342:  *
2343:  * @since 1.4
2344:  */
2345:  public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2346: 
2347:  /**
2348:  * Neutral bidirectional character type "B".
2349:  *
2350:  * @since 1.4
2351:  */
2352:  public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2353: 
2354:  /**
2355:  * Neutral bidirectional character type "S".
2356:  *
2357:  * @since 1.4
2358:  */
2359:  public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2360: 
2361:  /**
2362:  * Strong bidirectional character type "WS".
2363:  *
2364:  * @since 1.4
2365:  */
2366:  public static final byte DIRECTIONALITY_WHITESPACE = 12;
2367: 
2368:  /**
2369:  * Neutral bidirectional character type "ON".
2370:  *
2371:  * @since 1.4
2372:  */
2373:  public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2374: 
2375:  /**
2376:  * Strong bidirectional character type "LRE".
2377:  *
2378:  * @since 1.4
2379:  */
2380:  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2381: 
2382:  /**
2383:  * Strong bidirectional character type "LRO".
2384:  *
2385:  * @since 1.4
2386:  */
2387:  public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2388: 
2389:  /**
2390:  * Strong bidirectional character type "RLE".
2391:  *
2392:  * @since 1.4
2393:  */
2394:  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2395: 
2396:  /**
2397:  * Strong bidirectional character type "RLO".
2398:  *
2399:  * @since 1.4
2400:  */
2401:  public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2402: 
2403:  /**
2404:  * Weak bidirectional character type "PDF".
2405:  *
2406:  * @since 1.4
2407:  */
2408:  public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2409: 
2410:  /**
2411:  * Stores unicode block offset lookup table. Exploit package visibility of
2412:  * String.value to avoid copying the array.
2413:  * @see #readCodePoint(int)
2414:  * @see CharData#BLOCKS
2415:  */
2416:  private static final char[][] blocks = 
2417:  new char[][]{
2418:  String.zeroBasedStringValue(CharData.BLOCKS[0]),
2419:  String.zeroBasedStringValue(CharData.BLOCKS[1]),
2420:  String.zeroBasedStringValue(CharData.BLOCKS[2]),
2421:  String.zeroBasedStringValue(CharData.BLOCKS[3]),
2422:  String.zeroBasedStringValue(CharData.BLOCKS[4]),
2423:  String.zeroBasedStringValue(CharData.BLOCKS[5]),
2424:  String.zeroBasedStringValue(CharData.BLOCKS[6]),
2425:  String.zeroBasedStringValue(CharData.BLOCKS[7]),
2426:  String.zeroBasedStringValue(CharData.BLOCKS[8]),
2427:  String.zeroBasedStringValue(CharData.BLOCKS[9]),
2428:  String.zeroBasedStringValue(CharData.BLOCKS[10]),
2429:  String.zeroBasedStringValue(CharData.BLOCKS[11]),
2430:  String.zeroBasedStringValue(CharData.BLOCKS[12]),
2431:  String.zeroBasedStringValue(CharData.BLOCKS[13]),
2432:  String.zeroBasedStringValue(CharData.BLOCKS[14]),
2433:  String.zeroBasedStringValue(CharData.BLOCKS[15]),
2434:  String.zeroBasedStringValue(CharData.BLOCKS[16])};
2435: 
2436:  /**
2437:  * Stores unicode attribute offset lookup table. Exploit package visibility
2438:  * of String.value to avoid copying the array.
2439:  * @see CharData#DATA
2440:  */
2441:  private static final char[][] data = 
2442:  new char[][]{
2443:  String.zeroBasedStringValue(CharData.DATA[0]),
2444:  String.zeroBasedStringValue(CharData.DATA[1]),
2445:  String.zeroBasedStringValue(CharData.DATA[2]),
2446:  String.zeroBasedStringValue(CharData.DATA[3]),
2447:  String.zeroBasedStringValue(CharData.DATA[4]),
2448:  String.zeroBasedStringValue(CharData.DATA[5]),
2449:  String.zeroBasedStringValue(CharData.DATA[6]),
2450:  String.zeroBasedStringValue(CharData.DATA[7]),
2451:  String.zeroBasedStringValue(CharData.DATA[8]),
2452:  String.zeroBasedStringValue(CharData.DATA[9]),
2453:  String.zeroBasedStringValue(CharData.DATA[10]),
2454:  String.zeroBasedStringValue(CharData.DATA[11]),
2455:  String.zeroBasedStringValue(CharData.DATA[12]),
2456:  String.zeroBasedStringValue(CharData.DATA[13]),
2457:  String.zeroBasedStringValue(CharData.DATA[14]),
2458:  String.zeroBasedStringValue(CharData.DATA[15]),
2459:  String.zeroBasedStringValue(CharData.DATA[16])};
2460: 
2461:  /**
2462:  * Stores unicode numeric value attribute table. Exploit package visibility
2463:  * of String.value to avoid copying the array.
2464:  * @see CharData#NUM_VALUE
2465:  */
2466:  private static final char[][] numValue = 
2467:  new char[][]{
2468:  String.zeroBasedStringValue(CharData.NUM_VALUE[0]),
2469:  String.zeroBasedStringValue(CharData.NUM_VALUE[1]),
2470:  String.zeroBasedStringValue(CharData.NUM_VALUE[2]),
2471:  String.zeroBasedStringValue(CharData.NUM_VALUE[3]),
2472:  String.zeroBasedStringValue(CharData.NUM_VALUE[4]),
2473:  String.zeroBasedStringValue(CharData.NUM_VALUE[5]),
2474:  String.zeroBasedStringValue(CharData.NUM_VALUE[6]),
2475:  String.zeroBasedStringValue(CharData.NUM_VALUE[7]),
2476:  String.zeroBasedStringValue(CharData.NUM_VALUE[8]),
2477:  String.zeroBasedStringValue(CharData.NUM_VALUE[9]),
2478:  String.zeroBasedStringValue(CharData.NUM_VALUE[10]),
2479:  String.zeroBasedStringValue(CharData.NUM_VALUE[11]),
2480:  String.zeroBasedStringValue(CharData.NUM_VALUE[12]),
2481:  String.zeroBasedStringValue(CharData.NUM_VALUE[13]),
2482:  String.zeroBasedStringValue(CharData.NUM_VALUE[14]),
2483:  String.zeroBasedStringValue(CharData.NUM_VALUE[15]),
2484:  String.zeroBasedStringValue(CharData.NUM_VALUE[16])};
2485: 
2486:  /**
2487:  * Stores unicode uppercase attribute table. Exploit package visibility
2488:  * of String.value to avoid copying the array.
2489:  * @see CharData#UPPER
2490:  */ 
2491:  private static final char[][] upper = 
2492:  new char[][]{
2493:  String.zeroBasedStringValue(CharData.UPPER[0]),
2494:  String.zeroBasedStringValue(CharData.UPPER[1]),
2495:  String.zeroBasedStringValue(CharData.UPPER[2]),
2496:  String.zeroBasedStringValue(CharData.UPPER[3]),
2497:  String.zeroBasedStringValue(CharData.UPPER[4]),
2498:  String.zeroBasedStringValue(CharData.UPPER[5]),
2499:  String.zeroBasedStringValue(CharData.UPPER[6]),
2500:  String.zeroBasedStringValue(CharData.UPPER[7]),
2501:  String.zeroBasedStringValue(CharData.UPPER[8]),
2502:  String.zeroBasedStringValue(CharData.UPPER[9]),
2503:  String.zeroBasedStringValue(CharData.UPPER[10]),
2504:  String.zeroBasedStringValue(CharData.UPPER[11]),
2505:  String.zeroBasedStringValue(CharData.UPPER[12]),
2506:  String.zeroBasedStringValue(CharData.UPPER[13]),
2507:  String.zeroBasedStringValue(CharData.UPPER[14]),
2508:  String.zeroBasedStringValue(CharData.UPPER[15]),
2509:  String.zeroBasedStringValue(CharData.UPPER[16])};
2510: 
2511:  /**
2512:  * Stores unicode lowercase attribute table. Exploit package visibility
2513:  * of String.value to avoid copying the array.
2514:  * @see CharData#LOWER
2515:  */
2516:  private static final char[][] lower = 
2517:  new char[][]{
2518:  String.zeroBasedStringValue(CharData.LOWER[0]),
2519:  String.zeroBasedStringValue(CharData.LOWER[1]),
2520:  String.zeroBasedStringValue(CharData.LOWER[2]),
2521:  String.zeroBasedStringValue(CharData.LOWER[3]),
2522:  String.zeroBasedStringValue(CharData.LOWER[4]),
2523:  String.zeroBasedStringValue(CharData.LOWER[5]),
2524:  String.zeroBasedStringValue(CharData.LOWER[6]),
2525:  String.zeroBasedStringValue(CharData.LOWER[7]),
2526:  String.zeroBasedStringValue(CharData.LOWER[8]),
2527:  String.zeroBasedStringValue(CharData.LOWER[9]),
2528:  String.zeroBasedStringValue(CharData.LOWER[10]),
2529:  String.zeroBasedStringValue(CharData.LOWER[11]),
2530:  String.zeroBasedStringValue(CharData.LOWER[12]),
2531:  String.zeroBasedStringValue(CharData.LOWER[13]),
2532:  String.zeroBasedStringValue(CharData.LOWER[14]),
2533:  String.zeroBasedStringValue(CharData.LOWER[15]),
2534:  String.zeroBasedStringValue(CharData.LOWER[16])};
2535: 
2536:  /**
2537:  * Stores unicode direction attribute table. Exploit package visibility
2538:  * of String.value to avoid copying the array.
2539:  * @see CharData#DIRECTION
2540:  */
2541:  // Package visible for use by String.
2542:  static final char[][] direction = 
2543:  new char[][]{
2544:  String.zeroBasedStringValue(CharData.DIRECTION[0]),
2545:  String.zeroBasedStringValue(CharData.DIRECTION[1]),
2546:  String.zeroBasedStringValue(CharData.DIRECTION[2]),
2547:  String.zeroBasedStringValue(CharData.DIRECTION[3]),
2548:  String.zeroBasedStringValue(CharData.DIRECTION[4]),
2549:  String.zeroBasedStringValue(CharData.DIRECTION[5]),
2550:  String.zeroBasedStringValue(CharData.DIRECTION[6]),
2551:  String.zeroBasedStringValue(CharData.DIRECTION[7]),
2552:  String.zeroBasedStringValue(CharData.DIRECTION[8]),
2553:  String.zeroBasedStringValue(CharData.DIRECTION[9]),
2554:  String.zeroBasedStringValue(CharData.DIRECTION[10]),
2555:  String.zeroBasedStringValue(CharData.DIRECTION[11]),
2556:  String.zeroBasedStringValue(CharData.DIRECTION[12]),
2557:  String.zeroBasedStringValue(CharData.DIRECTION[13]),
2558:  String.zeroBasedStringValue(CharData.DIRECTION[14]),
2559:  String.zeroBasedStringValue(CharData.DIRECTION[15]),
2560:  String.zeroBasedStringValue(CharData.DIRECTION[16])};
2561: 
2562:  /**
2563:  * Stores unicode titlecase table. Exploit package visibility of
2564:  * String.value to avoid copying the array.
2565:  * @see CharData#TITLE
2566:  */
2567:  private static final char[] title = String.zeroBasedStringValue(CharData.TITLE); 
2568: 
2569:  /**
2570:  * Mask for grabbing the type out of the contents of data.
2571:  * @see CharData#DATA
2572:  */
2573:  private static final int TYPE_MASK = 0x1F;
2574: 
2575:  /**
2576:  * Mask for grabbing the non-breaking space flag out of the contents of
2577:  * data.
2578:  * @see CharData#DATA
2579:  */
2580:  private static final int NO_BREAK_MASK = 0x20;
2581: 
2582:  /**
2583:  * Mask for grabbing the mirrored directionality flag out of the contents
2584:  * of data.
2585:  * @see CharData#DATA
2586:  */
2587:  private static final int MIRROR_MASK = 0x40;
2588: 
2589:  /**
2590:  * Grabs an attribute offset from the Unicode attribute database. The lower
2591:  * 5 bits are the character type, the next 2 bits are flags, and the top
2592:  * 9 bits are the offset into the attribute tables.
2593:  *
2594:  * @param codePoint the character to look up
2595:  * @return the character's attribute offset and type
2596:  * @see #TYPE_MASK
2597:  * @see #NO_BREAK_MASK
2598:  * @see #MIRROR_MASK
2599:  * @see CharData#DATA
2600:  * @see CharData#SHIFT
2601:  */
2602:  // Package visible for use in String.
2603:  static char readCodePoint(int codePoint)
2604:  {
2605:  int plane = codePoint >>> 16;
2606:  char offset = (char) (codePoint & 0xffff);
2607:  return data[plane][(char) (blocks[plane][offset >> CharData.SHIFT[plane]] + offset)];
2608:  }
2609: 
2610:  /**
2611:  * Wraps up a character.
2612:  *
2613:  * @param value the character to wrap
2614:  */
2615:  public Character(char value)
2616:  {
2617:  this.value = value;
2618:  }
2619: 
2620:  /**
2621:  * Returns the character which has been wrapped by this class.
2622:  *
2623:  * @return the character wrapped
2624:  */
2625:  public char charValue()
2626:  {
2627:  return value;
2628:  }
2629: 
2630:  /**
2631:  * Returns the numerical value (unsigned) of the wrapped character.
2632:  * Range of returned values: 0x0000-0xFFFF.
2633:  *
2634:  * @return the value of the wrapped character
2635:  */
2636:  public int hashCode()
2637:  {
2638:  return value;
2639:  }
2640: 
2641:  /**
2642:  * Determines if an object is equal to this object. This is only true for
2643:  * another Character object wrapping the same value.
2644:  *
2645:  * @param o object to compare
2646:  * @return true if o is a Character with the same value
2647:  */
2648:  public boolean equals(Object o)
2649:  {
2650:  return o instanceof Character && value == ((Character) o).value;
2651:  }
2652: 
2653:  /**
2654:  * Converts the wrapped character into a String.
2655:  *
2656:  * @return a String containing one character -- the wrapped character
2657:  * of this instance
2658:  */
2659:  public String toString()
2660:  {
2661:  // Package constructor avoids an array copy.
2662:  return new String(new char[] { value }, 0, 1, true);
2663:  }
2664: 
2665:  /**
2666:  * Returns a String of length 1 representing the specified character.
2667:  *
2668:  * @param ch the character to convert
2669:  * @return a String containing the character
2670:  * @since 1.4
2671:  */
2672:  public static String toString(char ch)
2673:  {
2674:  // Package constructor avoids an array copy.
2675:  return new String(new char[] { ch }, 0, 1, true);
2676:  }
2677: 
2678:  /**
2679:  * Determines if a character is a Unicode lowercase letter. For example,
2680:  * <code>'a'</code> is lowercase. Returns true if getType() returns
2681:  * LOWERCASE_LETTER.
2682:  * <br>
2683:  * lowercase = [Ll]
2684:  *
2685:  * @param ch character to test
2686:  * @return true if ch is a Unicode lowercase letter, else false
2687:  * @see #isUpperCase(char)
2688:  * @see #isTitleCase(char)
2689:  * @see #toLowerCase(char)
2690:  * @see #getType(char)
2691:  */
2692:  public static boolean isLowerCase(char ch)
2693:  {
2694:  return isLowerCase((int)ch);
2695:  }
2696:  
2697:  /**
2698:  * Determines if a character is a Unicode lowercase letter. For example,
2699:  * <code>'a'</code> is lowercase. Returns true if getType() returns
2700:  * LOWERCASE_LETTER.
2701:  * <br>
2702:  * lowercase = [Ll]
2703:  *
2704:  * @param codePoint character to test
2705:  * @return true if ch is a Unicode lowercase letter, else false
2706:  * @see #isUpperCase(char)
2707:  * @see #isTitleCase(char)
2708:  * @see #toLowerCase(char)
2709:  * @see #getType(char)
2710:  * 
2711:  * @since 1.5
2712:  */
2713:  public static boolean isLowerCase(int codePoint)
2714:  {
2715:  return getType(codePoint) == LOWERCASE_LETTER;
2716:  }
2717: 
2718:  /**
2719:  * Determines if a character is a Unicode uppercase letter. For example,
2720:  * <code>'A'</code> is uppercase. Returns true if getType() returns
2721:  * UPPERCASE_LETTER.
2722:  * <br>
2723:  * uppercase = [Lu]
2724:  *
2725:  * @param ch character to test
2726:  * @return true if ch is a Unicode uppercase letter, else false
2727:  * @see #isLowerCase(char)
2728:  * @see #isTitleCase(char)
2729:  * @see #toUpperCase(char)
2730:  * @see #getType(char)
2731:  */
2732:  public static boolean isUpperCase(char ch)
2733:  {
2734:  return isUpperCase((int)ch);
2735:  }
2736:  
2737:  /**
2738:  * Determines if a character is a Unicode uppercase letter. For example,
2739:  * <code>'A'</code> is uppercase. Returns true if getType() returns
2740:  * UPPERCASE_LETTER.
2741:  * <br>
2742:  * uppercase = [Lu]
2743:  *
2744:  * @param codePoint character to test
2745:  * @return true if ch is a Unicode uppercase letter, else false
2746:  * @see #isLowerCase(char)
2747:  * @see #isTitleCase(char)
2748:  * @see #toUpperCase(char)
2749:  * @see #getType(char)
2750:  * 
2751:  * @since 1.5
2752:  */
2753:  public static boolean isUpperCase(int codePoint)
2754:  {
2755:  return getType(codePoint) == UPPERCASE_LETTER;
2756:  }
2757: 
2758:  /**
2759:  * Determines if a character is a Unicode titlecase letter. For example,
2760:  * the character "Lj" (Latin capital L with small letter j) is titlecase.
2761:  * True if getType() returns TITLECASE_LETTER.
2762:  * <br>
2763:  * titlecase = [Lt]
2764:  *
2765:  * @param ch character to test
2766:  * @return true if ch is a Unicode titlecase letter, else false
2767:  * @see #isLowerCase(char)
2768:  * @see #isUpperCase(char)
2769:  * @see #toTitleCase(char)
2770:  * @see #getType(char)
2771:  */
2772:  public static boolean isTitleCase(char ch)
2773:  {
2774:  return isTitleCase((int)ch);
2775:  }
2776: 
2777:  /**
2778:  * Determines if a character is a Unicode titlecase letter. For example,
2779:  * the character "Lj" (Latin capital L with small letter j) is titlecase.
2780:  * True if getType() returns TITLECASE_LETTER.
2781:  * <br>
2782:  * titlecase = [Lt]
2783:  *
2784:  * @param codePoint character to test
2785:  * @return true if ch is a Unicode titlecase letter, else false
2786:  * @see #isLowerCase(char)
2787:  * @see #isUpperCase(char)
2788:  * @see #toTitleCase(char)
2789:  * @see #getType(char)
2790:  * 
2791:  * @since 1.5
2792:  */
2793:  public static boolean isTitleCase(int codePoint)
2794:  {
2795:  return getType(codePoint) == TITLECASE_LETTER;
2796:  }
2797:  
2798: 
2799:  /**
2800:  * Determines if a character is a Unicode decimal digit. For example,
2801:  * <code>'0'</code> is a digit. A character is a Unicode digit if
2802:  * getType() returns DECIMAL_DIGIT_NUMBER.
2803:  * <br>
2804:  * Unicode decimal digit = [Nd]
2805:  *
2806:  * @param ch character to test
2807:  * @return true if ch is a Unicode decimal digit, else false
2808:  * @see #digit(char, int)
2809:  * @see #forDigit(int, int)
2810:  * @see #getType(char)
2811:  */
2812:  public static boolean isDigit(char ch)
2813:  {
2814:  return isDigit((int)ch);
2815:  }
2816:  
2817:  /**
2818:  * Determines if a character is a Unicode decimal digit. For example,
2819:  * <code>'0'</code> is a digit. A character is a Unicode digit if
2820:  * getType() returns DECIMAL_DIGIT_NUMBER.
2821:  * <br>
2822:  * Unicode decimal digit = [Nd]
2823:  *
2824:  * @param codePoint character to test
2825:  * @return true if ch is a Unicode decimal digit, else false
2826:  * @see #digit(char, int)
2827:  * @see #forDigit(int, int)
2828:  * @see #getType(char)
2829:  * 
2830:  * @since 1.5
2831:  */
2832: 
2833:  public static boolean isDigit(int codePoint)
2834:  {
2835:  return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2836:  }
2837: 
2838:  /**
2839:  * Determines if a character is part of the Unicode Standard. This is an
2840:  * evolving standard, but covers every character in the data file.
2841:  * <br>
2842:  * defined = not [Cn]
2843:  *
2844:  * @param ch character to test
2845:  * @return true if ch is a Unicode character, else false
2846:  * @see #isDigit(char)
2847:  * @see #isLetter(char)
2848:  * @see #isLetterOrDigit(char)
2849:  * @see #isLowerCase(char)
2850:  * @see #isTitleCase(char)
2851:  * @see #isUpperCase(char)
2852:  */
2853:  public static boolean isDefined(char ch)
2854:  {
2855:  return isDefined((int)ch);
2856:  }
2857:  
2858:  /**
2859:  * Determines if a character is part of the Unicode Standard. This is an
2860:  * evolving standard, but covers every character in the data file.
2861:  * <br>
2862:  * defined = not [Cn]
2863:  *
2864:  * @param codePoint character to test
2865:  * @return true if ch is a Unicode character, else false
2866:  * @see #isDigit(char)
2867:  * @see #isLetter(char)
2868:  * @see #isLetterOrDigit(char)
2869:  * @see #isLowerCase(char)
2870:  * @see #isTitleCase(char)
2871:  * @see #isUpperCase(char)
2872:  * 
2873:  * @since 1.5
2874:  */
2875:  public static boolean isDefined(int codePoint)
2876:  {
2877:  return getType(codePoint) != UNASSIGNED;
2878:  }
2879: 
2880:  /**
2881:  * Determines if a character is a Unicode letter. Not all letters have case,
2882:  * so this may return true when isLowerCase and isUpperCase return false.
2883:  * A character is a Unicode letter if getType() returns one of 
2884:  * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2885:  * or OTHER_LETTER.
2886:  * <br>
2887:  * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2888:  *
2889:  * @param ch character to test
2890:  * @return true if ch is a Unicode letter, else false
2891:  * @see #isDigit(char)
2892:  * @see #isJavaIdentifierStart(char)
2893:  * @see #isJavaLetter(char)
2894:  * @see #isJavaLetterOrDigit(char)
2895:  * @see #isLetterOrDigit(char)
2896:  * @see #isLowerCase(char)
2897:  * @see #isTitleCase(char)
2898:  * @see #isUnicodeIdentifierStart(char)
2899:  * @see #isUpperCase(char)
2900:  */
2901:  public static boolean isLetter(char ch)
2902:  {
2903:  return isLetter((int)ch);
2904:  }
2905:  
2906:  /**
2907:  * Determines if a character is a Unicode letter. Not all letters have case,
2908:  * so this may return true when isLowerCase and isUpperCase return false.
2909:  * A character is a Unicode letter if getType() returns one of 
2910:  * UPPERCASE_LETTER, LOWERCASE_LETTER, TITLECASE_LETTER, MODIFIER_LETTER,
2911:  * or OTHER_LETTER.
2912:  * <br>
2913:  * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2914:  *
2915:  * @param codePoint character to test
2916:  * @return true if ch is a Unicode letter, else false
2917:  * @see #isDigit(char)
2918:  * @see #isJavaIdentifierStart(char)
2919:  * @see #isJavaLetter(char)
2920:  * @see #isJavaLetterOrDigit(char)
2921:  * @see #isLetterOrDigit(char)
2922:  * @see #isLowerCase(char)
2923:  * @see #isTitleCase(char)
2924:  * @see #isUnicodeIdentifierStart(char)
2925:  * @see #isUpperCase(char)
2926:  * 
2927:  * @since 1.5
2928:  */
2929:  public static boolean isLetter(int codePoint)
2930:  {
2931:  return ((1 << getType(codePoint))
2932:  & ((1 << UPPERCASE_LETTER)
2933:  | (1 << LOWERCASE_LETTER)
2934:  | (1 << TITLECASE_LETTER)
2935:  | (1 << MODIFIER_LETTER)
2936:  | (1 << OTHER_LETTER))) != 0;
2937:  }
2938:  /**
2939:  * Returns the index into the given CharSequence that is offset
2940:  * <code>codePointOffset</code> code points from <code>index</code>.
2941:  * @param seq the CharSequence
2942:  * @param index the start position in the CharSequence
2943:  * @param codePointOffset the number of code points offset from the start
2944:  * position
2945:  * @return the index into the CharSequence that is codePointOffset code 
2946:  * points offset from index
2947:  * 
2948:  * @throws NullPointerException if seq is null
2949:  * @throws IndexOutOfBoundsException if index is negative or greater than the
2950:  * length of the sequence.
2951:  * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
2952:  * subsequence from index to the end of seq has fewer than codePointOffset
2953:  * code points
2954:  * @throws IndexOutOfBoundsException if codePointOffset is negative and the
2955:  * subsequence from the start of seq to index has fewer than 
2956:  * (-codePointOffset) code points
2957:  * @since 1.5
2958:  */
2959:  public static int offsetByCodePoints(CharSequence seq,
2960:  int index,
2961:  int codePointOffset)
2962:  {
2963:  int len = seq.length();
2964:  if (index < 0 || index > len)
2965:  throw new IndexOutOfBoundsException();
2966:  
2967:  int numToGo = codePointOffset;
2968:  int offset = index;
2969:  int adjust = 1;
2970:  if (numToGo >= 0)
2971:  {
2972:  for (; numToGo > 0; offset++)
2973:  {
2974:  numToGo--;
2975:  if (Character.isHighSurrogate(seq.charAt(offset))
2976:  && (offset + 1) < len
2977:  && Character.isLowSurrogate(seq.charAt(offset + 1)))
2978:  offset++;
2979:  }
2980:  return offset;
2981:  }
2982:  else
2983:  {
2984:  numToGo *= -1;
2985:  for (; numToGo > 0;)
2986:  {
2987:  numToGo--;
2988:  offset--;
2989:  if (Character.isLowSurrogate(seq.charAt(offset))
2990:  && (offset - 1) >= 0
2991:  && Character.isHighSurrogate(seq.charAt(offset - 1)))
2992:  offset--;
2993:  }
2994:  return offset;
2995:  }
2996:  }
2997:  
2998:  /**
2999:  * Returns the index into the given char subarray that is offset
3000:  * <code>codePointOffset</code> code points from <code>index</code>.
3001:  * @param a the char array
3002:  * @param start the start index of the subarray
3003:  * @param count the length of the subarray
3004:  * @param index the index to be offset
3005:  * @param codePointOffset the number of code points offset from <code>index
3006:  * </code>
3007:  * @return the index into the char array
3008:  * 
3009:  * @throws NullPointerException if a is null
3010:  * @throws IndexOutOfBoundsException if start or count is negative or if
3011:  * start + count is greater than the length of the array
3012:  * @throws IndexOutOfBoundsException if index is less than start or larger 
3013:  * than start + count
3014:  * @throws IndexOutOfBoundsException if codePointOffset is positive and the
3015:  * subarray from index to start + count - 1 has fewer than codePointOffset
3016:  * code points.
3017:  * @throws IndexOutOfBoundsException if codePointOffset is negative and the
3018:  * subarray from start to index - 1 has fewer than (-codePointOffset) code
3019:  * points
3020:  * 
3021:  * @since 1.5
3022:  */
3023:  public static int offsetByCodePoints(char[] a,
3024:  int start,
3025:  int count,
3026:  int index,
3027:  int codePointOffset)
3028:  {
3029:  int len = a.length;
3030:  int end = start + count;
3031:  if (start < 0 || count < 0 || end > len || index < start || index > end)
3032:  throw new IndexOutOfBoundsException();
3033:  
3034:  int numToGo = codePointOffset;
3035:  int offset = index;
3036:  int adjust = 1;
3037:  if (numToGo >= 0)
3038:  {
3039:  for (; numToGo > 0; offset++)
3040:  {
3041:  numToGo--;
3042:  if (Character.isHighSurrogate(a[offset])
3043:  && (offset + 1) < len
3044:  && Character.isLowSurrogate(a[offset + 1]))
3045:  offset++;
3046:  }
3047:  return offset;
3048:  }
3049:  else
3050:  {
3051:  numToGo *= -1;
3052:  for (; numToGo > 0;)
3053:  {
3054:  numToGo--;
3055:  offset--;
3056:  if (Character.isLowSurrogate(a[offset])
3057:  && (offset - 1) >= 0
3058:  && Character.isHighSurrogate(a[offset - 1]))
3059:  offset--;
3060:  if (offset < start)
3061:  throw new IndexOutOfBoundsException();
3062:  }
3063:  return offset;
3064:  }
3065: 
3066:  }
3067:  
3068:  /**
3069:  * Returns the number of Unicode code points in the specified range of the
3070:  * given CharSequence. The first char in the range is at position
3071:  * beginIndex and the last one is at position endIndex - 1. Paired 
3072:  * surrogates (supplementary characters are represented by a pair of chars - 
3073:  * one from the high surrogates and one from the low surrogates) 
3074:  * count as just one code point.
3075:  * @param seq the CharSequence to inspect
3076:  * @param beginIndex the beginning of the range
3077:  * @param endIndex the end of the range
3078:  * @return the number of Unicode code points in the given range of the 
3079:  * sequence
3080:  * @throws NullPointerException if seq is null
3081:  * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
3082:  * larger than the length of seq, or if beginIndex is greater than endIndex.
3083:  * @since 1.5
3084:  */
3085:  public static int codePointCount(CharSequence seq, int beginIndex,
3086:  int endIndex)
3087:  {
3088:  int len = seq.length();
3089:  if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
3090:  throw new IndexOutOfBoundsException();
3091:  
3092:  int count = 0;
3093:  for (int i = beginIndex; i < endIndex; i++)
3094:  {
3095:  count++;
3096:  // If there is a pairing, count it only once.
3097:  if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
3098:  && isLowSurrogate(seq.charAt(i + 1)))
3099:  i ++;
3100:  } 
3101:  return count;
3102:  }
3103:  
3104:  /**
3105:  * Returns the number of Unicode code points in the specified range of the
3106:  * given char array. The first char in the range is at position
3107:  * offset and the length of the range is count. Paired surrogates
3108:  * (supplementary characters are represented by a pair of chars - 
3109:  * one from the high surrogates and one from the low surrogates) 
3110:  * count as just one code point.
3111:  * @param a the char array to inspect
3112:  * @param offset the beginning of the range
3113:  * @param count the length of the range
3114:  * @return the number of Unicode code points in the given range of the 
3115:  * array
3116:  * @throws NullPointerException if a is null
3117:  * @throws IndexOutOfBoundsException if offset or count is negative or if 
3118:  * offset + countendIndex is larger than the length of a.
3119:  * @since 1.5
3120:  */
3121:  public static int codePointCount(char[] a, int offset,
3122:  int count)
3123:  {
3124:  int len = a.length;
3125:  int end = offset + count;
3126:  if (offset < 0 || count < 0 || end > len)
3127:  throw new IndexOutOfBoundsException();
3128:  
3129:  int counter = 0;
3130:  for (int i = offset; i < end; i++)
3131:  {
3132:  counter++;
3133:  // If there is a pairing, count it only once.
3134:  if (isHighSurrogate(a[i]) && (i + 1) < end
3135:  && isLowSurrogate(a[i + 1]))
3136:  i ++;
3137:  } 
3138:  return counter;
3139:  }
3140: 
3141:  /**
3142:  * Determines if a character is a Unicode letter or a Unicode digit. This
3143:  * is the combination of isLetter and isDigit.
3144:  * <br>
3145:  * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3146:  *
3147:  * @param ch character to test
3148:  * @return true if ch is a Unicode letter or a Unicode digit, else false
3149:  * @see #isDigit(char)
3150:  * @see #isJavaIdentifierPart(char)
3151:  * @see #isJavaLetter(char)
3152:  * @see #isJavaLetterOrDigit(char)
3153:  * @see #isLetter(char)
3154:  * @see #isUnicodeIdentifierPart(char)
3155:  */
3156:  public static boolean isLetterOrDigit(char ch)
3157:  {
3158:  return isLetterOrDigit((int)ch);
3159:  }
3160: 
3161:  /**
3162:  * Determines if a character is a Unicode letter or a Unicode digit. This
3163:  * is the combination of isLetter and isDigit.
3164:  * <br>
3165:  * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
3166:  *
3167:  * @param codePoint character to test
3168:  * @return true if ch is a Unicode letter or a Unicode digit, else false
3169:  * @see #isDigit(char)
3170:  * @see #isJavaIdentifierPart(char)
3171:  * @see #isJavaLetter(char)
3172:  * @see #isJavaLetterOrDigit(char)
3173:  * @see #isLetter(char)
3174:  * @see #isUnicodeIdentifierPart(char)
3175:  * 
3176:  * @since 1.5
3177:  */
3178:  public static boolean isLetterOrDigit(int codePoint)
3179:  {
3180:  return ((1 << getType(codePoint))
3181:  & ((1 << UPPERCASE_LETTER)
3182:  | (1 << LOWERCASE_LETTER)
3183:  | (1 << TITLECASE_LETTER)
3184:  | (1 << MODIFIER_LETTER)
3185:  | (1 << OTHER_LETTER)
3186:  | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
3187:  }
3188:  
3189:  /**
3190:  * Determines if a character can start a Java identifier. This is the
3191:  * combination of isLetter, any character where getType returns
3192:  * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3193:  * (like '_').
3194:  *
3195:  * @param ch character to test
3196:  * @return true if ch can start a Java identifier, else false
3197:  * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
3198:  * @see #isJavaLetterOrDigit(char)
3199:  * @see #isJavaIdentifierStart(char)
3200:  * @see #isJavaIdentifierPart(char)
3201:  * @see #isLetter(char)
3202:  * @see #isLetterOrDigit(char)
3203:  * @see #isUnicodeIdentifierStart(char)
3204:  */
3205:  public static boolean isJavaLetter(char ch)
3206:  {
3207:  return isJavaIdentifierStart(ch);
3208:  }
3209: 
3210:  /**
3211:  * Determines if a character can follow the first letter in
3212:  * a Java identifier. This is the combination of isJavaLetter (isLetter,
3213:  * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3214:  * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3215:  * or isIdentifierIgnorable.
3216:  *
3217:  * @param ch character to test
3218:  * @return true if ch can follow the first letter in a Java identifier
3219:  * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
3220:  * @see #isJavaLetter(char)
3221:  * @see #isJavaIdentifierStart(char)
3222:  * @see #isJavaIdentifierPart(char)
3223:  * @see #isLetter(char)
3224:  * @see #isLetterOrDigit(char)
3225:  * @see #isUnicodeIdentifierPart(char)
3226:  * @see #isIdentifierIgnorable(char)
3227:  */
3228:  public static boolean isJavaLetterOrDigit(char ch)
3229:  {
3230:  return isJavaIdentifierPart(ch);
3231:  }
3232: 
3233:  /**
3234:  * Determines if a character can start a Java identifier. This is the
3235:  * combination of isLetter, any character where getType returns
3236:  * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3237:  * (like '_').
3238:  * <br>
3239:  * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3240:  *
3241:  * @param ch character to test
3242:  * @return true if ch can start a Java identifier, else false
3243:  * @see #isJavaIdentifierPart(char)
3244:  * @see #isLetter(char)
3245:  * @see #isUnicodeIdentifierStart(char)
3246:  * @since 1.1
3247:  */
3248:  public static boolean isJavaIdentifierStart(char ch)
3249:  {
3250:  return isJavaIdentifierStart((int)ch);
3251:  }
3252: 
3253:  /**
3254:  * Determines if a character can start a Java identifier. This is the
3255:  * combination of isLetter, any character where getType returns
3256:  * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
3257:  * (like '_').
3258:  * <br>
3259:  * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
3260:  *
3261:  * @param codePoint character to test
3262:  * @return true if ch can start a Java identifier, else false
3263:  * @see #isJavaIdentifierPart(char)
3264:  * @see #isLetter(char)
3265:  * @see #isUnicodeIdentifierStart(char)
3266:  * @since 1.5
3267:  */
3268:  public static boolean isJavaIdentifierStart(int codePoint)
3269:  {
3270:  return ((1 << getType(codePoint))
3271:  & ((1 << UPPERCASE_LETTER)
3272:  | (1 << LOWERCASE_LETTER)
3273:  | (1 << TITLECASE_LETTER)
3274:  | (1 << MODIFIER_LETTER)
3275:  | (1 << OTHER_LETTER)
3276:  | (1 << LETTER_NUMBER)
3277:  | (1 << CURRENCY_SYMBOL)
3278:  | (1 << CONNECTOR_PUNCTUATION))) != 0;
3279:  }
3280: 
3281:  /**
3282:  * Determines if a character can follow the first letter in
3283:  * a Java identifier. This is the combination of isJavaLetter (isLetter,
3284:  * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3285:  * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3286:  * or isIdentifierIgnorable.
3287:  * <br>
3288:  * Java identifier extender =
3289:  * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3290:  * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3291:  *
3292:  * @param ch character to test
3293:  * @return true if ch can follow the first letter in a Java identifier
3294:  * @see #isIdentifierIgnorable(char)
3295:  * @see #isJavaIdentifierStart(char)
3296:  * @see #isLetterOrDigit(char)
3297:  * @see #isUnicodeIdentifierPart(char)
3298:  * @since 1.1
3299:  */
3300:  public static boolean isJavaIdentifierPart(char ch)
3301:  {
3302:  return isJavaIdentifierPart((int)ch);
3303:  }
3304:  
3305:  /**
3306:  * Determines if a character can follow the first letter in
3307:  * a Java identifier. This is the combination of isJavaLetter (isLetter,
3308:  * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
3309:  * numeric letter (like Roman numerals), combining marks, non-spacing marks,
3310:  * or isIdentifierIgnorable.
3311:  * <br>
3312:  * Java identifier extender =
3313:  * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
3314:  * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3315:  *
3316:  * @param codePoint character to test
3317:  * @return true if ch can follow the first letter in a Java identifier
3318:  * @see #isIdentifierIgnorable(char)
3319:  * @see #isJavaIdentifierStart(char)
3320:  * @see #isLetterOrDigit(char)
3321:  * @see #isUnicodeIdentifierPart(char)
3322:  * @since 1.5
3323:  */
3324:  public static boolean isJavaIdentifierPart(int codePoint)
3325:  {
3326:  int category = getType(codePoint);
3327:  return ((1 << category)
3328:  & ((1 << UPPERCASE_LETTER)
3329:  | (1 << LOWERCASE_LETTER)
3330:  | (1 << TITLECASE_LETTER)
3331:  | (1 << MODIFIER_LETTER)
3332:  | (1 << OTHER_LETTER)
3333:  | (1 << NON_SPACING_MARK)
3334:  | (1 << COMBINING_SPACING_MARK)
3335:  | (1 << DECIMAL_DIGIT_NUMBER)
3336:  | (1 << LETTER_NUMBER)
3337:  | (1 << CURRENCY_SYMBOL)
3338:  | (1 << CONNECTOR_PUNCTUATION)
3339:  | (1 << FORMAT))) != 0
3340:  || (category == CONTROL && isIdentifierIgnorable(codePoint));
3341:  }
3342: 
3343:  /**
3344:  * Determines if a character can start a Unicode identifier. Only
3345:  * letters can start a Unicode identifier, but this includes characters
3346:  * in LETTER_NUMBER.
3347:  * <br>
3348:  * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3349:  *
3350:  * @param ch character to test
3351:  * @return true if ch can start a Unicode identifier, else false
3352:  * @see #isJavaIdentifierStart(char)
3353:  * @see #isLetter(char)
3354:  * @see #isUnicodeIdentifierPart(char)
3355:  * @since 1.1
3356:  */
3357:  public static boolean isUnicodeIdentifierStart(char ch)
3358:  {
3359:  return isUnicodeIdentifierStart((int)ch);
3360:  }
3361: 
3362:  /**
3363:  * Determines if a character can start a Unicode identifier. Only
3364:  * letters can start a Unicode identifier, but this includes characters
3365:  * in LETTER_NUMBER.
3366:  * <br>
3367:  * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
3368:  *
3369:  * @param codePoint character to test
3370:  * @return true if ch can start a Unicode identifier, else false
3371:  * @see #isJavaIdentifierStart(char)
3372:  * @see #isLetter(char)
3373:  * @see #isUnicodeIdentifierPart(char)
3374:  * @since 1.5
3375:  */
3376:  public static boolean isUnicodeIdentifierStart(int codePoint)
3377:  {
3378:  return ((1 << getType(codePoint))
3379:  & ((1 << UPPERCASE_LETTER)
3380:  | (1 << LOWERCASE_LETTER)
3381:  | (1 << TITLECASE_LETTER)
3382:  | (1 << MODIFIER_LETTER)
3383:  | (1 << OTHER_LETTER)
3384:  | (1 << LETTER_NUMBER))) != 0;
3385:  }
3386: 
3387:  /**
3388:  * Determines if a character can follow the first letter in
3389:  * a Unicode identifier. This includes letters, connecting punctuation,
3390:  * digits, numeric letters, combining marks, non-spacing marks, and
3391:  * isIdentifierIgnorable.
3392:  * <br>
3393:  * Unicode identifier extender =
3394:  * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3395:  * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3396:  *
3397:  * @param ch character to test
3398:  * @return true if ch can follow the first letter in a Unicode identifier
3399:  * @see #isIdentifierIgnorable(char)
3400:  * @see #isJavaIdentifierPart(char)
3401:  * @see #isLetterOrDigit(char)
3402:  * @see #isUnicodeIdentifierStart(char)
3403:  * @since 1.1
3404:  */
3405:  public static boolean isUnicodeIdentifierPart(char ch)
3406:  {
3407:  return isUnicodeIdentifierPart((int)ch);
3408:  }
3409:  
3410:  /**
3411:  * Determines if a character can follow the first letter in
3412:  * a Unicode identifier. This includes letters, connecting punctuation,
3413:  * digits, numeric letters, combining marks, non-spacing marks, and
3414:  * isIdentifierIgnorable.
3415:  * <br>
3416:  * Unicode identifier extender =
3417:  * [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
3418:  * |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
3419:  *
3420:  * @param codePoint character to test
3421:  * @return true if ch can follow the first letter in a Unicode identifier
3422:  * @see #isIdentifierIgnorable(char)
3423:  * @see #isJavaIdentifierPart(char)
3424:  * @see #isLetterOrDigit(char)
3425:  * @see #isUnicodeIdentifierStart(char)
3426:  * @since 1.5
3427:  */
3428:  public static boolean isUnicodeIdentifierPart(int codePoint)
3429:  {
3430:  int category = getType(codePoint);
3431:  return ((1 << category)
3432:  & ((1 << UPPERCASE_LETTER)
3433:  | (1 << LOWERCASE_LETTER)
3434:  | (1 << TITLECASE_LETTER)
3435:  | (1 << MODIFIER_LETTER)
3436:  | (1 << OTHER_LETTER)
3437:  | (1 << NON_SPACING_MARK)
3438:  | (1 << COMBINING_SPACING_MARK)
3439:  | (1 << DECIMAL_DIGIT_NUMBER)
3440:  | (1 << LETTER_NUMBER)
3441:  | (1 << CONNECTOR_PUNCTUATION)
3442:  | (1 << FORMAT))) != 0
3443:  || (category == CONTROL && isIdentifierIgnorable(codePoint));
3444:  }
3445: 
3446:  /**
3447:  * Determines if a character is ignorable in a Unicode identifier. This
3448:  * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3449:  * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3450:  * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3451:  * <code>'\u009F'</code>), and FORMAT characters.
3452:  * <br>
3453:  * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3454:  * |U+007F-U+009F
3455:  *
3456:  * @param ch character to test
3457:  * @return true if ch is ignorable in a Unicode or Java identifier
3458:  * @see #isJavaIdentifierPart(char)
3459:  * @see #isUnicodeIdentifierPart(char)
3460:  * @since 1.1
3461:  */
3462:  public static boolean isIdentifierIgnorable(char ch)
3463:  {
3464:  return isIdentifierIgnorable((int)ch);
3465:  }
3466: 
3467:  /**
3468:  * Determines if a character is ignorable in a Unicode identifier. This
3469:  * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
3470:  * through <code>'\u0008'</code>, <code>'\u000E'</code> through
3471:  * <code>'\u001B'</code>, and <code>'\u007F'</code> through
3472:  * <code>'\u009F'</code>), and FORMAT characters.
3473:  * <br>
3474:  * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
3475:  * |U+007F-U+009F
3476:  *
3477:  * @param codePoint character to test
3478:  * @return true if ch is ignorable in a Unicode or Java identifier
3479:  * @see #isJavaIdentifierPart(char)
3480:  * @see #isUnicodeIdentifierPart(char)
3481:  * @since 1.5
3482:  */
3483:  public static boolean isIdentifierIgnorable(int codePoint)
3484:  {
3485:  if ((codePoint >= 0 && codePoint <= 0x0008)
3486:  || (codePoint >= 0x000E && codePoint <= 0x001B)
3487:  || (codePoint >= 0x007F && codePoint <= 0x009F)
3488:  || getType(codePoint) == FORMAT)
3489:  return true;
3490:  return false;
3491:  }
3492: 
3493:  /**
3494:  * Converts a Unicode character into its lowercase equivalent mapping.
3495:  * If a mapping does not exist, then the character passed is returned.
3496:  * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3497:  *
3498:  * @param ch character to convert to lowercase
3499:  * @return lowercase mapping of ch, or ch if lowercase mapping does
3500:  * not exist
3501:  * @see #isLowerCase(char)
3502:  * @see #isUpperCase(char)
3503:  * @see #toTitleCase(char)
3504:  * @see #toUpperCase(char)
3505:  */
3506:  public static char toLowerCase(char ch)
3507:  {
3508:  return (char) (lower[0][readCodePoint((int)ch) >>> 7] + ch);
3509:  }
3510:  
3511:  /**
3512:  * Converts a Unicode character into its lowercase equivalent mapping.
3513:  * If a mapping does not exist, then the character passed is returned.
3514:  * Note that isLowerCase(toLowerCase(ch)) does not always return true.
3515:  *
3516:  * @param codePoint character to convert to lowercase
3517:  * @return lowercase mapping of ch, or ch if lowercase mapping does
3518:  * not exist
3519:  * @see #isLowerCase(char)
3520:  * @see #isUpperCase(char)
3521:  * @see #toTitleCase(char)
3522:  * @see #toUpperCase(char)
3523:  * 
3524:  * @since 1.5
3525:  */
3526:  public static int toLowerCase(int codePoint)
3527:  {
3528:  // If the code point is unassigned or in one of the private use areas
3529:  // then we delegate the call to the appropriate private static inner class.
3530:  int plane = codePoint >>> 16;
3531:  if (plane > 2 && plane < 14)
3532:  return UnassignedCharacters.toLowerCase(codePoint);
3533:  if (plane > 14)
3534:  return PrivateUseCharacters.toLowerCase(codePoint);
3535:  
3536:  // The short value stored in lower[plane] is the signed difference between
3537:  // codePoint and its lowercase conversion.
3538:  return ((short)lower[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3539:  }
3540: 
3541:  /**
3542:  * Converts a Unicode character into its uppercase equivalent mapping.
3543:  * If a mapping does not exist, then the character passed is returned.
3544:  * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3545:  *
3546:  * @param ch character to convert to uppercase
3547:  * @return uppercase mapping of ch, or ch if uppercase mapping does
3548:  * not exist
3549:  * @see #isLowerCase(char)
3550:  * @see #isUpperCase(char)
3551:  * @see #toLowerCase(char)
3552:  * @see #toTitleCase(char)
3553:  */
3554:  public static char toUpperCase(char ch)
3555:  {
3556:  return (char) (upper[0][readCodePoint((int)ch) >>> 7] + ch);
3557:  }
3558:  
3559:  /**
3560:  * Converts a Unicode character into its uppercase equivalent mapping.
3561:  * If a mapping does not exist, then the character passed is returned.
3562:  * Note that isUpperCase(toUpperCase(ch)) does not always return true.
3563:  *
3564:  * @param codePoint character to convert to uppercase
3565:  * @return uppercase mapping of ch, or ch if uppercase mapping does
3566:  * not exist
3567:  * @see #isLowerCase(char)
3568:  * @see #isUpperCase(char)
3569:  * @see #toLowerCase(char)
3570:  * @see #toTitleCase(char)
3571:  * 
3572:  * @since 1.5
3573:  */
3574:  public static int toUpperCase(int codePoint)
3575:  {
3576:  // If the code point is unassigned or in one of the private use areas
3577:  // then we delegate the call to the appropriate private static inner class.
3578:  int plane = codePoint >>> 16;
3579:  if (plane > 2 && plane < 14)
3580:  return UnassignedCharacters.toUpperCase(codePoint);
3581:  if (plane > 14)
3582:  return PrivateUseCharacters.toUpperCase(codePoint);
3583:  
3584:  // The short value stored in upper[plane] is the signed difference between
3585:  // codePoint and its uppercase conversion.
3586:  return ((short)upper[plane][readCodePoint(codePoint) >>> 7]) + codePoint;
3587:  }
3588: 
3589:  /**
3590:  * Converts a Unicode character into its titlecase equivalent mapping.
3591:  * If a mapping does not exist, then the character passed is returned.
3592:  * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3593:  *
3594:  * @param ch character to convert to titlecase
3595:  * @return titlecase mapping of ch, or ch if titlecase mapping does
3596:  * not exist
3597:  * @see #isTitleCase(char)
3598:  * @see #toLowerCase(char)
3599:  * @see #toUpperCase(char)
3600:  */
3601:  public static char toTitleCase(char ch)
3602:  {
3603:  // As title is short, it doesn't hurt to exhaustively iterate over it.
3604:  for (int i = title.length - 2; i >= 0; i -= 2)
3605:  if (title[i] == ch)
3606:  return title[i + 1];
3607:  return toUpperCase(ch);
3608:  }
3609: 
3610:  /**
3611:  * Converts a Unicode character into its titlecase equivalent mapping.
3612:  * If a mapping does not exist, then the character passed is returned.
3613:  * Note that isTitleCase(toTitleCase(ch)) does not always return true.
3614:  *
3615:  * @param codePoint character to convert to titlecase
3616:  * @return titlecase mapping of ch, or ch if titlecase mapping does
3617:  * not exist
3618:  * @see #isTitleCase(char)
3619:  * @see #toLowerCase(char)
3620:  * @see #toUpperCase(char)
3621:  * 
3622:  * @since 1.5
3623:  */
3624:  public static int toTitleCase(int codePoint)
3625:  {
3626:  // As of Unicode 4.0.0 no characters outside of plane 0 have
3627:  // titlecase mappings that are different from their uppercase
3628:  // mapping.
3629:  if (codePoint < 0x10000)
3630:  return (int) toTitleCase((char)codePoint);
3631:  return toUpperCase(codePoint);
3632:  }
3633: 
3634:  /**
3635:  * Converts a character into a digit of the specified radix. If the radix
3636:  * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3637:  * exceeds the radix, or if ch is not a decimal digit or in the case
3638:  * insensitive set of 'a'-'z', the result is -1.
3639:  * <br>
3640:  * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3641:  * |U+FF21-U+FF3A|U+FF41-U+FF5A
3642:  *
3643:  * @param ch character to convert into a digit
3644:  * @param radix radix in which ch is a digit
3645:  * @return digit which ch represents in radix, or -1 not a valid digit
3646:  * @see #MIN_RADIX
3647:  * @see #MAX_RADIX
3648:  * @see #forDigit(int, int)
3649:  * @see #isDigit(char)
3650:  * @see #getNumericValue(char)
3651:  */
3652:  public static int digit(char ch, int radix)
3653:  {
3654:  if (radix < MIN_RADIX || radix > MAX_RADIX)
3655:  return -1;
3656:  char attr = readCodePoint((int)ch);
3657:  if (((1 << (attr & TYPE_MASK))
3658:  & ((1 << UPPERCASE_LETTER)
3659:  | (1 << LOWERCASE_LETTER)
3660:  | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3661:  {
3662:  // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3663:  int digit = numValue[0][attr >> 7];
3664:  return (digit < radix) ? digit : -1;
3665:  }
3666:  return -1;
3667:  }
3668: 
3669:  /**
3670:  * Converts a character into a digit of the specified radix. If the radix
3671:  * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3672:  * exceeds the radix, or if ch is not a decimal digit or in the case
3673:  * insensitive set of 'a'-'z', the result is -1.
3674:  * <br>
3675:  * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3676:  * |U+FF21-U+FF3A|U+FF41-U+FF5A
3677:  *
3678:  * @param codePoint character to convert into a digit
3679:  * @param radix radix in which ch is a digit
3680:  * @return digit which ch represents in radix, or -1 not a valid digit
3681:  * @see #MIN_RADIX
3682:  * @see #MAX_RADIX
3683:  * @see #forDigit(int, int)
3684:  * @see #isDigit(char)
3685:  * @see #getNumericValue(char)
3686:  */
3687:  public static int digit(int codePoint, int radix)
3688:  {
3689:  if (radix < MIN_RADIX || radix > MAX_RADIX)
3690:  return -1;
3691:  
3692:  // If the code point is unassigned or in one of the private use areas
3693:  // then we delegate the call to the appropriate private static inner class.
3694:  int plane = codePoint >>> 16;
3695:  if (plane > 2 && plane < 14)
3696:  return UnassignedCharacters.digit(codePoint, radix);
3697:  if (plane > 14)
3698:  return PrivateUseCharacters.digit(codePoint, radix);
3699:  char attr = readCodePoint(codePoint);
3700:  if (((1 << (attr & TYPE_MASK))
3701:  & ((1 << UPPERCASE_LETTER)
3702:  | (1 << LOWERCASE_LETTER)
3703:  | (1 << DECIMAL_DIGIT_NUMBER))) != 0)
3704:  {
3705:  // Signedness doesn't matter; 0xffff vs. -1 are both rejected.
3706:  int digit = numValue[plane][attr >> 7];
3707:  
3708:  // If digit is less than or equal to -3 then the numerical value was 
3709:  // too large to fit into numValue and is stored in CharData.LARGENUMS.
3710:  if (digit <= -3)
3711:  digit = CharData.LARGENUMS[-digit - 3];
3712:  return (digit < radix) ? digit : -1;
3713:  }
3714:  return -1;
3715:  }
3716:  
3717:  /**
3718:  * Returns the Unicode numeric value property of a character. For example,
3719:  * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3720:  *
3721:  * <p>This method also returns values for the letters A through Z, (not
3722:  * specified by Unicode), in these ranges: <code>'\u0041'</code>
3723:  * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3724:  * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3725:  * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3726:  * <code>'\uFF5A'</code> (full width variants).
3727:  *
3728:  * <p>If the character lacks a numeric value property, -1 is returned.
3729:  * If the character has a numeric value property which is not representable
3730:  * as a nonnegative integer, such as a fraction, -2 is returned.
3731:  *
3732:  * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3733:  * |U+FF21-U+FF3A|U+FF41-U+FF5A
3734:  *
3735:  * @param ch character from which the numeric value property will
3736:  * be retrieved
3737:  * @return the numeric value property of ch, or -1 if it does not exist, or
3738:  * -2 if it is not representable as a nonnegative integer
3739:  * @see #forDigit(int, int)
3740:  * @see #digit(char, int)
3741:  * @see #isDigit(char)
3742:  * @since 1.1
3743:  */
3744:  public static int getNumericValue(char ch)
3745:  {
3746:  // Treat numValue as signed.
3747:  return (short) numValue[0][readCodePoint((int)ch) >> 7];
3748:  }
3749:  
3750:  /**
3751:  * Returns the Unicode numeric value property of a character. For example,
3752:  * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3753:  *
3754:  * <p>This method also returns values for the letters A through Z, (not
3755:  * specified by Unicode), in these ranges: <code>'\u0041'</code>
3756:  * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3757:  * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3758:  * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3759:  * <code>'\uFF5A'</code> (full width variants).
3760:  *
3761:  * <p>If the character lacks a numeric value property, -1 is returned.
3762:  * If the character has a numeric value property which is not representable
3763:  * as a nonnegative integer, such as a fraction, -2 is returned.
3764:  *
3765:  * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3766:  * |U+FF21-U+FF3A|U+FF41-U+FF5A
3767:  *
3768:  * @param codePoint character from which the numeric value property will
3769:  * be retrieved
3770:  * @return the numeric value property of ch, or -1 if it does not exist, or
3771:  * -2 if it is not representable as a nonnegative integer
3772:  * @see #forDigit(int, int)
3773:  * @see #digit(char, int)
3774:  * @see #isDigit(char)
3775:  * @since 1.5
3776:  */
3777:  public static int getNumericValue(int codePoint)
3778:  {
3779:  // If the code point is unassigned or in one of the private use areas
3780:  // then we delegate the call to the appropriate private static inner class.
3781:  int plane = codePoint >>> 16;
3782:  if (plane > 2 && plane < 14)
3783:  return UnassignedCharacters.getNumericValue(codePoint);
3784:  if (plane > 14)
3785:  return PrivateUseCharacters.getNumericValue(codePoint);
3786:  
3787:  // If the value N found in numValue[plane] is less than or equal to -3
3788:  // then the numeric value was too big to fit into 16 bits and is 
3789:  // stored in CharData.LARGENUMS at offset (-N - 3).
3790:  short num = (short)numValue[plane][readCodePoint(codePoint) >> 7];
3791:  if (num <= -3)
3792:  return CharData.LARGENUMS[-num - 3];
3793:  return num;
3794:  }
3795: 
3796:  /**
3797:  * Determines if a character is a ISO-LATIN-1 space. This is only the five
3798:  * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3799:  * <code>'\r'</code>, and <code>' '</code>.
3800:  * <br>
3801:  * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3802:  *
3803:  * @param ch character to test
3804:  * @return true if ch is a space, else false
3805:  * @deprecated Replaced by {@link #isWhitespace(char)}
3806:  * @see #isSpaceChar(char)
3807:  * @see #isWhitespace(char)
3808:  */
3809:  public static boolean isSpace(char ch)
3810:  {
3811:  // Performing the subtraction up front alleviates need to compare longs.
3812:  return ch-- <= ' ' && ((1 << ch)
3813:  & ((1 << (' ' - 1))
3814:  | (1 << ('\t' - 1))
3815:  | (1 << ('\n' - 1))
3816:  | (1 << ('\r' - 1))
3817:  | (1 << ('\f' - 1)))) != 0;
3818:  }
3819: 
3820:  /**
3821:  * Determines if a character is a Unicode space character. This includes
3822:  * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3823:  * <br>
3824:  * Unicode space = [Zs]|[Zp]|[Zl]
3825:  *
3826:  * @param ch character to test
3827:  * @return true if ch is a Unicode space, else false
3828:  * @see #isWhitespace(char)
3829:  * @since 1.1
3830:  */
3831:  public static boolean isSpaceChar(char ch)
3832:  {
3833:  return isSpaceChar((int)ch);
3834:  }
3835:  
3836:  /**
3837:  * Determines if a character is a Unicode space character. This includes
3838:  * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3839:  * <br>
3840:  * Unicode space = [Zs]|[Zp]|[Zl]
3841:  *
3842:  * @param codePoint character to test
3843:  * @return true if ch is a Unicode space, else false
3844:  * @see #isWhitespace(char)
3845:  * @since 1.5
3846:  */
3847:  public static boolean isSpaceChar(int codePoint)
3848:  {
3849:  return ((1 << getType(codePoint))
3850:  & ((1 << SPACE_SEPARATOR)
3851:  | (1 << LINE_SEPARATOR)
3852:  | (1 << PARAGRAPH_SEPARATOR))) != 0;
3853:  }
3854: 
3855:  /**
3856:  * Determines if a character is Java whitespace. This includes Unicode
3857:  * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3858:  * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3859:  * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3860:  * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3861:  * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3862:  * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3863:  * and <code>'\u001F'</code>.
3864:  * <br>
3865:  * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3866:  *
3867:  * @param ch character to test
3868:  * @return true if ch is Java whitespace, else false
3869:  * @see #isSpaceChar(char)
3870:  * @since 1.1
3871:  */
3872:  public static boolean isWhitespace(char ch)
3873:  {
3874:  return isWhitespace((int) ch);
3875:  }
3876:  
3877:  /**
3878:  * Determines if a character is Java whitespace. This includes Unicode
3879:  * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3880:  * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3881:  * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3882:  * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3883:  * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3884:  * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3885:  * and <code>'\u001F'</code>.
3886:  * <br>
3887:  * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3888:  *
3889:  * @param codePoint character to test
3890:  * @return true if ch is Java whitespace, else false
3891:  * @see #isSpaceChar(char)
3892:  * @since 1.5
3893:  */
3894:  public static boolean isWhitespace(int codePoint)
3895:  {
3896:  int plane = codePoint >>> 16;
3897:  if (plane > 2 && plane < 14)
3898:  return UnassignedCharacters.isWhiteSpace(codePoint);
3899:  if (plane > 14)
3900:  return PrivateUseCharacters.isWhiteSpace(codePoint);
3901:  
3902:  int attr = readCodePoint(codePoint);
3903:  return ((((1 << (attr & TYPE_MASK))
3904:  & ((1 << SPACE_SEPARATOR)
3905:  | (1 << LINE_SEPARATOR)
3906:  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3907:  && (attr & NO_BREAK_MASK) == 0)
3908:  || (codePoint <= '\u001F' && ((1 << codePoint)
3909:  & ((1 << '\t')
3910:  | (1 << '\n')
3911:  | (1 << '\u000B')
3912:  | (1 << '\u000C')
3913:  | (1 << '\r')
3914:  | (1 << '\u001C')
3915:  | (1 << '\u001D')
3916:  | (1 << '\u001E')
3917:  | (1 << '\u001F'))) != 0);
3918:  }
3919: 
3920:  /**
3921:  * Determines if a character has the ISO Control property.
3922:  * <br>
3923:  * ISO Control = [Cc]
3924:  *
3925:  * @param ch character to test
3926:  * @return true if ch is an ISO Control character, else false
3927:  * @see #isSpaceChar(char)
3928:  * @see #isWhitespace(char)
3929:  * @since 1.1
3930:  */
3931:  public static boolean isISOControl(char ch)
3932:  {
3933:  return isISOControl((int)ch);
3934:  }
3935:  
3936:  /**
3937:  * Determines if the character is an ISO Control character. This is true
3938:  * if the code point is in the range [0, 0x001F] or if it is in the range
3939:  * [0x007F, 0x009F].
3940:  * @param codePoint the character to check
3941:  * @return true if the character is in one of the above ranges
3942:  * 
3943:  * @since 1.5
3944:  */
3945:  public static boolean isISOControl(int codePoint)
3946:  {
3947:  if ((codePoint >= 0 && codePoint <= 0x001F)
3948:  || (codePoint >= 0x007F && codePoint <= 0x009F))
3949:  return true;
3950:  return false; 
3951:  }
3952: 
3953:  /**
3954:  * Returns the Unicode general category property of a character.
3955:  *
3956:  * @param ch character from which the general category property will
3957:  * be retrieved
3958:  * @return the character category property of ch as an integer
3959:  * @see #UNASSIGNED
3960:  * @see #UPPERCASE_LETTER
3961:  * @see #LOWERCASE_LETTER
3962:  * @see #TITLECASE_LETTER
3963:  * @see #MODIFIER_LETTER
3964:  * @see #OTHER_LETTER
3965:  * @see #NON_SPACING_MARK
3966:  * @see #ENCLOSING_MARK
3967:  * @see #COMBINING_SPACING_MARK
3968:  * @see #DECIMAL_DIGIT_NUMBER
3969:  * @see #LETTER_NUMBER
3970:  * @see #OTHER_NUMBER
3971:  * @see #SPACE_SEPARATOR
3972:  * @see #LINE_SEPARATOR
3973:  * @see #PARAGRAPH_SEPARATOR
3974:  * @see #CONTROL
3975:  * @see #FORMAT
3976:  * @see #PRIVATE_USE
3977:  * @see #SURROGATE
3978:  * @see #DASH_PUNCTUATION
3979:  * @see #START_PUNCTUATION
3980:  * @see #END_PUNCTUATION
3981:  * @see #CONNECTOR_PUNCTUATION
3982:  * @see #OTHER_PUNCTUATION
3983:  * @see #MATH_SYMBOL
3984:  * @see #CURRENCY_SYMBOL
3985:  * @see #MODIFIER_SYMBOL
3986:  * @see #INITIAL_QUOTE_PUNCTUATION
3987:  * @see #FINAL_QUOTE_PUNCTUATION
3988:  * @since 1.1
3989:  */
3990:  public static int getType(char ch)
3991:  {
3992:  return getType((int)ch);
3993:  }
3994:  
3995:  /**
3996:  * Returns the Unicode general category property of a character.
3997:  *
3998:  * @param codePoint character from which the general category property will
3999:  * be retrieved
4000:  * @return the character category property of ch as an integer
4001:  * @see #UNASSIGNED
4002:  * @see #UPPERCASE_LETTER
4003:  * @see #LOWERCASE_LETTER
4004:  * @see #TITLECASE_LETTER
4005:  * @see #MODIFIER_LETTER
4006:  * @see #OTHER_LETTER
4007:  * @see #NON_SPACING_MARK
4008:  * @see #ENCLOSING_MARK
4009:  * @see #COMBINING_SPACING_MARK
4010:  * @see #DECIMAL_DIGIT_NUMBER
4011:  * @see #LETTER_NUMBER
4012:  * @see #OTHER_NUMBER
4013:  * @see #SPACE_SEPARATOR
4014:  * @see #LINE_SEPARATOR
4015:  * @see #PARAGRAPH_SEPARATOR
4016:  * @see #CONTROL
4017:  * @see #FORMAT
4018:  * @see #PRIVATE_USE
4019:  * @see #SURROGATE
4020:  * @see #DASH_PUNCTUATION
4021:  * @see #START_PUNCTUATION
4022:  * @see #END_PUNCTUATION
4023:  * @see #CONNECTOR_PUNCTUATION
4024:  * @see #OTHER_PUNCTUATION
4025:  * @see #MATH_SYMBOL
4026:  * @see #CURRENCY_SYMBOL
4027:  * @see #MODIFIER_SYMBOL
4028:  * @see #INITIAL_QUOTE_PUNCTUATION
4029:  * @see #FINAL_QUOTE_PUNCTUATION
4030:  * 
4031:  * @since 1.5
4032:  */
4033:  public static int getType(int codePoint)
4034:  {
4035:  // If the codePoint is unassigned or in one of the private use areas
4036:  // then we delegate the call to the appropriate private static inner class.
4037:  int plane = codePoint >>> 16;
4038:  if (plane > 2 && plane < 14)
4039:  return UnassignedCharacters.getType(codePoint);
4040:  if (plane > 14)
4041:  return PrivateUseCharacters.getType(codePoint);
4042:  
4043:  return readCodePoint(codePoint) & TYPE_MASK;
4044:  }
4045: 
4046:  /**
4047:  * Converts a digit into a character which represents that digit
4048:  * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
4049:  * or the digit exceeds the radix, then the null character <code>'0円'</code>
4050:  * is returned. Otherwise the return value is in '0'-'9' and 'a'-'z'.
4051:  * <br>
4052:  * return value boundary = U+0030-U+0039|U+0061-U+007A
4053:  *
4054:  * @param digit digit to be converted into a character
4055:  * @param radix radix of digit
4056:  * @return character representing digit in radix, or '0円'
4057:  * @see #MIN_RADIX
4058:  * @see #MAX_RADIX
4059:  * @see #digit(char, int)
4060:  */
4061:  public static char forDigit(int digit, int radix)
4062:  {
4063:  if (radix < MIN_RADIX || radix > MAX_RADIX
4064:  || digit < 0 || digit >= radix)
4065:  return '0円';
4066:  return Number.digits[digit];
4067:  }
4068: 
4069:  /**
4070:  * Returns the Unicode directionality property of the character. This
4071:  * is used in the visual ordering of text.
4072:  *
4073:  * @param ch the character to look up
4074:  * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4075:  * @see #DIRECTIONALITY_UNDEFINED
4076:  * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4077:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4078:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4079:  * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4080:  * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4081:  * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4082:  * @see #DIRECTIONALITY_ARABIC_NUMBER
4083:  * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4084:  * @see #DIRECTIONALITY_NONSPACING_MARK
4085:  * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4086:  * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4087:  * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4088:  * @see #DIRECTIONALITY_WHITESPACE
4089:  * @see #DIRECTIONALITY_OTHER_NEUTRALS
4090:  * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4091:  * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4092:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4093:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4094:  * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4095:  * @since 1.4
4096:  */
4097:  public static byte getDirectionality(char ch)
4098:  {
4099:  // The result will correctly be signed.
4100:  return getDirectionality((int)ch);
4101:  }
4102: 
4103:  
4104:  /**
4105:  * Returns the Unicode directionality property of the character. This
4106:  * is used in the visual ordering of text.
4107:  *
4108:  * @param codePoint the character to look up
4109:  * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
4110:  * @see #DIRECTIONALITY_UNDEFINED
4111:  * @see #DIRECTIONALITY_LEFT_TO_RIGHT
4112:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT
4113:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
4114:  * @see #DIRECTIONALITY_EUROPEAN_NUMBER
4115:  * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
4116:  * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
4117:  * @see #DIRECTIONALITY_ARABIC_NUMBER
4118:  * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
4119:  * @see #DIRECTIONALITY_NONSPACING_MARK
4120:  * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
4121:  * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
4122:  * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
4123:  * @see #DIRECTIONALITY_WHITESPACE
4124:  * @see #DIRECTIONALITY_OTHER_NEUTRALS
4125:  * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
4126:  * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
4127:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
4128:  * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
4129:  * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
4130:  * @since 1.5
4131:  */
4132:  public static byte getDirectionality(int codePoint)
4133:  {
4134:  // If the code point is unassigned or in one of the private use areas
4135:  // then we delegate the call to the appropriate private static inner class.
4136:  int plane = codePoint >>> 16;
4137:  if (plane > 2 && plane < 14)
4138:  return UnassignedCharacters.getDirectionality(codePoint);
4139:  if (plane > 14)
4140:  return PrivateUseCharacters.getDirectionality(codePoint);
4141:  
4142:  // The result will correctly be signed.
4143:  return (byte) (direction[plane][readCodePoint(codePoint) >> 7] >> 2);
4144:  }
4145:  
4146:  /**
4147:  * Determines whether the character is mirrored according to Unicode. For
4148:  * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4149:  * left-to-right text, but ')' in right-to-left text.
4150:  *
4151:  * @param ch the character to look up
4152:  * @return true if the character is mirrored
4153:  * @since 1.4
4154:  */
4155:  public static boolean isMirrored(char ch)
4156:  {
4157:  return (readCodePoint((int)ch) & MIRROR_MASK) != 0;
4158:  }
4159:  
4160:  /**
4161:  * Determines whether the character is mirrored according to Unicode. For
4162:  * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
4163:  * left-to-right text, but ')' in right-to-left text.
4164:  *
4165:  * @param codePoint the character to look up
4166:  * @return true if the character is mirrored
4167:  * @since 1.5
4168:  */
4169:  public static boolean isMirrored(int codePoint)
4170:  {
4171:  // If the code point is unassigned or part of one of the private use areas
4172:  // then we delegate the call to the appropriate private static inner class.
4173:  int plane = codePoint >>> 16;
4174:  if (plane > 2 && plane < 14)
4175:  return UnassignedCharacters.isMirrored(codePoint);
4176:  if (plane > 14)
4177:  return PrivateUseCharacters.isMirrored(codePoint);
4178:  
4179:  return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
4180:  }
4181: 
4182:  /**
4183:  * Compares another Character to this Character, numerically.
4184:  *
4185:  * @param anotherCharacter Character to compare with this Character
4186:  * @return a negative integer if this Character is less than
4187:  * anotherCharacter, zero if this Character is equal, and
4188:  * a positive integer if this Character is greater
4189:  * @throws NullPointerException if anotherCharacter is null
4190:  * @since 1.2
4191:  */
4192:  public int compareTo(Character anotherCharacter)
4193:  {
4194:  return value - anotherCharacter.value;
4195:  }
4196: 
4197:  /**
4198:  * Returns an <code>Character</code> object wrapping the value.
4199:  * In contrast to the <code>Character</code> constructor, this method
4200:  * will cache some values. It is used by boxing conversion.
4201:  *
4202:  * @param val the value to wrap
4203:  * @return the <code>Character</code>
4204:  *
4205:  * @since 1.5
4206:  */
4207:  public static Character valueOf(char val)
4208:  {
4209:  if (val > MAX_CACHE)
4210:  return new Character(val);
4211:  synchronized (charCache)
4212:  {
4213:  if (charCache[val - MIN_VALUE] == null)
4214:  charCache[val - MIN_VALUE] = new Character(val);
4215:  return charCache[val - MIN_VALUE];
4216:  }
4217:  }
4218: 
4219:  /**
4220:  * Reverse the bytes in val.
4221:  * @since 1.5
4222:  */
4223:  public static char reverseBytes(char val)
4224:  {
4225:  return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
4226:  }
4227: 
4228:  /**
4229:  * Converts a unicode code point to a UTF-16 representation of that
4230:  * code point.
4231:  * 
4232:  * @param codePoint the unicode code point
4233:  *
4234:  * @return the UTF-16 representation of that code point
4235:  *
4236:  * @throws IllegalArgumentException if the code point is not a valid
4237:  * unicode code point
4238:  *
4239:  * @since 1.5
4240:  */
4241:  public static char[] toChars(int codePoint)
4242:  {
4243:  if (!isValidCodePoint(codePoint))
4244:  throw new IllegalArgumentException("Illegal Unicode code point : "
4245:  + codePoint);
4246:  char[] result = new char[charCount(codePoint)];
4247:  int ignore = toChars(codePoint, result, 0);
4248:  return result;
4249:  }
4250: 
4251:  /**
4252:  * Converts a unicode code point to its UTF-16 representation.
4253:  *
4254:  * @param codePoint the unicode code point
4255:  * @param dst the target char array
4256:  * @param dstIndex the start index for the target
4257:  *
4258:  * @return number of characters written to <code>dst</code>
4259:  *
4260:  * @throws IllegalArgumentException if <code>codePoint</code> is not a
4261:  * valid unicode code point
4262:  * @throws NullPointerException if <code>dst</code> is <code>null</code>
4263:  * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
4264:  * in <code>dst</code> or if the UTF-16 representation does not
4265:  * fit into <code>dst</code>
4266:  *
4267:  * @since 1.5
4268:  */
4269:  public static int toChars(int codePoint, char[] dst, int dstIndex)
4270:  {
4271:  if (!isValidCodePoint(codePoint))
4272:  {
4273:  throw new IllegalArgumentException("not a valid code point: "
4274:  + codePoint);
4275:  }
4276: 
4277:  int result;
4278:  if (isSupplementaryCodePoint(codePoint))
4279:  {
4280:  // Write second char first to cause IndexOutOfBoundsException
4281:  // immediately.
4282:  final int cp2 = codePoint - 0x10000;
4283:  dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
4284:  dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
4285:  result = 2;
4286:  }
4287:  else
4288:  {
4289:  dst[dstIndex] = (char) codePoint;
4290:  result = 1; 
4291:  }
4292:  return result;
4293:  }
4294: 
4295:  /**
4296:  * Return number of 16-bit characters required to represent the given
4297:  * code point.
4298:  *
4299:  * @param codePoint a unicode code point
4300:  *
4301:  * @return 2 if codePoint >= 0x10000, 1 otherwise.
4302:  *
4303:  * @since 1.5
4304:  */
4305:  public static int charCount(int codePoint)
4306:  {
4307:  return 
4308:  (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
4309:  ? 2 
4310:  : 1;
4311:  }
4312: 
4313:  /**
4314:  * Determines whether the specified code point is
4315:  * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
4316:  * supplementary character range.
4317:  *
4318:  * @param codePoint a Unicode code point
4319:  *
4320:  * @return <code>true</code> if code point is in supplementary range
4321:  *
4322:  * @since 1.5
4323:  */
4324:  public static boolean isSupplementaryCodePoint(int codePoint)
4325:  {
4326:  return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4327:  && codePoint <= MAX_CODE_POINT;
4328:  }
4329: 
4330:  /**
4331:  * Determines whether the specified code point is
4332:  * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
4333:  *
4334:  * @param codePoint a Unicode code point
4335:  *
4336:  * @return <code>true</code> if code point is valid
4337:  *
4338:  * @since 1.5
4339:  */
4340:  public static boolean isValidCodePoint(int codePoint)
4341:  {
4342:  return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
4343:  }
4344: 
4345:  /**
4346:  * Return true if the given character is a high surrogate.
4347:  * @param ch the character
4348:  * @return true if the character is a high surrogate character
4349:  *
4350:  * @since 1.5
4351:  */
4352:  public static boolean isHighSurrogate(char ch)
4353:  {
4354:  return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
4355:  }
4356: 
4357:  /**
4358:  * Return true if the given character is a low surrogate.
4359:  * @param ch the character
4360:  * @return true if the character is a low surrogate character
4361:  *
4362:  * @since 1.5
4363:  */
4364:  public static boolean isLowSurrogate(char ch)
4365:  {
4366:  return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
4367:  }
4368: 
4369:  /**
4370:  * Return true if the given characters compose a surrogate pair.
4371:  * This is true if the first character is a high surrogate and the
4372:  * second character is a low surrogate.
4373:  * @param ch1 the first character
4374:  * @param ch2 the first character
4375:  * @return true if the characters compose a surrogate pair
4376:  *
4377:  * @since 1.5
4378:  */
4379:  public static boolean isSurrogatePair(char ch1, char ch2)
4380:  {
4381:  return isHighSurrogate(ch1) && isLowSurrogate(ch2);
4382:  }
4383: 
4384:  /**
4385:  * Given a valid surrogate pair, this returns the corresponding
4386:  * code point.
4387:  * @param high the high character of the pair
4388:  * @param low the low character of the pair
4389:  * @return the corresponding code point
4390:  *
4391:  * @since 1.5
4392:  */
4393:  public static int toCodePoint(char high, char low)
4394:  {
4395:  return ((high - MIN_HIGH_SURROGATE) * 0x400) +
4396:  (low - MIN_LOW_SURROGATE) + 0x10000;
4397:  }
4398: 
4399:  /**
4400:  * Get the code point at the specified index in the CharSequence.
4401:  * This is like CharSequence#charAt(int), but if the character is
4402:  * the start of a surrogate pair, and there is a following
4403:  * character, and this character completes the pair, then the
4404:  * corresponding supplementary code point is returned. Otherwise,
4405:  * the character at the index is returned.
4406:  *
4407:  * @param sequence the CharSequence
4408:  * @param index the index of the codepoint to get, starting at 0
4409:  * @return the codepoint at the specified index
4410:  * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4411:  * @since 1.5
4412:  */
4413:  public static int codePointAt(CharSequence sequence, int index)
4414:  {
4415:  int len = sequence.length();
4416:  if (index < 0 || index >= len)
4417:  throw new IndexOutOfBoundsException();
4418:  char high = sequence.charAt(index);
4419:  if (! isHighSurrogate(high) || ++index >= len)
4420:  return high;
4421:  char low = sequence.charAt(index);
4422:  if (! isLowSurrogate(low))
4423:  return high;
4424:  return toCodePoint(high, low);
4425:  }
4426: 
4427:  /**
4428:  * Get the code point at the specified index in the CharSequence.
4429:  * If the character is the start of a surrogate pair, and there is a
4430:  * following character, and this character completes the pair, then
4431:  * the corresponding supplementary code point is returned.
4432:  * Otherwise, the character at the index is returned.
4433:  *
4434:  * @param chars the character array in which to look
4435:  * @param index the index of the codepoint to get, starting at 0
4436:  * @return the codepoint at the specified index
4437:  * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4438:  * @since 1.5
4439:  */
4440:  public static int codePointAt(char[] chars, int index)
4441:  {
4442:  return codePointAt(chars, index, chars.length);
4443:  }
4444: 
4445:  /**
4446:  * Get the code point at the specified index in the CharSequence.
4447:  * If the character is the start of a surrogate pair, and there is a
4448:  * following character within the specified range, and this
4449:  * character completes the pair, then the corresponding
4450:  * supplementary code point is returned. Otherwise, the character
4451:  * at the index is returned.
4452:  *
4453:  * @param chars the character array in which to look
4454:  * @param index the index of the codepoint to get, starting at 0
4455:  * @param limit the limit past which characters should not be examined
4456:  * @return the codepoint at the specified index
4457:  * @throws IndexOutOfBoundsException if index is negative or &gt;=
4458:  * limit, or if limit is negative or &gt;= the length of the array
4459:  * @since 1.5
4460:  */
4461:  public static int codePointAt(char[] chars, int index, int limit)
4462:  {
4463:  if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
4464:  throw new IndexOutOfBoundsException();
4465:  char high = chars[index];
4466:  if (! isHighSurrogate(high) || ++index >= limit)
4467:  return high;
4468:  char low = chars[index];
4469:  if (! isLowSurrogate(low))
4470:  return high;
4471:  return toCodePoint(high, low);
4472:  }
4473: 
4474:  /**
4475:  * Get the code point before the specified index. This is like
4476:  * #codePointAt(char[], int), but checks the characters at
4477:  * <code>index-1</code> and <code>index-2</code> to see if they form
4478:  * a supplementary code point. If they do not, the character at
4479:  * <code>index-1</code> is returned.
4480:  *
4481:  * @param chars the character array
4482:  * @param index the index just past the codepoint to get, starting at 0
4483:  * @return the codepoint at the specified index
4484:  * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4485:  * @since 1.5
4486:  */
4487:  public static int codePointBefore(char[] chars, int index)
4488:  {
4489:  return codePointBefore(chars, index, 1);
4490:  }
4491: 
4492:  /**
4493:  * Get the code point before the specified index. This is like
4494:  * #codePointAt(char[], int), but checks the characters at
4495:  * <code>index-1</code> and <code>index-2</code> to see if they form
4496:  * a supplementary code point. If they do not, the character at
4497:  * <code>index-1</code> is returned. The start parameter is used to
4498:  * limit the range of the array which may be examined.
4499:  *
4500:  * @param chars the character array
4501:  * @param index the index just past the codepoint to get, starting at 0
4502:  * @param start the index before which characters should not be examined
4503:  * @return the codepoint at the specified index
4504:  * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
4505:  * the length of the array, or if limit is negative or &gt;= the
4506:  * length of the array
4507:  * @since 1.5
4508:  */
4509:  public static int codePointBefore(char[] chars, int index, int start)
4510:  {
4511:  if (index < start || index > chars.length
4512:  || start < 0 || start >= chars.length)
4513:  throw new IndexOutOfBoundsException();
4514:  --index;
4515:  char low = chars[index];
4516:  if (! isLowSurrogate(low) || --index < start)
4517:  return low;
4518:  char high = chars[index];
4519:  if (! isHighSurrogate(high))
4520:  return low;
4521:  return toCodePoint(high, low);
4522:  }
4523: 
4524:  /**
4525:  * Get the code point before the specified index. This is like
4526:  * #codePointAt(CharSequence, int), but checks the characters at
4527:  * <code>index-1</code> and <code>index-2</code> to see if they form
4528:  * a supplementary code point. If they do not, the character at
4529:  * <code>index-1</code> is returned.
4530:  *
4531:  * @param sequence the CharSequence
4532:  * @param index the index just past the codepoint to get, starting at 0
4533:  * @return the codepoint at the specified index
4534:  * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
4535:  * @since 1.5
4536:  */
4537:  public static int codePointBefore(CharSequence sequence, int index)
4538:  {
4539:  int len = sequence.length();
4540:  if (index < 1 || index > len)
4541:  throw new IndexOutOfBoundsException();
4542:  --index;
4543:  char low = sequence.charAt(index);
4544:  if (! isLowSurrogate(low) || --index < 0)
4545:  return low;
4546:  char high = sequence.charAt(index);
4547:  if (! isHighSurrogate(high))
4548:  return low;
4549:  return toCodePoint(high, low);
4550:  }
4551: } // class Character
Overview Package Class Use Source Tree Index Deprecated About
GNU Classpath (0.95)