[フレーム]

unicode_norm.c

Go to the documentation of this file.

1/*-------------------------------------------------------------------------

2 * unicode_norm.c

3 * Normalize a Unicode string

4 *

5 * This implements Unicode normalization, per the documentation at

6 * https://www.unicode.org/reports/tr15/.

7 *

9 *

10 * IDENTIFICATION

11 * src/common/unicode_norm.c

12 *

13 *-------------------------------------------------------------------------

14 */

15#ifndef FRONTEND

16#include "postgres.h"

17#else

18#include "postgres_fe.h"

19#endif

20

21#include "common/unicode_norm.h"

22#ifndef FRONTEND

23#include "common/unicode_norm_hashfunc.h"

24#include "common/unicode_normprops_table.h"

25#include "port/pg_bswap.h"

26#else

27#include "common/unicode_norm_table.h"

28#endif

29

30#ifndef FRONTEND

31 #define ALLOC(size) palloc(size)

32 #define FREE(size) pfree(size)

33#else

34#define ALLOC(size) malloc(size)

35#define FREE(size) free(size)

36#endif

37

38/* Constants for calculations with Hangul characters */

39 #define SBASE 0xAC00 /* U+AC00 */

40 #define LBASE 0x1100 /* U+1100 */

41 #define VBASE 0x1161 /* U+1161 */

42 #define TBASE 0x11A7 /* U+11A7 */

43 #define LCOUNT 19

44 #define VCOUNT 21

45 #define TCOUNT 28

46 #define NCOUNT VCOUNT * TCOUNT

47 #define SCOUNT LCOUNT * NCOUNT

48

49#ifdef FRONTEND

50/* comparison routine for bsearch() of decomposition lookup table. */

51static int

52conv_compare(const void *p1, const void *p2)

53{

54 uint32 v1,

55 v2;

56

57 v1 = *(const uint32 *) p1;

58 v2 = ((const pg_unicode_decomposition *) p2)->codepoint;

59 return (v1 > v2) ? 1 : ((v1 == v2) ? 0 : -1);

60}

61

62#endif

63

64/*

65 * get_code_entry

66 *

67 * Get the entry corresponding to code in the decomposition lookup table.

68 * The backend version of this code uses a perfect hash function for the

69 * lookup, while the frontend version uses a binary search.

70 */

71static const pg_unicode_decomposition *

72 get_code_entry(pg_wchar code)

73{

74#ifndef FRONTEND

75 int h;

76 uint32 hashkey;

77 pg_unicode_decompinfo decompinfo = UnicodeDecompInfo;

78

79 /*

80 * Compute the hash function. The hash key is the codepoint with the bytes

81 * in network order.

82 */

83 hashkey = pg_hton32(code);

84 h = decompinfo.hash(&hashkey);

85

86 /* An out-of-range result implies no match */

87 if (h < 0 || h >= decompinfo.num_decomps)

88 return NULL;

89

90 /*

91 * Since it's a perfect hash, we need only match to the specific codepoint

92 * it identifies.

93 */

94 if (code != decompinfo.decomps[h].codepoint)

95 return NULL;

96

97 /* Success! */

98 return &decompinfo.decomps[h];

99#else

100 return bsearch(&(code),

101 UnicodeDecompMain,

102 lengthof(UnicodeDecompMain),

103 sizeof(pg_unicode_decomposition),

104 conv_compare);

105#endif

106}

107

108/*

109 * Get the combining class of the given codepoint.

110 */

111static uint8

112 get_canonical_class(pg_wchar code)

113{

114 const pg_unicode_decomposition *entry = get_code_entry(code);

115

116 /*

117 * If no entries are found, the character used is either an Hangul

118 * character or a character with a class of 0 and no decompositions.

119 */

120 if (!entry)

121 return 0;

122 else

123 return entry->comb_class;

124}

125

126/*

127 * Given a decomposition entry looked up earlier, get the decomposed

128 * characters.

129 *

130 * Note: the returned pointer can point to statically allocated buffer, and

131 * is only valid until next call to this function!

132 */

133static const pg_wchar *

134 get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)

135{

136 static pg_wchar x;

137

138 if (DECOMPOSITION_IS_INLINE(entry))

139 {

140 Assert(DECOMPOSITION_SIZE(entry) == 1);

141 x = (pg_wchar) entry->dec_index;

142 *dec_size = 1;

143 return &x;

144 }

145 else

146 {

147 *dec_size = DECOMPOSITION_SIZE(entry);

148 return &UnicodeDecomp_codepoints[entry->dec_index];

149 }

150}

151

152/*

153 * Calculate how many characters a given character will decompose to.

154 *

155 * This needs to recurse, if the character decomposes into characters that

156 * are, in turn, decomposable.

157 */

158static int

159 get_decomposed_size(pg_wchar code, bool compat)

160{

161 const pg_unicode_decomposition *entry;

162 int size = 0;

163 int i;

164 const uint32 *decomp;

165 int dec_size;

166

167 /*

168 * Fast path for Hangul characters not stored in tables to save memory as

169 * decomposition is algorithmic. See

170 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details

171 * on the matter.

172 */

173 if (code >= SBASE && code < SBASE + SCOUNT)

174 {

175 uint32 tindex,

176 sindex;

177

178 sindex = code - SBASE;

179 tindex = sindex % TCOUNT;

180

181 if (tindex != 0)

182 return 3;

183 return 2;

184 }

185

186 entry = get_code_entry(code);

187

188 /*

189 * Just count current code if no other decompositions. A NULL entry is

190 * equivalent to a character with class 0 and no decompositions.

191 */

192 if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||

193 (!compat && DECOMPOSITION_IS_COMPAT(entry)))

194 return 1;

195

196 /*

197 * If this entry has other decomposition codes look at them as well. First

198 * get its decomposition in the list of tables available.

199 */

200 decomp = get_code_decomposition(entry, &dec_size);

201 for (i = 0; i < dec_size; i++)

202 {

203 uint32 lcode = decomp[i];

204

205 size += get_decomposed_size(lcode, compat);

206 }

207

208 return size;

209}

210

211/*

212 * Recompose a set of characters. For hangul characters, the calculation

213 * is algorithmic. For others, an inverse lookup at the decomposition

214 * table is necessary. Returns true if a recomposition can be done, and

215 * false otherwise.

216 */

217static bool

218 recompose_code(uint32 start, uint32 code, uint32 *result)

219{

220 /*

221 * Handle Hangul characters algorithmically, per the Unicode spec.

222 *

223 * Check if two current characters are L and V.

224 */

225 if (start >= LBASE && start < LBASE + LCOUNT &&

226 code >= VBASE && code < VBASE + VCOUNT)

227 {

228 /* make syllable of form LV */

229 uint32 lindex = start - LBASE;

230 uint32 vindex = code - VBASE;

231

232 *result = SBASE + (lindex * VCOUNT + vindex) * TCOUNT;

233 return true;

234 }

235 /* Check if two current characters are LV and T */

236 else if (start >= SBASE && start < (SBASE + SCOUNT) &&

237 ((start - SBASE) % TCOUNT) == 0 &&

238 code >= TBASE && code < (TBASE + TCOUNT))

239 {

240 /* make syllable of form LVT */

241 uint32 tindex = code - TBASE;

242

243 *result = start + tindex;

244 return true;

245 }

246 else

247 {

248 const pg_unicode_decomposition *entry;

249

250 /*

251 * Do an inverse lookup of the decomposition tables to see if anything

252 * matches. The comparison just needs to be a perfect match on the

253 * sub-table of size two, because the start character has already been

254 * recomposed partially. This lookup uses a perfect hash function for

255 * the backend code.

256 */

257#ifndef FRONTEND

258

259 int h,

260 inv_lookup_index;

261 uint64 hashkey;

262 pg_unicode_recompinfo recompinfo = UnicodeRecompInfo;

263

264 /*

265 * Compute the hash function. The hash key is formed by concatenating

266 * bytes of the two codepoints in network order. See also

267 * src/common/unicode/generate-unicode_norm_table.pl.

268 */

269 hashkey = pg_hton64(((uint64) start << 32) | (uint64) code);

270 h = recompinfo.hash(&hashkey);

271

272 /* An out-of-range result implies no match */

273 if (h < 0 || h >= recompinfo.num_recomps)

274 return false;

275

276 inv_lookup_index = recompinfo.inverse_lookup[h];

277 entry = &UnicodeDecompMain[inv_lookup_index];

278

279 if (start == UnicodeDecomp_codepoints[entry->dec_index] &&

280 code == UnicodeDecomp_codepoints[entry->dec_index + 1])

281 {

282 *result = entry->codepoint;

283 return true;

284 }

285

286#else

287

288 int i;

289

290 for (i = 0; i < lengthof(UnicodeDecompMain); i++)

291 {

292 entry = &UnicodeDecompMain[i];

293

294 if (DECOMPOSITION_SIZE(entry) != 2)

295 continue;

296

297 if (DECOMPOSITION_NO_COMPOSE(entry))

298 continue;

299

300 if (start == UnicodeDecomp_codepoints[entry->dec_index] &&

301 code == UnicodeDecomp_codepoints[entry->dec_index + 1])

302 {

303 *result = entry->codepoint;

304 return true;

305 }

306 }

307#endif /* !FRONTEND */

308 }

309

310 return false;

311}

312

313/*

314 * Decompose the given code into the array given by caller. The

315 * decomposition begins at the position given by caller, saving one

316 * lookup on the decomposition table. The current position needs to be

317 * updated here to let the caller know from where to continue filling

318 * in the array result.

319 */

320static void

321 decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)

322{

323 const pg_unicode_decomposition *entry;

324 int i;

325 const uint32 *decomp;

326 int dec_size;

327

328 /*

329 * Fast path for Hangul characters not stored in tables to save memory as

330 * decomposition is algorithmic. See

331 * https://www.unicode.org/reports/tr15/tr15-18.html, annex 10 for details

332 * on the matter.

333 */

334 if (code >= SBASE && code < SBASE + SCOUNT)

335 {

336 uint32 l,

337 v,

338 tindex,

339 sindex;

340 pg_wchar *res = *result;

341

342 sindex = code - SBASE;

343 l = LBASE + sindex / (VCOUNT * TCOUNT);

344 v = VBASE + (sindex % (VCOUNT * TCOUNT)) / TCOUNT;

345 tindex = sindex % TCOUNT;

346

347 res[*current] = l;

348 (*current)++;

349 res[*current] = v;

350 (*current)++;

351

352 if (tindex != 0)

353 {

354 res[*current] = TBASE + tindex;

355 (*current)++;

356 }

357

358 return;

359 }

360

361 entry = get_code_entry(code);

362

363 /*

364 * Just fill in with the current decomposition if there are no

365 * decomposition codes to recurse to. A NULL entry is equivalent to a

366 * character with class 0 and no decompositions, so just leave also in

367 * this case.

368 */

369 if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||

370 (!compat && DECOMPOSITION_IS_COMPAT(entry)))

371 {

372 pg_wchar *res = *result;

373

374 res[*current] = code;

375 (*current)++;

376 return;

377 }

378

379 /*

380 * If this entry has other decomposition codes look at them as well.

381 */

382 decomp = get_code_decomposition(entry, &dec_size);

383 for (i = 0; i < dec_size; i++)

384 {

385 pg_wchar lcode = (pg_wchar) decomp[i];

386

387 /* Leave if no more decompositions */

388 decompose_code(lcode, compat, result, current);

389 }

390}

391

392/*

393 * unicode_normalize - Normalize a Unicode string to the specified form.

394 *

395 * The input is a 0-terminated array of codepoints.

396 *

397 * In frontend, returns a 0-terminated array of codepoints, allocated with

398 * malloc. Or NULL if we run out of memory. In backend, the returned

399 * string is palloc'd instead, and OOM is reported with ereport().

400 */

401pg_wchar *

402 unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)

403{

404 bool compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);

405 bool recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);

406 pg_wchar *decomp_chars;

407 pg_wchar *recomp_chars;

408 int decomp_size,

409 current_size;

410 int count;

411 const pg_wchar *p;

412

413 /* variables for recomposition */

414 int last_class;

415 int starter_pos;

416 int target_pos;

417 uint32 starter_ch;

418

419 /* First, do character decomposition */

420

421 /*

422 * Calculate how many characters long the decomposed version will be.

423 */

424 decomp_size = 0;

425 for (p = input; *p; p++)

426 decomp_size += get_decomposed_size(*p, compat);

427

428 decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));

429 if (decomp_chars == NULL)

430 return NULL;

431

432 /*

433 * Now fill in each entry recursively. This needs a second pass on the

434 * decomposition table.

435 */

436 current_size = 0;

437 for (p = input; *p; p++)

438 decompose_code(*p, compat, &decomp_chars, &current_size);

439 decomp_chars[decomp_size] = '0円';

440 Assert(decomp_size == current_size);

441

442 /* Leave if there is nothing to decompose */

443 if (decomp_size == 0)

444 return decomp_chars;

445

446 /*

447 * Now apply canonical ordering.

448 */

449 for (count = 1; count < decomp_size; count++)

450 {

451 pg_wchar prev = decomp_chars[count - 1];

452 pg_wchar next = decomp_chars[count];

453 pg_wchar tmp;

454 const uint8 prevClass = get_canonical_class(prev);

455 const uint8 nextClass = get_canonical_class(next);

456

457 /*

458 * Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)

459 * annex 4, a sequence of two adjacent characters in a string is an

460 * exchangeable pair if the combining class (from the Unicode

461 * Character Database) for the first character is greater than the

462 * combining class for the second, and the second is not a starter. A

463 * character is a starter if its combining class is 0.

464 */

465 if (prevClass == 0 || nextClass == 0)

466 continue;

467

468 if (prevClass <= nextClass)

469 continue;

470

471 /* exchange can happen */

472 tmp = decomp_chars[count - 1];

473 decomp_chars[count - 1] = decomp_chars[count];

474 decomp_chars[count] = tmp;

475

476 /* backtrack to check again */

477 if (count > 1)

478 count -= 2;

479 }

480

481 if (!recompose)

482 return decomp_chars;

483

484 /*

485 * The last phase of NFC and NFKC is the recomposition of the reordered

486 * Unicode string using combining classes. The recomposed string cannot be

487 * longer than the decomposed one, so make the allocation of the output

488 * string based on that assumption.

489 */

490 recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));

491 if (!recomp_chars)

492 {

493 FREE(decomp_chars);

494 return NULL;

495 }

496

497 last_class = -1; /* this eliminates a special check */

498 starter_pos = 0;

499 target_pos = 1;

500 starter_ch = recomp_chars[0] = decomp_chars[0];

501

502 for (count = 1; count < decomp_size; count++)

503 {

504 pg_wchar ch = decomp_chars[count];

505 int ch_class = get_canonical_class(ch);

506 pg_wchar composite;

507

508 if (last_class < ch_class &&

509 recompose_code(starter_ch, ch, &composite))

510 {

511 recomp_chars[starter_pos] = composite;

512 starter_ch = composite;

513 }

514 else if (ch_class == 0)

515 {

516 starter_pos = target_pos;

517 starter_ch = ch;

518 last_class = -1;

519 recomp_chars[target_pos++] = ch;

520 }

521 else

522 {

523 last_class = ch_class;

524 recomp_chars[target_pos++] = ch;

525 }

526 }

527 recomp_chars[target_pos] = (pg_wchar) '0円';

528

529 FREE(decomp_chars);

530

531 return recomp_chars;

532}

533

534/*

535 * Normalization "quick check" algorithm; see

536 * <http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms>

537 */

538

539/* We only need this in the backend. */

540#ifndef FRONTEND

541

542static const pg_unicode_normprops *

543 qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)

544{

545 int h;

546 uint32 hashkey;

547

548 /*

549 * Compute the hash function. The hash key is the codepoint with the bytes

550 * in network order.

551 */

552 hashkey = pg_hton32(ch);

553 h = norminfo->hash(&hashkey);

554

555 /* An out-of-range result implies no match */

556 if (h < 0 || h >= norminfo->num_normprops)

557 return NULL;

558

559 /*

560 * Since it's a perfect hash, we need only match to the specific codepoint

561 * it identifies.

562 */

563 if (ch != norminfo->normprops[h].codepoint)

564 return NULL;

565

566 /* Success! */

567 return &norminfo->normprops[h];

568}

569

570/*

571 * Look up the normalization quick check character property

572 */

573static UnicodeNormalizationQC

574 qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)

575{

576 const pg_unicode_normprops *found = NULL;

577

578 switch (form)

579 {

580 case UNICODE_NFC:

581 found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC);

582 break;

583 case UNICODE_NFKC:

584 found = qc_hash_lookup(ch, &UnicodeNormInfo_NFKC_QC);

585 break;

586 default:

587 Assert(false);

588 break;

589 }

590

591 if (found)

592 return found->quickcheck;

593 else

594 return UNICODE_NORM_QC_YES;

595}

596

597UnicodeNormalizationQC

598 unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)

599{

600 uint8 lastCanonicalClass = 0;

601 UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;

602

603 /*

604 * For the "D" forms, we don't run the quickcheck. We don't include the

605 * lookup tables for those because they are huge, checking for these

606 * particular forms is less common, and running the slow path is faster

607 * for the "D" forms than the "C" forms because you don't need to

608 * recompose, which is slow.

609 */

610 if (form == UNICODE_NFD || form == UNICODE_NFKD)

611 return UNICODE_NORM_QC_MAYBE;

612

613 for (const pg_wchar *p = input; *p; p++)

614 {

615 pg_wchar ch = *p;

616 uint8 canonicalClass;

617 UnicodeNormalizationQC check;

618

619 canonicalClass = get_canonical_class(ch);

620 if (lastCanonicalClass > canonicalClass && canonicalClass != 0)

621 return UNICODE_NORM_QC_NO;

622

623 check = qc_is_allowed(form, ch);

624 if (check == UNICODE_NORM_QC_NO)

625 return UNICODE_NORM_QC_NO;

626 else if (check == UNICODE_NORM_QC_MAYBE)

627 result = UNICODE_NORM_QC_MAYBE;

628

629 lastCanonicalClass = canonicalClass;

630 }

631 return result;

632}

633

634#endif /* !FRONTEND */

Definition: blutils.c:224

uint8

uint8_t uint8

Definition: c.h:536

uint64

uint64_t uint64

Definition: c.h:539

uint32

uint32_t uint32

Definition: c.h:538

lengthof

#define lengthof(array)

Definition: c.h:787

compat

enum COMPAT_MODE compat

Definition: ecpg.c:26

Assert

Assert(PointerIsAligned(start, uint64))

start

return str start

Definition: hashfn_unstable.h:282

input

FILE * input

x

int x

Definition: isn.c:75

i

int i

Definition: isn.c:77

pg_wchar

unsigned int pg_wchar

Definition: mbprint.c:31

pg_bswap.h

pg_hton32

#define pg_hton32(x)

Definition: pg_bswap.h:121

pg_hton64

#define pg_hton64(x)

Definition: pg_bswap.h:122

current_size

static int64 current_size

Definition: pg_checksums.c:63

postgres.h

postgres_fe.h

pg_unicode_decompinfo

Definition: unicode_norm_hashfunc.h:27

pg_unicode_decompinfo::num_decomps

int num_decomps

Definition: unicode_norm_hashfunc.h:30

pg_unicode_decompinfo::hash

cp_hash_func hash

Definition: unicode_norm_hashfunc.h:29

pg_unicode_decompinfo::decomps

const pg_unicode_decomposition * decomps

Definition: unicode_norm_hashfunc.h:28

pg_unicode_decomposition

Definition: unicode_norm_table.h:20

pg_unicode_decomposition::codepoint

uint32 codepoint

Definition: unicode_norm_table.h:21

pg_unicode_decomposition::dec_index

uint16 dec_index

Definition: unicode_norm_table.h:24

pg_unicode_decomposition::comb_class

uint8 comb_class

Definition: unicode_norm_table.h:22

pg_unicode_norminfo

Definition: unicode_normprops_table.h:20

pg_unicode_norminfo::hash

qc_hash_func hash

Definition: unicode_normprops_table.h:22

pg_unicode_norminfo::num_normprops

int num_normprops

Definition: unicode_normprops_table.h:23

pg_unicode_norminfo::normprops

const pg_unicode_normprops * normprops

Definition: unicode_normprops_table.h:21

pg_unicode_normprops

Definition: unicode_normprops_table.h:10

pg_unicode_normprops::quickcheck

signed int quickcheck

Definition: unicode_normprops_table.h:12

pg_unicode_normprops::codepoint

unsigned int codepoint

Definition: unicode_normprops_table.h:11

pg_unicode_recompinfo

Definition: unicode_norm_hashfunc.h:34

pg_unicode_recompinfo::num_recomps

int num_recomps

Definition: unicode_norm_hashfunc.h:37

pg_unicode_recompinfo::hash

cp_hash_func hash

Definition: unicode_norm_hashfunc.h:36

pg_unicode_recompinfo::inverse_lookup

const uint16 * inverse_lookup

Definition: unicode_norm_hashfunc.h:35

TCOUNT

#define TCOUNT

Definition: unicode_norm.c:45

TBASE

#define TBASE

Definition: unicode_norm.c:42

get_code_entry

static const pg_unicode_decomposition * get_code_entry(pg_wchar code)

Definition: unicode_norm.c:72

decompose_code

static void decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)

Definition: unicode_norm.c:321

unicode_is_normalized_quickcheck

UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)

Definition: unicode_norm.c:598

VBASE

#define VBASE

Definition: unicode_norm.c:41

VCOUNT

#define VCOUNT

Definition: unicode_norm.c:44

get_code_decomposition

static const pg_wchar * get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)

Definition: unicode_norm.c:134

get_canonical_class

static uint8 get_canonical_class(pg_wchar code)

Definition: unicode_norm.c:112

LBASE

#define LBASE

Definition: unicode_norm.c:40

qc_is_allowed

static UnicodeNormalizationQC qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)

Definition: unicode_norm.c:574

unicode_normalize

pg_wchar * unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)

Definition: unicode_norm.c:402

LCOUNT

#define LCOUNT

Definition: unicode_norm.c:43

ALLOC

#define ALLOC(size)

Definition: unicode_norm.c:31

FREE

#define FREE(size)

Definition: unicode_norm.c:32

SBASE

#define SBASE

Definition: unicode_norm.c:39

SCOUNT

#define SCOUNT

Definition: unicode_norm.c:47

qc_hash_lookup

static const pg_unicode_normprops * qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)

Definition: unicode_norm.c:543

recompose_code

static bool recompose_code(uint32 start, uint32 code, uint32 *result)

Definition: unicode_norm.c:218

get_decomposed_size

static int get_decomposed_size(pg_wchar code, bool compat)

Definition: unicode_norm.c:159

unicode_norm.h

UnicodeNormalizationForm

Definition: unicode_norm.h:20

UNICODE_NFKD

@ UNICODE_NFKD

Definition: unicode_norm.h:24

UNICODE_NFD

@ UNICODE_NFD

Definition: unicode_norm.h:22

UNICODE_NFC

@ UNICODE_NFC

Definition: unicode_norm.h:21

UNICODE_NFKC

@ UNICODE_NFKC

Definition: unicode_norm.h:23

UnicodeNormalizationQC

Definition: unicode_norm.h:29

UNICODE_NORM_QC_YES

@ UNICODE_NORM_QC_YES

Definition: unicode_norm.h:31

UNICODE_NORM_QC_NO

@ UNICODE_NORM_QC_NO

Definition: unicode_norm.h:30

UNICODE_NORM_QC_MAYBE

@ UNICODE_NORM_QC_MAYBE

Definition: unicode_norm.h:32

unicode_norm_hashfunc.h

UnicodeDecompInfo

static const pg_unicode_decompinfo UnicodeDecompInfo

Definition: unicode_norm_hashfunc.h:1774

UnicodeRecompInfo

static const pg_unicode_recompinfo UnicodeRecompInfo

Definition: unicode_norm_hashfunc.h:3011

unicode_norm_table.h

DECOMPOSITION_NO_COMPOSE

#define DECOMPOSITION_NO_COMPOSE(x)

Definition: unicode_norm_table.h:33

UnicodeDecomp_codepoints

static const uint32 UnicodeDecomp_codepoints[5138]

Definition: unicode_norm_table.h:6887

DECOMPOSITION_IS_INLINE

#define DECOMPOSITION_IS_INLINE(x)

Definition: unicode_norm_table.h:34

UnicodeDecompMain

static const pg_unicode_decomposition UnicodeDecompMain[6843]

Definition: unicode_norm_table.h:38

DECOMPOSITION_IS_COMPAT

#define DECOMPOSITION_IS_COMPAT(x)

Definition: unicode_norm_table.h:35

DECOMPOSITION_SIZE

#define DECOMPOSITION_SIZE(x)

Definition: unicode_norm_table.h:32

unicode_normprops_table.h

UnicodeNormInfo_NFKC_QC

static const pg_unicode_norminfo UnicodeNormInfo_NFKC_QC

Definition: unicode_normprops_table.h:8021

UnicodeNormInfo_NFC_QC

static const pg_unicode_norminfo UnicodeNormInfo_NFC_QC

Definition: unicode_normprops_table.h:1618

PostgreSQL Source Code: src/common/unicode_norm.c Source File