Include dependency graph for wchar.c:
Go to the source code of this file.
#define
P3A 6 /* Lead was E0, check
for 3-byte overlong */
#define
P3B 20 /* Lead was ED, check
for surrogate */
#define
P4A 25 /* Lead was
F0, check
for 4-byte overlong */
#define
P4B 30 /* Lead was F4, check
for too-large */
Macro Definition Documentation
◆ ASC
◆ BGN
◆ CR1
◆ CR2
◆ CR3
◆ CS1
◆ CS2
◆ CS3
◆ END
◆ ERR
◆ ILL
◆ IS_EUC_RANGE_VALID
#define IS_EUC_RANGE_VALID
(
c )
((
c) >= 0xa1 && (
c) <= 0xfe)
◆ L2A
◆ L3A
◆ L3B
◆ L3C
◆ L4A
◆ L4B
◆ L4C
◆ NONUTF8_INVALID_BYTE0
#define NONUTF8_INVALID_BYTE0 (0x8d)
◆ NONUTF8_INVALID_BYTE1
#define NONUTF8_INVALID_BYTE1 (' ')
◆ P3A
#define P3A 6 /* Lead was E0, check
for 3-byte overlong */
◆ P3B
#define P3B 20 /* Lead was ED, check
for surrogate */
◆ P4A
#define P4A 25 /* Lead was
F0, check
for 4-byte overlong */
◆ P4B
#define P4B 30 /* Lead was F4, check
for too-large */
◆ pg_euccn_verifychar
◆ pg_euccn_verifystr
◆ STRIDE_LENGTH
#define STRIDE_LENGTH (2 * sizeof(
Vector8))
Function Documentation
◆ mbbisearch()
Definition at line 581 of file wchar.c.
582{
583 int min = 0;
584 int mid;
585
586 if (ucs <
table[0].first || ucs >
table[max].last)
587 return 0;
588 while (max >= min)
589 {
590 mid = (min + max) / 2;
591 if (ucs >
table[mid].last)
592 min = mid + 1;
593 else if (ucs <
table[mid].first)
594 max = mid - 1;
595 else
596 return 1;
597 }
598
599 return 0;
600}
static const struct lconv_member_info table[]
References table.
Referenced by ucs_wcwidth().
◆ pg_ascii2wchar_with_len()
static int pg_ascii2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 70 of file wchar.c.
71{
72 int cnt = 0;
73
74 while (
len > 0 && *from)
75 {
76 *to++ = *from++;
78 cnt++;
79 }
80 *to = 0;
81 return cnt;
82}
References len.
◆ pg_ascii_dsplen()
static int pg_ascii_dsplen
(
const unsigned char *
s )
static
◆ pg_ascii_mblen()
static int pg_ascii_mblen
(
const unsigned char *
s )
static
◆ pg_ascii_verifychar()
static int pg_ascii_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1063 of file wchar.c.
1064{
1065 return 1;
1066}
◆ pg_ascii_verifystr()
static int pg_ascii_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1069 of file wchar.c.
1070{
1071 const unsigned char *nullpos = memchr(s, 0,
len);
1072
1073 if (nullpos == NULL)
1075 else
1076 return nullpos - s;
1077}
References len.
◆ pg_big5_dsplen()
static int pg_big5_dsplen
(
const unsigned char *
s )
static
◆ pg_big5_mblen()
static int pg_big5_mblen
(
const unsigned char *
s )
static
◆ pg_big5_verifychar()
static int pg_big5_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1479 of file wchar.c.
1480{
1481 int l,
1482 mbl;
1483
1485
1487 return -1;
1488
1489 if (l == 2 &&
1492 return -1;
1493
1494 while (--l > 0)
1495 {
1496 if (*++s == '0円')
1497 return -1;
1498 }
1499
1500 return mbl;
1501}
#define NONUTF8_INVALID_BYTE0
static int pg_big5_mblen(const unsigned char *s)
#define NONUTF8_INVALID_BYTE1
References len, NONUTF8_INVALID_BYTE0, NONUTF8_INVALID_BYTE1, and pg_big5_mblen().
Referenced by pg_big5_verifystr().
◆ pg_big5_verifystr()
static int pg_big5_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1504 of file wchar.c.
1505{
1506 const unsigned char *
start = s;
1507
1509 {
1510 int l;
1511
1512 /* fast path for ASCII-subset characters */
1514 {
1515 if (*s == '0円')
1516 break;
1517 l = 1;
1518 }
1519 else
1520 {
1522 if (l == -1)
1523 break;
1524 }
1525 s += l;
1527 }
1528
1530}
static int pg_big5_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_big5_verifychar(), and start.
◆ pg_encoding_dsplen()
int pg_encoding_dsplen
(
int
encoding,
const char *
mbstr
)
◆ pg_encoding_max_length()
int pg_encoding_max_length
(
int
encoding )
Definition at line 2213 of file wchar.c.
2214{
2216
2217 /*
2218 * Check for the encoding despite the assert, due to some mingw versions
2219 * otherwise issuing bogus warnings.
2220 */
2224}
Assert(PointerIsAligned(start, uint64))
References Assert(), encoding, pg_wchar_tbl::maxmblen, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by ascii(), chr(), CopyConvertBuf(), pg_encoding_mbcliplen(), pg_encoding_set_invalid(), pg_verify_mbstr_len(), reportErrorPosition(), test_enc_setup(), and type_maximum_size().
◆ pg_encoding_mblen()
int pg_encoding_mblen
(
int
encoding,
const char *
mbstr
)
Definition at line 2135 of file wchar.c.
References encoding, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by CopyAttributeOutCSV(), CopyAttributeOutText(), fmtIdEnc(), pg_encoding_mblen_bounded(), pg_encoding_mblen_or_incomplete(), PQescapeInternal(), PQmblen(), PQmblenBounded(), and test_enc_setup().
◆ pg_encoding_mblen_bounded()
int pg_encoding_mblen_bounded
(
int
encoding,
const char *
mbstr
)
◆ pg_encoding_mblen_or_incomplete()
int pg_encoding_mblen_or_incomplete
(
int
encoding,
const char *
mbstr,
size_t
remaining
)
◆ pg_encoding_set_invalid()
void pg_encoding_set_invalid
(
int
encoding,
char *
dst
)
◆ pg_encoding_verifymbchar()
int pg_encoding_verifymbchar
(
int
encoding,
const char *
mbstr,
int
len
)
Definition at line 2189 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by appendStringLiteral(), big52euc_tw(), big52mic(), euc_jis_20042shift_jis_2004(), euc_jp2mic(), euc_jp2sjis(), euc_kr2mic(), euc_tw2big5(), euc_tw2mic(), fmtIdEnc(), LocalToUtf(), mic2big5(), mic2euc_jp(), mic2euc_kr(), mic2euc_tw(), mic2sjis(), PQescapeStringInternal(), shift_jis_20042euc_jis_2004(), and sjis2euc_jp().
◆ pg_encoding_verifymbstr()
int pg_encoding_verifymbstr
(
int
encoding,
const char *
mbstr,
int
len
)
Definition at line 2202 of file wchar.c.
References encoding, len, PG_SQL_ASCII, PG_VALID_ENCODING, and pg_wchar_table.
Referenced by add_file_to_manifest(), CopyConvertBuf(), handle_oauth_sasl_error(), parse_oauth_json(), PQescapeInternal(), test_enc_conversion(), test_enc_setup(), and test_one_vector_escape().
◆ pg_euc2wchar_with_len()
static int pg_euc2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 105 of file wchar.c.
106{
107 int cnt = 0;
108
109 while (
len > 0 && *from)
110 {
111 if (*from ==
SS2 &&
len >= 2)
/* JIS X 0201 (so called "1 byte
112 * KANA") */
113 {
114 from++;
115 *to = (
SS2 << 8) | *from++;
117 }
118 else if (*from ==
SS3 &&
len >= 3)
/* JIS X 0212 KANJI */
119 {
120 from++;
121 *to = (
SS3 << 16) | (*from++ << 8);
122 *to |= *from++;
124 }
126 {
127 *to = *from++ << 8;
128 *to |= *from++;
130 }
131 else /* must be ASCII */
132 {
133 *to = *from++;
135 }
136 to++;
137 cnt++;
138 }
139 *to = 0;
140 return cnt;
141}
References IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp2wchar_with_len(), and pg_euckr2wchar_with_len().
◆ pg_euc_dsplen()
static int pg_euc_dsplen
(
const unsigned char *
s )
inlinestatic
◆ pg_euc_mblen()
static int pg_euc_mblen
(
const unsigned char *
s )
inlinestatic
◆ pg_euccn2wchar_with_len()
static int pg_euccn2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 232 of file wchar.c.
233{
234 int cnt = 0;
235
236 while (
len > 0 && *from)
237 {
238 if (*from ==
SS2 &&
len >= 3)
/* code set 2 (unused?) */
239 {
240 from++;
241 *to = (
SS2 << 16) | (*from++ << 8);
242 *to |= *from++;
244 }
245 else if (*from ==
SS3 &&
len >= 3)
/* code set 3 (unused ?) */
246 {
247 from++;
248 *to = (
SS3 << 16) | (*from++ << 8);
249 *to |= *from++;
251 }
253 {
254 *to = *from++ << 8;
255 *to |= *from++;
257 }
258 else
259 {
260 *to = *from++;
262 }
263 to++;
264 cnt++;
265 }
266 *to = 0;
267 return cnt;
268}
References IS_HIGHBIT_SET, len, SS2, and SS3.
◆ pg_euccn_dsplen()
static int pg_euccn_dsplen
(
const unsigned char *
s )
static
◆ pg_euccn_mblen()
static int pg_euccn_mblen
(
const unsigned char *
s )
static
◆ pg_eucjp2wchar_with_len()
static int pg_eucjp2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
◆ pg_eucjp_dsplen()
static int pg_eucjp_dsplen
(
const unsigned char *
s )
static
◆ pg_eucjp_mblen()
static int pg_eucjp_mblen
(
const unsigned char *
s )
static
Definition at line 185 of file wchar.c.
186{
188}
static int pg_euc_mblen(const unsigned char *s)
References pg_euc_mblen().
◆ pg_eucjp_verifychar()
static int pg_eucjp_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1082 of file wchar.c.
1083{
1084 int l;
1085 unsigned char c1,
1086 c2;
1087
1088 c1 = *s++;
1089
1090 switch (c1)
1091 {
1092 case SS2:
/* JIS X 0201 */
1093 l = 2;
1095 return -1;
1096 c2 = *s++;
1097 if (c2 < 0xa1 || c2 > 0xdf)
1098 return -1;
1099 break;
1100
1101 case SS3:
/* JIS X 0212 */
1102 l = 3;
1104 return -1;
1105 c2 = *s++;
1107 return -1;
1108 c2 = *s++;
1110 return -1;
1111 break;
1112
1113 default:
1115 {
1116 l = 2;
1118 return -1;
1120 return -1;
1121 c2 = *s++;
1123 return -1;
1124 }
1125 else
1126 /* must be ASCII */
1127 {
1128 l = 1;
1129 }
1130 break;
1131 }
1132
1133 return l;
1134}
#define IS_EUC_RANGE_VALID(c)
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_eucjp_verifystr().
◆ pg_eucjp_verifystr()
static int pg_eucjp_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1137 of file wchar.c.
1138{
1139 const unsigned char *
start = s;
1140
1142 {
1143 int l;
1144
1145 /* fast path for ASCII-subset characters */
1147 {
1148 if (*s == '0円')
1149 break;
1150 l = 1;
1151 }
1152 else
1153 {
1155 if (l == -1)
1156 break;
1157 }
1158 s += l;
1160 }
1161
1163}
static int pg_eucjp_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_eucjp_verifychar(), and start.
◆ pg_euckr2wchar_with_len()
static int pg_euckr2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
◆ pg_euckr_dsplen()
static int pg_euckr_dsplen
(
const unsigned char *
s )
static
Definition at line 222 of file wchar.c.
223{
225}
static int pg_euc_dsplen(const unsigned char *s)
References pg_euc_dsplen().
◆ pg_euckr_mblen()
static int pg_euckr_mblen
(
const unsigned char *
s )
static
◆ pg_euckr_verifychar()
static int pg_euckr_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1166 of file wchar.c.
1167{
1168 int l;
1169 unsigned char c1,
1170 c2;
1171
1172 c1 = *s++;
1173
1175 {
1176 l = 2;
1178 return -1;
1180 return -1;
1181 c2 = *s++;
1183 return -1;
1184 }
1185 else
1186 /* must be ASCII */
1187 {
1188 l = 1;
1189 }
1190
1191 return l;
1192}
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, and len.
Referenced by pg_euckr_verifystr().
◆ pg_euckr_verifystr()
static int pg_euckr_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1195 of file wchar.c.
1196{
1197 const unsigned char *
start = s;
1198
1200 {
1201 int l;
1202
1203 /* fast path for ASCII-subset characters */
1205 {
1206 if (*s == '0円')
1207 break;
1208 l = 1;
1209 }
1210 else
1211 {
1213 if (l == -1)
1214 break;
1215 }
1216 s += l;
1218 }
1219
1221}
static int pg_euckr_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_euckr_verifychar(), and start.
◆ pg_euctw2wchar_with_len()
static int pg_euctw2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 299 of file wchar.c.
300{
301 int cnt = 0;
302
303 while (
len > 0 && *from)
304 {
305 if (*from ==
SS2 &&
len >= 4)
/* code set 2 */
306 {
307 from++;
308 *to = (((
uint32)
SS2) << 24) | (*from++ << 16);
309 *to |= *from++ << 8;
310 *to |= *from++;
312 }
313 else if (*from ==
SS3 &&
len >= 3)
/* code set 3 (unused?) */
314 {
315 from++;
316 *to = (
SS3 << 16) | (*from++ << 8);
317 *to |= *from++;
319 }
321 {
322 *to = *from++ << 8;
323 *to |= *from++;
325 }
326 else
327 {
328 *to = *from++;
330 }
331 to++;
332 cnt++;
333 }
334 *to = 0;
335 return cnt;
336}
References IS_HIGHBIT_SET, len, SS2, and SS3.
◆ pg_euctw_dsplen()
static int pg_euctw_dsplen
(
const unsigned char *
s )
static
◆ pg_euctw_mblen()
static int pg_euctw_mblen
(
const unsigned char *
s )
static
◆ pg_euctw_verifychar()
static int pg_euctw_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1228 of file wchar.c.
1229{
1230 int l;
1231 unsigned char c1,
1232 c2;
1233
1234 c1 = *s++;
1235
1236 switch (c1)
1237 {
1238 case SS2:
/* CNS 11643 Plane 1-7 */
1239 l = 4;
1241 return -1;
1242 c2 = *s++;
1243 if (c2 < 0xa1 || c2 > 0xa7)
1244 return -1;
1245 c2 = *s++;
1247 return -1;
1248 c2 = *s++;
1250 return -1;
1251 break;
1252
1253 case SS3:
/* unused */
1254 return -1;
1255
1256 default:
1258 {
1259 l = 2;
1261 return -1;
1262 /* no further range check on c1? */
1263 c2 = *s++;
1265 return -1;
1266 }
1267 else
1268 /* must be ASCII */
1269 {
1270 l = 1;
1271 }
1272 break;
1273 }
1274 return l;
1275}
References IS_EUC_RANGE_VALID, IS_HIGHBIT_SET, len, SS2, and SS3.
Referenced by pg_euctw_verifystr().
◆ pg_euctw_verifystr()
static int pg_euctw_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1278 of file wchar.c.
1279{
1280 const unsigned char *
start = s;
1281
1283 {
1284 int l;
1285
1286 /* fast path for ASCII-subset characters */
1288 {
1289 if (*s == '0円')
1290 break;
1291 l = 1;
1292 }
1293 else
1294 {
1296 if (l == -1)
1297 break;
1298 }
1299 s += l;
1301 }
1302
1304}
static int pg_euctw_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_euctw_verifychar(), and start.
◆ pg_gb18030_dsplen()
static int pg_gb18030_dsplen
(
const unsigned char *
s )
static
◆ pg_gb18030_mblen()
static int pg_gb18030_mblen
(
const unsigned char *
s )
static
Definition at line 1015 of file wchar.c.
1016{
1018
1020 len = 1;
/* ASCII */
1021 else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1023 else
1026}
References IS_HIGHBIT_SET, and len.
◆ pg_gb18030_verifychar()
static int pg_gb18030_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1641 of file wchar.c.
1642{
1643 int l;
1644
1646 l = 1; /* ASCII */
1647 else if (
len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
1648 {
1649 /* Should be 4-byte, validate remaining bytes */
1650 if (*s >= 0x81 && *s <= 0xfe &&
1651 *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
1652 *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
1653 l = 4;
1654 else
1655 l = -1;
1656 }
1657 else if (
len >= 2 && *s >= 0x81 && *s <= 0xfe)
1658 {
1659 /* Should be 2-byte, validate */
1660 if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
1661 (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
1662 l = 2;
1663 else
1664 l = -1;
1665 }
1666 else
1667 l = -1;
1668 return l;
1669}
References IS_HIGHBIT_SET, and len.
Referenced by pg_gb18030_verifystr().
◆ pg_gb18030_verifystr()
static int pg_gb18030_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1672 of file wchar.c.
1673{
1674 const unsigned char *
start = s;
1675
1677 {
1678 int l;
1679
1680 /* fast path for ASCII-subset characters */
1682 {
1683 if (*s == '0円')
1684 break;
1685 l = 1;
1686 }
1687 else
1688 {
1690 if (l == -1)
1691 break;
1692 }
1693 s += l;
1695 }
1696
1698}
static int pg_gb18030_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_gb18030_verifychar(), and start.
◆ pg_gbk_dsplen()
static int pg_gbk_dsplen
(
const unsigned char *
s )
static
◆ pg_gbk_mblen()
static int pg_gbk_mblen
(
const unsigned char *
s )
static
◆ pg_gbk_verifychar()
static int pg_gbk_verifychar
(
const unsigned char *
s,
int
len
)
static
◆ pg_gbk_verifystr()
static int pg_gbk_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1558 of file wchar.c.
1559{
1560 const unsigned char *
start = s;
1561
1563 {
1564 int l;
1565
1566 /* fast path for ASCII-subset characters */
1568 {
1569 if (*s == '0円')
1570 break;
1571 l = 1;
1572 }
1573 else
1574 {
1576 if (l == -1)
1577 break;
1578 }
1579 s += l;
1581 }
1582
1584}
static int pg_gbk_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_gbk_verifychar(), and start.
◆ pg_johab_dsplen()
static int pg_johab_dsplen
(
const unsigned char *
s )
static
◆ pg_johab_mblen()
static int pg_johab_mblen
(
const unsigned char *
s )
static
◆ pg_johab_verifychar()
static int pg_johab_verifychar
(
const unsigned char *
s,
int
len
)
static
◆ pg_johab_verifystr()
static int pg_johab_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1331 of file wchar.c.
1332{
1333 const unsigned char *
start = s;
1334
1336 {
1337 int l;
1338
1339 /* fast path for ASCII-subset characters */
1341 {
1342 if (*s == '0円')
1343 break;
1344 l = 1;
1345 }
1346 else
1347 {
1349 if (l == -1)
1350 break;
1351 }
1352 s += l;
1354 }
1355
1357}
static int pg_johab_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_johab_verifychar(), and start.
◆ pg_latin12wchar_with_len()
static int pg_latin12wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 839 of file wchar.c.
840{
841 int cnt = 0;
842
843 while (
len > 0 && *from)
844 {
845 *to++ = *from++;
847 cnt++;
848 }
849 *to = 0;
850 return cnt;
851}
References len.
◆ pg_latin1_dsplen()
static int pg_latin1_dsplen
(
const unsigned char *
s )
static
◆ pg_latin1_mblen()
static int pg_latin1_mblen
(
const unsigned char *
s )
static
◆ pg_latin1_verifychar()
static int pg_latin1_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1410 of file wchar.c.
1411{
1412 return 1;
1413}
◆ pg_latin1_verifystr()
static int pg_latin1_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1416 of file wchar.c.
1417{
1418 const unsigned char *nullpos = memchr(s, 0,
len);
1419
1420 if (nullpos == NULL)
1422 else
1423 return nullpos - s;
1424}
References len.
◆ pg_mule2wchar_with_len()
static int pg_mule2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 674 of file wchar.c.
675{
676 int cnt = 0;
677
678 while (
len > 0 && *from)
679 {
681 {
682 *to = *from++ << 16;
683 *to |= *from++;
685 }
687 {
688 from++;
689 *to = *from++ << 16;
690 *to |= *from++;
692 }
694 {
695 *to = *from++ << 16;
696 *to |= *from++ << 8;
697 *to |= *from++;
699 }
701 {
702 from++;
703 *to = *from++ << 16;
704 *to |= *from++ << 8;
705 *to |= *from++;
707 }
708 else
709 { /* assume ASCII */
710 *to = (unsigned char) *from++;
712 }
713 to++;
714 cnt++;
715 }
716 *to = 0;
717 return cnt;
718}
References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.
◆ pg_mule_dsplen()
static int pg_mule_dsplen
(
const unsigned char *
s )
static
Definition at line 811 of file wchar.c.
812{
814
815 /*
816 * Note: it's not really appropriate to assume that all multibyte charsets
817 * are double-wide on screen. But this seems an okay approximation for
818 * the MULE charsets we currently support.
819 */
820
829 else
830 len = 1;
/* assume ASCII */
831
833}
References IS_LC1, IS_LC2, IS_LCPRV1, IS_LCPRV2, and len.
◆ pg_mule_mblen()
int pg_mule_mblen
(
const unsigned char *
s )
◆ pg_mule_verifychar()
static int pg_mule_verifychar
(
const unsigned char *
s,
int
len
)
static
◆ pg_mule_verifystr()
static int pg_mule_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1381 of file wchar.c.
1382{
1383 const unsigned char *
start = s;
1384
1386 {
1387 int l;
1388
1389 /* fast path for ASCII-subset characters */
1391 {
1392 if (*s == '0円')
1393 break;
1394 l = 1;
1395 }
1396 else
1397 {
1399 if (l == -1)
1400 break;
1401 }
1402 s += l;
1404 }
1405
1407}
static int pg_mule_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_mule_verifychar(), and start.
◆ pg_sjis_dsplen()
static int pg_sjis_dsplen
(
const unsigned char *
s )
static
◆ pg_sjis_mblen()
static int pg_sjis_mblen
(
const unsigned char *
s )
static
◆ pg_sjis_verifychar()
static int pg_sjis_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1427 of file wchar.c.
1428{
1429 int l,
1430 mbl;
1431 unsigned char c1,
1432 c2;
1433
1435
1437 return -1;
1438
1439 if (l == 1) /* pg_sjis_mblen already verified it */
1440 return mbl;
1441
1442 c1 = *s++;
1443 c2 = *s;
1445 return -1;
1446 return mbl;
1447}
static int pg_sjis_mblen(const unsigned char *s)
References ISSJISHEAD, ISSJISTAIL, len, and pg_sjis_mblen().
Referenced by pg_sjis_verifystr().
◆ pg_sjis_verifystr()
static int pg_sjis_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1450 of file wchar.c.
1451{
1452 const unsigned char *
start = s;
1453
1455 {
1456 int l;
1457
1458 /* fast path for ASCII-subset characters */
1460 {
1461 if (*s == '0円')
1462 break;
1463 l = 1;
1464 }
1465 else
1466 {
1468 if (l == -1)
1469 break;
1470 }
1471 s += l;
1473 }
1474
1476}
static int pg_sjis_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_sjis_verifychar(), and start.
◆ pg_uhc_dsplen()
static int pg_uhc_dsplen
(
const unsigned char *
s )
static
◆ pg_uhc_mblen()
static int pg_uhc_mblen
(
const unsigned char *
s )
static
◆ pg_uhc_verifychar()
static int pg_uhc_verifychar
(
const unsigned char *
s,
int
len
)
static
◆ pg_uhc_verifystr()
static int pg_uhc_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1612 of file wchar.c.
1613{
1614 const unsigned char *
start = s;
1615
1617 {
1618 int l;
1619
1620 /* fast path for ASCII-subset characters */
1622 {
1623 if (*s == '0円')
1624 break;
1625 l = 1;
1626 }
1627 else
1628 {
1630 if (l == -1)
1631 break;
1632 }
1633 s += l;
1635 }
1636
1638}
static int pg_uhc_verifychar(const unsigned char *s, int len)
References IS_HIGHBIT_SET, len, pg_uhc_verifychar(), and start.
◆ pg_utf2wchar_with_len()
static int pg_utf2wchar_with_len
(
const unsigned char *
from,
int
len
)
static
Definition at line 441 of file wchar.c.
442{
443 int cnt = 0;
445 c2,
446 c3,
447 c4;
448
449 while (
len > 0 && *from)
450 {
451 if ((*from & 0x80) == 0)
452 {
453 *to = *from++;
455 }
456 else if ((*from & 0xe0) == 0xc0)
457 {
459 break; /* drop trailing incomplete char */
460 c1 = *from++ & 0x1f;
461 c2 = *from++ & 0x3f;
462 *to = (c1 << 6) | c2;
464 }
465 else if ((*from & 0xf0) == 0xe0)
466 {
468 break; /* drop trailing incomplete char */
469 c1 = *from++ & 0x0f;
470 c2 = *from++ & 0x3f;
471 c3 = *from++ & 0x3f;
472 *to = (c1 << 12) | (c2 << 6) | c3;
474 }
475 else if ((*from & 0xf8) == 0xf0)
476 {
478 break; /* drop trailing incomplete char */
479 c1 = *from++ & 0x07;
480 c2 = *from++ & 0x3f;
481 c3 = *from++ & 0x3f;
482 c4 = *from++ & 0x3f;
483 *to = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
485 }
486 else
487 {
488 /* treat a bogus char as length 1; not ours to raise error */
489 *to = *from++;
491 }
492 to++;
493 cnt++;
494 }
495 *to = 0;
496 return cnt;
497}
References len.
◆ pg_utf8_islegal()
bool pg_utf8_islegal
(
const unsigned char *
source,
int
length
)
Definition at line 1989 of file wchar.c.
1990{
1992
1993 switch (length)
1994 {
1995 default:
1996 /* reject lengths 5 and 6 for now */
1997 return false;
1998 case 4:
2000 if (a < 0x80 || a > 0xBF)
2001 return false;
2002 /* FALL THRU */
2003 case 3:
2005 if (a < 0x80 || a > 0xBF)
2006 return false;
2007 /* FALL THRU */
2008 case 2:
2011 {
2012 case 0xE0:
2013 if (a < 0xA0 || a > 0xBF)
2014 return false;
2015 break;
2016 case 0xED:
2017 if (a < 0x80 || a > 0x9F)
2018 return false;
2019 break;
2020 case 0xF0:
2021 if (a < 0x90 || a > 0xBF)
2022 return false;
2023 break;
2024 case 0xF4:
2025 if (a < 0x80 || a > 0x8F)
2026 return false;
2027 break;
2028 default:
2029 if (a < 0x80 || a > 0xBF)
2030 return false;
2031 break;
2032 }
2033 /* FALL THRU */
2034 case 1:
2036 if (
a >= 0x80 &&
a < 0xC2)
2037 return false;
2039 return false;
2040 break;
2041 }
2042 return true;
2043}
static rewind_source * source
References a, and source.
Referenced by chr(), pg_utf8_string_len(), pg_utf8_verifychar(), utf8_to_iso8859_1(), and UtfToLocal().
◆ pg_utf8_verifychar()
static int pg_utf8_verifychar
(
const unsigned char *
s,
int
len
)
static
Definition at line 1701 of file wchar.c.
1702{
1703 int l;
1704
1705 if ((*s & 0x80) == 0)
1706 {
1707 if (*s == '0円')
1708 return -1;
1709 return 1;
1710 }
1711 else if ((*s & 0xe0) == 0xc0)
1712 l = 2;
1713 else if ((*s & 0xf0) == 0xe0)
1714 l = 3;
1715 else if ((*s & 0xf8) == 0xf0)
1716 l = 4;
1717 else
1718 l = 1;
1719
1721 return -1;
1722
1724 return -1;
1725
1726 return l;
1727}
bool pg_utf8_islegal(const unsigned char *source, int length)
References len, and pg_utf8_islegal().
Referenced by pg_utf8_verifystr().
◆ pg_utf8_verifystr()
static int pg_utf8_verifystr
(
const unsigned char *
s,
int
len
)
static
Definition at line 1891 of file wchar.c.
1892{
1893 const unsigned char *
start = s;
1894 const int orig_len =
len;
1896
1897/*
1898 * With a stride of two vector widths, gcc will unroll the loop. Even if
1899 * the compiler can unroll a longer loop, it's not worth it because we
1900 * must fall back to the byte-wise algorithm if we find any non-ASCII.
1901 */
1902#define STRIDE_LENGTH (2 * sizeof(Vector8))
1903
1905 {
1907 {
1908 /*
1909 * If the chunk is all ASCII, we can skip the full UTF-8 check,
1910 * but we must first check for a non-END state, which means the
1911 * previous chunk ended in the middle of a multibyte sequence.
1912 */
1915
1918 }
1919
1920 /* The error state persists, so we only need to check for it here. */
1922 {
1923 /*
1924 * Start over from the beginning with the slow path so we can
1925 * count the valid bytes.
1926 */
1929 }
1931 {
1932 /*
1933 * The fast path exited in the middle of a multibyte sequence.
1934 * Walk backwards to find the leading byte so that the slow path
1935 * can resume checking from there. We must always backtrack at
1936 * least one byte, since the current byte could be e.g. an ASCII
1937 * byte after a 2-byte lead, which is invalid.
1938 */
1939 do
1940 {
1942 s--;
1946 }
1947 }
1948
1949 /* check remaining bytes */
1951 {
1952 int l;
1953
1954 /* fast path for ASCII-subset characters */
1956 {
1957 if (*s == '0円')
1958 break;
1959 l = 1;
1960 }
1961 else
1962 {
1964 if (l == -1)
1965 break;
1966 }
1967 s += l;
1969 }
1970
1972}
static bool is_valid_ascii(const unsigned char *s, int len)
int pg_utf_mblen(const unsigned char *s)
static int pg_utf8_verifychar(const unsigned char *s, int len)
static void utf8_advance(const unsigned char *s, uint32 *state, int len)
References Assert(), BGN, END, ERR, IS_HIGHBIT_SET, is_valid_ascii(), len, pg_utf8_verifychar(), pg_utf_mblen(), start, STRIDE_LENGTH, and utf8_advance().
◆ pg_utf_dsplen()
static int pg_utf_dsplen
(
const unsigned char *
s )
static
◆ pg_utf_mblen()
int pg_utf_mblen
(
const unsigned char *
s )
Definition at line 538 of file wchar.c.
539{
541
542 if ((*s & 0x80) == 0)
544 else if ((*s & 0xe0) == 0xc0)
546 else if ((*s & 0xf0) == 0xe0)
548 else if ((*s & 0xf8) == 0xf0)
550#ifdef NOT_USED
551 else if ((*s & 0xfc) == 0xf8)
553 else if ((*s & 0xfe) == 0xfc)
555#endif
556 else
559}
References len.
Referenced by pg_utf8_verifystr(), and pg_wchar2utf_with_len().
◆ pg_wchar2euc_with_len()
static int pg_wchar2euc_with_len
(
const
pg_wchar *
from,
unsigned char *
to,
int
len
)
static
Definition at line 377 of file wchar.c.
378{
379 int cnt = 0;
380
381 while (
len > 0 && *from)
382 {
384
385 if ((
c = (*from >> 24)))
386 {
388 *to++ = (*from >> 16) & 0xff;
389 *to++ = (*from >> 8) & 0xff;
390 *to++ = *from & 0xff;
391 cnt += 4;
392 }
393 else if ((
c = (*from >> 16)))
394 {
396 *to++ = (*from >> 8) & 0xff;
397 *to++ = *from & 0xff;
398 cnt += 3;
399 }
400 else if ((
c = (*from >> 8)))
401 {
403 *to++ = *from & 0xff;
404 cnt += 2;
405 }
406 else
407 {
408 *to++ = *from;
409 cnt++;
410 }
411 from++;
413 }
414 *to = 0;
415 return cnt;
416}
References len.
◆ pg_wchar2mule_with_len()
static int pg_wchar2mule_with_len
(
const
pg_wchar *
from,
unsigned char *
to,
int
len
)
static
Definition at line 727 of file wchar.c.
728{
729 int cnt = 0;
730
731 while (
len > 0 && *from)
732 {
733 unsigned char lb;
734
735 lb = (*from >> 16) & 0xff;
737 {
738 *to++ = lb;
739 *to++ = *from & 0xff;
740 cnt += 2;
741 }
743 {
744 *to++ = lb;
745 *to++ = (*from >> 8) & 0xff;
746 *to++ = *from & 0xff;
747 cnt += 3;
748 }
750 {
752 *to++ = lb;
753 *to++ = *from & 0xff;
754 cnt += 3;
755 }
757 {
759 *to++ = lb;
760 *to++ = *from & 0xff;
761 cnt += 3;
762 }
764 {
766 *to++ = lb;
767 *to++ = (*from >> 8) & 0xff;
768 *to++ = *from & 0xff;
769 cnt += 4;
770 }
772 {
774 *to++ = lb;
775 *to++ = (*from >> 8) & 0xff;
776 *to++ = *from & 0xff;
777 cnt += 4;
778 }
779 else
780 {
781 *to++ = *from & 0xff;
782 cnt += 1;
783 }
784 from++;
786 }
787 *to = 0;
788 return cnt;
789}
#define IS_LCPRV2_B_RANGE(c)
#define IS_LCPRV1_A_RANGE(c)
#define IS_LCPRV1_B_RANGE(c)
#define IS_LCPRV2_A_RANGE(c)
References IS_LC1, IS_LC2, IS_LCPRV1_A_RANGE, IS_LCPRV1_B_RANGE, IS_LCPRV2_A_RANGE, IS_LCPRV2_B_RANGE, LCPRV1_A, LCPRV1_B, LCPRV2_A, LCPRV2_B, and len.
◆ pg_wchar2single_with_len()
static int pg_wchar2single_with_len
(
const
pg_wchar *
from,
unsigned char *
to,
int
len
)
static
Definition at line 861 of file wchar.c.
862{
863 int cnt = 0;
864
865 while (
len > 0 && *from)
866 {
867 *to++ = *from++;
869 cnt++;
870 }
871 *to = 0;
872 return cnt;
873}
References len.
◆ pg_wchar2utf_with_len()
static int pg_wchar2utf_with_len
(
const
pg_wchar *
from,
unsigned char *
to,
int
len
)
static
Definition at line 507 of file wchar.c.
508{
509 int cnt = 0;
510
511 while (
len > 0 && *from)
512 {
513 int char_len;
514
517 cnt += char_len;
518 to += char_len;
519 from++;
521 }
522 *to = 0;
523 return cnt;
524}
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
References len, pg_utf_mblen(), and unicode_to_utf8().
◆ ucs_wcwidth()
Definition at line 628 of file wchar.c.
629{
632
633 /* test for 8-bit control characters */
634 if (ucs == 0)
635 return 0;
636
637 if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
638 return -1;
639
640 /*
641 * binary search in table of non-spacing characters
642 *
643 * XXX: In the official Unicode sources, it is possible for a character to
644 * be described as both non-spacing and wide at the same time. As of
645 * Unicode 13.0, treating the non-spacing property as the determining
646 * factor for display width leads to the correct behavior, so do that
647 * search first.
648 */
651 return 0;
652
653 /* binary search in table of wide characters */
656 return 2;
657
658 return 1;
659}
static const struct mbinterval east_asian_fw[]
static const struct mbinterval nonspacing[]
static int mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
References east_asian_fw, mbbisearch(), and nonspacing.
Referenced by pg_utf_dsplen().
◆ utf8_advance()
static void utf8_advance
(
const unsigned char *
s,
int
len
)
static
Definition at line 1873 of file wchar.c.
1874{
1875 /* Note: We deliberately don't check the state's value here. */
1877 {
1878 /*
1879 * It's important that the mask value is 31: In most instruction sets,
1880 * a shift by a 32-bit operand is understood to be a shift by its mod
1881 * 32, so the compiler should elide the mask operation.
1882 */
1885 }
1886
1888}
static const uint32 Utf8Transition[256]
References len, and Utf8Transition.
Referenced by pg_utf8_verifystr().
Variable Documentation
◆ pg_wchar_table
Definition at line 2064 of file wchar.c.
Referenced by pg_database_encoding_max_length(), pg_dsplen(), pg_encoding_dsplen(), pg_encoding_max_length(), pg_encoding_max_length_sql(), pg_encoding_mb2wchar_with_len(), pg_encoding_mbcliplen(), pg_encoding_mblen(), pg_encoding_verifymbchar(), pg_encoding_verifymbstr(), pg_encoding_wchar2mb_with_len(), pg_generic_charinc(), pg_mb2wchar(), pg_mb2wchar_with_len(), pg_mblen(), pg_verify_mbstr(), pg_verify_mbstr_len(), pg_wchar2mb(), and pg_wchar2mb_with_len().
◆ Utf8Transition
const
uint32 Utf8Transition[256]
static