1048{
1051 int input_size;
1052 char *result;
1053 int result_size;
1054 int count;
1056 bool contains_RandALCat;
1057 unsigned char *p;
1059
1060 /* Ensure we return *output as NULL on failure */
1062
1063 /*
1064 * Quick check if the input is pure ASCII. An ASCII string requires no
1065 * further processing.
1066 */
1068 {
1071 goto oom;
1073 }
1074
1075 /*
1076 * Convert the input from UTF-8 to an array of Unicode codepoints.
1077 *
1078 * This also checks that the input is a legal UTF-8 string.
1079 */
1081 if (input_size < 0)
1084 goto oom;
1085
1087 if (!input_chars)
1088 goto oom;
1089
1090 p = (
unsigned char *)
input;
1091 for (
i = 0;
i < input_size;
i++)
1092 {
1095 }
1097
1098 /*
1099 * The steps below correspond to the steps listed in [RFC3454], Section
1100 * "2. Preparation Overview"
1101 */
1102
1103 /*
1104 * 1) Map -- For each character in the input, check if it has a mapping
1105 * and, if so, replace it with its mapping.
1106 */
1107 count = 0;
1108 for (
i = 0;
i < input_size;
i++)
1109 {
1111
1113 input_chars[count++] = 0x0020;
1115 {
1116 /* map to nothing */
1117 }
1118 else
1119 input_chars[count++] = code;
1120 }
1121 input_chars[count] = (
pg_wchar)
'0円';
1122 input_size = count;
1123
1124 if (input_size == 0)
1125 goto prohibited; /* don't allow empty password */
1126
1127 /*
1128 * 2) Normalize -- Normalize the result of step 1 using Unicode
1129 * normalization.
1130 */
1132 if (!output_chars)
1133 goto oom;
1134
1135 /*
1136 * 3) Prohibit -- Check for any characters that are not allowed in the
1137 * output. If any are found, return an error.
1138 */
1139 for (
i = 0;
i < input_size;
i++)
1140 {
1142
1144 goto prohibited;
1146 goto prohibited;
1147 }
1148
1149 /*
1150 * 4) Check bidi -- Possibly check for right-to-left characters, and if
1151 * any are found, make sure that the whole string satisfies the
1152 * requirements for bidirectional strings. If the string does not satisfy
1153 * the requirements for bidirectional strings, return an error.
1154 *
1155 * [RFC3454], Section "6. Bidirectional Characters" explains in more
1156 * detail what that means:
1157 *
1158 * "In any profile that specifies bidirectional character handling, all
1159 * three of the following requirements MUST be met:
1160 *
1161 * 1) The characters in section 5.8 MUST be prohibited.
1162 *
1163 * 2) If a string contains any RandALCat character, the string MUST NOT
1164 * contain any LCat character.
1165 *
1166 * 3) If a string contains any RandALCat character, a RandALCat character
1167 * MUST be the first character of the string, and a RandALCat character
1168 * MUST be the last character of the string."
1169 */
1170 contains_RandALCat = false;
1171 for (
i = 0;
i < input_size;
i++)
1172 {
1174
1176 {
1177 contains_RandALCat = true;
1178 break;
1179 }
1180 }
1181
1182 if (contains_RandALCat)
1183 {
1185 pg_wchar last = input_chars[input_size - 1];
1186
1187 for (
i = 0;
i < input_size;
i++)
1188 {
1190
1192 goto prohibited;
1193 }
1194
1197 goto prohibited;
1198 }
1199
1200 /*
1201 * Finally, convert the result back to UTF-8.
1202 */
1203 result_size = 0;
1204 for (wp = output_chars; *wp; wp++)
1205 {
1206 unsigned char buf[4];
1207
1210 }
1211
1212 result =
ALLOC(result_size + 1);
1213 if (!result)
1214 goto oom;
1215
1216 /*
1217 * There are no error exits below here, so the error exit paths don't need
1218 * to worry about possibly freeing "result".
1219 */
1220 p = (unsigned char *) result;
1221 for (wp = output_chars; *wp; wp++)
1222 {
1225 }
1226 Assert((
char *) p == result + result_size);
1227 *p = '0円';
1228
1231
1234
1235prohibited:
1236 if (input_chars)
1238 if (output_chars)
1240
1242
1243oom:
1244 if (input_chars)
1246 if (output_chars)
1248
1250}
Assert(PointerIsAligned(start, uint64))
static pg_wchar utf8_to_unicode(const unsigned char *c)
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
static const pg_wchar unassigned_codepoint_ranges[]
static const pg_wchar non_ascii_space_ranges[]
static const pg_wchar RandALCat_codepoint_ranges[]
#define IS_CODE_IN_TABLE(code, map)
static const pg_wchar LCat_codepoint_ranges[]
static const pg_wchar commonly_mapped_to_nothing_ranges[]
static const pg_wchar prohibited_output_ranges[]
static int pg_utf8_string_len(const char *source)
bool pg_is_ascii(const char *str)
pg_wchar * unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)