1/*-------------------------------------------------------------------------
4 * Various data encoding/decoding things.
6 * Copyright (c) 2001-2025, PostgreSQL Global Development Group
10 * src/backend/utils/adt/encode.c
12 *-------------------------------------------------------------------------
25 * Encoding conversion API.
26 * encode_len() and decode_len() compute the amount of space needed, while
27 * encode() and decode() perform the actual conversions. It is okay for
28 * the _len functions to return an overestimate, but not an underestimate.
29 * (Having said that, large overestimates could cause unnecessary errors,
30 * so it's better to get it right.) The conversion routines write to the
31 * buffer at *res and return the true length of their output.
65 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
66 errmsg(
"unrecognized encoding: \"%s\"", namebuf)));
74 * resultlen possibly overflows uint32, therefore on 32-bit machines it's
75 * unsafe to rely on palloc's internal check.
79 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
80 errmsg(
"result of encoding conversion is too large")));
86 /* Make this FATAL 'cause we've trodden on memory ... */
88 elog(
FATAL,
"overflow - encode estimate too small");
113 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
114 errmsg(
"unrecognized encoding: \"%s\"", namebuf)));
122 * resultlen possibly overflows uint32, therefore on 32-bit machines it's
123 * unsafe to rely on palloc's internal check.
127 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
128 errmsg(
"result of decoding conversion is too large")));
134 /* Make this FATAL 'cause we've trodden on memory ... */
136 elog(
FATAL,
"overflow - decode estimate too small");
149 * The hex expansion of each possible byte value (two chars per value).
152"000102030405060708090a0b0c0d0e0f"
153"101112131415161718191a1b1c1d1e1f"
154"202122232425262728292a2b2c2d2e2f"
155"303132333435363738393a3b3c3d3e3f"
156"404142434445464748494a4b4c4d4e4f"
157"505152535455565758595a5b5c5d5e5f"
158"606162636465666768696a6b6c6d6e6f"
159"707172737475767778797a7b7c7d7e7f"
160"808182838485868788898a8b8c8d8e8f"
161"909192939495969798999a9b9c9d9e9f"
162"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
163"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
164"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
165"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
166"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
167"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
171 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
172 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
173 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
174 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
183 const char *end = src +
len;
187 unsigned char usrc = *((
const unsigned char *) src);
189 memcpy(dst, &
hextbl[2 * usrc], 2);
199 unsigned char c = (
unsigned char) *cp;
230 if (*s ==
' ' || *s ==
'\n' || *s ==
'\t' || *s ==
'\r')
237 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
238 errmsg(
"invalid hexadecimal digit: \"%.*s\"",
243 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
244 errmsg(
"invalid hexadecimal data: odd number of digits")));
247 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
248 errmsg(
"invalid hexadecimal digit: \"%.*s\"",
251 *p++ = (v1 << 4) | v2;
260 return (
uint64) srclen << 1;
266 return (
uint64) srclen >> 1;
270 * BASE64 and BASE64URL
274"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
277"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
280 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
281 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
282 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
283 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
284 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
285 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
286 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
287 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
291 * pg_base64_encode_internal
293 * Helper for decoding base64 or base64url. When url is passed as true the
294 * input will be encoded using base64url. len bytes in src is encoded into
313 buf |= (
unsigned char) *s << (pos << 3);
320 *p++ = alphabet[(
buf >> 18) & 0x3f];
321 *p++ = alphabet[(
buf >> 12) & 0x3f];
322 *p++ = alphabet[(
buf >> 6) & 0x3f];
323 *p++ = alphabet[
buf & 0x3f];
328 if (!url && p >= lend)
336 /* Handle remaining bytes in buf */
339 *p++ = alphabet[(
buf >> 18) & 0x3f];
340 *p++ = alphabet[(
buf >> 12) & 0x3f];
344 *p++ = alphabet[(
buf >> 6) & 0x3f];
371 * pg_base64_decode_internal
373 * Helper for decoding base64 or base64url. When url is passed as true the
374 * input will be assumed to be encoded using base64url.
379 const char *srcend = src +
len,
392 if (
c ==
' ' ||
c ==
'\t' ||
c ==
'\n' ||
c ==
'\r')
395 /* convert base64url to base64 */
415 /* translator: %s is the name of an encoding scheme */
417 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
418 errmsg(
"unexpected \"=\" while decoding %s sequence", url ?
"base64url" :
"base64")));
426 if (
c > 0 &&
c < 127)
430 /* translator: %s is the name of an encoding scheme */
432 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
433 errmsg(
"invalid symbol \"%.*s\" found while decoding %s sequence",
435 url ?
"base64url" :
"base64")));
438 /* add it to buffer */
443 *p++ = (
buf >> 16) & 255;
444 if (end == 0 || end > 1)
445 *p++ = (
buf >> 8) & 255;
446 if (end == 0 || end > 2)
456 *p++ = (
buf >> 16) & 0xFF;
461 *p++ = (
buf >> 16) & 0xFF;
462 *p++ = (
buf >> 8) & 0xFF;
466 /* translator: %s is the name of an encoding scheme */
468 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
469 errmsg(
"invalid %s end sequence", url ?
"base64url" :
"base64"),
470 errhint(
"Input data is missing padding, is truncated, or is otherwise corrupted.")));
491 /* 3 bytes will be converted to 4, linefeed after 76 chars */
492 return ((
uint64) srclen + 2) / 3 * 4 + (
uint64) srclen / (76 * 3 / 4);
498 return ((
uint64) srclen * 3) >> 2;
505 * Unlike standard base64, base64url doesn't use padding characters when
506 * the input length is not divisible by 3
508 return (srclen + 2) / 3 * 4;
515 * For base64, each 4 characters of input produce at most 3 bytes of
516 * output. For base64url without padding, we need to round up to the
519 size_t adjusted_len = srclen;
522 adjusted_len += 4 - (srclen % 4);
524 return (adjusted_len * 3) / 4;
529 * Minimally escape bytea to text.
530 * De-escape text to bytea.
532 * We must escape zero bytes and high-bit-set bytes to avoid generating
533 * text that might be invalid in the current encoding, or that might
534 * change to something else if passed through an encoding conversion
535 * (leading to failing to de-escape to the original bytea value).
536 * Also of course backslash itself has to be escaped.
538 * De-escaping processes \\ and any \### octal
541 #define VAL(CH) ((CH) - '0')
542 #define DIG(VAL) ((VAL) + '0')
547 const char *end = src + srclen;
553 unsigned char c = (
unsigned char) *src;
559 rp[2] =
DIG((
c >> 3) & 7);
586 const char *end = src + srclen;
594 else if (src + 3 < end &&
595 (src[1] >=
'0' && src[1] <=
'3') &&
596 (src[2] >=
'0' && src[2] <=
'7') &&
597 (src[3] >=
'0' && src[3] <=
'7'))
605 *rp++ =
val +
VAL(src[3]);
608 else if (src + 1 < end &&
617 * One backslash, not followed by ### valid octal. Should never
618 * get here, since esc_dec_len does same check.
621 (
errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
622 errmsg(
"invalid input syntax for type %s",
"bytea")));
634 const char *end = src + srclen;
641 else if (*src ==
'\\')
655 const char *end = src + srclen;
662 else if (src + 3 < end &&
663 (src[1] >=
'0' && src[1] <=
'3') &&
664 (src[2] >=
'0' && src[2] <=
'7') &&
665 (src[3] >=
'0' && src[3] <=
'7'))
668 * backslash + valid octal
672 else if (src + 1 < end &&
676 * two backslashes = backslash
683 * one backslash, not followed by ### valid octal
686 (
errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
687 errmsg(
"invalid input syntax for type %s",
"bytea")));
733 NULL, NULL, NULL, NULL
#define TextDatumGetCString(d)
#define IS_HIGHBIT_SET(ch)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereturn(context, dummy_value,...)
#define ereport(elevel,...)
static uint64 pg_base64_decode(const char *src, size_t len, char *dst)
static uint64 pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
static bool get_hex(const char *cp, char *out)
static uint64 hex_dec_len(const char *src, size_t srclen)
static const struct pg_encoding * pg_find_encoding(const char *name)
static uint64 pg_base64_encode(const char *src, size_t len, char *dst)
static uint64 esc_encode(const char *src, size_t srclen, char *dst)
static uint64 hex_enc_len(const char *src, size_t srclen)
Datum binary_decode(PG_FUNCTION_ARGS)
static const struct @24 enclist[]
static const char hextbl[512]
static uint64 pg_base64url_enc_len(const char *src, size_t srclen)
static uint64 pg_base64url_decode(const char *src, size_t len, char *dst)
static const char _base64url[]
uint64 hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext)
uint64 hex_encode(const char *src, size_t len, char *dst)
static uint64 esc_enc_len(const char *src, size_t srclen)
static uint64 pg_base64url_encode(const char *src, size_t len, char *dst)
static uint64 pg_base64url_dec_len(const char *src, size_t srclen)
static uint64 pg_base64_enc_len(const char *src, size_t srclen)
static uint64 pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
static const char _base64[]
static uint64 esc_decode(const char *src, size_t srclen, char *dst)
static uint64 esc_dec_len(const char *src, size_t srclen)
Datum binary_encode(PG_FUNCTION_ARGS)
uint64 hex_decode(const char *src, size_t len, char *dst)
static const int8 b64lookup[128]
static const int8 hexlookup[128]
static uint64 pg_base64_dec_len(const char *src, size_t srclen)
#define PG_GETARG_BYTEA_PP(n)
#define PG_GETARG_TEXT_PP(n)
#define PG_RETURN_BYTEA_P(x)
#define PG_GETARG_DATUM(n)
#define PG_RETURN_TEXT_P(x)
int pg_mblen(const char *mbstr)
int pg_strcasecmp(const char *s1, const char *s2)
uint64(* encode_len)(const char *data, size_t dlen)
uint64(* decode_len)(const char *data, size_t dlen)
uint64(* decode)(const char *data, size_t dlen, char *res)
uint64(* encode)(const char *data, size_t dlen, char *res)
static Size VARSIZE_ANY_EXHDR(const void *PTR)
static char * VARDATA(const void *PTR)
static char * VARDATA_ANY(const void *PTR)
static void SET_VARSIZE(void *PTR, Size len)