1/*-------------------------------------------------------------------------
4 * Query normalization and fingerprinting.
6 * Normalization is a process whereby similar queries, typically differing only
7 * in their constants (though the exact rules are somewhat more subtle than
8 * that) are recognized as equivalent, and are tracked as a single entry. This
9 * is particularly useful for non-prepared queries.
11 * Normalization is implemented by fingerprinting queries, selectively
12 * serializing those fields of each query tree's nodes that are judged to be
13 * essential to the query. This is referred to as a query jumble. This is
14 * distinct from a regular serialization in that various extraneous
15 * information is ignored as irrelevant or not essential to the query, such
16 * as the collations of Vars and, most notably, the values of constants.
18 * This jumble is acquired at the end of parse analysis of each query, and
19 * a 64-bit hash of it is stored into the query's Query.queryId field.
20 * The server then copies this value around, making it available in plan
21 * tree(s) generated from the query. The executor can then use this value
22 * to blame query costs on the proper queryId.
24 * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed"
25 * and contribute only once to the jumble. This has the effect that queries
26 * that differ only on the length of such lists have the same queryId.
29 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
30 * Portions Copyright (c) 1994, Regents of the University of California
34 * src/backend/nodes/queryjumblefuncs.c
36 *-------------------------------------------------------------------------
49 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
55 * True when compute_query_id is ON or AUTO, and a module requests them.
57 * Note that IsQueryIdEnabled() should be used instead of checking
58 * query_id_enabled or compute_query_id directly when we want to know
59 * whether query identifiers are computed in the core or not.
70 int location,
int len);
82 * Given a possibly multi-statement source string, confine our attention to the
83 * relevant part of the string.
88 int query_location = *location;
91 /* First apply starting offset, unless it's -1 (unknown). */
92 if (query_location >= 0)
94 Assert(query_location <= strlen(query));
95 query += query_location;
96 /* Length of 0 (or -1) means "rest of string" */
98 query_len = strlen(query);
100 Assert(query_len <= strlen(query));
104 /* If query location is unknown, distrust query_len as well */
106 query_len = strlen(query);
110 * Discard leading and trailing whitespace, too. Use scanner_isspace()
111 * not libc's isspace(), because we want to match the lexer's behavior.
113 * Note: the parser now strips leading comments and whitespace from the
114 * reported stmt_location, so this first loop will only iterate in the
115 * unusual case that the location didn't propagate to here. But the
116 * statement length will extend to the end-of-string or terminating
117 * semicolon, so the second loop often does something useful.
120 query++, query_location++, query_len--;
124 *location = query_location;
132 * Recursively process the given Query producing a 64-bit hash value by
133 * hashing the relevant fields and record that value in the Query's queryId
134 * field. Return the JumbleState object used for jumbling the query.
148 * If we are unlucky enough to get a hash of zero, use 1 instead for
149 * normal statements and 2 for utility queries.
163 * Enables query identifier computation.
165 * Third-party plugins can use this function to inform core that they require
166 * a query identifier to be computed.
177 * Allocate a JumbleState object and make it ready to jumble.
186 /* Set up workspace for query jumbling */
196#ifdef USE_ASSERT_CHECKING
197 jstate->total_jumble_len = 0;
205 * Jumble the given Node using the given JumbleState and return the resulting
211 /* Jumble the given node */
214 /* Flush any pending NULLs before doing the final hash */
218 /* Squashed list found, reset highest_extern_param_id */
222 /* Process the jumble buffer and produce the hash value */
229 * AppendJumbleInternal: Internal function for appending to the jumble buffer
231 * Note: Callers must ensure that size > 0.
237 unsigned char *jumble = jstate->
jumble;
240 /* Ensure the caller didn't mess up */
244 * Fast path for when there's enough space left in the buffer. This is
245 * worthwhile as means the memcpy can be inlined into very efficient code
246 * when 'size' is a compile-time constant.
250 memcpy(jumble + jumble_len, item, size);
253#ifdef USE_ASSERT_CHECKING
254 jstate->total_jumble_len += size;
261 * Whenever the jumble buffer is full, we hash the current contents and
262 * reset the buffer to contain just that hash value, thus relying on the
263 * hash to summarize everything so far.
275 memcpy(jumble, &start_hash,
sizeof(start_hash));
276 jumble_len =
sizeof(start_hash);
279 memcpy(jumble + jumble_len, item, part_size);
280 jumble_len += part_size;
284#ifdef USE_ASSERT_CHECKING
285 jstate->total_jumble_len += part_size;
294 * Add 'size' bytes of the given jumble 'value' to the jumble state
307 * For jumbling NULL pointers
317 * Add the first byte from the given 'value' pointer to the jumble state
330 * Add the first 2 bytes from the given 'value' pointer to the jumble
344 * Add the first 4 bytes from the given 'value' pointer to the jumble
358 * Add the first 8 bytes from the given 'value' pointer to the jumble
372 * Incorporate the pending_nulls value into the jumble buffer.
374 * Note: Callers must ensure that there's at least 1 pending NULL.
388 * Record the location of some kind of constant within a query string.
389 * These are not only bare constants but also expressions that ultimately
390 * constitute a constant, such as those inside casts and simple function
391 * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
393 * If length is -1, it indicates a single such constant element. If
394 * it's a positive integer, it indicates the length of a squashable
400 /* -1 indicates unknown or undefined location */
403 /* enlarge array if needed */
415 * Lengths are either positive integers (indicating a squashable
427 * Subroutine for _jumbleElements: Verify a few simple cases where we can
428 * deduce that the expression is a constant:
430 * - See through any wrapping RelabelType and CoerceViaIO layers.
431 * - If it's a FuncExpr, check that the function is a builtin
432 * cast and its arguments are Const.
433 * - Otherwise test if the expression is a simple Const or a
434 * PARAM_EXTERN param.
443 /* Unwrap RelabelType */
448 /* Unwrap CoerceViaIO */
471 * We can check function arguments recursively, being careful
472 * about recursing too deep. At each recursion level it's
473 * enough to test the stack on the first element. (Note that
474 * I wasn't able to hit this without bloating the stack
475 * artificially in this function: the parser errors out before
476 * stack size becomes a problem here.)
478 foreach(temp, func->
args)
501 * Subroutine for _jumbleElements: Verify whether the provided list
502 * can be squashed, meaning it contains only constant expressions.
504 * Return value indicates if squashing is possible.
506 * Note that this function searches only for explicit Const nodes with
507 * possibly very simple decorations on top and PARAM_EXTERN parameters,
508 * and does not try to simplify expressions.
515 /* If the list is too short, we don't try to squash it. */
519 foreach(temp, elements)
528 #define JUMBLE_NODE(item) \
529 _jumbleNode(jstate, (Node *) expr->item)
530 #define JUMBLE_ELEMENTS(list, node) \
531 _jumbleElements(jstate, (List *) expr->list, node)
532 #define JUMBLE_LOCATION(location) \
533 RecordConstLocation(jstate, false, expr->location, -1)
534 #define JUMBLE_FIELD(item) \
536 if (sizeof(expr->item) == 8) \
537 AppendJumble64(jstate, (const unsigned char *) &(expr->item)); \
538 else if (sizeof(expr->item) == 4) \
539 AppendJumble32(jstate, (const unsigned char *) &(expr->item)); \
540 else if (sizeof(expr->item) == 2) \
541 AppendJumble16(jstate, (const unsigned char *) &(expr->item)); \
542 else if (sizeof(expr->item) == 1) \
543 AppendJumble8(jstate, (const unsigned char *) &(expr->item)); \
545 AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)); \
547 #define JUMBLE_STRING(str) \
550 AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \
552 AppendJumbleNull(jstate); \
554/* Function name used for the node field attribute custom_query_jumble. */
555 #define JUMBLE_CUSTOM(nodetype, item) \
556 _jumble##nodetype##_##item(jstate, expr, expr->item)
558#include "queryjumblefuncs.funcs.c"
564#ifdef USE_ASSERT_CHECKING
565 Size prev_jumble_len = jstate->total_jumble_len;
574 /* Guard against stack overflow due to overly complex expressions */
578 * We always emit the node's NodeTag, then any additional fields that are
579 * considered significant, and then we recurse to any child nodes.
585#include "queryjumblefuncs.switch.c"
595 /* Only a warning, since we can stumble along anyway */
601 /* Ensure we added something to the jumble buffer */
602 Assert(jstate->total_jumble_len > prev_jumble_len);
630 elog(
ERROR,
"unrecognized list node type: %d",
637 * We try to jumble lists of expressions as one individual item regardless
638 * of how many elements are in the list. This is know as squashing, which
639 * results in different queries jumbling to the same query_id, if the only
640 * difference is the number of elements in the list.
642 * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize
643 * such queries, we use the start and end locations of the list of elements in
649 bool normalize_list =
false;
663 normalize_list =
true;
676 * We store the highest param ID of extern params. This can later be used
677 * to start the numbering of the placeholder for squashed lists.
687 /* paramtypmode and paramcollid are ignored */
692 * At this point, only external parameter locations outside of
693 * squashable lists will be recorded.
698 * Update the highest Param id seen, in order to start normalization
701 * Note: This value is reset at the end of jumbling if there exists a
702 * squashable list. See the comment in the definition of JumbleState.
736 elog(
ERROR,
"unrecognized node type: %d",
752 * Account for the list of arguments in query jumbling only if told by the
762 * Custom query jumble function for RangeTblEntry.eref.
772 * This includes only the table name, the list of column names is ignored.
#define pg_attribute_always_inline
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Assert(PointerIsAligned(start, uint64))
void * repalloc(void *pointer, Size size)
#define IsA(nodeptr, _type_)
#define castNode(_type_, nodeptr)
static int list_length(const List *l)
#define foreach_current_index(var_or_cell)
static int64 DatumGetInt64(Datum X)
static bool IsQueryIdEnabled(void)
#define JUMBLE_NODE(item)
JumbleState * JumbleQuery(Query *query)
static void _jumbleNode(JumbleState *jstate, Node *node)
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
static bool IsSquashableConstantList(List *elements)
static bool IsSquashableConstant(Node *element)
static pg_attribute_always_inline void AppendJumbleInternal(JumbleState *jstate, const unsigned char *item, Size size)
static pg_attribute_always_inline void AppendJumbleNull(JumbleState *jstate)
static pg_noinline void AppendJumble8(JumbleState *jstate, const unsigned char *value)
#define JUMBLE_LOCATION(location)
static void _jumbleParam(JumbleState *jstate, Node *node)
const char * CleanQuerytext(const char *query, int *location, int *len)
static void _jumbleList(JumbleState *jstate, Node *node)
static void FlushPendingNulls(JumbleState *jstate)
static void RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
#define JUMBLE_STRING(str)
static pg_noinline void AppendJumble32(JumbleState *jstate, const unsigned char *value)
static pg_noinline void AppendJumble64(JumbleState *jstate, const unsigned char *value)
static void _jumbleA_Const(JumbleState *jstate, Node *node)
static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size)
static void _jumbleElements(JumbleState *jstate, List *elements, Node *node)
static pg_noinline void AppendJumble16(JumbleState *jstate, const unsigned char *value)
static int64 DoJumble(JumbleState *jstate, Node *node)
static void _jumbleRangeTblEntry_eref(JumbleState *jstate, RangeTblEntry *rte, Alias *expr)
static JumbleState * InitJumble(void)
#define JUMBLE_FIELD(item)
static chr element(struct vars *v, const chr *startp, const chr *endp)
bool scanner_isspace(char ch)
bool stack_is_too_deep(void)
void check_stack_depth(void)
unsigned int pending_nulls
int highest_extern_param_id
#define FirstGenbkiObjectId