1/*-------------------------------------------------------------------------
4 * Commands for creating and altering extended statistics objects
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/commands/statscmds.c
13 *-------------------------------------------------------------------------
48/* qsort comparator for the attnums in CreateStatistics */
53 int bv = *(
const int16 *)
b;
55 /* this can't overflow if int is wider than int16 */
75 bool nulls[Natts_pg_statistic_ext];
84 Datum types[4];
/* one for each possible type of statistic */
88 bool build_dependencies;
90 bool build_expressions;
91 bool requested_type =
false;
99 * Examine the FROM clause. Currently, we only allow it to be a single
100 * simple table, but later we'll probably allow multiple tables and JOIN
101 * syntax. The grammar is already prepared for that, so we have to check
102 * here that what we got is what we can support.
106 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
107 errmsg(
"only a single relation is allowed in CREATE STATISTICS")));
109 foreach(cell,
stmt->relations)
115 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
116 errmsg(
"only a single relation is allowed in CREATE STATISTICS")));
119 * CREATE STATISTICS will influence future execution plans but does
120 * not interfere with currently executing plans. So it should be
121 * enough to take only ShareUpdateExclusiveLock on relation,
122 * conflicting with ANALYZE and other DDL that sets statistical
123 * information, but not with normal queries.
127 /* Restrict to allowed relation types */
128 if (rel->
rd_rel->relkind != RELKIND_RELATION &&
129 rel->
rd_rel->relkind != RELKIND_MATVIEW &&
130 rel->
rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
131 rel->
rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
133 (
errcode(ERRCODE_WRONG_OBJECT_TYPE),
134 errmsg(
"cannot define statistics for relation \"%s\"",
138 /* You must own the relation to create stats on it */
143 /* Creating statistics on system catalogs is not allowed */
146 (
errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
147 errmsg(
"permission denied: \"%s\" is a system catalog",
155 * If the node has a name, split it up and determine creation namespace.
156 * If not, put the object in the same namespace as the relation, and cons
157 * up a name for it. (This can happen either via "CREATE STATISTICS ..."
158 * or via "CREATE TABLE ... (LIKE)".)
174 * Deal with the possibility that the statistics object already exists.
180 if (
stmt->if_not_exists)
183 * Since stats objects aren't members of extensions (see comments
184 * below), no need for checkMembershipInCurrentExtension here.
188 errmsg(
"statistics object \"%s\" already exists, skipping",
196 errmsg(
"statistics object \"%s\" already exists", namestr)));
200 * Make sure no more than STATS_MAX_DIMENSIONS columns are used. There
201 * might be duplicates and so on, but we'll deal with those later.
206 (
errcode(ERRCODE_TOO_MANY_COLUMNS),
207 errmsg(
"cannot have more than %d columns in statistics",
211 * Convert the expression list to a simple array of attnums, but also keep
212 * a list of more complex expressions. While at it, enforce some
213 * constraints - we don't allow extended statistics on system attributes,
214 * and we require the data type to have a less-than operator.
216 * There are many ways to "mask" a simple attribute reference as an
217 * expression, for example "(a+0)" etc. We can't possibly detect all of
218 * them, but we handle at least the simple case with the attribute in
219 * parens. There'll always be a way around this, if the user is determined
220 * (like the "(a+0)" example), but this makes it somewhat consistent with
221 * how indexes treat attributes/expressions.
223 foreach(cell,
stmt->exprs)
227 if (selem->
name)
/* column reference */
239 (
errcode(ERRCODE_UNDEFINED_COLUMN),
240 errmsg(
"column \"%s\" does not exist",
244 /* Disallow use of system attributes in extended stats */
245 if (attForm->attnum <= 0)
247 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
248 errmsg(
"statistics creation on system columns is not supported")));
250 /* Disallow use of virtual generated columns in extended stats */
251 if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
253 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
254 errmsg(
"statistics creation on virtual generated columns is not supported")));
256 /* Disallow data types without a less-than operator */
260 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
261 errmsg(
"column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
264 attnums[nattnums] = attForm->attnum;
268 else if (
IsA(selem->
expr,
Var))
/* column reference in parens */
273 /* Disallow use of system attributes in extended stats */
276 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
277 errmsg(
"statistics creation on system columns is not supported")));
279 /* Disallow use of virtual generated columns in extended stats */
282 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
283 errmsg(
"statistics creation on virtual generated columns is not supported")));
285 /* Disallow data types without a less-than operator */
289 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
290 errmsg(
"column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
296 else /* expression */
313 /* Disallow expressions referencing system attributes. */
316 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
317 errmsg(
"statistics creation on system columns is not supported")));
319 /* Disallow use of virtual generated columns in extended stats */
322 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
323 errmsg(
"statistics creation on virtual generated columns is not supported")));
327 * Disallow data types without a less-than operator.
329 * We ignore this for statistics on a single expression, in which
330 * case we'll build the regular statistics only (and that code can
331 * deal with such data types).
339 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
340 errmsg(
"expression cannot be used in multivariate statistics because its type %s has no default btree operator class",
344 stxexprs =
lappend(stxexprs, expr);
349 * Parse the statistics kinds.
351 * First check that if this is the case with a single expression, there
352 * are no statistics kinds specified (we don't allow that for the simple
353 * CREATE STATISTICS form).
357 /* statistics kinds not specified */
360 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
361 errmsg(
"when building statistics on a single expression, statistics kinds may not be specified")));
364 /* OK, let's check that we recognize the statistics kinds. */
365 build_ndistinct =
false;
366 build_dependencies =
false;
368 foreach(cell,
stmt->stat_types)
372 if (strcmp(
type,
"ndistinct") == 0)
374 build_ndistinct =
true;
375 requested_type =
true;
377 else if (strcmp(
type,
"dependencies") == 0)
379 build_dependencies =
true;
380 requested_type =
true;
382 else if (strcmp(
type,
"mcv") == 0)
385 requested_type =
true;
389 (
errcode(ERRCODE_SYNTAX_ERROR),
390 errmsg(
"unrecognized statistics kind \"%s\"",
395 * If no statistic type was specified, build them all (but only when the
396 * statistics is defined on more than one column/expression).
398 if ((!requested_type) && (numcols >= 2))
400 build_ndistinct =
true;
401 build_dependencies =
true;
406 * When there are non-trivial expressions, build the expression stats
407 * automatically. This allows calculating good estimates for stats that
408 * consider per-clause estimates (e.g. functional dependencies).
410 build_expressions = (stxexprs !=
NIL);
413 * Check that at least two columns were specified in the statement, or
414 * that we're building statistics on a single expression.
418 (
errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
419 errmsg(
"extended statistics require at least 2 columns")));
422 * Sort the attnums, which makes detecting duplicates somewhat easier, and
423 * it does not hurt (it does not matter for the contents, unlike for
424 * indexes, for example).
429 * Check for duplicates in the list of columns. The attnums are sorted so
430 * just check consecutive elements.
432 for (
i = 1;
i < nattnums;
i++)
434 if (attnums[
i] == attnums[
i - 1])
436 (
errcode(ERRCODE_DUPLICATE_COLUMN),
437 errmsg(
"duplicate column name in statistics definition")));
441 * Check for duplicate expressions. We do two loops, counting the
442 * occurrences of each expression. This is O(N^2) but we only allow small
443 * number of expressions and it's not executed often.
445 * XXX We don't cross-check attributes and expressions, because it does
446 * not seem worth it. In principle we could check that expressions don't
447 * contain trivial attribute references like "(a)", but the reasoning is
448 * similar to why we don't bother with extracting columns from
449 * expressions. It's either expensive or very easy to defeat for
450 * determined user, and there's no risk if we allow such statistics (the
451 * statistics is useless, but harmless).
453 foreach(cell, stxexprs)
458 foreach(cell2, stxexprs)
462 if (
equal(expr1, expr2))
466 /* every expression should find at least itself */
471 (
errcode(ERRCODE_DUPLICATE_COLUMN),
472 errmsg(
"duplicate expression in statistics definition")));
475 /* Form an int2vector representation of the sorted column list */
478 /* construct the char array of enabled statistic types */
482 if (build_dependencies)
486 if (build_expressions)
491 /* convert the expressions (if any) to a text datum */
501 exprsDatum = (
Datum) 0;
506 * Everything seems fine, so let's build the pg_statistic_ext tuple.
509 memset(nulls,
false,
sizeof(nulls));
512 Anum_pg_statistic_ext_oid);
519 nulls[Anum_pg_statistic_ext_stxstattarget - 1] =
true;
522 values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
523 if (exprsDatum == (
Datum) 0)
524 nulls[Anum_pg_statistic_ext_stxexprs - 1] =
true;
526 /* insert it into pg_statistic_ext */
534 * We used to create the pg_statistic_ext_data tuple too, but it's not
535 * clear what value should the stxdinherit flag have (it depends on
536 * whether the rel is partitioned, contains data, etc.)
542 * Invalidate relcache so that others see the new statistics object.
549 * Add an AUTO dependency on each column used in the stats, so that the
550 * stats object goes away if any or all of them get dropped.
554 /* add dependencies for plain column references */
555 for (
i = 0;
i < nattnums;
i++)
562 * If there are no dependencies on a column, give the statistics object an
563 * auto dependency on the whole table. In most cases, this will be
564 * redundant, but it might not be if the statistics expressions contain no
565 * Vars (which might seem strange but possible). This is consistent with
566 * what we do for indexes in index_create.
568 * XXX We intentionally don't consider the expressions before adding this
569 * dependency, because recordDependencyOnSingleRelExpr may not create any
570 * dependencies for whole-row Vars.
579 * Store dependencies on anything mentioned in statistics expressions,
580 * just like we do for index expressions.
590 * Also add dependencies on namespace and owner. These are required
591 * because the stats object might have a different namespace and/or owner
592 * than the underlying table(s).
600 * XXX probably there should be a recordDependencyOnCurrentExtension call
601 * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
602 * STATISTICS, which is more work than it seems worth.
605 /* Add any requested comment */
606 if (
stmt->stxcomment != NULL)
610 /* Return stats object's address */
624 Datum repl_val[Natts_pg_statistic_ext];
625 bool repl_null[Natts_pg_statistic_ext];
626 bool repl_repl[Natts_pg_statistic_ext];
629 bool newtarget_default;
631 /* -1 was used in previous versions for the default setting */
635 newtarget_default =
false;
638 newtarget_default =
true;
640 if (!newtarget_default)
642 /* Limit statistics target to a sane range */
646 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
647 errmsg(
"statistics target %d is too low",
654 (
errcode(ERRCODE_INVALID_PARAMETER_VALUE),
655 errmsg(
"lowering statistics target to %d",
660 /* lookup OID of the statistics object */
664 * If we got here and the OID is not valid, it means the statistics object
665 * does not exist, but the command specified IF EXISTS. So report this as
666 * a simple NOTICE and we're done.
679 (
errmsg(
"statistics object \"%s.%s\" does not exist, skipping",
680 schemaname, statname)));
683 (
errmsg(
"statistics object \"%s\" does not exist, skipping",
689 /* Search pg_statistic_ext */
694 elog(
ERROR,
"cache lookup failed for extended statistics object %u", stxoid);
696 /* Must be owner of the existing statistics object */
701 /* Build new tuple. */
702 memset(repl_val, 0,
sizeof(repl_val));
703 memset(repl_null,
false,
sizeof(repl_null));
704 memset(repl_repl,
false,
sizeof(repl_repl));
706 /* replace the stxstattarget column */
707 repl_repl[Anum_pg_statistic_ext_stxstattarget - 1] =
true;
708 if (!newtarget_default)
709 repl_val[Anum_pg_statistic_ext_stxstattarget - 1] =
Int16GetDatum(newtarget);
711 repl_null[Anum_pg_statistic_ext_stxstattarget - 1] =
true;
714 repl_val, repl_null, repl_repl);
716 /* Update system catalog. */
724 * NOTE: because we only support altering the statistics target, not the
725 * other fields, there is no need to update dependencies.
737 * Delete entry in pg_statistic_ext_data catalog. We don't know if the row
738 * exists, so don't error out.
751 /* We don't know if the data row for inh value exists. */
763 * Guts of statistics object deletion.
775 * Delete the pg_statistic_ext tuple. Also send out a cache inval on the
776 * associated table, so that dependent plans will be rebuilt.
783 elog(
ERROR,
"cache lookup failed for statistics object %u", statsOid);
786 relid = statext->stxrelid;
789 * Delete the pg_statistic_ext_data tuples holding the actual statistical
790 * data. There might be data with/without inheritance, so attempt deleting
791 * both. We lock the user table first, to prevent other processes (e.g.
792 * DROP STATISTICS) from removing the row concurrently.
805 /* Keep lock until the end of the transaction. */
812 * Select a nonconflicting name for a new statistics object.
814 * name1, name2, and label are used the same way as for makeObjectName(),
815 * except that the label can't be NULL; digits will be appended to the label
816 * if needed to create a name that is unique within the specified namespace.
818 * Returns a palloc'd string.
820 * Note: it is theoretically possible to get a collision anyway, if someone
821 * else chooses the same name concurrently. This is fairly unlikely to be
822 * a problem in practice, especially if one is holding a share update
823 * exclusive lock on the relation identified by name1. However, if choosing
824 * multiple names within a single command, you'd better create the new object
825 * and do CommandCounterIncrement before choosing the next one!
832 char *stxname = NULL;
835 /* try the unmodified label first */
844 existingstats =
GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
850 /* found a conflict, so try a new name component */
859 * Generate "name2" for a new statistics object given the list of column
860 * names for it. This will be passed to ChooseExtendedStatisticName along
861 * with the parent table name and a suitable label.
863 * We know that less than NAMEDATALEN characters will actually be used,
864 * so we can truncate the result once we've generated that many.
866 * XXX see also ChooseForeignKeyConstraintNameAddition and
867 * ChooseIndexNameAddition.
882 /* It should be one of these, but just skip if it happens not to be */
889 buf[buflen++] =
'_';
/* insert _ between names */
892 * We use fixed 'expr' for expressions, which have empty column names.
893 * For indexes this is handled in ChooseIndexColumnNames, but we have
894 * no such function for stats and it does not seem worth adding. If a
895 * better name is needed, the user can specify it explicitly.
901 * At this point we have buflen <= NAMEDATALEN. name should be less
902 * than NAMEDATALEN already, but use strlcpy for paranoia.
905 buflen += strlen(
buf + buflen);
913 * StatisticsGetRelation: given a statistics object's OID, get the OID of
914 * the relation it is defined on. Uses the system cache.
928 elog(
ERROR,
"cache lookup failed for statistics object %u", statId);
931 Assert(stx->oid == statId);
933 result = stx->stxrelid;
void aclcheck_error(AclResult aclerr, ObjectType objtype, const char *objectname)
bool object_ownercheck(Oid classid, Oid objectid, Oid roleid)
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
int bms_next_member(const Bitmapset *a, int prevbit)
static Datum values[MAXATTR]
#define CStringGetTextDatum(s)
#define OidIsValid(objectId)
bool IsSystemRelation(Relation relation)
Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
void recordDependencyOnSingleRelExpr(const ObjectAddress *depender, Node *expr, Oid relId, DependencyType behavior, DependencyType self_behavior, bool reverse_self)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
bool equal(const void *a, const void *b)
bool allowSystemTableMods
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_modify_tuple(HeapTuple tuple, TupleDesc tupleDesc, const Datum *replValues, const bool *replIsnull, const bool *doReplace)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
void heap_freetuple(HeapTuple htup)
#define HeapTupleIsValid(tuple)
static void * GETSTRUCT(const HeapTupleData *tuple)
char * makeObjectName(const char *name1, const char *name2, const char *label)
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
void CatalogTupleDelete(Relation heapRel, ItemPointer tid)
int2vector * buildint2vector(const int16 *int2s, int n)
void CacheInvalidateRelcache(Relation relation)
void CacheInvalidateRelcacheByRelid(Oid relid)
if(TABLE==NULL||TABLE_index==NULL)
List * lappend(List *list, void *datum)
#define ShareUpdateExclusiveLock
char get_attgenerated(Oid relid, AttrNumber attnum)
char * get_attname(Oid relid, AttrNumber attnum, bool missing_ok)
char * pstrdup(const char *in)
void pfree(void *pointer)
void namestrcpy(Name name, const char *str)
char * NameListToString(const List *names)
Oid QualifiedNameGetCreationNamespace(const List *names, char **objname_p)
Oid get_statistics_object_oid(List *names, bool missing_ok)
void DeconstructQualifiedName(const List *names, char **nspname_p, char **objname_p)
Oid exprType(const Node *expr)
#define IsA(nodeptr, _type_)
#define InvokeObjectPostCreateHook(classId, objectId, subId)
#define InvokeObjectPostAlterHook(classId, objectId, subId)
ObjectType get_relkind_objtype(char relkind)
const ObjectAddress InvalidObjectAddress
#define ObjectAddressSet(addr, class_id, object_id)
#define ObjectAddressSubSet(addr, class_id, object_id, object_sub_id)
char * nodeToString(const void *obj)
FormData_pg_attribute * Form_pg_attribute
int errdetail_relkind_not_supported(char relkind)
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
#define lfirst_node(type, lc)
static int list_length(const List *l)
void recordDependencyOnOwner(Oid classId, Oid objectId, Oid owner)
FormData_pg_statistic_ext * Form_pg_statistic_ext
#define qsort(a, b, c, d)
size_t strlcpy(char *dst, const char *src, size_t siz)
static Datum PointerGetDatum(const void *X)
static Datum Int16GetDatum(int16 X)
static Datum BoolGetDatum(bool X)
static Datum ObjectIdGetDatum(Oid X)
static Datum NameGetDatum(const NameData *X)
static Datum CStringGetDatum(const char *X)
static Datum CharGetDatum(char X)
#define RelationGetRelid(relation)
#define RelationGetDescr(relation)
#define RelationGetRelationName(relation)
#define RelationGetNamespace(relation)
void relation_close(Relation relation, LOCKMODE lockmode)
Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
#define STATS_MAX_DIMENSIONS
ObjectAddress AlterStatistics(AlterStatsStmt *stmt)
ObjectAddress CreateStatistics(CreateStatsStmt *stmt)
static char * ChooseExtendedStatisticNameAddition(List *exprs)
void RemoveStatisticsDataById(Oid statsOid, bool inh)
static char * ChooseExtendedStatisticName(const char *name1, const char *name2, const char *label, Oid namespaceid)
void RemoveStatisticsById(Oid statsOid)
static int compare_int16(const void *a, const void *b)
Oid StatisticsGetRelation(Oid statId, bool missing_ok)
#define ERRCODE_DUPLICATE_OBJECT
#define FirstLowInvalidHeapAttributeNumber
void ReleaseSysCache(HeapTuple tuple)
HeapTuple SearchSysCache1(int cacheId, Datum key1)
HeapTuple SearchSysCache2(int cacheId, Datum key1, Datum key2)
HeapTuple SearchSysCacheAttName(Oid relid, const char *attname)
#define SearchSysCacheExists2(cacheId, key1, key2)
#define GetSysCacheOid2(cacheId, oidcol, key1, key2)
void table_close(Relation relation, LOCKMODE lockmode)
Relation table_open(Oid relationId, LOCKMODE lockmode)
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
#define MAX_STATISTICS_TARGET
void pull_varattnos(Node *node, Index varno, Bitmapset **varattnos)