64{
66 int nattnums = 0;
67 int numcols;
68 char *namestr;
75 bool nulls[Natts_pg_statistic_ext];
83 myself;
84 Datum types[4];
/* one for each possible type of statistic */
85 int ntypes;
87 bool build_ndistinct;
88 bool build_dependencies;
89 bool build_mcv;
90 bool build_expressions;
91 bool requested_type = false;
95
97
98 /*
99 * Examine the FROM clause. Currently, we only allow it to be a single
100 * simple table, but later we'll probably allow multiple tables and JOIN
101 * syntax. The grammar is already prepared for that, so we have to check
102 * here that what we got is what we can support.
103 */
106 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
107 errmsg(
"only a single relation is allowed in CREATE STATISTICS")));
108
109 foreach(cell,
stmt->relations)
110 {
112
115 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
116 errmsg(
"only a single relation is allowed in CREATE STATISTICS")));
117
118 /*
119 * CREATE STATISTICS will influence future execution plans but does
120 * not interfere with currently executing plans. So it should be
121 * enough to take only ShareUpdateExclusiveLock on relation,
122 * conflicting with ANALYZE and other DDL that sets statistical
123 * information, but not with normal queries.
124 */
126
127 /* Restrict to allowed relation types */
128 if (rel->
rd_rel->relkind != RELKIND_RELATION &&
129 rel->
rd_rel->relkind != RELKIND_MATVIEW &&
130 rel->
rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
131 rel->
rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
133 (
errcode(ERRCODE_WRONG_OBJECT_TYPE),
134 errmsg(
"cannot define statistics for relation \"%s\"",
137
138 /* You must own the relation to create stats on it */
142
143 /* Creating statistics on system catalogs is not allowed */
146 (
errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
147 errmsg(
"permission denied: \"%s\" is a system catalog",
149 }
150
153
154 /*
155 * If the node has a name, split it up and determine creation namespace.
156 * If not, put the object in the same namespace as the relation, and cons
157 * up a name for it. (This can happen either via "CREATE STATISTICS ..."
158 * or via "CREATE TABLE ... (LIKE)".)
159 */
162 &namestr);
163 else
164 {
168 "stat",
169 namespaceId);
170 }
172
173 /*
174 * Deal with the possibility that the statistics object already exists.
175 */
179 {
180 if (
stmt->if_not_exists)
181 {
182 /*
183 * Since stats objects aren't members of extensions (see comments
184 * below), no need for checkMembershipInCurrentExtension here.
185 */
188 errmsg(
"statistics object \"%s\" already exists, skipping",
189 namestr)));
192 }
193
196 errmsg(
"statistics object \"%s\" already exists", namestr)));
197 }
198
199 /*
200 * Make sure no more than STATS_MAX_DIMENSIONS columns are used. There
201 * might be duplicates and so on, but we'll deal with those later.
202 */
206 (
errcode(ERRCODE_TOO_MANY_COLUMNS),
207 errmsg(
"cannot have more than %d columns in statistics",
209
210 /*
211 * Convert the expression list to a simple array of attnums, but also keep
212 * a list of more complex expressions. While at it, enforce some
213 * constraints - we don't allow extended statistics on system attributes,
214 * and we require the data type to have a less-than operator.
215 *
216 * There are many ways to "mask" a simple attribute reference as an
217 * expression, for example "(a+0)" etc. We can't possibly detect all of
218 * them, but we handle at least the simple case with the attribute in
219 * parens. There'll always be a way around this, if the user is determined
220 * (like the "(a+0)" example), but this makes it somewhat consistent with
221 * how indexes treat attributes/expressions.
222 */
223 foreach(cell,
stmt->exprs)
224 {
226
227 if (selem->
name)
/* column reference */
228 {
233
235
239 (
errcode(ERRCODE_UNDEFINED_COLUMN),
240 errmsg(
"column \"%s\" does not exist",
243
244 /* Disallow use of system attributes in extended stats */
245 if (attForm->attnum <= 0)
247 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
248 errmsg(
"statistics creation on system columns is not supported")));
249
250 /* Disallow use of virtual generated columns in extended stats */
251 if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
253 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
254 errmsg(
"statistics creation on virtual generated columns is not supported")));
255
256 /* Disallow data types without a less-than operator */
260 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
261 errmsg(
"column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
263
264 attnums[nattnums] = attForm->attnum;
265 nattnums++;
267 }
268 else if (
IsA(selem->
expr,
Var))
/* column reference in parens */
269 {
272
273 /* Disallow use of system attributes in extended stats */
276 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
277 errmsg(
"statistics creation on system columns is not supported")));
278
279 /* Disallow use of virtual generated columns in extended stats */
282 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
283 errmsg(
"statistics creation on virtual generated columns is not supported")));
284
285 /* Disallow data types without a less-than operator */
289 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
290 errmsg(
"column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
292
294 nattnums++;
295 }
296 else /* expression */
297 {
302 int k;
303
305
307
308 k = -1;
310 {
312
313 /* Disallow expressions referencing system attributes. */
316 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
317 errmsg(
"statistics creation on system columns is not supported")));
318
319 /* Disallow use of virtual generated columns in extended stats */
322 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
323 errmsg(
"statistics creation on virtual generated columns is not supported")));
324 }
325
326 /*
327 * Disallow data types without a less-than operator.
328 *
329 * We ignore this for statistics on a single expression, in which
330 * case we'll build the regular statistics only (and that code can
331 * deal with such data types).
332 */
334 {
339 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
340 errmsg(
"expression cannot be used in multivariate statistics because its type %s has no default btree operator class",
342 }
343
344 stxexprs =
lappend(stxexprs, expr);
345 }
346 }
347
348 /*
349 * Parse the statistics kinds.
350 *
351 * First check that if this is the case with a single expression, there
352 * are no statistics kinds specified (we don't allow that for the simple
353 * CREATE STATISTICS form).
354 */
356 {
357 /* statistics kinds not specified */
360 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
361 errmsg(
"when building statistics on a single expression, statistics kinds may not be specified")));
362 }
363
364 /* OK, let's check that we recognize the statistics kinds. */
365 build_ndistinct = false;
366 build_dependencies = false;
367 build_mcv = false;
368 foreach(cell,
stmt->stat_types)
369 {
371
372 if (strcmp(
type,
"ndistinct") == 0)
373 {
374 build_ndistinct = true;
375 requested_type = true;
376 }
377 else if (strcmp(
type,
"dependencies") == 0)
378 {
379 build_dependencies = true;
380 requested_type = true;
381 }
382 else if (strcmp(
type,
"mcv") == 0)
383 {
384 build_mcv = true;
385 requested_type = true;
386 }
387 else
389 (
errcode(ERRCODE_SYNTAX_ERROR),
390 errmsg(
"unrecognized statistics kind \"%s\"",
392 }
393
394 /*
395 * If no statistic type was specified, build them all (but only when the
396 * statistics is defined on more than one column/expression).
397 */
398 if ((!requested_type) && (numcols >= 2))
399 {
400 build_ndistinct = true;
401 build_dependencies = true;
402 build_mcv = true;
403 }
404
405 /*
406 * When there are non-trivial expressions, build the expression stats
407 * automatically. This allows calculating good estimates for stats that
408 * consider per-clause estimates (e.g. functional dependencies).
409 */
410 build_expressions = (stxexprs !=
NIL);
411
412 /*
413 * Check that at least two columns were specified in the statement, or
414 * that we're building statistics on a single expression.
415 */
418 (
errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
419 errmsg(
"extended statistics require at least 2 columns")));
420
421 /*
422 * Sort the attnums, which makes detecting duplicates somewhat easier, and
423 * it does not hurt (it does not matter for the contents, unlike for
424 * indexes, for example).
425 */
427
428 /*
429 * Check for duplicates in the list of columns. The attnums are sorted so
430 * just check consecutive elements.
431 */
432 for (
i = 1;
i < nattnums;
i++)
433 {
434 if (attnums[
i] == attnums[
i - 1])
436 (
errcode(ERRCODE_DUPLICATE_COLUMN),
437 errmsg(
"duplicate column name in statistics definition")));
438 }
439
440 /*
441 * Check for duplicate expressions. We do two loops, counting the
442 * occurrences of each expression. This is O(N^2) but we only allow small
443 * number of expressions and it's not executed often.
444 *
445 * XXX We don't cross-check attributes and expressions, because it does
446 * not seem worth it. In principle we could check that expressions don't
447 * contain trivial attribute references like "(a)", but the reasoning is
448 * similar to why we don't bother with extracting columns from
449 * expressions. It's either expensive or very easy to defeat for
450 * determined user, and there's no risk if we allow such statistics (the
451 * statistics is useless, but harmless).
452 */
453 foreach(cell, stxexprs)
454 {
456 int cnt = 0;
457
458 foreach(cell2, stxexprs)
459 {
461
462 if (
equal(expr1, expr2))
463 cnt += 1;
464 }
465
466 /* every expression should find at least itself */
468
469 if (cnt > 1)
471 (
errcode(ERRCODE_DUPLICATE_COLUMN),
472 errmsg(
"duplicate expression in statistics definition")));
473 }
474
475 /* Form an int2vector representation of the sorted column list */
477
478 /* construct the char array of enabled statistic types */
479 ntypes = 0;
480 if (build_ndistinct)
482 if (build_dependencies)
484 if (build_mcv)
486 if (build_expressions)
490
491 /* convert the expressions (if any) to a text datum */
493 {
494 char *exprsString;
495
499 }
500 else
501 exprsDatum = (
Datum) 0;
502
504
505 /*
506 * Everything seems fine, so let's build the pg_statistic_ext tuple.
507 */
509 memset(nulls, false, sizeof(nulls));
510
512 Anum_pg_statistic_ext_oid);
519 nulls[Anum_pg_statistic_ext_stxstattarget - 1] = true;
521
522 values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
523 if (exprsDatum == (
Datum) 0)
524 nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
525
526 /* insert it into pg_statistic_ext */
530
532
533 /*
534 * We used to create the pg_statistic_ext_data tuple too, but it's not
535 * clear what value should the stxdinherit flag have (it depends on
536 * whether the rel is partitioned, contains data, etc.)
537 */
538
540
541 /*
542 * Invalidate relcache so that others see the new statistics object.
543 */
545
547
548 /*
549 * Add an AUTO dependency on each column used in the stats, so that the
550 * stats object goes away if any or all of them get dropped.
551 */
553
554 /* add dependencies for plain column references */
555 for (
i = 0;
i < nattnums;
i++)
556 {
559 }
560
561 /*
562 * If there are no dependencies on a column, give the statistics object an
563 * auto dependency on the whole table. In most cases, this will be
564 * redundant, but it might not be if the statistics expressions contain no
565 * Vars (which might seem strange but possible). This is consistent with
566 * what we do for indexes in index_create.
567 *
568 * XXX We intentionally don't consider the expressions before adding this
569 * dependency, because recordDependencyOnSingleRelExpr may not create any
570 * dependencies for whole-row Vars.
571 */
572 if (!nattnums)
573 {
576 }
577
578 /*
579 * Store dependencies on anything mentioned in statistics expressions,
580 * just like we do for index expressions.
581 */
582 if (stxexprs)
585 relid,
588
589 /*
590 * Also add dependencies on namespace and owner. These are required
591 * because the stats object might have a different namespace and/or owner
592 * than the underlying table(s).
593 */
596
598
599 /*
600 * XXX probably there should be a recordDependencyOnCurrentExtension call
601 * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
602 * STATISTICS, which is more work than it seems worth.
603 */
604
605 /* Add any requested comment */
606 if (
stmt->stxcomment != NULL)
609
610 /* Return stats object's address */
611 return myself;
612}
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
int bms_next_member(const Bitmapset *a, int prevbit)
static Datum values[MAXATTR]
#define CStringGetTextDatum(s)
bool IsSystemRelation(Relation relation)
Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
void recordDependencyOnSingleRelExpr(const ObjectAddress *depender, Node *expr, Oid relId, DependencyType behavior, DependencyType self_behavior, bool reverse_self)
bool equal(const void *a, const void *b)
bool allowSystemTableMods
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
static void * GETSTRUCT(const HeapTupleData *tuple)
void CatalogTupleInsert(Relation heapRel, HeapTuple tup)
int2vector * buildint2vector(const int16 *int2s, int n)
void CacheInvalidateRelcache(Relation relation)
if(TABLE==NULL||TABLE_index==NULL)
List * lappend(List *list, void *datum)
#define ShareUpdateExclusiveLock
char get_attgenerated(Oid relid, AttrNumber attnum)
char * get_attname(Oid relid, AttrNumber attnum, bool missing_ok)
void namestrcpy(Name name, const char *str)
Oid QualifiedNameGetCreationNamespace(const List *names, char **objname_p)
Oid exprType(const Node *expr)
#define InvokeObjectPostCreateHook(classId, objectId, subId)
ObjectType get_relkind_objtype(char relkind)
#define ObjectAddressSubSet(addr, class_id, object_id, object_sub_id)
char * nodeToString(const void *obj)
FormData_pg_attribute * Form_pg_attribute
int errdetail_relkind_not_supported(char relkind)
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
#define lfirst_node(type, lc)
static int list_length(const List *l)
void recordDependencyOnOwner(Oid classId, Oid objectId, Oid owner)
#define qsort(a, b, c, d)
static Datum NameGetDatum(const NameData *X)
static Datum CStringGetDatum(const char *X)
static Datum CharGetDatum(char X)
#define RelationGetRelid(relation)
#define RelationGetRelationName(relation)
#define RelationGetNamespace(relation)
void relation_close(Relation relation, LOCKMODE lockmode)
Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode)
#define STATS_MAX_DIMENSIONS
static char * ChooseExtendedStatisticNameAddition(List *exprs)
static char * ChooseExtendedStatisticName(const char *name1, const char *name2, const char *label, Oid namespaceid)
static int compare_int16(const void *a, const void *b)
#define ERRCODE_DUPLICATE_OBJECT
#define FirstLowInvalidHeapAttributeNumber
HeapTuple SearchSysCacheAttName(Oid relid, const char *attname)
#define SearchSysCacheExists2(cacheId, key1, key2)
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
void pull_varattnos(Node *node, Index varno, Bitmapset **varattnos)