1/*-------------------------------------------------------------------------
4 * CLUSTER a table on an index. This is now also used for VACUUM FULL.
6 * There is hardly anything left of Paul Brown's original implementation...
9 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994-5, Regents of the University of California
14 * src/backend/commands/cluster.c
16 *-------------------------------------------------------------------------
49#include "utils/fmgroids.h"
60 * This struct is used to pass around the information on tables to be
61 * clustered. We need this so we can make a list of them when invoked without
62 * a specific table/index pair.
74 bool verbose,
bool *pSwapToastByContent,
82/*---------------------------------------------------------------------------
83 * This cluster code allows for clustering multiple tables at once. Because
84 * of this, we cannot just run everything on a single transaction, or we
85 * would be forced to acquire exclusive locks on all the tables being
86 * clustered, simultaneously --- very likely leading to deadlock.
88 * To solve this we follow a similar strategy to VACUUM code,
89 * clustering each relation in a separate transaction. For this to work,
91 * - provide a separate memory context so that we can pass information in
92 * a way that survives across transactions
93 * - start a new transaction every time a new relation is clustered
94 * - check for validity of the information on to-be-clustered relations,
95 * as someone might have deleted a relation behind our back, or
96 * clustered one on a different index
97 * - end the transaction
99 * The single-relation case does not have any such overhead.
101 * We also allow a relation to be specified without index. In that case,
102 * the indisclustered bit will be looked up, and an ERROR will be thrown
103 * if there is no index with the bit set.
104 *---------------------------------------------------------------------------
117 /* Parse option list */
118 foreach(lc,
stmt->params)
122 if (strcmp(opt->
defname,
"verbose") == 0)
126 (
errcode(ERRCODE_SYNTAX_ERROR),
127 errmsg(
"unrecognized CLUSTER option \"%s\"",
134 if (
stmt->relation != NULL)
136 /* This is the single-relation case. */
140 * Find, lock, and check permissions on the table. We obtain
141 * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
142 * single-transaction case.
152 * Reject clustering a remote temp table ... their local buffer
153 * manager is not going to cope.
157 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
158 errmsg(
"cannot cluster temporary tables of other sessions")));
160 if (
stmt->indexname == NULL)
164 /* We need to find the index that has indisclustered set. */
175 (
errcode(ERRCODE_UNDEFINED_OBJECT),
176 errmsg(
"there is no previously clustered index for table \"%s\"",
177 stmt->relation->relname)));
182 * The index is expected to be in the same namespace as the
186 rel->
rd_rel->relnamespace);
189 (
errcode(ERRCODE_UNDEFINED_OBJECT),
190 errmsg(
"index \"%s\" for table \"%s\" does not exist",
191 stmt->indexname,
stmt->relation->relname)));
194 /* For non-partitioned tables, do what we came here to do. */
195 if (rel->
rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
198 /* cluster_rel closes the relation, but keeps lock */
205 * By here, we know we are in a multi-table situation. In order to avoid
206 * holding locks for too long, we want to process each table in its own
207 * transaction. This forces us to disallow running inside a user
212 /* Also, we need a memory context to hold our list of relations */
218 * Either we're processing a partitioned table, or we were not given any
219 * table name at all. In either case, obtain a list of relations to
222 * In the former case, an index name must have been given, so we don't
223 * need to recheck its "indisclustered" bit, but we have to check that it
224 * is an index that we can cluster on. In the latter case, we set the
225 * option bit to have indisclustered verified.
227 * Rechecking the relation itself is necessary here in all cases.
232 Assert(rel->
rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
236 /* close relation, releasing lock on parent table */
248 /* Start a new transaction for the cleanup work. */
251 /* Clean up working storage */
256 * Given a list of relations to cluster, process each of them in a separate
259 * We expect to be in a transaction at start, but there isn't one when we
267 /* Commit to get out of starting transaction */
271 /* Cluster the tables, each in a separate transaction */
277 /* Start a new transaction for each relation. */
280 /* functions in indexes may want a snapshot set */
285 /* Process this table */
287 /* cluster_rel closes the relation, but keeps lock */
297 * This clusters the table by creating a new, clustered table and
298 * swapping the relfilenumbers of the new table and the old table, so
299 * the OID of the original table is preserved. Thus we do not lose
300 * GRANT, inheritance nor references to this table.
302 * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
303 * the new table, it's better to create the indexes afterwards than to fill
304 * them incrementally while we load the table.
306 * If indexOid is InvalidOid, the table will be rewritten in physical order
307 * instead of index order. This is the new implementation of VACUUM FULL,
308 * and error messages should refer to the operation as VACUUM not CLUSTER.
315 int save_sec_context;
323 /* Check for user-requested abort. */
335 * Switch to the table owner's userid, so that any index functions are run
336 * as that user. Also lock down security-restricted operations and
337 * arrange to make GUC variable changes local to this command.
346 * Since we may open a new transaction for each relation, we have to check
347 * that the relation still is what we think it is.
349 * If this is a single-transaction CLUSTER, we can skip these tests. We
350 * *must* skip the one on indisclustered since it would reject an attempt
351 * to cluster a not-previously-clustered index.
355 /* Check that the user still has privileges for the relation */
363 * Silently skip a temp table for a remote session. Only doing this
364 * check in the "recheck" case is appropriate (which currently means
365 * somebody is executing a database-wide CLUSTER or on a partitioned
366 * table), because there is another check in cluster() which will stop
367 * any attempt to cluster remote temp tables by name. There is
368 * another check in cluster_rel which is redundant, but we leave it
380 * Check that the index still exists
389 * Check that the index is still the one with indisclustered set,
402 * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
403 * would work in most respects, but the index would only get marked as
404 * indisclustered in the current database, leading to unexpected behavior
405 * if CLUSTER were later invoked in another database.
409 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
410 errmsg(
"cannot cluster a shared catalog")));
413 * Don't process temp tables of other backends ... their local buffer
414 * manager is not going to cope.
420 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
421 errmsg(
"cannot cluster temporary tables of other sessions")));
424 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
425 errmsg(
"cannot vacuum temporary tables of other sessions")));
429 * Also check for active uses of the relation in the current transaction,
430 * including open scans and pending AFTER trigger events.
434 /* Check heap and index are valid to cluster on */
437 /* verify the index is good and lock it */
446 * Quietly ignore the request if this is a materialized view which has not
447 * been populated from its query. No harm is done because there is no data
448 * to deal with, and we don't want to throw an error if this is part of a
449 * multi-relation request -- for example, CLUSTER was run on the entire
452 if (OldHeap->
rd_rel->relkind == RELKIND_MATVIEW &&
460 OldHeap->
rd_rel->relkind == RELKIND_MATVIEW ||
461 OldHeap->
rd_rel->relkind == RELKIND_TOASTVALUE);
464 * All predicate locks on the tuples or pages are about to be made
465 * invalid, because we move tuples around. Promote them to relation
466 * locks. Predicate locks on indexes will be promoted when they are
471 /* rebuild_relation does all the dirty work */
473 /* rebuild_relation closes OldHeap, and index if valid */
476 /* Roll back any GUC changes executed by index functions */
479 /* Restore userid and security context */
486 * Verify that the specified heap and index are valid to cluster on
488 * Side effect: obtains lock on the index. The caller may
489 * in some cases already have AccessExclusiveLock on the table, but
490 * not in all cases so we can't rely on the table-level lock for
501 * Check that index is in fact an index on the given relation
506 (
errcode(ERRCODE_WRONG_OBJECT_TYPE),
507 errmsg(
"\"%s\" is not an index for table \"%s\"",
511 /* Index AM must allow clustering */
514 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
515 errmsg(
"cannot cluster on index \"%s\" because access method does not support clustering",
519 * Disallow clustering on incomplete indexes (those that might not index
520 * every row of the relation). We could relax this by making a separate
521 * seqscan pass over the table to copy the missing rows, but that seems
522 * expensive and tedious.
526 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
527 errmsg(
"cannot cluster on partial index \"%s\"",
531 * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
532 * it might well not contain entries for every heap row, or might not even
533 * be internally consistent. (But note that we don't check indcheckxmin;
534 * the worst consequence of following broken HOT chains would be that we
535 * might put recently-dead tuples out-of-order in the new table, and there
536 * is little harm in that.)
538 if (!OldIndex->
rd_index->indisvalid)
540 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
541 errmsg(
"cannot cluster on invalid index \"%s\"",
544 /* Drop relcache refcnt on OldIndex, but keep lock */
549 * mark_index_clustered: mark the specified index as the one clustered on
551 * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
561 /* Disallow applying to a partitioned table */
562 if (rel->
rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
564 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
565 errmsg(
"cannot mark index clustered in partitioned table")));
568 * If the index is already marked clustered, no need to do anything.
577 * Check each index of the relation and set/clear the bit as needed.
588 elog(
ERROR,
"cache lookup failed for index %u", thisIndexOid);
592 * Unset the bit if set. We know it's wrong because we checked this
595 if (indexForm->indisclustered)
597 indexForm->indisclustered =
false;
600 else if (thisIndexOid == indexOid)
602 /* this was checked earlier, but let's be real sure */
603 if (!indexForm->indisvalid)
604 elog(
ERROR,
"cannot cluster on invalid index %u", indexOid);
605 indexForm->indisclustered =
true;
619 * rebuild_relation: rebuild an existing relation in index or physical order
621 * OldHeap: table to rebuild.
622 * index: index to cluster by, or NULL to rewrite in physical order.
624 * On entry, heap and index (if one is given) must be open, and
625 * AccessExclusiveLock held on them.
626 * On exit, they are closed, but locks on them are not released.
632 Oid accessMethod = OldHeap->
rd_rel->relam;
633 Oid tableSpace = OldHeap->
rd_rel->reltablespace;
637 bool is_system_catalog;
638 bool swap_toast_by_content;
646 /* Mark the correct index as clustered */
649 /* Remember info about rel before closing OldHeap */
650 relpersistence = OldHeap->
rd_rel->relpersistence;
654 * Create the transient table that will receive the re-ordered data.
656 * OldHeap is already locked, so no need to lock it again. make_new_heap
657 * obtains AccessExclusiveLock on the new heap and its toast table.
666 /* Copy the heap data into the new table in the desired order */
668 &swap_toast_by_content, &frozenXid, &cutoffMulti);
671 /* Close relcache entries, but keep lock until transaction commit */
677 * Close the new relation so it can be dropped as soon as the storage is
678 * swapped. The relation is not visible to others, so no need to unlock it
684 * Swap the physical files of the target and transient tables, then
685 * rebuild the target's indexes and throw away the transient table.
688 swap_toast_by_content,
false,
true,
689 frozenXid, cutoffMulti,
695 * Create the transient table that will be filled with new data during
696 * CLUSTER, ALTER TABLE, and similar operations. The transient table
697 * duplicates the logical structure of the OldHeap; but will have the
698 * specified physical storage properties NewTableSpace, NewAccessMethod, and
701 * After this, the caller should load the new heap with transferred/modified
702 * data, then call finish_heap_swap to complete the operation.
706 char relpersistence,
LOCKMODE lockmode)
722 * Note that the NewHeap will not receive any of the defaults or
723 * constraints associated with the OldHeap; we don't need 'em, and there's
724 * no reason to spend cycles inserting them into the catalogs only to
729 * But we do want to use reloptions of the old heap for new heap.
733 elog(
ERROR,
"cache lookup failed for relation %u", OIDOldHeap);
737 reloptions = (
Datum) 0;
739 if (relpersistence == RELPERSISTENCE_TEMP)
745 * Create the new heap, using a temporary name in the same namespace as
746 * the existing table. NOTE: there is some risk of collision with user
747 * relnames. Working around this seems more trouble than it's worth; in
748 * particular, we can't create the new heap in a different namespace from
749 * the old, or we will have problems with the TEMP status of temp tables.
751 * Note: the new heap is not a shared relation, even if we are rebuilding
752 * a shared rel. However, we do make the new heap mapped if the source is
753 * mapped. This simplifies swap_relation_files, and is absolutely
754 * necessary for rebuilding pg_class, for reasons explained there.
756 snprintf(NewHeapName,
sizeof(NewHeapName),
"pg_temp_%u", OIDOldHeap);
764 OldHeap->
rd_rel->relowner,
784 * Advance command counter so that the newly-created relation's catalog
785 * tuples will be visible to table_open.
790 * If necessary, create a TOAST table for the new relation.
792 * If the relation doesn't have a TOAST table already, we can't need one
793 * for the new relation. The other way around is possible though: if some
794 * wide columns have been dropped, NewHeapCreateToastTable can decide that
795 * no TOAST table is needed for the new table.
797 * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
798 * that the TOAST table will be visible for insertion.
800 toastid = OldHeap->
rd_rel->reltoastrelid;
803 /* keep the existing toast table's reloptions, if any */
806 elog(
ERROR,
"cache lookup failed for relation %u", toastid);
810 reloptions = (
Datum) 0;
823 * Do the physical copying of table data.
825 * There are three output parameters:
826 * *pSwapToastByContent is set true if toast tables must be swapped by content.
827 * *pFreezeXid receives the TransactionId used as freeze cutoff point.
828 * *pCutoffMulti receives the MultiXactId used as a cutoff point.
843 double num_tuples = 0,
845 tups_recently_dead = 0;
853 /* Store a copy of the namespace name for logging purposes */
857 * Their tuple descriptors should be exactly alike, but here we only need
858 * assume that they have the same number of columns.
862 Assert(newTupDesc->natts == oldTupDesc->natts);
865 * If the OldHeap has a toast table, get lock on the toast table to keep
866 * it from being vacuumed. This is needed because autovacuum processes
867 * toast tables independently of their main tables, with no lock on the
868 * latter. If an autovacuum were to start on the toast table after we
869 * compute our OldestXmin below, it would use a later OldestXmin, and then
870 * possibly remove as DEAD toast tuples belonging to main tuples we think
871 * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
874 * We don't need to open the toast relation here, just lock it. The lock
875 * will be held till end of transaction.
877 if (OldHeap->
rd_rel->reltoastrelid)
881 * If both tables have TOAST tables, perform toast swap by content. It is
882 * possible that the old table has a toast table but the new one doesn't,
883 * if toastable columns have been dropped. In that case we have to do
884 * swap by links. This is okay because swap by content is only essential
885 * for system catalogs, and we don't support schema changes for them.
887 if (OldHeap->
rd_rel->reltoastrelid && NewHeap->
rd_rel->reltoastrelid)
889 *pSwapToastByContent =
true;
892 * When doing swap by content, any toast pointers written into NewHeap
893 * must use the old toast table's OID, because that's where the toast
894 * data will eventually be found. Set this up by setting rd_toastoid.
895 * This also tells toast_save_datum() to preserve the toast value
896 * OIDs, which we want so as not to invalidate toast pointers in
897 * system catalog caches, and to avoid making multiple copies of a
898 * single toast value.
900 * Note that we must hold NewHeap open until we are done writing data,
901 * since the relcache will not guarantee to remember this setting once
902 * the relation is closed. Also, this technique depends on the fact
903 * that no one will try to read from the NewHeap until after we've
904 * finished writing it and swapping the rels --- otherwise they could
905 * follow the toast pointers to the wrong place. (It would actually
906 * work for values copied over from the old toast table, but not for
907 * any values that we toast which were previously not toasted.)
912 *pSwapToastByContent =
false;
915 * Compute xids used to freeze and weed out dead tuples and multixacts.
916 * Since we're going to rewrite the whole table anyway, there's no reason
917 * not to be aggressive about this.
923 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
924 * backwards, so take the max.
935 * MultiXactCutoff, similarly, shouldn't go backwards either.
946 * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
947 * the OldHeap. We know how to use a sort to duplicate the ordering of a
948 * btree index, and will use seqscan-and-sort for that case if the planner
949 * tells us it's cheaper. Otherwise, always indexscan if an index is
950 * provided, else plain seqscan.
952 if (OldIndex != NULL && OldIndex->
rd_rel->relam == BTREE_AM_OID)
958 /* Log what we're doing */
959 if (OldIndex != NULL && !use_sort)
961 (
errmsg(
"clustering \"%s.%s\" using index scan on \"%s\"",
967 (
errmsg(
"clustering \"%s.%s\" using sequential scan and sort",
972 (
errmsg(
"vacuuming \"%s.%s\"",
977 * Hand off the actual copying to AM specific function, the generic code
978 * cannot know how to deal with visibility across AMs. Note that this
979 * routine is allowed to set FreezeXid / MultiXactCutoff to different
980 * values (e.g. because the AM doesn't use freezing).
985 &num_tuples, &tups_vacuumed,
986 &tups_recently_dead);
988 /* return selected values to caller, get set as relfrozenxid/minmxid */
992 /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
997 /* Log what we did */
999 (
errmsg(
"\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
1002 tups_vacuumed, num_tuples,
1004 errdetail(
"%.0f dead row versions cannot be removed yet.\n"
1009 /* Update pg_class to reflect the correct values of pages and tuples. */
1015 elog(
ERROR,
"cache lookup failed for relation %u",
1019 relform->relpages = num_pages;
1020 relform->reltuples = num_tuples;
1022 /* Don't update the stats for pg_class. See swap_relation_files. */
1032 /* Make the update visible */
1037 * Swap the physical files of two given relations.
1039 * We swap the physical identity (reltablespace, relfilenumber) while keeping
1040 * the same logical identities of the two relations. relpersistence is also
1041 * swapped, which is critical since it determines where buffers live for each
1044 * We can swap associated TOAST data in either of two ways: recursively swap
1045 * the physical content of the toast tables (and their indexes), or swap the
1046 * TOAST links in the given relations' pg_class entries. The former is needed
1047 * to manage rewrites of shared catalogs (where we cannot change the pg_class
1048 * links) while the latter is the only way to handle cases in which a toast
1049 * table is added or removed altogether.
1051 * Additionally, the first relation is marked with relfrozenxid set to
1052 * frozenXid. It seems a bit ugly to have this here, but the caller would
1053 * have to do it anyway, so having it here saves a heap_update. Note: in
1054 * the swap-toast-links case, we assume we don't need to change the toast
1055 * table's relfrozenxid: the new version of the toast table should already
1056 * have relfrozenxid set to RecentXmin, which is good enough.
1058 * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1059 * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1060 * having to look the information up again later in finish_heap_swap.
1064 bool swap_toast_by_content,
1082 /* We need writable copies of both pg_class tuples. */
1087 elog(
ERROR,
"cache lookup failed for relation %u", r1);
1092 elog(
ERROR,
"cache lookup failed for relation %u", r2);
1095 relfilenumber1 = relform1->relfilenode;
1096 relfilenumber2 = relform2->relfilenode;
1097 relam1 = relform1->relam;
1098 relam2 = relform2->relam;
1104 * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
1107 Assert(!target_is_pg_class);
1109 swaptemp = relform1->relfilenode;
1110 relform1->relfilenode = relform2->relfilenode;
1111 relform2->relfilenode = swaptemp;
1113 swaptemp = relform1->reltablespace;
1114 relform1->reltablespace = relform2->reltablespace;
1115 relform2->reltablespace = swaptemp;
1117 swaptemp = relform1->relam;
1118 relform1->relam = relform2->relam;
1119 relform2->relam = swaptemp;
1121 swptmpchr = relform1->relpersistence;
1122 relform1->relpersistence = relform2->relpersistence;
1123 relform2->relpersistence = swptmpchr;
1125 /* Also swap toast links, if we're swapping by links */
1126 if (!swap_toast_by_content)
1128 swaptemp = relform1->reltoastrelid;
1129 relform1->reltoastrelid = relform2->reltoastrelid;
1130 relform2->reltoastrelid = swaptemp;
1136 * Mapped-relation case. Here we have to swap the relation mappings
1137 * instead of modifying the pg_class columns. Both must be mapped.
1141 elog(
ERROR,
"cannot swap mapped relation \"%s\" with non-mapped relation",
1145 * We can't change the tablespace nor persistence of a mapped rel, and
1146 * we can't handle toast link swapping for one either, because we must
1147 * not apply any critical changes to its pg_class row. These cases
1148 * should be prevented by upstream permissions tests, so these checks
1149 * are non-user-facing emergency backstop.
1151 if (relform1->reltablespace != relform2->reltablespace)
1152 elog(
ERROR,
"cannot change tablespace of mapped relation \"%s\"",
1154 if (relform1->relpersistence != relform2->relpersistence)
1155 elog(
ERROR,
"cannot change persistence of mapped relation \"%s\"",
1157 if (relform1->relam != relform2->relam)
1158 elog(
ERROR,
"cannot change access method of mapped relation \"%s\"",
1160 if (!swap_toast_by_content &&
1161 (relform1->reltoastrelid || relform2->reltoastrelid))
1162 elog(
ERROR,
"cannot swap toast by links for mapped relation \"%s\"",
1166 * Fetch the mappings --- shouldn't fail, but be paranoid
1170 elog(
ERROR,
"could not find relation mapping for relation \"%s\", OID %u",
1171 NameStr(relform1->relname), r1);
1174 elog(
ERROR,
"could not find relation mapping for relation \"%s\", OID %u",
1175 NameStr(relform2->relname), r2);
1178 * Send replacement mappings to relmapper. Note these won't actually
1179 * take effect until CommandCounterIncrement.
1184 /* Pass OIDs of mapped r2 tables back to caller */
1185 *mapped_tables++ = r2;
1189 * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
1190 * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1208 * In the case of a shared catalog, these next few steps will only affect
1209 * our own database's pg_class row; but that's okay, because they are all
1210 * noncritical updates. That's also an important fact for the case of a
1211 * mapped catalog, because it's possible that we'll commit the map change
1212 * and then fail to commit the pg_class update.
1215 /* set rel1's frozen Xid and minimum MultiXid */
1216 if (relform1->relkind != RELKIND_INDEX)
1220 relform1->relfrozenxid = frozenXid;
1221 relform1->relminmxid = cutoffMulti;
1224 /* swap size statistics too, since new rel has freshly-updated stats */
1228 int32 swap_allvisible;
1229 int32 swap_allfrozen;
1231 swap_pages = relform1->relpages;
1232 relform1->relpages = relform2->relpages;
1233 relform2->relpages = swap_pages;
1235 swap_tuples = relform1->reltuples;
1236 relform1->reltuples = relform2->reltuples;
1237 relform2->reltuples = swap_tuples;
1239 swap_allvisible = relform1->relallvisible;
1240 relform1->relallvisible = relform2->relallvisible;
1241 relform2->relallvisible = swap_allvisible;
1243 swap_allfrozen = relform1->relallfrozen;
1244 relform1->relallfrozen = relform2->relallfrozen;
1245 relform2->relallfrozen = swap_allfrozen;
1249 * Update the tuples in pg_class --- unless the target relation of the
1250 * swap is pg_class itself. In that case, there is zero point in making
1251 * changes because we'd be updating the old data that we're about to throw
1252 * away. Because the real work being done here for a mapped relation is
1253 * just to change the relation map settings, it's all right to not update
1254 * the pg_class rows in this case. The most important changes will instead
1255 * performed later, in finish_heap_swap() itself.
1257 if (!target_is_pg_class)
1270 /* no update ... but we do still need relcache inval */
1276 * Now that pg_class has been updated with its relevant information for
1277 * the swap, update the dependency of the relations to point to their new
1278 * table AM, if it has changed.
1280 if (relam1 != relam2)
1284 AccessMethodRelationId,
1287 elog(
ERROR,
"could not change access method dependency for relation \"%s.%s\"",
1292 AccessMethodRelationId,
1295 elog(
ERROR,
"could not change access method dependency for relation \"%s.%s\"",
1301 * Post alter hook for modified relations. The change to r2 is always
1302 * internal, but r1 depends on the invocation context.
1310 * If we have toast tables associated with the relations being swapped,
1311 * deal with them too.
1313 if (relform1->reltoastrelid || relform2->reltoastrelid)
1315 if (swap_toast_by_content)
1317 if (relform1->reltoastrelid && relform2->reltoastrelid)
1319 /* Recursively swap the contents of the toast tables */
1321 relform2->reltoastrelid,
1323 swap_toast_by_content,
1331 /* caller messed up */
1332 elog(
ERROR,
"cannot swap toast files by content when there's only one");
1338 * We swapped the ownership links, so we need to change dependency
1341 * NOTE: it is possible that only one table has a toast table.
1343 * NOTE: at present, a TOAST table's only dependency is the one on
1344 * its owning table. If more are ever created, we'd need to use
1345 * something more selective than deleteDependencyRecordsFor() to
1346 * get rid of just the link we want.
1353 * We disallow this case for system catalogs, to avoid the
1354 * possibility that the catalog we're rebuilding is one of the
1355 * ones the dependency changes would change. It's too late to be
1356 * making any data changes to the target catalog.
1359 elog(
ERROR,
"cannot swap toast files by links for system catalogs");
1361 /* Delete old dependencies */
1362 if (relform1->reltoastrelid)
1365 relform1->reltoastrelid,
1368 elog(
ERROR,
"expected one dependency record for TOAST table, found %ld",
1371 if (relform2->reltoastrelid)
1374 relform2->reltoastrelid,
1377 elog(
ERROR,
"expected one dependency record for TOAST table, found %ld",
1381 /* Register new dependencies */
1382 baseobject.
classId = RelationRelationId;
1384 toastobject.
classId = RelationRelationId;
1387 if (relform1->reltoastrelid)
1390 toastobject.
objectId = relform1->reltoastrelid;
1395 if (relform2->reltoastrelid)
1398 toastobject.
objectId = relform2->reltoastrelid;
1406 * If we're swapping two toast tables by content, do the same for their
1407 * valid index. The swap can actually be safely done only if the relations
1410 if (swap_toast_by_content &&
1411 relform1->relkind == RELKIND_TOASTVALUE &&
1412 relform2->relkind == RELKIND_TOASTVALUE)
1417 /* Get valid index for each relation */
1426 swap_toast_by_content,
1441 * Remove the transient table that was built by make_new_heap, and finish
1442 * cleaning up (including rebuilding all indexes on the old heap).
1446 bool is_system_catalog,
1447 bool swap_toast_by_content,
1448 bool check_constraints,
1452 char newrelpersistence)
1455 Oid mapped_tables[4];
1460 /* Report that we are now swapping relation files */
1464 /* Zero out possible results from swapped_relation_files */
1465 memset(mapped_tables, 0,
sizeof(mapped_tables));
1468 * Swap the contents of the heap relations (including any toast tables).
1469 * Also set old heap's relfrozenxid to frozenXid.
1472 (OIDOldHeap == RelationRelationId),
1473 swap_toast_by_content, is_internal,
1474 frozenXid, cutoffMulti, mapped_tables);
1477 * If it's a system catalog, queue a sinval message to flush all catcaches
1478 * on the catalog when we reach CommandCounterIncrement.
1480 if (is_system_catalog)
1484 * Rebuild each index on the relation (but not the toast table, which is
1485 * all-new at this point). It is important to do this before the DROP
1486 * step because if we are processing a system catalog that will be used
1487 * during DROP, we want to have its indexes available. There is no
1488 * advantage to the other order anyway because this is all transactional,
1489 * so no chance to reclaim disk space before commit. We do not need a
1490 * final CommandCounterIncrement() because reindex_relation does it.
1492 * Note: because index_build is called via reindex_relation, it will never
1493 * set indcheckxmin true for the indexes. This is OK even though in some
1494 * sense we are building new indexes rather than rebuilding existing ones,
1495 * because the new heap won't contain any HOT chains at all, let alone
1496 * broken ones, so it can't be necessary to set indcheckxmin.
1499 if (check_constraints)
1503 * Ensure that the indexes have the same persistence as the parent
1506 if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1508 else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1511 /* Report that we are now reindexing relations */
1517 /* Report that we are now doing clean up */
1522 * If the relation being rebuilt is pg_class, swap_relation_files()
1523 * couldn't update pg_class's own pg_class entry (check comments in
1524 * swap_relation_files()), thus relfrozenxid was not updated. That's
1525 * annoying because a potential reason for doing a VACUUM FULL is a
1526 * imminent or actual anti-wraparound shutdown. So, now that we can
1527 * access the new relation using its indices, update relfrozenxid.
1528 * pg_class doesn't have a toast relation, so we don't need to update the
1529 * corresponding toast relation. Not that there's little point moving all
1530 * relfrozenxid updates here since swap_relation_files() needs to write to
1531 * pg_class for non-mapped relations anyway.
1533 if (OIDOldHeap == RelationRelationId)
1543 elog(
ERROR,
"cache lookup failed for relation %u", OIDOldHeap);
1546 relform->relfrozenxid = frozenXid;
1547 relform->relminmxid = cutoffMulti;
1554 /* Destroy new heap with old filenumber */
1555 object.classId = RelationRelationId;
1556 object.objectId = OIDNewHeap;
1557 object.objectSubId = 0;
1560 * The new relation is local to our transaction and we know nothing
1561 * depends on it, so DROP_RESTRICT should be OK.
1565 /* performDeletion does CommandCounterIncrement at end */
1568 * Now we must remove any relation mapping entries that we set up for the
1569 * transient table, as well as its toast table and toast index if any. If
1570 * we fail to do this before commit, the relmapper will complain about new
1571 * permanent map entries being added post-bootstrap.
1577 * At this point, everything is kosher except that, if we did toast swap
1578 * by links, the toast table's name corresponds to the transient table.
1579 * The name is irrelevant to the backend because it's referenced by OID,
1580 * but users looking at the catalogs could be confused. Rename it to
1581 * prevent this problem.
1583 * Note no lock required on the relation, because we already hold an
1584 * exclusive lock on it.
1586 if (!swap_toast_by_content)
1596 /* Get the associated valid index to be renamed */
1600 /* rename the toast table ... */
1604 NewToastName,
true,
false);
1606 /* ... and its valid index too. */
1611 NewToastName,
true,
true);
1614 * Reset the relrewrite for the toast. The command-counter
1615 * increment is required here as we are about to update the tuple
1616 * that is updated as part of RenameRelationInternal.
1624 /* if it's not a catalog table, clear any missing attribute settings */
1625 if (!is_system_catalog)
1637 * Get a list of tables that the current user has privileges on and
1638 * have indisclustered set. Return the list in a List * of RelToCluster
1639 * (stored in the specified memory context), each one giving the tableOid
1640 * and the indexOid on which the table is already clustered.
1654 * Get all indexes that have indisclustered set and that the current user
1655 * has the appropriate privileges for.
1659 Anum_pg_index_indisclustered,
1672 /* Use a permanent memory context for the result list */
1690 * Given an index on a partitioned table, return a list of RelToCluster for
1691 * all the children leaves tables/indexes.
1693 * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
1694 * on the table containing the index.
1704 /* Do not lock the children until they're processed */
1707 foreach(lc, inhoids)
1713 /* consider only leaf indexes */
1718 * It's possible that the user does not have privileges to CLUSTER the
1719 * leaf partition despite having such privileges on the partitioned
1720 * table. We skip any partitions which the user is not permitted to
1726 /* Use a permanent memory context for the result list */
1741 * Return whether userid has privileges to CLUSTER relid. If not, this
1742 * function emits a WARNING.
1751 (
errmsg(
"permission denied to cluster \"%s\", skipping it",
AclResult pg_class_aclcheck(Oid table_oid, Oid roleid, AclMode mode)
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_CLUSTER
#define RelationGetNumberOfBlocks(reln)
#define PG_USED_FOR_ASSERTS_ONLY
TransactionId MultiXactId
#define OidIsValid(objectId)
bool IsSystemRelation(Relation relation)
bool IsSystemClass(Oid relid, Form_pg_class reltuple)
static void copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verbose, bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
void check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
void finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool is_system_catalog, bool swap_toast_by_content, bool check_constraints, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, char newrelpersistence)
static List * get_tables_to_cluster(MemoryContext cluster_context)
void cluster_rel(Relation OldHeap, Oid indexOid, ClusterParams *params)
static List * get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
static bool cluster_is_permitted_for_relation(Oid relid, Oid userid)
static void cluster_multiple_rels(List *rtcs, ClusterParams *params)
Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod, char relpersistence, LOCKMODE lockmode)
static void rebuild_relation(Relation OldHeap, Relation index, bool verbose)
void cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
static void swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, bool swap_toast_by_content, bool is_internal, TransactionId frozenXid, MultiXactId cutoffMulti, Oid *mapped_tables)
#define CLUOPT_RECHECK_ISCLUSTERED
bool defGetBoolean(DefElem *def)
void performDeletion(const ObjectAddress *object, DropBehavior behavior, int flags)
#define PERFORM_DELETION_INTERNAL
int errdetail(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
int NewGUCNestLevel(void)
void RestrictSearchPath(void)
void AtEOXact_GUC(bool isCommit, int nestLevel)
Assert(PointerIsAligned(start, uint64))
void RelationClearMissing(Relation rel)
Oid heap_create_with_catalog(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, Oid reltypeid, Oid reloftypeid, Oid ownerid, Oid accessmtd, TupleDesc tupdesc, List *cooked_constraints, char relkind, char relpersistence, bool shared_relation, bool mapped_relation, OnCommitAction oncommit, Datum reloptions, bool use_user_acl, bool allow_system_table_mods, bool is_internal, Oid relrewrite, ObjectAddress *typaddress)
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
bool heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
void heap_freetuple(HeapTuple htup)
#define HeapTupleIsValid(tuple)
static void * GETSTRUCT(const HeapTupleData *tuple)
Oid IndexGetRelation(Oid indexId, bool missing_ok)
bool reindex_relation(const ReindexStmt *stmt, Oid relid, int flags, const ReindexParams *params)
#define REINDEX_REL_FORCE_INDEXES_UNLOGGED
#define REINDEX_REL_SUPPRESS_INDEX_USE
#define REINDEX_REL_FORCE_INDEXES_PERMANENT
#define REINDEX_REL_CHECK_CONSTRAINTS
void index_close(Relation relation, LOCKMODE lockmode)
Relation index_open(Oid relationId, LOCKMODE lockmode)
void CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
void CatalogCloseIndexes(CatalogIndexState indstate)
CatalogIndexState CatalogOpenIndexes(Relation heapRel)
void CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup, CatalogIndexState indstate)
void CacheInvalidateCatalog(Oid catalogId)
void CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
List * lappend(List *list, void *datum)
void LockRelationOid(Oid relid, LOCKMODE lockmode)
bool CheckRelationLockedByMe(Relation relation, LOCKMODE lockmode, bool orstronger)
bool CheckRelationOidLockedByMe(Oid relid, LOCKMODE lockmode, bool orstronger)
#define AccessExclusiveLock
char * get_rel_name(Oid relid)
char get_rel_relkind(Oid relid)
Oid get_rel_namespace(Oid relid)
bool get_index_isclustered(Oid index_oid)
char * get_namespace_name(Oid nspid)
Oid get_relname_relid(const char *relname, Oid relnamespace)
void MemoryContextDelete(MemoryContext context)
MemoryContext PortalContext
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
#define SECURITY_RESTRICTED_OPERATION
#define CHECK_FOR_INTERRUPTS()
void GetUserIdAndSecContext(Oid *userid, int *sec_context)
void SetUserIdAndSecContext(Oid userid, int sec_context)
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
#define MultiXactIdIsValid(multi)
#define InvalidMultiXactId
Oid LookupCreationNamespace(const char *nspname)
Oid RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, uint32 flags, RangeVarGetRelidCallback callback, void *callback_arg)
#define InvokeObjectPostAlterHookArg(classId, objectId, subId, auxiliaryId, is_internal)
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
int parser_errposition(ParseState *pstate, int location)
FormData_pg_class * Form_pg_class
void recordDependencyOn(const ObjectAddress *depender, const ObjectAddress *referenced, DependencyType behavior)
long changeDependencyFor(Oid classId, Oid objectId, Oid refClassId, Oid oldRefObjectId, Oid newRefObjectId)
long deleteDependencyRecordsFor(Oid classId, Oid objectId, bool skipExtensionDeps)
FormData_pg_index * Form_pg_index
List * find_all_inheritors(Oid parentrelId, LOCKMODE lockmode, List **numparents)
const char * pg_rusage_show(const PGRUsage *ru0)
void pg_rusage_init(PGRUsage *ru0)
bool plan_cluster_use_sort(Oid tableOid, Oid indexOid)
static Datum BoolGetDatum(bool X)
static Datum ObjectIdGetDatum(Oid X)
void TransferPredicateLocksToHeapRelation(Relation relation)
#define PROGRESS_CLUSTER_PHASE
#define PROGRESS_CLUSTER_COMMAND_VACUUM_FULL
#define PROGRESS_CLUSTER_PHASE_REBUILD_INDEX
#define PROGRESS_CLUSTER_COMMAND_CLUSTER
#define PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP
#define PROGRESS_CLUSTER_COMMAND
#define PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES
#define RelationGetRelid(relation)
#define RelationGetDescr(relation)
#define RelationIsMapped(relation)
#define RelationGetRelationName(relation)
#define RelationIsPopulated(relation)
#define RELATION_IS_OTHER_TEMP(relation)
#define RelationGetNamespace(relation)
List * RelationGetIndexList(Relation relation)
void RelationAssumeNewRelfilelocator(Relation relation)
void RelationMapRemoveMapping(Oid relationId)
RelFileNumber RelationMapOidToFilenumber(Oid relationId, bool shared)
void RelationMapUpdateMap(Oid relationId, RelFileNumber fileNumber, bool shared, bool immediate)
#define RelFileNumberIsValid(relnumber)
void ScanKeyInit(ScanKey entry, AttrNumber attributeNumber, StrategyNumber strategy, RegProcedure procedure, Datum argument)
Snapshot GetTransactionSnapshot(void)
void PushActiveSnapshot(Snapshot snapshot)
void PopActiveSnapshot(void)
void relation_close(Relation relation, LOCKMODE lockmode)
Relation relation_open(Oid relationId, LOCKMODE lockmode)
#define BTEqualStrategyNumber
struct IndexAmRoutine * rd_indam
SubTransactionId rd_firstRelfilelocatorSubid
struct HeapTupleData * rd_indextuple
SubTransactionId rd_newRelfilelocatorSubid
SubTransactionId rd_createSubid
TransactionId FreezeLimit
TransactionId relfrozenxid
MultiXactId MultiXactCutoff
void ReleaseSysCache(HeapTuple tuple)
HeapTuple SearchSysCache1(int cacheId, Datum key1)
Datum SysCacheGetAttr(int cacheId, HeapTuple tup, AttrNumber attributeNumber, bool *isNull)
#define SearchSysCacheCopy1(cacheId, key1)
#define SearchSysCacheExists1(cacheId, key1)
void table_close(Relation relation, LOCKMODE lockmode)
Relation table_open(Oid relationId, LOCKMODE lockmode)
TableScanDesc table_beginscan_catalog(Relation relation, int nkeys, ScanKeyData *key)
static void table_endscan(TableScanDesc scan)
static void table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
void ResetRelRewrite(Oid myrelid)
void CheckTableNotInUse(Relation rel, const char *stmt)
void RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bool is_index)
void RangeVarCallbackMaintainsTable(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg)
Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock)
void NewHeapCreateToastTable(Oid relOid, Datum reloptions, LOCKMODE lockmode, Oid OIDOldToast)
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
#define InvalidTransactionId
#define TransactionIdIsValid(xid)
#define TransactionIdIsNormal(xid)
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
void CommandCounterIncrement(void)
void PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
void StartTransactionCommand(void)
void CommitTransactionCommand(void)