index b3e74a62171657c071d690b54d807622400db431..034934a56f2ab41c2859e7ab6d573504d504e30f 100644 (file)
@@ -349,6 +349,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Size freespace;
bool all_visible_according_to_vm = false;
bool all_visible;
+ bool has_dead_tuples;
/*
* Skip pages that don't require vacuuming according to the visibility
@@ -500,6 +501,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* requiring freezing.
*/
all_visible = true;
+ has_dead_tuples = false;
nfrozen = 0;
hastup = false;
prev_dead_count = vacrelstats->num_dead_tuples;
@@ -640,6 +642,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
&vacrelstats->latestRemovedXid);
tups_vacuumed += 1;
+ has_dead_tuples = true;
}
else
{
@@ -703,9 +706,22 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
PageSetAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
}
- else if (PageIsAllVisible(page) && !all_visible)
+ /*
+ * It's possible for the value returned by GetOldestXmin() to move
+ * backwards, so it's not wrong for us to see tuples that appear to
+ * not be visible to everyone yet, while PD_ALL_VISIBLE is already
+ * set. The real safe xmin value never moves backwards, but
+ * GetOldestXmin() is conservative and sometimes returns a value
+ * that's unnecessarily small, so if we see that contradiction it
+ * just means that the tuples that we think are not visible to
+ * everyone yet actually are, and the PD_ALL_VISIBLE flag is correct.
+ *
+ * There should never be dead tuples on a page with PD_ALL_VISIBLE
+ * set, however.
+ */
+ else if (PageIsAllVisible(page) && has_dead_tuples)
{
- elog(WARNING, "PD_ALL_VISIBLE flag was incorrectly set in relation \"%s\" page %u",
+ elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
relname, blkno);
PageClearAllVisible(page);
SetBufferCommitInfoNeedsSave(buf);
index 8badd663be4642203f870b8534de730f97dddc33..89ec7f046196af9cbad247da08c3ea5c0474e676 100644 (file)
* This is also used to determine where to truncate pg_subtrans. allDbs
* must be TRUE for that case, and ignoreVacuum FALSE.
*
+ * Note: it's possible for the calculated value to move backwards on repeated
+ * calls. The calculated value is conservative, so that anything older is
+ * definitely not considered as running by anyone anymore, but the exact
+ * value calculated depends on a number of things. For example, if allDbs is
+ * TRUE and there are no transactions running in the current database,
+ * GetOldestXmin() returns latestCompletedXid. If a transaction begins after
+ * that, its xmin will include in-progress transactions in other databases
+ * that started earlier, so another call will return an lower value. The
+ * return value is also adjusted with vacuum_defer_cleanup_age, so increasing
+ * that setting on the fly is an easy way to have GetOldestXmin() move
+ * backwards.
+ *
* Note: we include all currently running xids in the set of considered xids.
* This ensures that if a just-started xact has not yet set its snapshot,
* when it does set the snapshot it cannot set xmin less than what we compute.