1/*-------------------------------------------------------------------------
4 * local buffer manager. Fast buffer manager for temporary tables,
5 * which never need to be WAL-logged or checkpointed, etc.
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994-5, Regents of the University of California
12 * src/backend/storage/buffer/localbuf.c
14 *-------------------------------------------------------------------------
33/* entry for buffer lookup hashtable */
37 int id;
/* Associated local buffer's index */
40/* Note: this macro only works on local buffers, not shared ones! */
41 #define LocalBufHdrGetBlock(bufHdr) \
42 LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
54/* number of local buffers pinned at least once */
64 * PrefetchLocalBuffer -
65 * initiate asynchronous read of a block of a relation
67 * Do PrefetchBuffer's work for temporary relations.
68 * No-op if prefetching isn't compiled in.
75 BufferTag newTag;
/* identity of requested block */
80 /* Initialize local buffers if first request in this session */
84 /* See if the desired buffer already exists */
90 /* Yes, so nothing to do */
96 /* Not in buffers, so initiate prefetch */
102#endif /* USE_PREFETCH */
111 * Find or create a local buffer for the given page of the given relation.
113 * API is similar to bufmgr.c's BufferAlloc, except that we do not need to do
114 * any locking since this is all local. We support only default access
115 * strategy (hence, usage_count is always advanced).
121 BufferTag newTag;
/* identity of requested block */
130 /* Initialize local buffers if first request in this session */
136 /* See if the desired buffer already exists */
153 bufid = -victim_buffer - 1;
158 if (found)
/* shouldn't happen */
159 elog(
ERROR,
"local buffer hash table corrupted");
165 bufHdr->
tag = newTag;
179 * Like FlushBuffer(), just for local buffers.
190 * Try to start an I/O operation. There currently are no reasons for
191 * StartLocalBufferIO to return false, so we raise an error in that case.
194 elog(
ERROR,
"failed to start write IO on local buffer");
196 /* Find smgr relation for buffer */
212 /* Temporary table I/O does not use Buffer Access Strategies */
232 * Need to get a new buffer. We use a clock-sweep algorithm (essentially
233 * the same as what freelist.c does now...)
258 * This can be reached if the backend initiated AIO for this
259 * buffer and then errored out.
264 /* Found a usable buffer */
269 else if (--trycounter == 0)
271 (
errcode(ERRCODE_INSUFFICIENT_RESOURCES),
272 errmsg(
"no empty local buffer available")));
276 * lazy memory allocation: allocate space on first use of a buffer.
280 /* Set pointer for use by BufferGetBlock() macro */
285 * this buffer is not referenced but it might still be dirty. if that's
286 * the case, write it out before reusing it!
292 * Remove the victim buffer from the hashtable and mark as invalid.
304/* see GetPinLimit() */
308 /* Every backend has its own temporary buffers, and can pin them all. */
312/* see GetAdditionalPinLimit() */
320/* see LimitAdditionalPins() */
326 if (*additional_pins <= 1)
330 * In contrast to LimitAdditionalPins() other backends don't play a role
331 * here. We can allow up to NLocBuffer pins in total, but it might not be
332 * initialized yet so read num_temp_buffers.
336 if (*additional_pins >= max_pins)
337 *additional_pins = max_pins;
341 * Implementation of ExtendBufferedRelBy() and ExtendBufferedRelTo() for
356 /* Initialize local buffers if first request in this session */
371 /* new buffers are zero-filled */
372 MemSet(buf_block, 0, BLCKSZ);
380 * In contrast to shared relations, nothing could change the relation
381 * size concurrently. Thus we shouldn't end up finding that we don't
382 * need to do anything.
384 Assert(first_block <= extend_upto);
386 Assert((
uint64) first_block + extend_by <= extend_upto);
389 /* Fail if relation is already at maximum possible length */
392 (
errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
393 errmsg(
"cannot extend relation %s beyond %u blocks",
405 victim_buf_id = -buffers[
i] - 1;
408 /* in case we need to pin an existing buffer below */
427 * Clear the BM_VALID bit, do StartLocalBufferIO() and proceed.
432 buf_state &= ~BM_VALID;
435 /* no need to loop for local buffers */
444 victim_buf_hdr->
tag = tag;
450 hresult->
id = victim_buf_id;
458 /* actually extend relation */
462 io_start, 1, extend_by * BLCKSZ);
477 *extended_by = extend_by;
485 * MarkLocalBufferDirty -
486 * mark a local buffer dirty
498 fprintf(stderr,
"LB DIRTY %d\n", buffer);
518 * Like StartBufferIO, but for local buffers
526 * With AIO the buffer could have IO in progress, e.g. when there are two
527 * scans of the same relation. Either wait for the other IO or return
540 /* Once we get here, there is definitely no I/O active on this buffer */
542 /* Check if someone else already did the I/O */
549 /* BM_IO_IN_PROGRESS isn't currently used for local buffers */
551 /* local buffers don't track IO using resowners */
557 * Like TerminateBufferIO, but for local buffers
563 /* Only need to adjust flags */
566 /* BM_IO_IN_PROGRESS isn't currently used for local buffers */
568 /* Clear earlier errors, if this IO failed, it'll be marked again */
569 buf_state &= ~BM_IO_ERROR;
572 buf_state &= ~BM_DIRTY;
576 /* release pin held by IO subsystem, see also buffer_stage_common() */
582 buf_state |= set_flag_bits;
585 /* local buffers don't track IO using resowners */
587 /* local buffers don't use the IO CV, as no other process can see buffer */
589 /* local buffers don't use BM_PIN_COUNT_WAITER, so no need to wake */
593 * InvalidateLocalBuffer -- mark a local buffer invalid.
595 * If check_unreferenced is true, error out if the buffer is still
596 * pinned. Passing false is appropriate when calling InvalidateLocalBuffer()
597 * as part of changing the identity of a buffer, instead of just dropping the
600 * See also InvalidateBuffer().
606 int bufid = -buffer - 1;
611 * It's possible that we started IO on this buffer before e.g. aborting
612 * the transaction that created a table. We need to wait for that IO to
613 * complete before removing / reusing the buffer.
626 * We need to test not just LocalRefCount[bufid] but also the BufferDesc
627 * itself, as the latter is used to represent a pin by the AIO subsystem.
628 * This can happen if AIO is initiated and then the query errors out.
630 if (check_unreferenced &&
632 elog(
ERROR,
"block %u of %s is still referenced (local %d)",
639 /* Remove entry from hashtable */
642 if (!hresult)
/* shouldn't happen */
643 elog(
ERROR,
"local buffer hash table corrupted");
644 /* Mark buffer invalid */
646 buf_state &= ~BUF_FLAG_MASK;
647 buf_state &= ~BUF_USAGECOUNT_MASK;
652 * DropRelationLocalBuffers
653 * This function removes from the buffer pool all the pages of the
654 * specified relation that have block numbers >= firstDelBlock.
655 * (In particular, with firstDelBlock = 0, all pages are removed.)
656 * Dirty pages are simply dropped, without bothering to write them
657 * out first. Therefore, this is NOT rollback-able, and so should be
658 * used only with extreme caution!
660 * See DropRelationBuffers in bufmgr.c for more notes.
680 for (
j = 0;
j < nforks;
j++)
693 * DropRelationAllLocalBuffers
694 * This function removes from the buffer pool all pages of all forks
695 * of the specified relation.
697 * See DropRelationsAllBuffers in bufmgr.c for more notes.
721 * init the local buffer cache. Since most queries (esp. multi-user ones)
722 * don't involve local buffers, we delay allocating actual memory for the
723 * buffers until we need them; just make the buffer headers here.
733 * Parallel workers can't access data in temporary tables, because they
734 * have no visibility into the local buffers of their leader. This is a
735 * convenient, low-cost place to provide a backstop check for that. Note
736 * that we don't wish to prevent a parallel worker from accessing catalog
737 * metadata about a temp table, so checks at higher levels would be
742 (
errcode(ERRCODE_INVALID_TRANSACTION_STATE),
743 errmsg(
"cannot access temporary tables during a parallel operation")));
745 /* Allocate and zero buffer headers and auxiliary arrays */
751 (
errcode(ERRCODE_OUT_OF_MEMORY),
752 errmsg(
"out of memory")));
756 /* initialize fields that need to start off nonzero */
757 for (
i = 0;
i < nbufs;
i++)
762 * negative to indicate local buffer. This is tricky: shared buffers
763 * start with 0. We have to start with -2. (Note that the routine
764 * BufferDescriptorGetBuffer adds 1 to buf_id so our first buffer id
767 buf->buf_id = -
i - 2;
772 * Intentionally do not initialize the buffer's atomic variable
773 * (besides zeroing the underlying memory above). That way we get
774 * errors on platforms without atomics, if somebody (re-)introduces
775 * atomic operations for local buffers.
779 /* Create the lookup hash table */
789 elog(
ERROR,
"could not initialize local buffer hash table");
791 /* Initialization done, mark buffers allocated */
796 * XXX: We could have a slightly more efficient version of PinLocalBuffer()
797 * that does not support adjusting the usagecount - but so far it does not
798 * seem worth the trouble.
800 * Note that ResourceOwnerEnlarge() must have been done already.
807 int bufid = -buffer - 1;
815 if (adjust_usagecount &&
823 * See comment in PinBuffer().
825 * If the buffer isn't allocated yet, it'll be marked as defined in
826 * GetLocalBufferStorage().
848 int buffid = -buffer - 1;
866 /* see comment in UnpinBufferNoOwner */
872 * GUC check_hook for temp_buffers
878 * Once local buffers have been initialized, it's too late to change this.
879 * However, if this is only a test call, allow it.
883 GUC_check_errdetail(
"\"temp_buffers\" cannot be changed after any temporary tables have been accessed in the session.");
890 * GetLocalBufferStorage - allocate memory for a local buffer
892 * The idea of this function is to aggregate our requests for storage
893 * so that the memory manager doesn't see a whole lot of relatively small
894 * requests. Since we'll never give back a local buffer once it's created
895 * within a particular process, no point in burdening memmgr with separately
901 static char *cur_block = NULL;
902 static int next_buf_in_block = 0;
903 static int num_bufs_in_block = 0;
904 static int total_bufs_allocated = 0;
911 if (next_buf_in_block >= num_bufs_in_block)
913 /* Need to make a new request to memmgr */
917 * We allocate local buffers in a context of their own, so that the
918 * space eaten for them is easily recognizable in MemoryContextStats
919 * output. Create the context on first use.
921 if (LocalBufferContext == NULL)
924 "LocalBufferContext",
927 /* Start with a 16-buffer request; subsequent ones double each time */
928 num_bufs =
Max(num_bufs_in_block * 2, 16);
929 /* But not more than what we need for all remaining local bufs */
931 /* And don't overflow MaxAllocSize, either */
934 /* Buffers should be I/O aligned. */
940 next_buf_in_block = 0;
941 num_bufs_in_block = num_bufs;
944 /* Allocate next buffer in current memory block */
945 this_buf = cur_block + next_buf_in_block * BLCKSZ;
947 total_bufs_allocated++;
950 * Caller's PinLocalBuffer() was too early for Valgrind updates, so do it
951 * here. The block is actually undefined, but we want consistency with
952 * the regular case of not needing to allocate memory. This is
953 * specifically needed when method_io_uring.c fills the block, because
954 * Valgrind doesn't recognize io_uring reads causing undefined memory to
959 return (
Block) this_buf;
963 * CheckForLocalBufferLeaks - ensure this backend holds no local buffer pins
965 * This is just like CheckForBufferLeaks(), but for local buffers.
970#ifdef USE_ASSERT_CHECKING
973 int RefCountErrors = 0;
984 elog(
WARNING,
"local buffer refcount leak: %s", s);
990 Assert(RefCountErrors == 0);
996 * AtEOXact_LocalBuffers - clean up at end of transaction.
998 * This is just like AtEOXact_Buffers, but for local buffers.
1007 * AtProcExit_LocalBuffers - ensure we have dropped pins during backend exit.
1009 * This is just like AtProcExit_Buffers, but for local buffers.
1015 * We shouldn't be holding any remaining pins; if we are, and assertions
1016 * aren't enabled, we'll fail later in DropRelationBuffers while trying to
1017 * drop the temp rels.
bool pgaio_wref_valid(PgAioWaitRef *iow)
void pgaio_wref_clear(PgAioWaitRef *iow)
void pgaio_wref_wait(PgAioWaitRef *iow)
static void pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
#define InvalidBlockNumber
#define BufferIsLocal(buffer)
#define BM_MAX_USAGE_COUNT
static void InitBufferTag(BufferTag *tag, const RelFileLocator *rlocator, ForkNumber forkNum, BlockNumber blockNum)
#define BUF_USAGECOUNT_MASK
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static bool BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
static bool BufTagMatchesRelFileLocator(const BufferTag *tag, const RelFileLocator *rlocator)
#define BUF_STATE_GET_USAGECOUNT(state)
static void ClearBufferTag(BufferTag *tag)
static void ResourceOwnerRememberBuffer(ResourceOwner owner, Buffer buffer)
static void ResourceOwnerForgetBuffer(ResourceOwner owner, Buffer buffer)
#define BUF_USAGECOUNT_ONE
#define BUF_STATE_GET_REFCOUNT(state)
static RelFileLocator BufTagGetRelFileLocator(const BufferTag *tag)
static BufferDesc * GetLocalBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
char * DebugPrintBufferRefcount(Buffer buffer)
void PageSetChecksumInplace(Page page, BlockNumber blkno)
#define MemSet(start, val, len)
#define fprintf(file, fmt, msg)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
HTAB * hash_create(const char *tabname, int64 nelem, const HASHCTL *info, int flags)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
#define GUC_check_errdetail
Assert(PointerIsAligned(start, uint64))
#define IsParallelWorker()
BufferUsage pgBufferUsage
void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln)
void UnpinLocalBuffer(Buffer buffer)
bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait)
static HTAB * LocalBufHash
static int NLocalPinnedBuffers
void AtEOXact_LocalBuffers(bool isCommit)
#define LocalBufHdrGetBlock(bufHdr)
static void CheckForLocalBufferLeaks(void)
uint32 GetAdditionalLocalPinLimit(void)
static Block GetLocalBufferStorage(void)
static int nextFreeLocalBufId
bool check_temp_buffers(int *newval, void **extra, GucSource source)
void AtProcExit_LocalBuffers(void)
bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount)
static void InitLocalBuffers(void)
void LimitAdditionalLocalPins(uint32 *additional_pins)
uint32 GetLocalPinLimit(void)
static Buffer GetLocalVictimBuffer(void)
void MarkLocalBufferDirty(Buffer buffer)
void DropRelationAllLocalBuffers(RelFileLocator rlocator)
void TerminateLocalBufferIO(BufferDesc *bufHdr, bool clear_dirty, uint32 set_flag_bits, bool release_aio)
void InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum)
BlockNumber ExtendBufferedRelLocal(BufferManagerRelation bmr, ForkNumber fork, uint32 flags, uint32 extend_by, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by)
Block * LocalBufferBlockPointers
void UnpinLocalBufferNoOwner(Buffer buffer)
void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, int nforks, BlockNumber *firstDelBlock)
BufferDesc * LocalBufferDescriptors
BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, bool *foundPtr)
void pfree(void *pointer)
void * MemoryContextAllocAligned(MemoryContext context, Size size, Size alignto, int flags)
MemoryContext TopMemoryContext
#define VALGRIND_MAKE_MEM_DEFINED(addr, size)
#define VALGRIND_MAKE_MEM_NOACCESS(addr, size)
#define AllocSetContextCreate
#define ALLOCSET_DEFAULT_SIZES
static rewind_source * source
instr_time pgstat_prepare_io_time(bool track_io_guc)
void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt, uint64 bytes)
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
#define relpath(rlocator, forknum)
#define relpathbackend(rlocator, backend, forknum)
ResourceOwner CurrentResourceOwner
void ResourceOwnerEnlarge(ResourceOwner owner)
BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum)
SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend)
void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks)
static void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
RelFileLocatorBackend smgr_rlocator