1/*-------------------------------------------------------------------------
6 * This is the header to include when actually issuing AIO. When just
7 * declaring functions involving an AIO related type, it might suffice to
8 * include aio_types.h. Initialization related functions are in the dedicated
11 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
12 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/include/storage/aio.h
16 *-------------------------------------------------------------------------
25/* io_uring is incompatible with EXEC_BACKEND */
26#if defined(USE_LIBURING) && !defined(EXEC_BACKEND)
27#define IOMETHOD_IO_URING_ENABLED
31/* Enum for io_method GUC. */
36#ifdef IOMETHOD_IO_URING_ENABLED
41/* We'll default to worker based execution. */
42 #define DEFAULT_IO_METHOD IOMETHOD_WORKER
46 * Flags for an IO that can be set with pgaio_io_set_flag().
51 * The IO references backend local memory.
53 * This needs to be set on an IO whenever the IO references process-local
54 * memory. Some IO methods do not support executing IO that references
55 * process local memory and thus need to fall back to executing IO
56 * synchronously for IOs with this flag set.
58 * Required for correctness.
63 * Hint that IO will be executed synchronously.
65 * This can make it a bit cheaper to execute synchronous IO via the AIO
66 * interface, to avoid needing an AIO and non-AIO version of code.
68 * Advantageous to set, if applicable, but not required for correctness.
73 * IO is using buffered IO, used to control heuristic in some IO methods.
75 * Advantageous to set, if applicable, but not required for correctness.
81 * The IO operations supported by the AIO subsystem.
83 * This could be in aio_internal.h, as it is not publicly referenced, but
84 * PgAioOpData currently *does* need to be public, therefore keeping this
85 * public seems to make sense.
89 /* intentionally the zero value, to help catch zeroed memory etc */
107 #define PGAIO_OP_COUNT (PGAIO_OP_WRITEV + 1)
111 * On what is IO being performed?
113 * PgAioTargetID specific behaviour should be implemented in
118 /* intentionally the zero value, to help catch zeroed memory etc */
123 #define PGAIO_TID_COUNT (PGAIO_TID_SMGR + 1)
127 * Data necessary for support IO operations (see PgAioOp).
129 * NB: Note that the FDs in here may *not* be relied upon for re-issuing
130 * requests (e.g. for partial reads/writes or in an IO worker) - the FD might
131 * be from another process, or closed since. That's not a problem for staged
132 * IOs, as all staged IOs are submitted when closing an FD.
153 * Information the object that IO is executed on. Mostly callbacks that
154 * operate on PgAioTargetData.
156 * typedef is in aio_types.h
161 * To support executing using worker processes, the file descriptor for an
162 * IO may need to be reopened in a different process.
166 /* describe the target of the IO, used for log messages and views */
169 /* name of the target, used in log messages / views */
175 * IDs for callbacks that can be registered on an IO.
177 * Callbacks are identified by an ID rather than a function pointer. There are
180 * 1) Memory within PgAioHandle is precious, due to the number of PgAioHandle
181 * structs in pre-allocated shared memory.
183 * 2) Due to EXEC_BACKEND function pointers are not necessarily stable between
184 * different backends, therefore function pointers cannot directly be in
187 * Without 2), we could fairly easily allow to add new callbacks, by filling a
188 * ID->pointer mapping table on demand. In the presence of 2 that's still
189 * doable, but harder, because every process has to re-register the pointers
190 * so that a local ID->"backend local pointer" mapping can be maintained.
203 #define PGAIO_HCB_MAX PGAIO_HCB_LOCAL_BUFFER_READV
205 "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS");
212/* typedef is in aio_types.h */
216 * Prepare resources affected by the IO for execution. This could e.g.
217 * include moving ownership of buffer pins to the AIO subsystem.
222 * Update the state of resources affected by the IO to reflect completion
223 * of the IO. This could e.g. include updating shared buffer state to
224 * signal the IO has finished.
226 * The _shared suffix indicates that this is executed by the backend that
227 * completed the IO, which may or may not be the backend that issued the
228 * IO. Obviously the callback thus can only modify resources in shared
231 * The latest registered callback is called first. This allows
232 * higher-level code to register callbacks that can rely on callbacks
233 * registered by lower-level code to already have been executed.
235 * NB: This is called in a critical section. Errors can be signalled by
236 * the callback's return value, it's the responsibility of the IO's issuer
237 * to react appropriately.
242 * Like complete_shared, except called in the issuing backend.
244 * This variant of the completion callback is useful when backend-local
245 * state has to be updated to reflect the IO's completion. E.g. a
246 * temporary buffer's BufferDesc isn't accessible in complete_shared.
248 * Local callbacks are only called after complete_shared for all
249 * registered callbacks has been called.
254 * Report the result of an IO operation. This is e.g. used to raise an
255 * error after an IO failed at the appropriate time (i.e. not when the IO
256 * failed, but under control of the code that issued the IO).
264 * How many callbacks can be registered for one IO handle. Currently we only
265 * need two, but it's not hard to imagine needing a few more.
267 #define PGAIO_HANDLE_MAX_CALLBACKS 4
271/* --------------------------------------------------------------------------------
273 * --------------------------------------------------------------------------------
276/* functions in aio.c */
292/* functions in aio_io.c */
304/* functions in aio_target.c */
310/* functions in aio_callback.c */
319/* --------------------------------------------------------------------------------
321 * --------------------------------------------------------------------------------
333/* --------------------------------------------------------------------------------
335 * --------------------------------------------------------------------------------
343/* --------------------------------------------------------------------------------
344 * Actions on multiple IOs.
345 * --------------------------------------------------------------------------------
355/* --------------------------------------------------------------------------------
357 * --------------------------------------------------------------------------------
void(* PgAioHandleCallbackReport)(PgAioResult result, const PgAioTargetData *target_data, int elevel)
PgAioTargetData * pgaio_io_get_target_data(PgAioHandle *ioh)
PGDLLIMPORT int io_max_concurrency
@ PGAIO_HCB_LOCAL_BUFFER_READV
@ PGAIO_HCB_SHARED_BUFFER_READV
bool pgaio_wref_valid(PgAioWaitRef *iow)
int pgaio_io_get_id(PgAioHandle *ioh)
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
void pgaio_wref_clear(PgAioWaitRef *iow)
void pgaio_io_set_handle_data_32(PgAioHandle *ioh, uint32 *data, uint8 len)
void pgaio_io_start_readv(PgAioHandle *ioh, int fd, int iovcnt, uint64 offset)
void(* PgAioHandleCallbackStage)(PgAioHandle *ioh, uint8 cb_flags)
PgAioOpData * pgaio_io_get_op_data(PgAioHandle *ioh)
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
void pgaio_io_register_callbacks(PgAioHandle *ioh, PgAioHandleCallbackID cb_id, uint8 cb_data)
void pgaio_closing_fd(int fd)
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
bool pgaio_have_staged(void)
PgAioOp pgaio_io_get_op(PgAioHandle *ioh)
@ PGAIO_HF_REFERENCES_LOCAL
StaticAssertDecl(PGAIO_HCB_MAX<(1<< PGAIO_RESULT_ID_BITS), "PGAIO_HCB_MAX is too big for PGAIO_RESULT_ID_BITS")
PgAioResult(* PgAioHandleCallbackComplete)(PgAioHandle *ioh, PgAioResult prior_result, uint8 cb_flags)
bool pgaio_io_has_target(PgAioHandle *ioh)
uint64 * pgaio_io_get_handle_data(PgAioHandle *ioh, uint8 *len)
void pgaio_io_start_writev(PgAioHandle *ioh, int fd, int iovcnt, uint64 offset)
void pgaio_io_set_handle_data_64(PgAioHandle *ioh, uint64 *data, uint8 len)
bool pgaio_wref_check_done(PgAioWaitRef *iow)
PGDLLIMPORT int io_method
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
void pgaio_enter_batchmode(void)
void pgaio_io_release_resowner(struct dlist_node *ioh_node, bool on_error)
void pgaio_submit_staged(void)
char * pgaio_io_get_target_description(PgAioHandle *ioh)
void pgaio_wref_wait(PgAioWaitRef *iow)
void pgaio_io_release(PgAioHandle *ioh)
int pgaio_wref_get_id(PgAioWaitRef *iow)
void pgaio_io_set_target(PgAioHandle *ioh, PgAioTargetID targetid)
int pgaio_io_get_iovec(PgAioHandle *ioh, struct iovec **iov)
void pgaio_result_report(PgAioResult result, const PgAioTargetData *target_data, int elevel)
void pgaio_exit_batchmode(void)
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
#define PGAIO_RESULT_ID_BITS
struct PgAioResult PgAioResult
static int fd(const char *x, int i)
PgAioHandleCallbackComplete complete_shared
PgAioHandleCallbackStage stage
PgAioHandleCallbackReport report
PgAioHandleCallbackComplete complete_local
void(* reopen)(PgAioHandle *ioh)