PostgreSQL Source Code: src/include/storage/aio_internal.h Source File

PostgreSQL Source Code git master
aio_internal.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * aio_internal.h
4 * AIO related declarations that should only be used by the AIO subsystem
5 * internally.
6 *
7 *
8 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 * Portions Copyright (c) 1994, Regents of the University of California
10 *
11 * src/include/storage/aio_internal.h
12 *
13 *-------------------------------------------------------------------------
14 */
15#ifndef AIO_INTERNAL_H
16#define AIO_INTERNAL_H
17
18
19#include "lib/ilist.h"
20#include "port/pg_iovec.h"
21#include "storage/aio.h"
22#include "storage/condition_variable.h"
23
24
25/*
26 * The maximum number of IOs that can be batch submitted at once.
27 */
28 #define PGAIO_SUBMIT_BATCH_SIZE 32
29
30
31
32/*
33 * State machine for handles. With some exceptions, noted below, handles move
34 * linearly through all states.
35 *
36 * State changes should all go through pgaio_io_update_state().
37 *
38 * Note that the externally visible functions to start IO
39 * (e.g. FileStartReadV(), via pgaio_io_start_readv()) move an IO from
40 * PGAIO_HS_HANDED_OUT to at least PGAIO_HS_STAGED and at most
41 * PGAIO_HS_COMPLETED_LOCAL (at which point the handle will be reused).
42 */
43 typedef enum PgAioHandleState
44{
45 /* not in use */
46 PGAIO_HS_IDLE = 0,
47
48 /*
49 * Returned by pgaio_io_acquire(). The next state is either DEFINED (if
50 * pgaio_io_start_*() is called), or IDLE (if pgaio_io_release() is
51 * called).
52 */
53 PGAIO_HS_HANDED_OUT,
54
55 /*
56 * pgaio_io_start_*() has been called, but IO is not yet staged. At this
57 * point the handle has all the information for the IO to be executed.
58 */
59 PGAIO_HS_DEFINED,
60
61 /*
62 * stage() callbacks have been called, handle ready to be submitted for
63 * execution. Unless in batchmode (see c.f. pgaio_enter_batchmode()), the
64 * IO will be submitted immediately after.
65 */
66 PGAIO_HS_STAGED,
67
68 /* IO has been submitted to the IO method for execution */
69 PGAIO_HS_SUBMITTED,
70
71 /* IO finished, but result has not yet been processed */
72 PGAIO_HS_COMPLETED_IO,
73
74 /*
75 * IO completed, shared completion has been called.
76 *
77 * If the IO completion occurs in the issuing backend, local callbacks
78 * will immediately be called. Otherwise the handle stays in
79 * COMPLETED_SHARED until the issuing backend waits for the completion of
80 * the IO.
81 */
82 PGAIO_HS_COMPLETED_SHARED,
83
84 /*
85 * IO completed, local completion has been called.
86 *
87 * After this the handle will be made reusable and go into IDLE state.
88 */
89 PGAIO_HS_COMPLETED_LOCAL,
90 } PgAioHandleState;
91
92
93struct ResourceOwnerData;
94
95/*
96 * Typedef is in aio_types.h
97 *
98 * We don't use the underlying enums for state, target and op to avoid wasting
99 * space. We tried using bitfields, but several compilers generate rather
100 * horrid code for that.
101 */
102 struct PgAioHandle
103{
104 /* all state updates should go through pgaio_io_update_state() */
105 uint8 state;
106
107 /* what are we operating on */
108 uint8 target;
109
110 /* which IO operation */
111 uint8 op;
112
113 /* bitfield of PgAioHandleFlags */
114 uint8 flags;
115
116 uint8 num_callbacks;
117
118 /* using the proper type here would use more space */
119 uint8 callbacks[PGAIO_HANDLE_MAX_CALLBACKS];
120
121 /* data forwarded to each callback */
122 uint8 callbacks_data[PGAIO_HANDLE_MAX_CALLBACKS];
123
124 /*
125 * Length of data associated with handle using
126 * pgaio_io_set_handle_data_*().
127 */
128 uint8 handle_data_len;
129
130 /* XXX: could be optimized out with some pointer math */
131 int32 owner_procno;
132
133 /* raw result of the IO operation */
134 int32 result;
135
146 dlist_node node;
147
148 struct ResourceOwnerData *resowner;
149 dlist_node resowner_node;
150
151 /* incremented every time the IO handle is reused */
152 uint64 generation;
153
154 /*
155 * To wait for the IO to complete other backends can wait on this CV. Note
156 * that, if in SUBMITTED state, a waiter first needs to check if it needs
157 * to do work via IoMethodOps->wait_one().
158 */
159 ConditionVariable cv;
160
161 /* result of shared callback, passed to issuer callback */
162 PgAioResult distilled_result;
163
164 /*
165 * Index into PgAioCtl->iovecs and PgAioCtl->handle_data.
166 *
167 * At the moment there's no need to differentiate between the two, but
168 * that won't necessarily stay that way.
169 */
170 uint32 iovec_off;
171
172 /*
173 * If not NULL, this memory location will be updated with information
174 * about the IOs completion iff the issuing backend learns about the IOs
175 * completion.
176 */
177 PgAioReturn *report_return;
178
179 /* Data necessary for the IO to be performed */
180 PgAioOpData op_data;
181
182 /*
183 * Data necessary to identify the object undergoing IO to higher-level
184 * code. Needs to be sufficient to allow another backend to reopen the
185 * file.
186 */
187 PgAioTargetData target_data;
188};
189
190
191 typedef struct PgAioBackend
192{
193 /* index into PgAioCtl->io_handles */
194 uint32 io_handle_off;
195
196 /* IO Handles that currently are not used */
197 dclist_head idle_ios;
198
199 /*
200 * Only one IO may be returned by pgaio_io_acquire()/pgaio_io_acquire_nb()
201 * without having been either defined (by actually associating it with IO)
202 * or released (with pgaio_io_release()). This restriction is necessary to
203 * guarantee that we always can acquire an IO. ->handed_out_io is used to
204 * enforce that rule.
205 */
206 PgAioHandle *handed_out_io;
207
208 /* Are we currently in batchmode? See pgaio_enter_batchmode(). */
209 bool in_batchmode;
210
211 /*
212 * IOs that are defined, but not yet submitted.
213 */
214 uint16 num_staged_ios;
215 PgAioHandle *staged_ios[PGAIO_SUBMIT_BATCH_SIZE];
216
217 /*
218 * List of in-flight IOs. Also contains IOs that aren't strictly speaking
219 * in-flight anymore, but have been waited-for and completed by another
220 * backend. Once this backend sees such an IO it'll be reclaimed.
221 *
222 * The list is ordered by submission time, with more recently submitted
223 * IOs being appended at the end.
224 */
225 dclist_head in_flight_ios;
226 } PgAioBackend;
227
228
229 typedef struct PgAioCtl
230{
231 int backend_state_count;
232 PgAioBackend *backend_state;
233
234 /*
235 * Array of iovec structs. Each iovec is owned by a specific backend. The
236 * allocation is in PgAioCtl to allow the maximum number of iovecs for
237 * individual IOs to be configurable with PGC_POSTMASTER GUC.
238 */
239 uint32 iovec_count;
240 struct iovec *iovecs;
241
242 /*
243 * For, e.g., an IO covering multiple buffers in shared / temp buffers, we
244 * need to get Buffer IDs during completion to be able to change the
245 * BufferDesc state accordingly. This space can be used to store e.g.
246 * Buffer IDs. Note that the actual iovec might be shorter than this,
247 * because we combine neighboring pages into one larger iovec entry.
248 */
249 uint64 *handle_data;
250
251 uint32 io_handle_count;
252 PgAioHandle *io_handles;
253 } PgAioCtl;
254
255
256
257/*
258 * Callbacks used to implement an IO method.
259 */
260 typedef struct IoMethodOps
261{
262 /* properties */
263
264 /*
265 * If an FD is about to be closed, do we need to wait for all in-flight
266 * IOs referencing that FD?
267 */
268 bool wait_on_fd_before_close;
269
270
271 /* global initialization */
272
273 /*
274 * Amount of additional shared memory to reserve for the io_method. Called
275 * just like a normal ipci.c style *Size() function. Optional.
276 */
277 size_t (*shmem_size) (void);
278
279 /*
280 * Initialize shared memory. First time is true if AIO's shared memory was
281 * just initialized, false otherwise. Optional.
282 */
283 void (*shmem_init) (bool first_time);
284
285 /*
286 * Per-backend initialization. Optional.
287 */
288 void (*init_backend) (void);
289
290
291 /* handling of IOs */
292
293 /* optional */
294 bool (*needs_synchronous_execution) (PgAioHandle *ioh);
295
296 /*
297 * Start executing passed in IOs.
298 *
299 * Shall advance state to at least PGAIO_HS_SUBMITTED. (By the time this
300 * returns, other backends might have advanced the state further.)
301 *
302 * Will not be called if ->needs_synchronous_execution() returned true.
303 *
304 * num_staged_ios is <= PGAIO_SUBMIT_BATCH_SIZE.
305 *
306 * Always called in a critical section.
307 */
308 int (*submit) (uint16 num_staged_ios, PgAioHandle **staged_ios);
309
310 /* ---
311 * Wait for the IO to complete. Optional.
312 *
313 * On return, state shall be on of
314 * - PGAIO_HS_COMPLETED_IO
315 * - PGAIO_HS_COMPLETED_SHARED
316 * - PGAIO_HS_COMPLETED_LOCAL
317 *
318 * The callback must not block if the handle is already in one of those
319 * states, or has been reused (see pgaio_io_was_recycled()). If, on
320 * return, the state is PGAIO_HS_COMPLETED_IO, state will reach
321 * PGAIO_HS_COMPLETED_SHARED without further intervention by the IO
322 * method.
323 *
324 * If not provided, it needs to be guaranteed that the IO method calls
325 * pgaio_io_process_completion() without further interaction by the
326 * issuing backend.
327 * ---
328 */
329 void (*wait_one) (PgAioHandle *ioh,
330 uint64 ref_generation);
331 } IoMethodOps;
332
333
334/* aio.c */
335extern bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state);
336extern void pgaio_io_stage(PgAioHandle *ioh, PgAioOp op);
337extern void pgaio_io_process_completion(PgAioHandle *ioh, int result);
338extern void pgaio_io_prepare_submit(PgAioHandle *ioh);
339extern bool pgaio_io_needs_synchronous_execution(PgAioHandle *ioh);
340extern const char *pgaio_io_get_state_name(PgAioHandle *ioh);
341const char *pgaio_result_status_string(PgAioResultStatus rs);
342extern void pgaio_shutdown(int code, Datum arg);
343
344/* aio_callback.c */
345extern void pgaio_io_call_stage(PgAioHandle *ioh);
346extern void pgaio_io_call_complete_shared(PgAioHandle *ioh);
347extern PgAioResult pgaio_io_call_complete_local(PgAioHandle *ioh);
348
349/* aio_io.c */
350extern void pgaio_io_perform_synchronously(PgAioHandle *ioh);
351extern const char *pgaio_io_get_op_name(PgAioHandle *ioh);
352extern bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd);
353extern int pgaio_io_get_iovec_length(PgAioHandle *ioh, struct iovec **iov);
354
355/* aio_target.c */
356extern bool pgaio_io_can_reopen(PgAioHandle *ioh);
357extern void pgaio_io_reopen(PgAioHandle *ioh);
358extern const char *pgaio_io_get_target_name(PgAioHandle *ioh);
359
360
361/*
362 * The AIO subsystem has fairly verbose debug logging support. This can be
363 * enabled/disabled at build time. The reason for this is that
364 * a) the verbosity can make debugging things on higher levels hard
365 * b) even if logging can be skipped due to elevel checks, it still causes a
366 * measurable slowdown
367 *
368 * XXX: This likely should be eventually be disabled by default, at least in
369 * non-assert builds.
370 */
371 #define PGAIO_VERBOSE 1
372
373/*
374 * Simple ereport() wrapper that only logs if PGAIO_VERBOSE is defined.
375 *
376 * This intentionally still compiles the code, guarded by a constant if (0),
377 * if verbose logging is disabled, to make it less likely that debug logging
378 * is silently broken.
379 *
380 * The current definition requires passing at least one argument.
381 */
382 #define pgaio_debug(elevel, msg, ...) \
383 do { \
384 if (PGAIO_VERBOSE) \
385 ereport(elevel, \
386 errhidestmt(true), errhidecontext(true), \
387 errmsg_internal(msg, \
388 __VA_ARGS__)); \
389 } while(0)
390
391/*
392 * Simple ereport() wrapper. Note that the definition requires passing at
393 * least one argument.
394 */
395 #define pgaio_debug_io(elevel, ioh, msg, ...) \
396 pgaio_debug(elevel, "io %-10d|op %-5s|target %-4s|state %-16s: " msg, \
397 pgaio_io_get_id(ioh), \
398 pgaio_io_get_op_name(ioh), \
399 pgaio_io_get_target_name(ioh), \
400 pgaio_io_get_state_name(ioh), \
401 __VA_ARGS__)
402
403/* Declarations for the tables of function pointers exposed by each IO method. */
404extern PGDLLIMPORT const IoMethodOps pgaio_sync_ops;
405extern PGDLLIMPORT const IoMethodOps pgaio_worker_ops;
406#ifdef IOMETHOD_IO_URING_ENABLED
407extern PGDLLIMPORT const IoMethodOps pgaio_uring_ops;
408#endif
409
410extern PGDLLIMPORT const IoMethodOps *pgaio_method_ops;
411extern PGDLLIMPORT PgAioCtl *pgaio_ctl;
412extern PGDLLIMPORT PgAioBackend *pgaio_my_backend;
413
414
415
416#endif /* AIO_INTERNAL_H */
#define PGAIO_HANDLE_MAX_CALLBACKS
Definition: aio.h:267
PgAioOp
Definition: aio.h:88
void pgaio_io_process_completion(PgAioHandle *ioh, int result)
Definition: aio.c:525
void pgaio_io_perform_synchronously(PgAioHandle *ioh)
Definition: aio_io.c:116
struct IoMethodOps IoMethodOps
const char * pgaio_result_status_string(PgAioResultStatus rs)
Definition: aio.c:931
void pgaio_io_call_stage(PgAioHandle *ioh)
Definition: aio_callback.c:199
PGDLLIMPORT const IoMethodOps pgaio_worker_ops
Definition: method_worker.c:84
PgAioHandleState
Definition: aio_internal.h:44
@ PGAIO_HS_STAGED
Definition: aio_internal.h:66
@ PGAIO_HS_COMPLETED_SHARED
Definition: aio_internal.h:82
@ PGAIO_HS_DEFINED
Definition: aio_internal.h:59
@ PGAIO_HS_SUBMITTED
Definition: aio_internal.h:69
@ PGAIO_HS_IDLE
Definition: aio_internal.h:46
@ PGAIO_HS_HANDED_OUT
Definition: aio_internal.h:53
@ PGAIO_HS_COMPLETED_IO
Definition: aio_internal.h:72
@ PGAIO_HS_COMPLETED_LOCAL
Definition: aio_internal.h:89
bool pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
Definition: aio.c:480
const char * pgaio_io_get_op_name(PgAioHandle *ioh)
Definition: aio_io.c:175
PgAioResult pgaio_io_call_complete_local(PgAioHandle *ioh)
Definition: aio_callback.c:285
void pgaio_io_reopen(PgAioHandle *ioh)
Definition: aio_target.c:116
struct PgAioCtl PgAioCtl
bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
Definition: aio_io.c:197
bool pgaio_io_can_reopen(PgAioHandle *ioh)
Definition: aio_target.c:103
void pgaio_io_call_complete_shared(PgAioHandle *ioh)
Definition: aio_callback.c:225
void pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
Definition: aio.c:421
PGDLLIMPORT PgAioBackend * pgaio_my_backend
Definition: aio.c:81
struct PgAioBackend PgAioBackend
int pgaio_io_get_iovec_length(PgAioHandle *ioh, struct iovec **iov)
Definition: aio_io.c:219
PGDLLIMPORT PgAioCtl * pgaio_ctl
Definition: aio.c:78
PGDLLIMPORT const IoMethodOps pgaio_sync_ops
Definition: method_sync.c:28
PGDLLIMPORT const IoMethodOps * pgaio_method_ops
Definition: aio.c:93
const char * pgaio_io_get_target_name(PgAioHandle *ioh)
Definition: aio_target.c:50
const char * pgaio_io_get_state_name(PgAioHandle *ioh)
Definition: aio.c:925
bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
Definition: aio.c:556
void pgaio_io_prepare_submit(PgAioHandle *ioh)
Definition: aio.c:507
void pgaio_shutdown(int code, Datum arg)
Definition: aio.c:1285
#define PGAIO_SUBMIT_BATCH_SIZE
Definition: aio_internal.h:28
PgAioResultStatus
Definition: aio_types.h:79
#define PGDLLIMPORT
Definition: c.h:1319
uint8_t uint8
Definition: c.h:536
int32_t int32
Definition: c.h:534
uint64_t uint64
Definition: c.h:539
uint16_t uint16
Definition: c.h:537
uint32_t uint32
Definition: c.h:538
void * arg
uint64_t Datum
Definition: postgres.h:70
static int fd(const char *x, int i)
Definition: preproc-init.c:105
size_t(* shmem_size)(void)
Definition: aio_internal.h:277
bool wait_on_fd_before_close
Definition: aio_internal.h:268
void(* shmem_init)(bool first_time)
Definition: aio_internal.h:283
void(* init_backend)(void)
Definition: aio_internal.h:288
int(* submit)(uint16 num_staged_ios, PgAioHandle **staged_ios)
Definition: aio_internal.h:308
void(* wait_one)(PgAioHandle *ioh, uint64 ref_generation)
Definition: aio_internal.h:329
bool(* needs_synchronous_execution)(PgAioHandle *ioh)
Definition: aio_internal.h:294
uint32 io_handle_off
Definition: aio_internal.h:194
bool in_batchmode
Definition: aio_internal.h:209
dclist_head in_flight_ios
Definition: aio_internal.h:225
uint16 num_staged_ios
Definition: aio_internal.h:214
dclist_head idle_ios
Definition: aio_internal.h:197
PgAioHandle * staged_ios[PGAIO_SUBMIT_BATCH_SIZE]
Definition: aio_internal.h:215
PgAioHandle * handed_out_io
Definition: aio_internal.h:206
uint32 iovec_count
Definition: aio_internal.h:239
struct iovec * iovecs
Definition: aio_internal.h:240
PgAioHandle * io_handles
Definition: aio_internal.h:252
uint32 io_handle_count
Definition: aio_internal.h:251
int backend_state_count
Definition: aio_internal.h:231
uint64 * handle_data
Definition: aio_internal.h:249
PgAioBackend * backend_state
Definition: aio_internal.h:232
uint8 target
Definition: aio_internal.h:108
PgAioTargetData target_data
Definition: aio_internal.h:187
struct ResourceOwnerData * resowner
Definition: aio_internal.h:148
int32 owner_procno
Definition: aio_internal.h:131
PgAioResult distilled_result
Definition: aio_internal.h:162
uint8 callbacks[PGAIO_HANDLE_MAX_CALLBACKS]
Definition: aio_internal.h:119
dlist_node node
Definition: aio_internal.h:146
uint8 handle_data_len
Definition: aio_internal.h:128
uint8 op
Definition: aio_internal.h:111
PgAioReturn * report_return
Definition: aio_internal.h:177
int32 result
Definition: aio_internal.h:134
PgAioOpData op_data
Definition: aio_internal.h:180
uint32 iovec_off
Definition: aio_internal.h:170
uint64 generation
Definition: aio_internal.h:152
uint8 callbacks_data[PGAIO_HANDLE_MAX_CALLBACKS]
Definition: aio_internal.h:122
uint8 flags
Definition: aio_internal.h:114
uint8 state
Definition: aio_internal.h:105
uint8 num_callbacks
Definition: aio_internal.h:116
dlist_node resowner_node
Definition: aio_internal.h:149
ConditionVariable cv
Definition: aio_internal.h:159
Definition: ilist.h:138
Definition: regguts.h:323
Definition: aio.h:135

AltStyle によって変換されたページ (->オリジナル) /