PostgreSQL Source Code: src/backend/storage/aio/aio.c Source File

PostgreSQL Source Code git master
aio.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * aio.c
4 * AIO - Core Logic
5 *
6 * For documentation about how AIO works on a higher level, including a
7 * schematic example, see README.md.
8 *
9 *
10 * AIO is a complicated subsystem. To keep things navigable, it is split
11 * across a number of files:
12 *
13 * - method_*.c - different ways of executing AIO (e.g. worker process)
14 *
15 * - aio_target.c - IO on different kinds of targets
16 *
17 * - aio_io.c - method-independent code for specific IO ops (e.g. readv)
18 *
19 * - aio_callback.c - callbacks at IO operation lifecycle events
20 *
21 * - aio_init.c - per-server and per-backend initialization
22 *
23 * - aio.c - all other topics
24 *
25 * - read_stream.c - helper for reading buffered relation data
26 *
27 * - README.md - higher-level overview over AIO
28 *
29 *
30 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
31 * Portions Copyright (c) 1994, Regents of the University of California
32 *
33 * IDENTIFICATION
34 * src/backend/storage/aio/aio.c
35 *
36 *-------------------------------------------------------------------------
37 */
38
39#include "postgres.h"
40
41#include "lib/ilist.h"
42#include "miscadmin.h"
43#include "port/atomics.h"
44#include "storage/aio.h"
45#include "storage/aio_internal.h"
46#include "storage/aio_subsys.h"
47#include "utils/guc.h"
48#include "utils/guc_hooks.h"
49#include "utils/injection_point.h"
50#include "utils/resowner.h"
51#include "utils/wait_event_types.h"
52
53
54static inline void pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state);
55static void pgaio_io_reclaim(PgAioHandle *ioh);
56static void pgaio_io_resowner_register(PgAioHandle *ioh);
57static void pgaio_io_wait_for_free(void);
58static PgAioHandle *pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation);
59static const char *pgaio_io_state_get_name(PgAioHandleState s);
60static void pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation);
61
62
63/* Options for io_method. */
64 const struct config_enum_entry io_method_options[] = {
65 {"sync", IOMETHOD_SYNC, false},
66 {"worker", IOMETHOD_WORKER, false},
67#ifdef IOMETHOD_IO_URING_ENABLED
68 {"io_uring", IOMETHOD_IO_URING, false},
69#endif
70 {NULL, 0, false}
71};
72
73/* GUCs */
74 int io_method = DEFAULT_IO_METHOD;
75 int io_max_concurrency = -1;
76
77/* global control for AIO */
78 PgAioCtl *pgaio_ctl;
79
80/* current backend's per-backend state */
81 PgAioBackend *pgaio_my_backend;
82
83
84 static const IoMethodOps *const pgaio_method_ops_table[] = {
85 [IOMETHOD_SYNC] = &pgaio_sync_ops,
86 [IOMETHOD_WORKER] = &pgaio_worker_ops,
87#ifdef IOMETHOD_IO_URING_ENABLED
88 [IOMETHOD_IO_URING] = &pgaio_uring_ops,
89#endif
90};
91
92/* callbacks for the configured io_method, set by assign_io_method */
93 const IoMethodOps *pgaio_method_ops;
94
95
96/* --------------------------------------------------------------------------------
97 * Public Functions related to PgAioHandle
98 * --------------------------------------------------------------------------------
99 */
100
101/*
102 * Acquire an AioHandle, waiting for IO completion if necessary.
103 *
104 * Each backend can only have one AIO handle that has been "handed out" to
105 * code, but not yet submitted or released. This restriction is necessary to
106 * ensure that it is possible for code to wait for an unused handle by waiting
107 * for in-flight IO to complete. There is a limited number of handles in each
108 * backend, if multiple handles could be handed out without being submitted,
109 * waiting for all in-flight IO to complete would not guarantee that handles
110 * free up.
111 *
112 * It is cheap to acquire an IO handle, unless all handles are in use. In that
113 * case this function waits for the oldest IO to complete. If that is not
114 * desirable, use pgaio_io_acquire_nb().
115 *
116 * If a handle was acquired but then does not turn out to be needed,
117 * e.g. because pgaio_io_acquire() is called before starting an IO in a
118 * critical section, the handle needs to be released with pgaio_io_release().
119 *
120 *
121 * To react to the completion of the IO as soon as it is known to have
122 * completed, callbacks can be registered with pgaio_io_register_callbacks().
123 *
124 * To actually execute IO using the returned handle, the pgaio_io_start_*()
125 * family of functions is used. In many cases the pgaio_io_start_*() call will
126 * not be done directly by code that acquired the handle, but by lower level
127 * code that gets passed the handle. E.g. if code in bufmgr.c wants to perform
128 * AIO, it typically will pass the handle to smgr.c, which will pass it on to
129 * md.c, on to fd.c, which then finally calls pgaio_io_start_*(). This
130 * forwarding allows the various layers to react to the IO's completion by
131 * registering callbacks. These callbacks in turn can translate a lower
132 * layer's result into a result understandable by a higher layer.
133 *
134 * During pgaio_io_start_*() the IO is staged (i.e. prepared for execution but
135 * not submitted to the kernel). Unless in batchmode
136 * (c.f. pgaio_enter_batchmode()), the IO will also get submitted for
137 * execution. Note that, whether in batchmode or not, the IO might even
138 * complete before the functions return.
139 *
140 * After pgaio_io_start_*() the AioHandle is "consumed" and may not be
141 * referenced by the IO issuing code. To e.g. wait for IO, references to the
142 * IO can be established with pgaio_io_get_wref() *before* pgaio_io_start_*()
143 * is called. pgaio_wref_wait() can be used to wait for the IO to complete.
144 *
145 *
146 * To know if the IO [partially] succeeded or failed, a PgAioReturn * can be
147 * passed to pgaio_io_acquire(). Once the issuing backend has called
148 * pgaio_wref_wait(), the PgAioReturn contains information about whether the
149 * operation succeeded and details about the first failure, if any. The error
150 * can be raised / logged with pgaio_result_report().
151 *
152 * The lifetime of the memory pointed to be *ret needs to be at least as long
153 * as the passed in resowner. If the resowner releases resources before the IO
154 * completes (typically due to an error), the reference to *ret will be
155 * cleared. In case of resowner cleanup *ret will not be updated with the
156 * results of the IO operation.
157 */
158PgAioHandle *
159 pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
160{
161 PgAioHandle *h;
162
163 while (true)
164 {
165 h = pgaio_io_acquire_nb(resowner, ret);
166
167 if (h != NULL)
168 return h;
169
170 /*
171 * Evidently all handles by this backend are in use. Just wait for
172 * some to complete.
173 */
174 pgaio_io_wait_for_free();
175 }
176}
177
178/*
179 * Acquire an AioHandle, returning NULL if no handles are free.
180 *
181 * See pgaio_io_acquire(). The only difference is that this function will return
182 * NULL if there are no idle handles, instead of blocking.
183 */
184PgAioHandle *
185 pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
186{
187 PgAioHandle *ioh = NULL;
188
189 if (pgaio_my_backend->num_staged_ios >= PGAIO_SUBMIT_BATCH_SIZE)
190 {
191 Assert(pgaio_my_backend->num_staged_ios == PGAIO_SUBMIT_BATCH_SIZE);
192 pgaio_submit_staged();
193 }
194
195 if (pgaio_my_backend->handed_out_io)
196 elog(ERROR, "API violation: Only one IO can be handed out");
197
198 /*
199 * Probably not needed today, as interrupts should not process this IO,
200 * but...
201 */
202 HOLD_INTERRUPTS();
203
204 if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
205 {
206 dlist_node *ion = dclist_pop_head_node(&pgaio_my_backend->idle_ios);
207
208 ioh = dclist_container(PgAioHandle, node, ion);
209
210 Assert(ioh->state == PGAIO_HS_IDLE);
211 Assert(ioh->owner_procno == MyProcNumber);
212
213 pgaio_io_update_state(ioh, PGAIO_HS_HANDED_OUT);
214 pgaio_my_backend->handed_out_io = ioh;
215
216 if (resowner)
217 pgaio_io_resowner_register(ioh);
218
219 if (ret)
220 {
221 ioh->report_return = ret;
222 ret->result.status = PGAIO_RS_UNKNOWN;
223 }
224 }
225
226 RESUME_INTERRUPTS();
227
228 return ioh;
229}
230
231/*
232 * Release IO handle that turned out to not be required.
233 *
234 * See pgaio_io_acquire() for more details.
235 */
236void
237 pgaio_io_release(PgAioHandle *ioh)
238{
239 if (ioh == pgaio_my_backend->handed_out_io)
240 {
241 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
242 Assert(ioh->resowner);
243
244 pgaio_my_backend->handed_out_io = NULL;
245
246 /*
247 * Note that no interrupts are processed between the handed_out_io
248 * check and the call to reclaim - that's important as otherwise an
249 * interrupt could have already reclaimed the handle.
250 */
251 pgaio_io_reclaim(ioh);
252 }
253 else
254 {
255 elog(ERROR, "release in unexpected state");
256 }
257}
258
259/*
260 * Release IO handle during resource owner cleanup.
261 */
262void
263 pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
264{
265 PgAioHandle *ioh = dlist_container(PgAioHandle, resowner_node, ioh_node);
266
267 Assert(ioh->resowner);
268
269 /*
270 * Otherwise an interrupt, in the middle of releasing the IO, could end up
271 * trying to wait for the IO, leading to state confusion.
272 */
273 HOLD_INTERRUPTS();
274
275 ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
276 ioh->resowner = NULL;
277
278 switch ((PgAioHandleState) ioh->state)
279 {
280 case PGAIO_HS_IDLE:
281 elog(ERROR, "unexpected");
282 break;
283 case PGAIO_HS_HANDED_OUT:
284 Assert(ioh == pgaio_my_backend->handed_out_io || pgaio_my_backend->handed_out_io == NULL);
285
286 if (ioh == pgaio_my_backend->handed_out_io)
287 {
288 pgaio_my_backend->handed_out_io = NULL;
289 if (!on_error)
290 elog(WARNING, "leaked AIO handle");
291 }
292
293 pgaio_io_reclaim(ioh);
294 break;
295 case PGAIO_HS_DEFINED:
296 case PGAIO_HS_STAGED:
297 if (!on_error)
298 elog(WARNING, "AIO handle was not submitted");
299 pgaio_submit_staged();
300 break;
301 case PGAIO_HS_SUBMITTED:
302 case PGAIO_HS_COMPLETED_IO:
303 case PGAIO_HS_COMPLETED_SHARED:
304 case PGAIO_HS_COMPLETED_LOCAL:
305 /* this is expected to happen */
306 break;
307 }
308
309 /*
310 * Need to unregister the reporting of the IO's result, the memory it's
311 * referencing likely has gone away.
312 */
313 if (ioh->report_return)
314 ioh->report_return = NULL;
315
316 RESUME_INTERRUPTS();
317}
318
319/*
320 * Add a [set of] flags to the IO.
321 *
322 * Note that this combines flags with already set flags, rather than set flags
323 * to explicitly the passed in parameters. This is to allow multiple callsites
324 * to set flags.
325 */
326void
327 pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
328{
329 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
330
331 ioh->flags |= flag;
332}
333
334/*
335 * Returns an ID uniquely identifying the IO handle. This is only really
336 * useful for logging, as handles are reused across multiple IOs.
337 */
338int
339 pgaio_io_get_id(PgAioHandle *ioh)
340{
341 Assert(ioh >= pgaio_ctl->io_handles &&
342 ioh < (pgaio_ctl->io_handles + pgaio_ctl->io_handle_count));
343 return ioh - pgaio_ctl->io_handles;
344}
345
346/*
347 * Return the ProcNumber for the process that can use an IO handle. The
348 * mapping from IO handles to PGPROCs is static, therefore this even works
349 * when the corresponding PGPROC is not in use.
350 */
351ProcNumber
352 pgaio_io_get_owner(PgAioHandle *ioh)
353{
354 return ioh->owner_procno;
355}
356
357/*
358 * Return a wait reference for the IO. Only wait references can be used to
359 * wait for an IOs completion, as handles themselves can be reused after
360 * completion. See also the comment above pgaio_io_acquire().
361 */
362void
363 pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
364{
365 Assert(ioh->state == PGAIO_HS_HANDED_OUT ||
366 ioh->state == PGAIO_HS_DEFINED ||
367 ioh->state == PGAIO_HS_STAGED);
368 Assert(ioh->generation != 0);
369
370 iow->aio_index = ioh - pgaio_ctl->io_handles;
371 iow->generation_upper = (uint32) (ioh->generation >> 32);
372 iow->generation_lower = (uint32) ioh->generation;
373}
374
375
376
377/* --------------------------------------------------------------------------------
378 * Internal Functions related to PgAioHandle
379 * --------------------------------------------------------------------------------
380 */
381
382static inline void
383 pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
384{
385 /*
386 * All callers need to have held interrupts in some form, otherwise
387 * interrupt processing could wait for the IO to complete, while in an
388 * intermediary state.
389 */
390 Assert(!INTERRUPTS_CAN_BE_PROCESSED());
391
392 pgaio_debug_io(DEBUG5, ioh,
393 "updating state to %s",
394 pgaio_io_state_get_name(new_state));
395
396 /*
397 * Ensure the changes signified by the new state are visible before the
398 * new state becomes visible.
399 */
400 pg_write_barrier();
401
402 ioh->state = new_state;
403}
404
405static void
406 pgaio_io_resowner_register(PgAioHandle *ioh)
407{
408 Assert(!ioh->resowner);
409 Assert(CurrentResourceOwner);
410
411 ResourceOwnerRememberAioHandle(CurrentResourceOwner, &ioh->resowner_node);
412 ioh->resowner = CurrentResourceOwner;
413}
414
415/*
416 * Stage IO for execution and, if appropriate, submit it immediately.
417 *
418 * Should only be called from pgaio_io_start_*().
419 */
420void
421 pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
422{
423 bool needs_synchronous;
424
425 Assert(ioh->state == PGAIO_HS_HANDED_OUT);
426 Assert(pgaio_my_backend->handed_out_io == ioh);
427 Assert(pgaio_io_has_target(ioh));
428
429 /*
430 * Otherwise an interrupt, in the middle of staging and possibly executing
431 * the IO, could end up trying to wait for the IO, leading to state
432 * confusion.
433 */
434 HOLD_INTERRUPTS();
435
436 ioh->op = op;
437 ioh->result = 0;
438
439 pgaio_io_update_state(ioh, PGAIO_HS_DEFINED);
440
441 /* allow a new IO to be staged */
442 pgaio_my_backend->handed_out_io = NULL;
443
444 pgaio_io_call_stage(ioh);
445
446 pgaio_io_update_state(ioh, PGAIO_HS_STAGED);
447
448 /*
449 * Synchronous execution has to be executed, well, synchronously, so check
450 * that first.
451 */
452 needs_synchronous = pgaio_io_needs_synchronous_execution(ioh);
453
454 pgaio_debug_io(DEBUG3, ioh,
455 "staged (synchronous: %d, in_batch: %d)",
456 needs_synchronous, pgaio_my_backend->in_batchmode);
457
458 if (!needs_synchronous)
459 {
460 pgaio_my_backend->staged_ios[pgaio_my_backend->num_staged_ios++] = ioh;
461 Assert(pgaio_my_backend->num_staged_ios <= PGAIO_SUBMIT_BATCH_SIZE);
462
463 /*
464 * Unless code explicitly opted into batching IOs, submit the IO
465 * immediately.
466 */
467 if (!pgaio_my_backend->in_batchmode)
468 pgaio_submit_staged();
469 }
470 else
471 {
472 pgaio_io_prepare_submit(ioh);
473 pgaio_io_perform_synchronously(ioh);
474 }
475
476 RESUME_INTERRUPTS();
477}
478
479bool
480 pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
481{
482 /*
483 * If the caller said to execute the IO synchronously, do so.
484 *
485 * XXX: We could optimize the logic when to execute synchronously by first
486 * checking if there are other IOs in flight and only synchronously
487 * executing if not. Unclear whether that'll be sufficiently common to be
488 * worth worrying about.
489 */
490 if (ioh->flags & PGAIO_HF_SYNCHRONOUS)
491 return true;
492
493 /* Check if the IO method requires synchronous execution of IO */
494 if (pgaio_method_ops->needs_synchronous_execution)
495 return pgaio_method_ops->needs_synchronous_execution(ioh);
496
497 return false;
498}
499
500/*
501 * Handle IO being processed by IO method.
502 *
503 * Should be called by IO methods / synchronous IO execution, just before the
504 * IO is performed.
505 */
506void
507 pgaio_io_prepare_submit(PgAioHandle *ioh)
508{
509 pgaio_io_update_state(ioh, PGAIO_HS_SUBMITTED);
510
511 dclist_push_tail(&pgaio_my_backend->in_flight_ios, &ioh->node);
512}
513
514/*
515 * Handle IO getting completed by a method.
516 *
517 * Should be called by IO methods / synchronous IO execution, just after the
518 * IO has been performed.
519 *
520 * Expects to be called in a critical section. We expect IOs to be usable for
521 * WAL etc, which requires being able to execute completion callbacks in a
522 * critical section.
523 */
524void
525 pgaio_io_process_completion(PgAioHandle *ioh, int result)
526{
527 Assert(ioh->state == PGAIO_HS_SUBMITTED);
528
529 Assert(CritSectionCount > 0);
530
531 ioh->result = result;
532
533 pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_IO);
534
535 INJECTION_POINT("aio-process-completion-before-shared", ioh);
536
537 pgaio_io_call_complete_shared(ioh);
538
539 pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_SHARED);
540
541 /* condition variable broadcast ensures state is visible before wakeup */
542 ConditionVariableBroadcast(&ioh->cv);
543
544 /* contains call to pgaio_io_call_complete_local() */
545 if (ioh->owner_procno == MyProcNumber)
546 pgaio_io_reclaim(ioh);
547}
548
549/*
550 * Has the IO completed and thus the IO handle been reused?
551 *
552 * This is useful when waiting for IO completion at a low level (e.g. in an IO
553 * method's ->wait_one() callback).
554 */
555bool
556 pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
557{
558 *state = ioh->state;
559
560 /*
561 * Ensure that we don't see an earlier state of the handle than ioh->state
562 * due to compiler or CPU reordering. This protects both ->generation as
563 * directly used here, and other fields in the handle accessed in the
564 * caller if the handle was not reused.
565 */
566 pg_read_barrier();
567
568 return ioh->generation != ref_generation;
569}
570
571/*
572 * Wait for IO to complete. External code should never use this, outside of
573 * the AIO subsystem waits are only allowed via pgaio_wref_wait().
574 */
575static void
576 pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
577{
578 PgAioHandleState state;
579 bool am_owner;
580
581 am_owner = ioh->owner_procno == MyProcNumber;
582
583 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
584 return;
585
586 if (am_owner)
587 {
588 if (state != PGAIO_HS_SUBMITTED
589 && state != PGAIO_HS_COMPLETED_IO
590 && state != PGAIO_HS_COMPLETED_SHARED
591 && state != PGAIO_HS_COMPLETED_LOCAL)
592 {
593 elog(PANIC, "waiting for own IO %d in wrong state: %s",
594 pgaio_io_get_id(ioh), pgaio_io_get_state_name(ioh));
595 }
596 }
597
598 while (true)
599 {
600 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
601 return;
602
603 switch ((PgAioHandleState) state)
604 {
605 case PGAIO_HS_IDLE:
606 case PGAIO_HS_HANDED_OUT:
607 elog(ERROR, "IO in wrong state: %d", state);
608 break;
609
610 case PGAIO_HS_SUBMITTED:
611
612 /*
613 * If we need to wait via the IO method, do so now. Don't
614 * check via the IO method if the issuing backend is executing
615 * the IO synchronously.
616 */
617 if (pgaio_method_ops->wait_one && !(ioh->flags & PGAIO_HF_SYNCHRONOUS))
618 {
619 pgaio_method_ops->wait_one(ioh, ref_generation);
620 continue;
621 }
622 /* fallthrough */
623
624 /* waiting for owner to submit */
625 case PGAIO_HS_DEFINED:
626 case PGAIO_HS_STAGED:
627 /* waiting for reaper to complete */
628 /* fallthrough */
629 case PGAIO_HS_COMPLETED_IO:
630 /* shouldn't be able to hit this otherwise */
631 Assert(IsUnderPostmaster);
632 /* ensure we're going to get woken up */
633 ConditionVariablePrepareToSleep(&ioh->cv);
634
635 while (!pgaio_io_was_recycled(ioh, ref_generation, &state))
636 {
637 if (state == PGAIO_HS_COMPLETED_SHARED ||
638 state == PGAIO_HS_COMPLETED_LOCAL)
639 break;
640 ConditionVariableSleep(&ioh->cv, WAIT_EVENT_AIO_IO_COMPLETION);
641 }
642
643 ConditionVariableCancelSleep();
644 break;
645
646 case PGAIO_HS_COMPLETED_SHARED:
647 case PGAIO_HS_COMPLETED_LOCAL:
648
649 /*
650 * Note that no interrupts are processed between
651 * pgaio_io_was_recycled() and this check - that's important
652 * as otherwise an interrupt could have already reclaimed the
653 * handle.
654 */
655 if (am_owner)
656 pgaio_io_reclaim(ioh);
657 return;
658 }
659 }
660}
661
662/*
663 * Make IO handle ready to be reused after IO has completed or after the
664 * handle has been released without being used.
665 *
666 * Note that callers need to be careful about only calling this in the right
667 * state and that no interrupts can be processed between the state check and
668 * the call to pgaio_io_reclaim(). Otherwise interrupt processing could
669 * already have reclaimed the handle.
670 */
671static void
672 pgaio_io_reclaim(PgAioHandle *ioh)
673{
674 /* This is only ok if it's our IO */
675 Assert(ioh->owner_procno == MyProcNumber);
676 Assert(ioh->state != PGAIO_HS_IDLE);
677
678 /* see comment in function header */
679 HOLD_INTERRUPTS();
680
681 /*
682 * It's a bit ugly, but right now the easiest place to put the execution
683 * of local completion callbacks is this function, as we need to execute
684 * local callbacks just before reclaiming at multiple callsites.
685 */
686 if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
687 {
688 PgAioResult local_result;
689
690 local_result = pgaio_io_call_complete_local(ioh);
691 pgaio_io_update_state(ioh, PGAIO_HS_COMPLETED_LOCAL);
692
693 if (ioh->report_return)
694 {
695 ioh->report_return->result = local_result;
696 ioh->report_return->target_data = ioh->target_data;
697 }
698 }
699
700 pgaio_debug_io(DEBUG4, ioh,
701 "reclaiming: distilled_result: (status %s, id %u, error_data %d), raw_result: %d",
702 pgaio_result_status_string(ioh->distilled_result.status),
703 ioh->distilled_result.id,
704 ioh->distilled_result.error_data,
705 ioh->result);
706
707 /* if the IO has been defined, it's on the in-flight list, remove */
708 if (ioh->state != PGAIO_HS_HANDED_OUT)
709 dclist_delete_from(&pgaio_my_backend->in_flight_ios, &ioh->node);
710
711 if (ioh->resowner)
712 {
713 ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
714 ioh->resowner = NULL;
715 }
716
717 Assert(!ioh->resowner);
718
719 /*
720 * Update generation & state first, before resetting the IO's fields,
721 * otherwise a concurrent "viewer" could think the fields are valid, even
722 * though they are being reset. Increment the generation first, so that
723 * we can assert elsewhere that we never wait for an IDLE IO. While it's
724 * a bit weird for the state to go backwards for a generation, it's OK
725 * here, as there cannot be references to the "reborn" IO yet. Can't
726 * update both at once, so something has to give.
727 */
728 ioh->generation++;
729 pgaio_io_update_state(ioh, PGAIO_HS_IDLE);
730
731 /* ensure the state update is visible before we reset fields */
732 pg_write_barrier();
733
734 ioh->op = PGAIO_OP_INVALID;
735 ioh->target = PGAIO_TID_INVALID;
736 ioh->flags = 0;
737 ioh->num_callbacks = 0;
738 ioh->handle_data_len = 0;
739 ioh->report_return = NULL;
740 ioh->result = 0;
741 ioh->distilled_result.status = PGAIO_RS_UNKNOWN;
742
743 /*
744 * We push the IO to the head of the idle IO list, that seems more cache
745 * efficient in cases where only a few IOs are used.
746 */
747 dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
748
749 RESUME_INTERRUPTS();
750}
751
752/*
753 * Wait for an IO handle to become usable.
754 *
755 * This only really is useful for pgaio_io_acquire().
756 */
757static void
758 pgaio_io_wait_for_free(void)
759{
760 int reclaimed = 0;
761
762 pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
763 pgaio_my_backend->num_staged_ios,
764 dclist_count(&pgaio_my_backend->in_flight_ios),
765 dclist_count(&pgaio_my_backend->idle_ios));
766
767 /*
768 * First check if any of our IOs actually have completed - when using
769 * worker, that'll often be the case. We could do so as part of the loop
770 * below, but that'd potentially lead us to wait for some IO submitted
771 * before.
772 */
773 for (int i = 0; i < io_max_concurrency; i++)
774 {
775 PgAioHandle *ioh = &pgaio_ctl->io_handles[pgaio_my_backend->io_handle_off + i];
776
777 if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
778 {
779 /*
780 * Note that no interrupts are processed between the state check
781 * and the call to reclaim - that's important as otherwise an
782 * interrupt could have already reclaimed the handle.
783 *
784 * Need to ensure that there's no reordering, in the more common
785 * paths, where we wait for IO, that's done by
786 * pgaio_io_was_recycled().
787 */
788 pg_read_barrier();
789 pgaio_io_reclaim(ioh);
790 reclaimed++;
791 }
792 }
793
794 if (reclaimed > 0)
795 return;
796
797 /*
798 * If we have any unsubmitted IOs, submit them now. We'll start waiting in
799 * a second, so it's better they're in flight. This also addresses the
800 * edge-case that all IOs are unsubmitted.
801 */
802 if (pgaio_my_backend->num_staged_ios > 0)
803 pgaio_submit_staged();
804
805 /* possibly some IOs finished during submission */
806 if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
807 return;
808
809 if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
810 ereport(ERROR,
811 errmsg_internal("no free IOs despite no in-flight IOs"),
812 errdetail_internal("%d pending, %u in-flight, %u idle IOs",
813 pgaio_my_backend->num_staged_ios,
814 dclist_count(&pgaio_my_backend->in_flight_ios),
815 dclist_count(&pgaio_my_backend->idle_ios)));
816
817 /*
818 * Wait for the oldest in-flight IO to complete.
819 *
820 * XXX: Reusing the general IO wait is suboptimal, we don't need to wait
821 * for that specific IO to complete, we just need *any* IO to complete.
822 */
823 {
824 PgAioHandle *ioh = dclist_head_element(PgAioHandle, node,
825 &pgaio_my_backend->in_flight_ios);
826 uint64 generation = ioh->generation;
827
828 switch ((PgAioHandleState) ioh->state)
829 {
830 /* should not be in in-flight list */
831 case PGAIO_HS_IDLE:
832 case PGAIO_HS_DEFINED:
833 case PGAIO_HS_HANDED_OUT:
834 case PGAIO_HS_STAGED:
835 case PGAIO_HS_COMPLETED_LOCAL:
836 elog(ERROR, "shouldn't get here with io:%d in state %d",
837 pgaio_io_get_id(ioh), ioh->state);
838 break;
839
840 case PGAIO_HS_COMPLETED_IO:
841 case PGAIO_HS_SUBMITTED:
842 pgaio_debug_io(DEBUG2, ioh,
843 "waiting for free io with %u in flight",
844 dclist_count(&pgaio_my_backend->in_flight_ios));
845
846 /*
847 * In a more general case this would be racy, because the
848 * generation could increase after we read ioh->state above.
849 * But we are only looking at IOs by the current backend and
850 * the IO can only be recycled by this backend. Even this is
851 * only OK because we get the handle's generation before
852 * potentially processing interrupts, e.g. as part of
853 * pgaio_debug_io().
854 */
855 pgaio_io_wait(ioh, generation);
856 break;
857
858 case PGAIO_HS_COMPLETED_SHARED:
859
860 /*
861 * It's possible that another backend just finished this IO.
862 *
863 * Note that no interrupts are processed between the state
864 * check and the call to reclaim - that's important as
865 * otherwise an interrupt could have already reclaimed the
866 * handle.
867 *
868 * Need to ensure that there's no reordering, in the more
869 * common paths, where we wait for IO, that's done by
870 * pgaio_io_was_recycled().
871 */
872 pg_read_barrier();
873 pgaio_io_reclaim(ioh);
874 break;
875 }
876
877 if (dclist_count(&pgaio_my_backend->idle_ios) == 0)
878 elog(PANIC, "no idle IO after waiting for IO to terminate");
879 return;
880 }
881}
882
883/*
884 * Internal - code outside of AIO should never need this and it'd be hard for
885 * such code to be safe.
886 */
887static PgAioHandle *
888 pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation)
889{
890 PgAioHandle *ioh;
891
892 Assert(iow->aio_index < pgaio_ctl->io_handle_count);
893
894 ioh = &pgaio_ctl->io_handles[iow->aio_index];
895
896 *ref_generation = ((uint64) iow->generation_upper) << 32 |
897 iow->generation_lower;
898
899 Assert(*ref_generation != 0);
900
901 return ioh;
902}
903
904static const char *
905 pgaio_io_state_get_name(PgAioHandleState s)
906{
907#define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym
908 switch ((PgAioHandleState) s)
909 {
910 PGAIO_HS_TOSTR_CASE(IDLE);
911 PGAIO_HS_TOSTR_CASE(HANDED_OUT);
912 PGAIO_HS_TOSTR_CASE(DEFINED);
913 PGAIO_HS_TOSTR_CASE(STAGED);
914 PGAIO_HS_TOSTR_CASE(SUBMITTED);
915 PGAIO_HS_TOSTR_CASE(COMPLETED_IO);
916 PGAIO_HS_TOSTR_CASE(COMPLETED_SHARED);
917 PGAIO_HS_TOSTR_CASE(COMPLETED_LOCAL);
918 }
919#undef PGAIO_HS_TOSTR_CASE
920
921 return NULL; /* silence compiler */
922}
923
924const char *
925 pgaio_io_get_state_name(PgAioHandle *ioh)
926{
927 return pgaio_io_state_get_name(ioh->state);
928}
929
930const char *
931 pgaio_result_status_string(PgAioResultStatus rs)
932{
933 switch ((PgAioResultStatus) rs)
934 {
935 case PGAIO_RS_UNKNOWN:
936 return "UNKNOWN";
937 case PGAIO_RS_OK:
938 return "OK";
939 case PGAIO_RS_WARNING:
940 return "WARNING";
941 case PGAIO_RS_PARTIAL:
942 return "PARTIAL";
943 case PGAIO_RS_ERROR:
944 return "ERROR";
945 }
946
947 return NULL; /* silence compiler */
948}
949
950
951
952/* --------------------------------------------------------------------------------
953 * Functions primarily related to IO Wait References
954 * --------------------------------------------------------------------------------
955 */
956
957/*
958 * Mark a wait reference as invalid
959 */
960void
961 pgaio_wref_clear(PgAioWaitRef *iow)
962{
963 iow->aio_index = PG_UINT32_MAX;
964}
965
966/* Is the wait reference valid? */
967bool
968 pgaio_wref_valid(PgAioWaitRef *iow)
969{
970 return iow->aio_index != PG_UINT32_MAX;
971}
972
973/*
974 * Similar to pgaio_io_get_id(), just for wait references.
975 */
976int
977 pgaio_wref_get_id(PgAioWaitRef *iow)
978{
979 Assert(pgaio_wref_valid(iow));
980 return iow->aio_index;
981}
982
983/*
984 * Wait for the IO to have completed. Can be called in any process, not just
985 * in the issuing backend.
986 */
987void
988 pgaio_wref_wait(PgAioWaitRef *iow)
989{
990 uint64 ref_generation;
991 PgAioHandle *ioh;
992
993 ioh = pgaio_io_from_wref(iow, &ref_generation);
994
995 pgaio_io_wait(ioh, ref_generation);
996}
997
998/*
999 * Check if the referenced IO completed, without blocking.
1000 */
1001bool
1002 pgaio_wref_check_done(PgAioWaitRef *iow)
1003{
1004 uint64 ref_generation;
1005 PgAioHandleState state;
1006 bool am_owner;
1007 PgAioHandle *ioh;
1008
1009 ioh = pgaio_io_from_wref(iow, &ref_generation);
1010
1011 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
1012 return true;
1013
1014 if (state == PGAIO_HS_IDLE)
1015 return true;
1016
1017 am_owner = ioh->owner_procno == MyProcNumber;
1018
1019 if (state == PGAIO_HS_COMPLETED_SHARED ||
1020 state == PGAIO_HS_COMPLETED_LOCAL)
1021 {
1022 /*
1023 * Note that no interrupts are processed between
1024 * pgaio_io_was_recycled() and this check - that's important as
1025 * otherwise an interrupt could have already reclaimed the handle.
1026 */
1027 if (am_owner)
1028 pgaio_io_reclaim(ioh);
1029 return true;
1030 }
1031
1032 /*
1033 * XXX: It likely would be worth checking in with the io method, to give
1034 * the IO method a chance to check if there are completion events queued.
1035 */
1036
1037 return false;
1038}
1039
1040
1041
1042/* --------------------------------------------------------------------------------
1043 * Actions on multiple IOs.
1044 * --------------------------------------------------------------------------------
1045 */
1046
1047/*
1048 * Submit IOs in batches going forward.
1049 *
1050 * Submitting multiple IOs at once can be substantially faster than doing so
1051 * one-by-one. At the same time, submitting multiple IOs at once requires more
1052 * care to avoid deadlocks.
1053 *
1054 * Consider backend A staging an IO for buffer 1 and then trying to start IO
1055 * on buffer 2, while backend B does the inverse. If A submitted the IO before
1056 * moving on to buffer 2, this works just fine, B will wait for the IO to
1057 * complete. But if batching were used, each backend will wait for IO that has
1058 * not yet been submitted to complete, i.e. forever.
1059 *
1060 * End batch submission mode with pgaio_exit_batchmode(). (Throwing errors is
1061 * allowed; error recovery will end the batch.)
1062 *
1063 * To avoid deadlocks, code needs to ensure that it will not wait for another
1064 * backend while there is unsubmitted IO. E.g. by using conditional lock
1065 * acquisition when acquiring buffer locks. To check if there currently are
1066 * staged IOs, call pgaio_have_staged() and to submit all staged IOs call
1067 * pgaio_submit_staged().
1068 *
1069 * It is not allowed to enter batchmode while already in batchmode, it's
1070 * unlikely to ever be needed, as code needs to be explicitly aware of being
1071 * called in batchmode, to avoid the deadlock risks explained above.
1072 *
1073 * Note that IOs may get submitted before pgaio_exit_batchmode() is called,
1074 * e.g. because too many IOs have been staged or because pgaio_submit_staged()
1075 * was called.
1076 */
1077void
1078 pgaio_enter_batchmode(void)
1079{
1080 if (pgaio_my_backend->in_batchmode)
1081 elog(ERROR, "starting batch while batch already in progress");
1082 pgaio_my_backend->in_batchmode = true;
1083}
1084
1085/*
1086 * Stop submitting IOs in batches.
1087 */
1088void
1089 pgaio_exit_batchmode(void)
1090{
1091 Assert(pgaio_my_backend->in_batchmode);
1092
1093 pgaio_submit_staged();
1094 pgaio_my_backend->in_batchmode = false;
1095}
1096
1097/*
1098 * Are there staged but unsubmitted IOs?
1099 *
1100 * See comment above pgaio_enter_batchmode() for why code may need to check if
1101 * there is IO in that state.
1102 */
1103bool
1104 pgaio_have_staged(void)
1105{
1106 Assert(pgaio_my_backend->in_batchmode ||
1107 pgaio_my_backend->num_staged_ios == 0);
1108 return pgaio_my_backend->num_staged_ios > 0;
1109}
1110
1111/*
1112 * Submit all staged but not yet submitted IOs.
1113 *
1114 * Unless in batch mode, this never needs to be called, as IOs get submitted
1115 * as soon as possible. While in batchmode pgaio_submit_staged() can be called
1116 * before waiting on another backend, to avoid the risk of deadlocks. See
1117 * pgaio_enter_batchmode().
1118 */
1119void
1120 pgaio_submit_staged(void)
1121{
1122 int total_submitted = 0;
1123 int did_submit;
1124
1125 if (pgaio_my_backend->num_staged_ios == 0)
1126 return;
1127
1128
1129 START_CRIT_SECTION();
1130
1131 did_submit = pgaio_method_ops->submit(pgaio_my_backend->num_staged_ios,
1132 pgaio_my_backend->staged_ios);
1133
1134 END_CRIT_SECTION();
1135
1136 total_submitted += did_submit;
1137
1138 Assert(total_submitted == did_submit);
1139
1140 pgaio_my_backend->num_staged_ios = 0;
1141
1142 pgaio_debug(DEBUG4,
1143 "aio: submitted %d IOs",
1144 total_submitted);
1145}
1146
1147
1148
1149/* --------------------------------------------------------------------------------
1150 * Other
1151 * --------------------------------------------------------------------------------
1152 */
1153
1154
1155/*
1156 * Perform AIO related cleanup after an error.
1157 *
1158 * This should be called early in the error recovery paths, as later steps may
1159 * need to issue AIO (e.g. to record a transaction abort WAL record).
1160 */
1161void
1162 pgaio_error_cleanup(void)
1163{
1164 /*
1165 * It is possible that code errored out after pgaio_enter_batchmode() but
1166 * before pgaio_exit_batchmode() was called. In that case we need to
1167 * submit the IO now.
1168 */
1169 if (pgaio_my_backend->in_batchmode)
1170 {
1171 pgaio_my_backend->in_batchmode = false;
1172
1173 pgaio_submit_staged();
1174 }
1175
1176 /*
1177 * As we aren't in batchmode, there shouldn't be any unsubmitted IOs.
1178 */
1179 Assert(pgaio_my_backend->num_staged_ios == 0);
1180}
1181
1182/*
1183 * Perform AIO related checks at (sub-)transactional boundaries.
1184 *
1185 * This should be called late during (sub-)transactional commit/abort, after
1186 * all steps that might need to perform AIO, so that we can verify that the
1187 * AIO subsystem is in a valid state at the end of a transaction.
1188 */
1189void
1190 AtEOXact_Aio(bool is_commit)
1191{
1192 /*
1193 * We should never be in batch mode at transactional boundaries. In case
1194 * an error was thrown while in batch mode, pgaio_error_cleanup() should
1195 * have exited batchmode.
1196 *
1197 * In case we are in batchmode somehow, make sure to submit all staged
1198 * IOs, other backends may need them to complete to continue.
1199 */
1200 if (pgaio_my_backend->in_batchmode)
1201 {
1202 pgaio_error_cleanup();
1203 elog(WARNING, "open AIO batch at end of (sub-)transaction");
1204 }
1205
1206 /*
1207 * As we aren't in batchmode, there shouldn't be any unsubmitted IOs.
1208 */
1209 Assert(pgaio_my_backend->num_staged_ios == 0);
1210}
1211
1212/*
1213 * Need to submit staged but not yet submitted IOs using the fd, otherwise
1214 * the IO would end up targeting something bogus.
1215 */
1216void
1217 pgaio_closing_fd(int fd)
1218{
1219 /*
1220 * Might be called before AIO is initialized or in a subprocess that
1221 * doesn't use AIO.
1222 */
1223 if (!pgaio_my_backend)
1224 return;
1225
1226 /*
1227 * For now just submit all staged IOs - we could be more selective, but
1228 * it's probably not worth it.
1229 */
1230 if (pgaio_my_backend->num_staged_ios > 0)
1231 {
1232 pgaio_debug(DEBUG2,
1233 "submitting %d IOs before FD %d gets closed",
1234 pgaio_my_backend->num_staged_ios, fd);
1235 pgaio_submit_staged();
1236 }
1237
1238 /*
1239 * If requested by the IO method, wait for all IOs that use the
1240 * to-be-closed FD.
1241 */
1242 if (pgaio_method_ops->wait_on_fd_before_close)
1243 {
1244 /*
1245 * As waiting for one IO to complete may complete multiple IOs, we
1246 * can't just use a mutable list iterator. The maximum number of
1247 * in-flight IOs is fairly small, so just restart the loop after
1248 * waiting for an IO.
1249 */
1250 while (!dclist_is_empty(&pgaio_my_backend->in_flight_ios))
1251 {
1252 dlist_iter iter;
1253 PgAioHandle *ioh = NULL;
1254 uint64 generation;
1255
1256 dclist_foreach(iter, &pgaio_my_backend->in_flight_ios)
1257 {
1258 ioh = dclist_container(PgAioHandle, node, iter.cur);
1259
1260 generation = ioh->generation;
1261
1262 if (pgaio_io_uses_fd(ioh, fd))
1263 break;
1264 else
1265 ioh = NULL;
1266 }
1267
1268 if (!ioh)
1269 break;
1270
1271 pgaio_debug_io(DEBUG2, ioh,
1272 "waiting for IO before FD %d gets closed, %u in-flight IOs",
1273 fd, dclist_count(&pgaio_my_backend->in_flight_ios));
1274
1275 /* see comment in pgaio_io_wait_for_free() about raciness */
1276 pgaio_io_wait(ioh, generation);
1277 }
1278 }
1279}
1280
1281/*
1282 * Registered as before_shmem_exit() callback in pgaio_init_backend()
1283 */
1284void
1285 pgaio_shutdown(int code, Datum arg)
1286{
1287 Assert(pgaio_my_backend);
1288 Assert(!pgaio_my_backend->handed_out_io);
1289
1290 /* first clean up resources as we would at a transaction boundary */
1291 AtEOXact_Aio(code == 0);
1292
1293 /*
1294 * Before exiting, make sure that all IOs are finished. That has two main
1295 * purposes:
1296 *
1297 * - Some kernel-level AIO mechanisms don't deal well with the issuer of
1298 * an AIO exiting before IO completed
1299 *
1300 * - It'd be confusing to see partially finished IOs in stats views etc
1301 */
1302 while (!dclist_is_empty(&pgaio_my_backend->in_flight_ios))
1303 {
1304 PgAioHandle *ioh = dclist_head_element(PgAioHandle, node, &pgaio_my_backend->in_flight_ios);
1305 uint64 generation = ioh->generation;
1306
1307 pgaio_debug_io(DEBUG2, ioh,
1308 "waiting for IO to complete during shutdown, %u in-flight IOs",
1309 dclist_count(&pgaio_my_backend->in_flight_ios));
1310
1311 /* see comment in pgaio_io_wait_for_free() about raciness */
1312 pgaio_io_wait(ioh, generation);
1313 }
1314
1315 pgaio_my_backend = NULL;
1316}
1317
1318void
1319 assign_io_method(int newval, void *extra)
1320{
1321 Assert(pgaio_method_ops_table[newval] != NULL);
1322 Assert(newval < lengthof(io_method_options));
1323
1324 pgaio_method_ops = pgaio_method_ops_table[newval];
1325}
1326
1327bool
1328 check_io_max_concurrency(int *newval, void **extra, GucSource source)
1329{
1330 if (*newval == -1)
1331 {
1332 /*
1333 * Auto-tuning will be applied later during startup, as auto-tuning
1334 * depends on the value of various GUCs.
1335 */
1336 return true;
1337 }
1338 else if (*newval == 0)
1339 {
1340 GUC_check_errdetail("Only -1 or values bigger than 0 are valid.");
1341 return false;
1342 }
1343
1344 return true;
1345}
void pgaio_io_process_completion(PgAioHandle *ioh, int result)
Definition: aio.c:525
int io_method
Definition: aio.c:74
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition: aio.c:968
int pgaio_io_get_id(PgAioHandle *ioh)
Definition: aio.c:339
PgAioBackend * pgaio_my_backend
Definition: aio.c:81
const char * pgaio_result_status_string(PgAioResultStatus rs)
Definition: aio.c:931
PgAioHandle * pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:159
void assign_io_method(int newval, void *extra)
Definition: aio.c:1319
static void pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
Definition: aio.c:383
void pgaio_wref_clear(PgAioWaitRef *iow)
Definition: aio.c:961
bool pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
Definition: aio.c:480
static void pgaio_io_wait_for_free(void)
Definition: aio.c:758
#define PGAIO_HS_TOSTR_CASE(sym)
static const char * pgaio_io_state_get_name(PgAioHandleState s)
Definition: aio.c:905
void pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
Definition: aio.c:263
static void pgaio_io_resowner_register(PgAioHandle *ioh)
Definition: aio.c:406
static PgAioHandle * pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation)
Definition: aio.c:888
void pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
Definition: aio.c:363
void pgaio_closing_fd(int fd)
Definition: aio.c:1217
void pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
Definition: aio.c:421
int io_max_concurrency
Definition: aio.c:75
void pgaio_io_set_flag(PgAioHandle *ioh, PgAioHandleFlags flag)
Definition: aio.c:327
bool pgaio_have_staged(void)
Definition: aio.c:1104
PgAioCtl * pgaio_ctl
Definition: aio.c:78
const IoMethodOps * pgaio_method_ops
Definition: aio.c:93
bool pgaio_wref_check_done(PgAioWaitRef *iow)
Definition: aio.c:1002
static const IoMethodOps *const pgaio_method_ops_table[]
Definition: aio.c:84
static void pgaio_io_reclaim(PgAioHandle *ioh)
Definition: aio.c:672
ProcNumber pgaio_io_get_owner(PgAioHandle *ioh)
Definition: aio.c:352
void pgaio_enter_batchmode(void)
Definition: aio.c:1078
void pgaio_submit_staged(void)
Definition: aio.c:1120
const char * pgaio_io_get_state_name(PgAioHandle *ioh)
Definition: aio.c:925
const struct config_enum_entry io_method_options[]
Definition: aio.c:64
bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
Definition: aio.c:556
void pgaio_io_prepare_submit(PgAioHandle *ioh)
Definition: aio.c:507
void pgaio_wref_wait(PgAioWaitRef *iow)
Definition: aio.c:988
void pgaio_error_cleanup(void)
Definition: aio.c:1162
void pgaio_io_release(PgAioHandle *ioh)
Definition: aio.c:237
int pgaio_wref_get_id(PgAioWaitRef *iow)
Definition: aio.c:977
void AtEOXact_Aio(bool is_commit)
Definition: aio.c:1190
void pgaio_shutdown(int code, Datum arg)
Definition: aio.c:1285
bool check_io_max_concurrency(int *newval, void **extra, GucSource source)
Definition: aio.c:1328
static void pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
Definition: aio.c:576
void pgaio_exit_batchmode(void)
Definition: aio.c:1089
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:185
@ IOMETHOD_WORKER
Definition: aio.h:35
@ IOMETHOD_SYNC
Definition: aio.h:34
@ PGAIO_TID_INVALID
Definition: aio.h:119
PgAioOp
Definition: aio.h:88
@ PGAIO_OP_INVALID
Definition: aio.h:90
PgAioHandleFlags
Definition: aio.h:49
@ PGAIO_HF_SYNCHRONOUS
Definition: aio.h:70
#define DEFAULT_IO_METHOD
Definition: aio.h:42
void pgaio_io_call_stage(PgAioHandle *ioh)
Definition: aio_callback.c:199
PgAioResult pgaio_io_call_complete_local(PgAioHandle *ioh)
Definition: aio_callback.c:285
void pgaio_io_call_complete_shared(PgAioHandle *ioh)
Definition: aio_callback.c:225
PgAioHandleState
Definition: aio_internal.h:44
@ PGAIO_HS_STAGED
Definition: aio_internal.h:66
@ PGAIO_HS_COMPLETED_SHARED
Definition: aio_internal.h:82
@ PGAIO_HS_DEFINED
Definition: aio_internal.h:59
@ PGAIO_HS_SUBMITTED
Definition: aio_internal.h:69
@ PGAIO_HS_IDLE
Definition: aio_internal.h:46
@ PGAIO_HS_HANDED_OUT
Definition: aio_internal.h:53
@ PGAIO_HS_COMPLETED_IO
Definition: aio_internal.h:72
@ PGAIO_HS_COMPLETED_LOCAL
Definition: aio_internal.h:89
#define pgaio_debug(elevel, msg,...)
Definition: aio_internal.h:382
#define pgaio_debug_io(elevel, ioh, msg,...)
Definition: aio_internal.h:395
#define PGAIO_SUBMIT_BATCH_SIZE
Definition: aio_internal.h:28
void pgaio_io_perform_synchronously(PgAioHandle *ioh)
Definition: aio_io.c:116
bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
Definition: aio_io.c:197
bool pgaio_io_has_target(PgAioHandle *ioh)
Definition: aio_target.c:40
PgAioResultStatus
Definition: aio_types.h:79
@ PGAIO_RS_OK
Definition: aio_types.h:81
@ PGAIO_RS_UNKNOWN
Definition: aio_types.h:80
@ PGAIO_RS_PARTIAL
Definition: aio_types.h:82
@ PGAIO_RS_ERROR
Definition: aio_types.h:84
@ PGAIO_RS_WARNING
Definition: aio_types.h:83
#define pg_read_barrier()
Definition: atomics.h:154
#define pg_write_barrier()
Definition: atomics.h:155
#define PG_UINT32_MAX
Definition: c.h:595
uint64_t uint64
Definition: c.h:539
uint32_t uint32
Definition: c.h:538
#define lengthof(array)
Definition: c.h:787
bool ConditionVariableCancelSleep(void)
void ConditionVariableBroadcast(ConditionVariable *cv)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1161
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1234
#define DEBUG3
Definition: elog.h:28
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define PANIC
Definition: elog.h:42
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
#define DEBUG5
Definition: elog.h:26
#define DEBUG4
Definition: elog.h:27
ProcNumber MyProcNumber
Definition: globals.c:90
bool IsUnderPostmaster
Definition: globals.c:120
volatile uint32 CritSectionCount
Definition: globals.c:45
#define newval
#define GUC_check_errdetail
Definition: guc.h:505
GucSource
Definition: guc.h:112
Assert(PointerIsAligned(start, uint64))
#define dclist_container(type, membername, ptr)
Definition: ilist.h:947
#define dclist_head_element(type, membername, lhead)
Definition: ilist.h:955
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition: ilist.h:709
static uint32 dclist_count(const dclist_head *head)
Definition: ilist.h:932
static bool dclist_is_empty(const dclist_head *head)
Definition: ilist.h:682
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition: ilist.h:763
static dlist_node * dclist_pop_head_node(dclist_head *head)
Definition: ilist.h:789
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition: ilist.h:693
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593
#define dclist_foreach(iter, lhead)
Definition: ilist.h:970
#define INJECTION_POINT(name, arg)
i
int i
Definition: isn.c:77
const IoMethodOps pgaio_sync_ops
Definition: method_sync.c:28
const IoMethodOps pgaio_worker_ops
Definition: method_worker.c:84
#define RESUME_INTERRUPTS()
Definition: miscadmin.h:135
#define INTERRUPTS_CAN_BE_PROCESSED()
Definition: miscadmin.h:129
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define HOLD_INTERRUPTS()
Definition: miscadmin.h:133
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void * arg
static rewind_source * source
Definition: pg_rewind.c:89
uint64_t Datum
Definition: postgres.h:70
static int fd(const char *x, int i)
Definition: preproc-init.c:105
int ProcNumber
Definition: procnumber.h:24
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
void ResourceOwnerRememberAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
Definition: resowner.c:1101
void ResourceOwnerForgetAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
Definition: resowner.c:1107
bool wait_on_fd_before_close
Definition: aio_internal.h:268
int(* submit)(uint16 num_staged_ios, PgAioHandle **staged_ios)
Definition: aio_internal.h:308
void(* wait_one)(PgAioHandle *ioh, uint64 ref_generation)
Definition: aio_internal.h:329
bool(* needs_synchronous_execution)(PgAioHandle *ioh)
Definition: aio_internal.h:294
uint32 io_handle_off
Definition: aio_internal.h:194
bool in_batchmode
Definition: aio_internal.h:209
dclist_head in_flight_ios
Definition: aio_internal.h:225
uint16 num_staged_ios
Definition: aio_internal.h:214
dclist_head idle_ios
Definition: aio_internal.h:197
PgAioHandle * staged_ios[PGAIO_SUBMIT_BATCH_SIZE]
Definition: aio_internal.h:215
PgAioHandle * handed_out_io
Definition: aio_internal.h:206
PgAioHandle * io_handles
Definition: aio_internal.h:252
uint32 io_handle_count
Definition: aio_internal.h:251
uint8 target
Definition: aio_internal.h:108
PgAioTargetData target_data
Definition: aio_internal.h:187
struct ResourceOwnerData * resowner
Definition: aio_internal.h:148
int32 owner_procno
Definition: aio_internal.h:131
PgAioResult distilled_result
Definition: aio_internal.h:162
dlist_node node
Definition: aio_internal.h:146
uint8 handle_data_len
Definition: aio_internal.h:128
uint8 op
Definition: aio_internal.h:111
PgAioReturn * report_return
Definition: aio_internal.h:177
int32 result
Definition: aio_internal.h:134
uint64 generation
Definition: aio_internal.h:152
uint8 flags
Definition: aio_internal.h:114
uint8 state
Definition: aio_internal.h:105
uint8 num_callbacks
Definition: aio_internal.h:116
dlist_node resowner_node
Definition: aio_internal.h:149
ConditionVariable cv
Definition: aio_internal.h:159
uint32 status
Definition: aio_types.h:108
uint32 error_data
Definition: aio_types.h:111
uint32 id
Definition: aio_types.h:105
PgAioResult result
Definition: aio_types.h:132
PgAioTargetData target_data
Definition: aio_types.h:133
uint32 generation_upper
Definition: aio_types.h:45
uint32 aio_index
Definition: aio_types.h:35
uint32 generation_lower
Definition: aio_types.h:46
Definition: guc.h:174
Definition: ilist.h:178
dlist_node * cur
Definition: ilist.h:179
Definition: ilist.h:138
Definition: regguts.h:323
char * flag(int b)
Definition: test-ctype.c:33

AltStyle によって変換されたページ (->オリジナル) /