PostgreSQL Source Code: src/port/pg_numa.c Source File

PostgreSQL Source Code git master
pg_numa.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_numa.c
4 * Basic NUMA portability routines
5 *
6 *
7 * Copyright (c) 2025, PostgreSQL Global Development Group
8 *
9 *
10 * IDENTIFICATION
11 * src/port/pg_numa.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "c.h"
17#include <unistd.h>
18
19#include "miscadmin.h"
20#include "port/pg_numa.h"
21
22/*
23 * At this point we provide support only for Linux thanks to libnuma, but in
24 * future support for other platforms e.g. Win32 or FreeBSD might be possible
25 * too. For Win32 NUMA APIs see
26 * https://learn.microsoft.com/en-us/windows/win32/procthread/numa-support
27 */
28#ifdef USE_LIBNUMA
29
30#include <numa.h>
31#include <numaif.h>
32
33/*
34 * numa_move_pages() chunk size, has to be <= 16 to work around a kernel bug
35 * in do_pages_stat() (chunked by DO_PAGES_STAT_CHUNK_NR). By using the same
36 * chunk size, we make it work even on unfixed kernels.
37 *
38 * 64-bit system are not affected by the bug, and so use much larger chunks.
39 */
40#if SIZEOF_SIZE_T == 4
41#define NUMA_QUERY_CHUNK_SIZE 16
42#else
43#define NUMA_QUERY_CHUNK_SIZE 1024
44#endif
45
46/* libnuma requires initialization as per numa(3) on Linux */
47int
48pg_numa_init(void)
49{
50 int r = numa_available();
51
52 return r;
53}
54
55/*
56 * We use move_pages(2) syscall here - instead of get_mempolicy(2) - as the
57 * first one allows us to batch and query about many memory pages in one single
58 * giant system call that is way faster.
59 *
60 * We call numa_move_pages() for smaller chunks of the whole array. The first
61 * reason is to work around a kernel bug, but also to allow interrupting the
62 * query between the calls (for many pointers processing the whole array can
63 * take a lot of time).
64 */
65int
66pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
67{
68 unsigned long next = 0;
69 int ret = 0;
70
71 /*
72 * Chunk pointers passed to numa_move_pages to NUMA_QUERY_CHUNK_SIZE
73 * items, to work around a kernel bug in do_pages_stat().
74 */
75 while (next < count)
76 {
77 unsigned long count_chunk = Min(count - next,
78 NUMA_QUERY_CHUNK_SIZE);
79
80 CHECK_FOR_INTERRUPTS();
81
82 /*
83 * Bail out if any of the chunks errors out (ret<0). We ignore (ret>0)
84 * which is used to return number of nonmigrated pages, but we're not
85 * migrating any pages here.
86 */
87 ret = numa_move_pages(pid, count_chunk, &pages[next], NULL, &status[next], 0);
88 if (ret < 0)
89 {
90 /* plain error, return as is */
91 return ret;
92 }
93
94 next += count_chunk;
95 }
96
97 /* should have consumed the input array exactly */
98 Assert(next == count);
99
100 return 0;
101}
102
103int
104pg_numa_get_max_node(void)
105{
106 return numa_max_node();
107}
108
109#else
110
111/* Empty wrappers */
112int
113 pg_numa_init(void)
114{
115 /* We state that NUMA is not available */
116 return -1;
117}
118
119int
120 pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
121{
122 return 0;
123}
124
125int
126 pg_numa_get_max_node(void)
127{
128 return 0;
129}
130
131#endif
static int32 next
Definition: blutils.c:224
#define Min(x, y)
Definition: c.h:1003
Assert(PointerIsAligned(start, uint64))
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
Definition: pg_numa.c:120
int pg_numa_init(void)
Definition: pg_numa.c:113
int pg_numa_get_max_node(void)
Definition: pg_numa.c:126

AltStyle によって変換されたページ (->オリジナル) /