1/*-------------------------------------------------------------------------
4 * Implement shared memory using win32 facilities
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 * src/backend/port/win32_shmem.c
11 *-------------------------------------------------------------------------
23 * Early in a process's life, Windows asynchronously creates threads for the
24 * process's "default thread pool"
25 * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
26 * Occasionally, thread creation allocates a stack after
27 * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
28 * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
29 * if the allocator preferred the just-released region for allocating the new
30 * thread stack. We observed such failures in some Windows Server 2016
31 * configurations. To give the system another region to prefer, reserve and
32 * release an additional, protective region immediately before reserving or
33 * releasing shared memory. The idea is that, if the allocator handed out
34 * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
35 * both regions are free. Windows Server 2016 exhibits that behavior, and a
36 * system behaving differently would have less need to protect
37 * UsedShmemSegAddr. The protective region must be at least large enough for
38 * one thread stack. However, ten times as much is less than 2% of the 32-bit
39 * address space and is negligible relative to the 64-bit address space.
41 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
52 * Generate shared memory segment name. Expand the data directory, to generate
53 * an identifier unique for this data directory. Then replace all backslashes
54 * with forward slashes, since backslashes aren't permitted in global object names.
56 * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
57 * 2000, but that's all we support for other reasons as well), to make sure you can't
58 * open two postmasters in different sessions against the same data directory.
60 * XXX: What happens with junctions? It's only someone breaking things on purpose,
61 * and this is still better than before, but we might want to do something about
62 * that sometime in the future.
74 elog(
FATAL,
"could not get size for full pathname of datadir %s: error code %lu",
77 retptr =
malloc(
bufsize + 18);
/* 18 for Global\PostgreSQL: */
79 elog(
FATAL,
"could not allocate memory for shared memory name");
81 strcpy(retptr,
"Global\\PostgreSQL:");
84 elog(
FATAL,
"could not generate full pathname for datadir %s: error code %lu",
88 * XXX: Intentionally overwriting the Global\ part here. This was not the
89 * original approach, but putting it in the actual Global\ namespace
90 * causes permission errors in a lot of cases, so we leave it in the
91 * default namespace for now.
93 for (cp = retptr; *cp; cp++)
102 * PGSharedMemoryIsInUse
104 * Is a previously-existing shmem segment still existing and in use?
106 * The point of this exercise is to detect the case where a prior postmaster
107 * crashed, but it left child backends that are still running. Therefore
108 * we only care about shmem segments that are associated with the intended
109 * DataDir. This is an important consideration since accidental matches of
110 * shmem segment IDs are reasonably common.
120 hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
132 * EnableLockPagesPrivilege
134 * Try to acquire SeLockMemoryPrivilege so we can use large pages.
143 if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
146 (
errmsg(
"could not enable user right \"%s\": error code %lu",
149 * translator: This is a term from Windows and should be translated to
150 * match the Windows localization.
152 _(
"Lock pages in memory"),
154 errdetail(
"Failed system call was %s.",
"OpenProcessToken")));
158 if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
161 (
errmsg(
"could not enable user right \"%s\": error code %lu",
_(
"Lock pages in memory"), GetLastError()),
162 errdetail(
"Failed system call was %s.",
"LookupPrivilegeValue")));
166 tp.PrivilegeCount = 1;
167 tp.Privileges[0].Luid = luid;
168 tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
170 if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
173 (
errmsg(
"could not enable user right \"%s\": error code %lu",
_(
"Lock pages in memory"), GetLastError()),
174 errdetail(
"Failed system call was %s.",
"AdjustTokenPrivileges")));
179 if (GetLastError() != ERROR_SUCCESS)
181 if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
183 (
errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
184 errmsg(
"could not enable user right \"%s\"",
_(
"Lock pages in memory")),
185 errhint(
"Assign user right \"%s\" to the Windows user account which runs PostgreSQL.",
186 _(
"Lock pages in memory"))));
189 (
errmsg(
"could not enable user right \"%s\": error code %lu",
_(
"Lock pages in memory"), GetLastError()),
190 errdetail(
"Failed system call was %s.",
"AdjustTokenPrivileges")));
201 * PGSharedMemoryCreate
203 * Create a shared memory segment of the given size and initialize its
218 SIZE_T largePageSize = 0;
219 Size orig_size = size;
220 DWORD flProtect = PAGE_READWRITE;
224 MEM_RESERVE, PAGE_NOACCESS);
226 elog(
FATAL,
"could not reserve memory region: error code %lu",
229 /* Room for a header? */
238 /* Does the processor support large pages? */
239 largePageSize = GetLargePageMinimum();
240 if (largePageSize == 0)
243 (
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
244 errmsg(
"the processor does not support large pages")));
255 /* Huge pages available and privilege enabled, so turn on */
256 flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
258 /* Round size up as appropriate. */
259 if (size % largePageSize != 0)
260 size += largePageSize - (size % largePageSize);
266 size_high = size >> 32;
270 size_low = (DWORD) size;
273 * When recycling a shared memory segment, it may take a short while
274 * before it gets dropped from the global namespace. So re-try after
275 * sleeping for a second, and continue retrying 10 times. (both the 1
276 * second time and the 10 retries are completely arbitrary)
278 for (
i = 0;
i < 10;
i++)
281 * In case CreateFileMapping() doesn't set the error code to 0 on
286 hmap = CreateFileMapping(INVALID_HANDLE_VALUE,
/* Use the pagefile */
287 NULL,
/* Default security attrs */
289 size_high,
/* Size Upper 32 Bits */
290 size_low,
/* Size Lower 32 bits */
295 if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
297 (flProtect & SEC_LARGE_PAGES) != 0)
299 elog(
DEBUG1,
"CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
300 "huge pages disabled",
304 * Use the original size, not the rounded-up value, when
305 * falling back to non-huge pages.
308 flProtect = PAGE_READWRITE;
313 (
errmsg(
"could not create shared memory segment: error code %lu", GetLastError()),
314 errdetail(
"Failed system call was CreateFileMapping(size=%zu, name=%s).",
319 * If the segment already existed, CreateFileMapping() will return a
320 * handle to the existing one and set ERROR_ALREADY_EXISTS.
322 if (GetLastError() == ERROR_ALREADY_EXISTS)
324 CloseHandle(hmap);
/* Close the handle, since we got a valid one
325 * to the previous segment. */
334 * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
335 * shared memory segment exists and we assume it belongs to somebody else.
339 (
errmsg(
"pre-existing shared memory block is still in use"),
340 errhint(
"Check if there are any old server processes still running, and terminate them.")));
345 * Make the handle inheritable
347 if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
349 (
errmsg(
"could not create shared memory segment: error code %lu", GetLastError()),
350 errdetail(
"Failed system call was DuplicateHandle.")));
353 * Close the old, non-inheritable handle. If this fails we don't really
356 if (!CloseHandle(hmap))
357 elog(
LOG,
"could not close handle to shared memory: error code %lu", GetLastError());
359 desiredAccess = FILE_MAP_WRITE | FILE_MAP_READ;
361#ifdef FILE_MAP_LARGE_PAGES
362 /* Set large pages if wanted. */
363 if ((flProtect & SEC_LARGE_PAGES) != 0)
364 desiredAccess |= FILE_MAP_LARGE_PAGES;
368 * Get a pointer to the new shared memory segment. Map the whole segment
369 * at once, and let the system decide on the initial address.
371 memAddress = MapViewOfFileEx(hmap2, desiredAccess, 0, 0, 0, NULL);
374 (
errmsg(
"could not create shared memory segment: error code %lu", GetLastError()),
375 errdetail(
"Failed system call was MapViewOfFileEx.")));
380 * OK, we created a new segment. Mark it as created by this process. The
381 * order of assignments here is critical so that another Postgres process
382 * can't see the header as valid but belonging to an invalid PID!
389 * Initialize space allocation status for segment.
395 /* Save info for possible future use */
400 /* Register on-exit routine to delete the new segment */
405 /* Report whether huge pages are in use */
413 * PGSharedMemoryReAttach
415 * This is called during startup of a postmaster child process to re-attach to
416 * an already existing shared memory segment, using the handle inherited from
419 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
420 * parameters to this routine. The caller must have already restored them to
421 * the postmaster's values.
434 * Release memory region reservations made by the postmaster
437 elog(
FATAL,
"failed to release reserved memory region (addr=%p): error code %lu",
440 elog(
FATAL,
"failed to release reserved memory region (addr=%p): error code %lu",
445 elog(
FATAL,
"could not reattach to shared memory (key=%p, addr=%p): error code %lu",
447 if (hdr != origUsedShmemSegAddr)
448 elog(
FATAL,
"reattaching to shared memory returned unexpected address (got %p, expected %p)",
449 hdr, origUsedShmemSegAddr);
451 elog(
FATAL,
"reattaching to shared memory returned non-PostgreSQL memory");
458 * PGSharedMemoryNoReAttach
460 * This is called during startup of a postmaster child process when we choose
461 * *not* to re-attach to the existing shared memory segment. We must clean up
462 * to leave things in the appropriate state.
464 * The child process startup logic might or might not call PGSharedMemoryDetach
465 * after this; make sure that it will be a no-op if called.
467 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
468 * parameters to this routine. The caller must have already restored them to
469 * the postmaster's values.
479 * Under Windows we will not have mapped the segment, so we don't need to
480 * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
485 * We *must* close the inherited shmem segment handle, else Windows will
486 * consider the existence of this process to mean it can't release the
487 * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
493 * PGSharedMemoryDetach
495 * Detach from the shared memory segment, if still attached. This is not
496 * intended to be called explicitly by the process that originally created the
497 * segment (it will have an on_shmem_exit callback registered to do that).
498 * Rather, this is for subprocesses that have inherited an attachment and want
501 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
502 * parameters to this routine.
508 * Releasing the protective region liberates an unimportant quantity of
509 * address space, but be tidy.
514 elog(
LOG,
"failed to release reserved memory region (addr=%p): error code %lu",
520 /* Unmap the view, if it's mapped */
524 elog(
LOG,
"could not unmap view of shared memory: error code %lu",
530 /* And close the shmem handle, if we have one */
534 elog(
LOG,
"could not close handle to shared memory: error code %lu",
543 * pgwin32_SharedMemoryDelete
545 * Detach from and delete the shared memory segment
546 * (called as an on_shmem_exit callback, hence funny argument list)
556 * pgwin32_ReserveSharedMemoryRegion(hChild)
558 * Reserve the memory region that will be used for shared memory in a child
559 * process. It is called before the child process starts, to make sure the
560 * memory is available.
562 * Once the child starts, DLLs loading in different order or threads getting
563 * scheduled differently may allocate memory which can conflict with the
564 * address space we need for our shared memory. By reserving the shared
565 * memory region before the child starts, and freeing it only just before we
566 * attempt to get access to the shared memory forces these allocations to
567 * be given different address ranges that don't conflict.
569 * NOTE! This function executes in the postmaster, and should for this
570 * reason not use elog(FATAL) since that would take down the postmaster.
581 /* ShmemProtectiveRegion */
584 MEM_RESERVE, PAGE_NOACCESS);
587 /* Don't use FATAL since we're running in the postmaster */
588 elog(
LOG,
"could not reserve shared memory region (addr=%p) for child %p: error code %lu",
595 * Should never happen - in theory if allocation granularity causes
596 * strange effects it could, so check just in case.
598 * Don't use FATAL since we're running in the postmaster.
600 elog(
LOG,
"reserved shared memory region got incorrect address %p, expected %p",
605 /* UsedShmemSegAddr */
607 MEM_RESERVE, PAGE_READWRITE);
610 elog(
LOG,
"could not reserve shared memory region (addr=%p) for child %p: error code %lu",
616 elog(
LOG,
"reserved shared memory region got incorrect address %p, expected %p",
625 * This function is provided for consistency with sysv_shmem.c and does not
626 * provide any useful information for Windows. To obtain the large page size,
627 * use GetLargePageMinimum() instead.
639 * GUC check_hook for huge_page_size
int errmsg_internal(const char *fmt,...)
int errdetail(const char *fmt,...)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
#define GUC_check_errdetail
Assert(PointerIsAligned(start, uint64))
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
static rewind_source * source
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
#define PROTECTIVE_REGION_SIZE
void PGSharedMemoryDetach(void)
void PGSharedMemoryReAttach(void)
PGShmemHeader * PGSharedMemoryCreate(Size size, PGShmemHeader **shim)
bool check_huge_page_size(int *newval, void **extra, GucSource source)
void * ShmemProtectiveRegion
static char * GetSharedMemName(void)
void GetHugePageSize(Size *hugepagesize, int *mmap_flags)
int pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
static bool EnableLockPagesPrivilege(int elevel)
bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
static void pgwin32_SharedMemoryDelete(int status, Datum shmId)
static Size UsedShmemSegSize
void PGSharedMemoryNoReAttach(void)