1/*-------------------------------------------------------------------------
4 * Microsoft Windows Win32 Socket Functions
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 * src/backend/port/win32/socket.c
11 *-------------------------------------------------------------------------
17 * Indicate if pgwin32_recv() and pgwin32_send() should operate
18 * in non-blocking mode.
20 * Since the socket emulation layer always sets the actual socket to
21 * non-blocking mode in order to be able to deliver signals, we must
22 * specify this in a separate flag if we actually need non-blocking
25 * This flag changes the behaviour *globally* for all socket operations,
26 * so it should only be set for very short periods of time.
30/* Undef the macros defined in win32.h, so we can access system functions */
41 * Blocking socket functions implemented so they listen on both
42 * the socket and the signal event, required for signal handling.
46 * Convert the last socket error code into errno
48 * Note: where there is a direct correspondence between a WSAxxx error code
49 * and a Berkeley error symbol, this mapping is actually a no-op, because
50 * in win32_port.h we redefine the network-related Berkeley error symbols to
51 * have the values of their WSAxxx counterparts. The point of the switch is
52 * mostly to translate near-miss error codes into something that's sensible
53 * in the Berkeley universe.
58 switch (WSAGetLastError())
61 case WSANOTINITIALISED:
62 case WSAEINVALIDPROVIDER:
63 case WSAEINVALIDPROCTABLE:
88 case WSAEPROTONOSUPPORT:
90 case WSAESOCKTNOSUPPORT:
120 case WSAEADDRNOTAVAIL:
126 case WSAEHOSTUNREACH:
127 case WSAHOST_NOT_FOUND:
150 WSAGetLastError())));
172 int typelen =
sizeof(
type);
174 if (getsockopt(s, SOL_SOCKET, SO_TYPE, (
char *) &
type, &typelen))
177 return (
type == SOCK_DGRAM) ? 1 : 0;
183 static HANDLE waitevent = INVALID_HANDLE_VALUE;
184 static SOCKET current_socket = INVALID_SOCKET;
185 static int isUDP = 0;
189 /* Create an event object just once and use it on all future calls */
190 if (waitevent == INVALID_HANDLE_VALUE)
192 waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
194 if (waitevent == INVALID_HANDLE_VALUE)
196 (
errmsg_internal(
"could not create socket waiting event: error code %lu", GetLastError())));
198 else if (!ResetEvent(waitevent))
200 (
errmsg_internal(
"could not reset socket waiting event: error code %lu", GetLastError())));
203 * Track whether socket is UDP or not. (NB: most likely, this is both
204 * useless and wrong; there is no reason to think that the behavior of
205 * WSAEventSelect is different for TCP and UDP.)
207 if (current_socket != s)
212 * Attach event to socket. NOTE: we must detach it again before
213 * returning, since other bits of code may try to attach other events to
216 if (WSAEventSelect(s, waitevent, what) != 0)
223 events[1] = waitevent;
226 * Just a workaround of unknown locking problem with writing in UDP socket
227 * under high load: Client's pgsql backend sleeps infinitely in
228 * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
229 * So, we will wait with small timeout(0.1 sec) and if socket is still
230 * blocked, try WSASend (see comments in pgwin32_select) and wait again.
232 if ((what & FD_WRITE) && isUDP)
236 r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
238 if (r == WAIT_TIMEOUT)
247 r = WSASend(s, &
buf, 1, &sent, 0, NULL, NULL);
248 if (r == 0)
/* Completed - means things are fine! */
250 WSAEventSelect(s, NULL, 0);
253 else if (WSAGetLastError() != WSAEWOULDBLOCK)
256 WSAEventSelect(s, NULL, 0);
265 r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
267 WSAEventSelect(s, NULL, 0);
269 if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
275 if (r == WAIT_OBJECT_0 + 1)
277 if (r == WAIT_TIMEOUT)
283 (
errmsg_internal(
"unrecognized return value from WaitForMultipleObjects: %d (error code %lu)", r, GetLastError())));
288 * Create a socket, setting it to overlapped and non-blocking
294 unsigned long on = 1;
296 s = WSASocket(af,
type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
297 if (s == INVALID_SOCKET)
300 return INVALID_SOCKET;
303 if (ioctlsocket(s, FIONBIO, &on))
307 return INVALID_SOCKET;
319 res =
bind(s, addr, addrlen);
342 * Poll for signals, but don't return with EINTR, since we don't handle
347 rs = WSAAccept(s, addr, addrlen, NULL, 0);
348 if (rs == INVALID_SOCKET)
351 return INVALID_SOCKET;
357/* No signal delivery during connect. */
363 r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
367 if (WSAGetLastError() != WSAEWOULDBLOCK)
375 /* Loop endlessly as long as we are just delivering signals */
396 r = WSARecv(s, &wbuf, 1, &
b, &flags, NULL, NULL);
397 if (r != SOCKET_ERROR)
398 return b;
/* success */
400 if (WSAGetLastError() != WSAEWOULDBLOCK)
409 * No data received, and we are in "emulated non-blocking mode", so
410 * return indicating that we'd block if we were to continue.
416 /* We're in blocking mode, so wait for data */
418 for (n = 0; n < 5; n++)
422 return -1;
/* errno already set */
424 r = WSARecv(s, &wbuf, 1, &
b, &flags, NULL, NULL);
425 if (r != SOCKET_ERROR)
426 return b;
/* success */
427 if (WSAGetLastError() != WSAEWOULDBLOCK)
434 * There seem to be cases on win2k (at least) where WSARecv can return
435 * WSAEWOULDBLOCK even when pgwin32_waitforsinglesocket claims the
436 * socket is readable. In this case, just sleep for a moment and try
437 * again. We try up to 5 times - if it fails more than that it's not
438 * likely to ever come back.
443 (
errmsg_internal(
"could not read from ready socket (after retries)")));
449 * The second argument to send() is defined by SUS to be a "const void *"
450 * and so we use the same signature here to keep compilers happy when
453 * But the buf member of a WSABUF struct is defined as "char *", so we cast
454 * the second argument to that here when assigning it, also to keep compilers
469 wbuf.buf = (
char *)
buf;
472 * Readiness of socket to send data to UDP socket may be not true: socket
473 * can become busy again! So loop until send or error occurs.
477 r = WSASend(s, &wbuf, 1, &
b, flags, NULL, NULL);
478 if (r != SOCKET_ERROR &&
b > 0)
479 /* Write succeeded right away */
482 if (r == SOCKET_ERROR &&
483 WSAGetLastError() != WSAEWOULDBLOCK)
492 * No data sent, and we are in "emulated non-blocking mode", so
493 * return indicating that we'd block if we were to continue.
499 /* No error, zero bytes */
510 * Wait for activity on one or more sockets.
511 * While waiting, allow signals to run
513 * NOTE! Currently does not implement exceptfds check,
514 * since it is not used in postgresql!
517 pgwin32_select(
int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
const struct timeval *timeout)
519 WSAEVENT events[FD_SETSIZE * 2];
/* worst case is readfds totally
520 * different from writefds, so
521 * 2*FD_SETSIZE sockets */
522 SOCKET sockets[FD_SETSIZE * 2];
526 DWORD timeoutval = WSA_INFINITE;
531 Assert(exceptfds == NULL);
536 FD_ZERO(&outreadfds);
537 FD_ZERO(&outwritefds);
540 * Windows does not guarantee to log an FD_WRITE network event indicating
541 * that more data can be sent unless the previous send() failed with
542 * WSAEWOULDBLOCK. While our caller might well have made such a call, we
543 * cannot assume that here. Therefore, if waiting for write-ready, force
544 * the issue by doing a dummy send(). If the dummy send() succeeds,
545 * assume that the socket is in fact write-ready, and return immediately.
546 * Also, if it fails with something other than WSAEWOULDBLOCK, return a
547 * write-ready indication to let our caller deal with the error condition.
549 if (writefds != NULL)
551 for (
i = 0;
i < writefds->fd_count;
i++)
560 r = WSASend(writefds->fd_array[
i], &
buf, 1, &sent, 0, NULL, NULL);
561 if (r == 0 || WSAGetLastError() != WSAEWOULDBLOCK)
562 FD_SET(writefds->fd_array[
i], &outwritefds);
565 /* If we found any write-ready sockets, just return them immediately */
566 if (outwritefds.fd_count > 0)
568 memcpy(writefds, &outwritefds,
sizeof(fd_set));
571 return outwritefds.fd_count;
576 /* Now set up for an actual select */
580 /* timeoutval is in milliseconds */
581 timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
586 for (
i = 0;
i < readfds->fd_count;
i++)
588 events[numevents] = WSACreateEvent();
589 sockets[numevents] = readfds->fd_array[
i];
593 if (writefds != NULL)
595 for (
i = 0;
i < writefds->fd_count;
i++)
598 !FD_ISSET(writefds->fd_array[
i], readfds))
600 /* If the socket is not in the read list */
601 events[numevents] = WSACreateEvent();
602 sockets[numevents] = writefds->fd_array[
i];
608 for (
i = 0;
i < numevents;
i++)
612 if (readfds && FD_ISSET(sockets[
i], readfds))
613 flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
615 if (writefds && FD_ISSET(sockets[
i], writefds))
616 flags |= FD_WRITE | FD_CLOSE;
618 if (WSAEventSelect(sockets[
i], events[
i], flags) != 0)
621 /* release already-assigned event objects */
623 WSAEventSelect(sockets[
i], NULL, 0);
624 for (
i = 0;
i < numevents;
i++)
625 WSACloseEvent(events[
i]);
631 r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
632 if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
635 * We scan all events, even those not signaled, in case more than one
636 * event has been tagged but Wait.. can only return one.
638 WSANETWORKEVENTS resEvents;
640 for (
i = 0;
i < numevents;
i++)
642 ZeroMemory(&resEvents,
sizeof(resEvents));
643 if (WSAEnumNetworkEvents(sockets[
i], events[
i], &resEvents) != 0)
644 elog(
ERROR,
"failed to enumerate network events: error code %d",
647 if (readfds && FD_ISSET(sockets[
i], readfds))
649 if ((resEvents.lNetworkEvents & FD_READ) ||
650 (resEvents.lNetworkEvents & FD_ACCEPT) ||
651 (resEvents.lNetworkEvents & FD_CLOSE))
653 FD_SET(sockets[
i], &outreadfds);
658 /* Write activity? */
659 if (writefds && FD_ISSET(sockets[
i], writefds))
661 if ((resEvents.lNetworkEvents & FD_WRITE) ||
662 (resEvents.lNetworkEvents & FD_CLOSE))
664 FD_SET(sockets[
i], &outwritefds);
672 /* Clean up all the event objects */
673 for (
i = 0;
i < numevents;
i++)
675 WSAEventSelect(sockets[
i], NULL, 0);
676 WSACloseEvent(events[
i]);
679 if (r == WSA_WAIT_TIMEOUT)
688 /* Signal-like events. */
689 if (r == WAIT_OBJECT_0 + numevents || r == WAIT_IO_COMPLETION)
700 /* Overwrite socket sets with our resulting values */
702 memcpy(readfds, &outreadfds,
sizeof(fd_set));
704 memcpy(writefds, &outwritefds,
sizeof(fd_set));
int errmsg_internal(const char *fmt,...)
#define ereport(elevel,...)
Assert(PointerIsAligned(start, uint64))
void pg_usleep(long microsec)
void pgwin32_dispatch_queued_signals(void)
HANDLE pgwin32_signal_event
int pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval *timeout)
int pgwin32_recv(SOCKET s, char *buf, int len, int f)
int pgwin32_send(SOCKET s, const void *buf, int len, int flags)
int pgwin32_connect(SOCKET s, const struct sockaddr *addr, int addrlen)
int pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
static int pgwin32_poll_signals(void)
SOCKET pgwin32_socket(int af, int type, int protocol)
static int isDataGram(SOCKET s)
static void TranslateSocketError(void)
SOCKET pgwin32_accept(SOCKET s, struct sockaddr *addr, int *addrlen)
int pgwin32_bind(SOCKET s, struct sockaddr *addr, int addrlen)
int pgwin32_listen(SOCKET s, int backlog)
#define UNBLOCKED_SIGNAL_QUEUE()
#define bind(s, addr, addrlen)
#define listen(s, backlog)