1/*-------------------------------------------------------------------------
4 * Communication functions between the Frontend and the Backend
6 * These routines handle the low-level details of communication between
7 * frontend and backend. They just shove data across the communication
8 * channel, and are ignorant of the semantics of the data.
10 * To emit an outgoing message, use the routines in pqformat.c to construct
11 * the message in a buffer and then emit it in one call to pq_putmessage.
12 * There are no functions to send raw bytes or partial messages; this
13 * ensures that the channel will not be clogged by an incomplete message if
14 * execution is aborted by ereport(ERROR) partway through the message.
16 * At one time, libpq was shared between frontend and backend, but now
17 * the backend's "backend/libpq" is quite separate from "interfaces/libpq".
18 * All that remains is similarities of names to trap the unwary...
20 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
21 * Portions Copyright (c) 1994, Regents of the University of California
23 * src/backend/libpq/pqcomm.c
25 *-------------------------------------------------------------------------
28/*------------------------
32 * ListenServerPort - Open postmaster's server port
33 * AcceptConnection - Accept new connection with client
34 * TouchSocketFiles - Protect socket files against /tmp cleaners
35 * pq_init - initialize libpq at backend startup
36 * socket_comm_reset - reset libpq during error recovery
37 * socket_close - shutdown libpq at backend exit
40 * pq_getbytes - get a known number of bytes from connection
41 * pq_getmessage - get a message with length word from connection
42 * pq_getbyte - get next byte from connection
43 * pq_peekbyte - peek at next byte from connection
44 * pq_flush - flush pending output
45 * pq_flush_if_writable - flush pending output if writable without blocking
46 * pq_getbyte_if_available - get a byte if available without blocking
49 * pq_putmessage - send a normal message (suppressed in COPY OUT mode)
50 * pq_putmessage_noblock - buffer a normal message (suppressed in COPY OUT)
52 *------------------------
85 * Cope with the various platform-specific ways to spell TCP keepalive socket
86 * options. This doesn't cover Windows, which as usual does its own thing.
88#if defined(TCP_KEEPIDLE)
89/* TCP_KEEPIDLE is the name of this option on Linux and *BSD */
90#define PG_TCP_KEEPALIVE_IDLE TCP_KEEPIDLE
91#define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPIDLE"
92#elif defined(TCP_KEEPALIVE_THRESHOLD)
93/* TCP_KEEPALIVE_THRESHOLD is the name of this option on Solaris >= 11 */
94#define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE_THRESHOLD
95#define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE_THRESHOLD"
96#elif defined(TCP_KEEPALIVE) && defined(__darwin__)
97/* TCP_KEEPALIVE is the name of this option on macOS */
98/* Caution: Solaris has this symbol but it means something different */
99#define PG_TCP_KEEPALIVE_IDLE TCP_KEEPALIVE
100#define PG_TCP_KEEPALIVE_IDLE_STR "TCP_KEEPALIVE"
104 * Configuration options
109/* Where the Unix socket files are (list of palloc'd strings) */
113 * Buffers for low-level I/O.
115 * The receive buffer is fixed size. Send buffer is usually 8k, but can be
116 * enlarged by pq_putmessage_noblock() if the message doesn't fit otherwise.
119 #define PQ_SEND_BUFFER_SIZE 8192
120 #define PQ_RECV_BUFFER_SIZE 8192
124 static size_t PqSendPointer;
/* Next index to store a byte in PqSendBuffer */
125 static size_t PqSendStart;
/* Next index to send a byte in PqSendBuffer */
134 static bool PqCommBusy;
/* busy sending data to the client */
138/* Internal functions */
152static int Lock_AF_UNIX(
const char *unixSocketDir,
const char *unixSocketPath);
169/* --------------------------------
170 * pq_init - initialize libpq at backend startup
171 * --------------------------------
180 /* allocate the Port struct and copy the ClientSocket contents to it */
186 /* fill in the server (local) address */
187 port->laddr.salen =
sizeof(
port->laddr.addr);
188 if (getsockname(
port->sock,
189 (
struct sockaddr *) &
port->laddr.addr,
190 &
port->laddr.salen) < 0)
193 (
errmsg(
"%s() failed: %m",
"getsockname")));
196 /* select NODELAY and KEEPALIVE options if it's a TCP connection */
197 if (
port->laddr.addr.ss_family != AF_UNIX)
208 if (setsockopt(
port->sock, IPPROTO_TCP, TCP_NODELAY,
209 (
char *) &on,
sizeof(on)) < 0)
212 (
errmsg(
"%s(%s) failed: %m",
"setsockopt",
"TCP_NODELAY")));
216 if (setsockopt(
port->sock, SOL_SOCKET, SO_KEEPALIVE,
217 (
char *) &on,
sizeof(on)) < 0)
220 (
errmsg(
"%s(%s) failed: %m",
"setsockopt",
"SO_KEEPALIVE")));
226 * This is a Win32 socket optimization. The OS send buffer should be
227 * large enough to send the whole Postgres send buffer in one go, or
228 * performance suffers. The Postgres send buffer can be enlarged if a
229 * very large message needs to be sent, but we won't attempt to
230 * enlarge the OS buffer if that happens, so somewhat arbitrarily
231 * ensure that the OS buffer is at least PQ_SEND_BUFFER_SIZE * 4.
232 * (That's 32kB with the current default).
234 * The default OS buffer size used to be 8kB in earlier Windows
235 * versions, but was raised to 64kB in Windows 2012. So it shouldn't
236 * be necessary to change it in later versions anymore. Changing it
237 * unnecessarily can even reduce performance, because setting
238 * SO_SNDBUF in the application disables the "dynamic send buffering"
239 * feature that was introduced in Windows 7. So before fiddling with
240 * SO_SNDBUF, check if the current buffer size is already large enough
241 * and only increase it if necessary.
243 * See https://support.microsoft.com/kb/823764/EN-US/ and
244 * https://msdn.microsoft.com/en-us/library/bb736549%28v=vs.85%29.aspx
246 optlen =
sizeof(oldopt);
247 if (getsockopt(
port->sock, SOL_SOCKET, SO_SNDBUF, (
char *) &oldopt,
251 (
errmsg(
"%s(%s) failed: %m",
"getsockopt",
"SO_SNDBUF")));
256 if (setsockopt(
port->sock, SOL_SOCKET, SO_SNDBUF, (
char *) &newopt,
260 (
errmsg(
"%s(%s) failed: %m",
"setsockopt",
"SO_SNDBUF")));
266 * Also apply the current keepalive parameters. If we fail to set a
267 * parameter, don't error out, because these aren't universally
268 * supported. (Note: you might think we need to reset the GUC
269 * variables to 0 in such a case, but it's not necessary because the
270 * show hooks for these variables report the truth anyway.)
278 /* initialize state variables */
285 /* set up process-exit hook to close the socket */
289 * In backends (as soon as forked) we operate the underlying socket in
290 * nonblocking mode and use latches to implement blocking semantics if
291 * needed. That allows us to provide safely interruptible reads and
297 (
errmsg(
"could not set socket to nonblocking mode: %m")));
302 /* Don't give the socket to any subprograms we execute. */
303 if (fcntl(
port->sock, F_SETFD, FD_CLOEXEC) < 0)
304 elog(
FATAL,
"fcntl(F_SETFD) failed on socket: %m");
309 port->sock, NULL, NULL);
316 * The event positions match the order we added them, but let's sanity
317 * check them to be sure.
325/* --------------------------------
326 * socket_comm_reset - reset libpq during error recovery
328 * This is called from error recovery at the outer idle loop. It's
329 * just to get us out of trouble if we somehow manage to elog() from
330 * inside a pqcomm.c routine (which ideally will never happen, but...)
331 * --------------------------------
336 /* Do not throw away pending data, but do reset the busy flag */
340/* --------------------------------
341 * socket_close - shutdown libpq at backend exit
343 * This is the one pg_on_exit_callback in place during BackendInitialize().
344 * That function's unusual signal handling constrains that this callback be
345 * safe to run at any instant.
346 * --------------------------------
351 /* Nothing to do in a standalone backend, where MyProcPort is NULL. */
356 * Shutdown GSSAPI layer. This section does nothing when interrupting
357 * BackendInitialize(), because pg_GSS_recvauth() makes first use of
360 * Note that we don't bother to free MyProcPort->gss, since we're
361 * about to exit anyway.
373#endif /* ENABLE_GSS */
376 * Cleanly shut down SSL layer. Nowhere else does a postmaster child
377 * call this, so this is safe when interrupting BackendInitialize().
382 * Formerly we did an explicit close() here, but it seems better to
383 * leave the socket open until the process dies. This allows clients
384 * to perform a "synchronous close" if they care --- wait till the
385 * transport layer reports connection closure, and you can be sure the
386 * backend has exited.
388 * We do set sock to PGINVALID_SOCKET to prevent any further I/O,
397/* --------------------------------
398 * Postmaster functions to handle sockets.
399 * --------------------------------
403 * ListenServerPort -- open a "listening" port to accept connections.
405 * family should be AF_UNIX or AF_UNSPEC; portNumber is the port number.
406 * For AF_UNIX ports, hostName should be NULL and unixSocketDir must be
407 * specified. For TCP ports, hostName is either NULL for all interfaces or
408 * the interface to listen on, and unixSocketDir is ignored (can be NULL).
410 * Successfully opened sockets are appended to the ListenSockets[] array. On
411 * entry, *NumListenSockets holds the number of elements currently in the
412 * array, and it is updated to reflect the opened sockets. MaxListen is the
413 * allocated size of the array.
415 * RETURNS: STATUS_OK or STATUS_ERROR
419 const char *unixSocketDir,
426 char portNumberStr[32];
427 const char *familyDesc;
428 char familyDescBuf[64];
429 const char *addrDesc;
430 char addrBuf[NI_MAXHOST];
432 struct addrinfo *addrs = NULL,
434 struct addrinfo hint;
437#if !defined(WIN32) || defined(IPV6_V6ONLY)
441 /* Initialize hint structure */
442 MemSet(&hint, 0,
sizeof(hint));
443 hint.ai_family = family;
444 hint.ai_flags = AI_PASSIVE;
445 hint.ai_socktype = SOCK_STREAM;
447 if (family == AF_UNIX)
450 * Create unixSocketPath from portNumber and unixSocketDir and lock
457 (
errmsg(
"Unix-domain socket path \"%s\" is too long (maximum %d bytes)",
464 service = unixSocketPath;
468 snprintf(portNumberStr,
sizeof(portNumberStr),
"%d", portNumber);
469 service = portNumberStr;
477 (
errmsg(
"could not translate host name \"%s\", service \"%s\" to address: %s",
481 (
errmsg(
"could not translate service \"%s\" to address: %s",
488 for (addr = addrs; addr; addr = addr->ai_next)
490 if (family != AF_UNIX && addr->ai_family == AF_UNIX)
493 * Only set up a unix domain socket when they really asked for it.
494 * The service/port is different in that case.
499 /* See if there is still room to add 1 more socket. */
503 (
errmsg(
"could not bind to all requested addresses: MAXLISTEN (%d) exceeded",
508 /* set up address family name for log messages */
509 switch (addr->ai_family)
512 familyDesc =
_(
"IPv4");
515 familyDesc =
_(
"IPv6");
518 familyDesc =
_(
"Unix");
521 snprintf(familyDescBuf,
sizeof(familyDescBuf),
522 _(
"unrecognized address family %d"),
524 familyDesc = familyDescBuf;
528 /* set up text form of address for log messages */
529 if (addr->ai_family == AF_UNIX)
530 addrDesc = unixSocketPath;
535 addrBuf,
sizeof(addrBuf),
545 /* translator: first %s is IPv4, IPv6, or Unix */
546 errmsg(
"could not create %s socket for address \"%s\": %m",
547 familyDesc, addrDesc)));
552 /* Don't give the listen socket to any subprograms we execute. */
553 if (fcntl(
fd, F_SETFD, FD_CLOEXEC) < 0)
554 elog(
FATAL,
"fcntl(F_SETFD) failed on socket: %m");
557 * Without the SO_REUSEADDR flag, a new postmaster can't be started
558 * right away after a stop or crash, giving "address already in use"
559 * error on TCP ports.
561 * On win32, however, this behavior only happens if the
562 * SO_EXCLUSIVEADDRUSE is set. With SO_REUSEADDR, win32 allows
563 * multiple servers to listen on the same address, resulting in
564 * unpredictable behavior. With no flags at all, win32 behaves as Unix
567 if (addr->ai_family != AF_UNIX)
569 if ((setsockopt(
fd, SOL_SOCKET, SO_REUSEADDR,
570 (
char *) &one,
sizeof(one))) == -1)
574 /* translator: third %s is IPv4 or IPv6 */
575 errmsg(
"%s(%s) failed for %s address \"%s\": %m",
576 "setsockopt",
"SO_REUSEADDR",
577 familyDesc, addrDesc)));
585 if (addr->ai_family == AF_INET6)
587 if (setsockopt(
fd, IPPROTO_IPV6, IPV6_V6ONLY,
588 (
char *) &one,
sizeof(one)) == -1)
592 /* translator: third %s is IPv6 */
593 errmsg(
"%s(%s) failed for %s address \"%s\": %m",
594 "setsockopt",
"IPV6_V6ONLY",
595 familyDesc, addrDesc)));
603 * Note: This might fail on some OS's, like Linux older than
604 * 2.4.21-pre3, that don't have the IPV6_V6ONLY socket option, and map
605 * ipv4 addresses to ipv6. It will show ::ffff:ipv4 for all ipv4
608 err =
bind(
fd, addr->ai_addr, addr->ai_addrlen);
611 int saved_errno = errno;
615 /* translator: first %s is IPv4, IPv6, or Unix */
616 errmsg(
"could not bind %s address \"%s\": %m",
617 familyDesc, addrDesc),
619 (addr->ai_family == AF_UNIX ?
620 errhint(
"Is another postmaster already running on port %d?",
622 errhint(
"Is another postmaster already running on port %d?"
623 " If not, wait a few seconds and retry.",
624 (
int) portNumber)) : 0));
629 if (addr->ai_family == AF_UNIX)
639 * Select appropriate accept-queue length limit. It seems reasonable
640 * to use a value similar to the maximum number of child processes
641 * that the postmaster will permit.
650 /* translator: first %s is IPv4, IPv6, or Unix */
651 errmsg(
"could not listen on %s address \"%s\": %m",
652 familyDesc, addrDesc)));
657 if (addr->ai_family == AF_UNIX)
659 (
errmsg(
"listening on Unix socket \"%s\"",
663 /* translator: first %s is IPv4 or IPv6 */
664 (
errmsg(
"listening on %s address \"%s\", port %d",
665 familyDesc, addrDesc, (
int) portNumber)));
668 (*NumListenSockets)++;
682 * Lock_AF_UNIX -- configure unix socket file path
687 /* no lock file for abstract sockets */
688 if (unixSocketPath[0] ==
'@')
692 * Grab an interlock file associated with the socket file.
694 * Note: there are two reasons for using a socket lock file, rather than
695 * trying to interlock directly on the socket itself. First, it's a lot
696 * more portable, and second, it lets us remove any pre-existing socket
697 * file without race conditions.
702 * Once we have the interlock, we can safely delete any pre-existing
703 * socket file to avoid failure at bind() time.
705 (void) unlink(unixSocketPath);
708 * Remember socket file pathnames for later maintenance.
717 * Setup_AF_UNIX -- configure unix socket permissions
722 /* no file system permissions for abstract sockets */
723 if (sock_path[0] ==
'@')
727 * Fix socket ownership/permission if requested. Note we must do this
728 * before we listen() to avoid a window where unwanted connections could
735 elog(
WARNING,
"configuration item \"unix_socket_group\" is not supported on this platform");
743 {
/* numeric group id */
747 {
/* convert group name to id */
754 (
errmsg(
"group \"%s\" does not exist",
760 if (chown(sock_path, -1, gid) == -1)
764 errmsg(
"could not set group of file \"%s\": %m",
775 errmsg(
"could not set permissions of file \"%s\": %m",
784 * AcceptConnection -- accept a new connection with client using
785 * server port. Fills *client_sock with the FD and endpoint info
786 * of the new connection.
788 * ASSUME: that this doesn't need to be non-blocking because
789 * the Postmaster waits for the socket to be ready to accept().
791 * RETURNS: STATUS_OK or STATUS_ERROR
796 /* accept connection and fill in the client (remote) address */
799 (
struct sockaddr *) &client_sock->
raddr.
addr,
804 errmsg(
"could not accept new connection: %m")));
807 * If accept() fails then postmaster.c will still see the server
808 * socket as read-ready, and will immediately try again. To avoid
809 * uselessly sucking lots of CPU, delay a bit before trying again.
810 * (The most likely reason for failure is being out of kernel file
811 * table slots; we can do little except hope some will get freed up.)
821 * TouchSocketFiles -- mark socket files as recently accessed
823 * This routine should be called every so often to ensure that the socket
824 * files have a recent mod date (ordinary operations on sockets usually won't
825 * change the mod date). That saves them from being removed by
826 * overenthusiastic /tmp-directory-cleaner daemons. (Another reason we should
827 * never have put the socket file in /tmp...)
834 /* Loop through all created sockets... */
837 char *sock_path = (
char *)
lfirst(l);
839 /* Ignore errors; there's no point in complaining */
840 (void) utime(sock_path, NULL);
845 * RemoveSocketFiles -- unlink socket files at postmaster shutdown
852 /* Loop through all created sockets... */
855 char *sock_path = (
char *)
lfirst(l);
857 /* Ignore any error. */
858 (void) unlink(sock_path);
860 /* Since we're about to exit, no need to reclaim storage */
864/* --------------------------------
865 * Low-level I/O routines begin here.
867 * These routines communicate with a frontend client across a connection
868 * already established by the preceding routines.
869 * --------------------------------
872/* --------------------------------
873 * socket_set_nonblocking - set socket blocking/non-blocking
875 * Sets the socket non-blocking if nonblocking is true, or sets it
876 * blocking otherwise.
877 * --------------------------------
884 (
errcode(ERRCODE_CONNECTION_DOES_NOT_EXIST),
885 errmsg(
"there is no client connection")));
890/* --------------------------------
891 * pq_recvbuf - load some bytes into the input buffer
893 * returns 0 if OK, EOF if trouble
894 * --------------------------------
903 /* still some unread data, left-justify it in the buffer */
913 /* Ensure that we're in blocking mode */
916 /* Can fill buffer from PqRecvLength and upwards */
929 continue;
/* Ok if interrupted */
932 * Careful: an ereport() that tries to write to the client would
933 * cause recursion to here, leading to stack overflow and core
934 * dump! This message must go *only* to the postmaster log.
936 * If errno is zero, assume it's EOF and let the caller complain.
941 errmsg(
"could not receive data from client: %m")));
947 * EOF detected. We used to write a log message here, but it's
948 * better to expect the ultimate caller to do that.
952 /* r contains number of bytes read, so just incr length */
958/* --------------------------------
959 * pq_getbyte - get a single byte from connection, or return EOF
960 * --------------------------------
969 if (
pq_recvbuf())
/* If nothing in buffer, then recv some */
970 return EOF;
/* Failed to recv data */
975/* --------------------------------
976 * pq_peekbyte - peek at next byte from connection
978 * Same as pq_getbyte() except we don't advance the pointer.
979 * --------------------------------
988 if (
pq_recvbuf())
/* If nothing in buffer, then recv some */
989 return EOF;
/* Failed to recv data */
994/* --------------------------------
995 * pq_getbyte_if_available - get a single byte from connection,
998 * The received byte is stored in *c. Returns 1 if a byte was read,
999 * 0 if no data was available, or EOF if trouble.
1000 * --------------------------------
1015 /* Put the socket into non-blocking mode */
1024 * Ok if no data available without blocking or interrupted (though
1025 * EINTR really shouldn't happen with a non-blocking socket). Report
1033 * Careful: an ereport() that tries to write to the client would
1034 * cause recursion to here, leading to stack overflow and core
1035 * dump! This message must go *only* to the postmaster log.
1037 * If errno is zero, assume it's EOF and let the caller complain.
1042 errmsg(
"could not receive data from client: %m")));
1055/* --------------------------------
1056 * pq_getbytes - get a known number of bytes from connection
1058 * returns 0 if OK, EOF if trouble
1059 * --------------------------------
1073 if (
pq_recvbuf())
/* If nothing in buffer, then recv some */
1074 return EOF;
/* Failed to recv data */
1087/* --------------------------------
1088 * pq_discardbytes - throw away a known number of bytes
1090 * same as pq_getbytes except we do not copy the data to anyplace.
1091 * this is used for resynchronizing after read errors.
1093 * returns 0 if OK, EOF if trouble
1094 * --------------------------------
1107 if (
pq_recvbuf())
/* If nothing in buffer, then recv some */
1108 return EOF;
/* Failed to recv data */
1119/* --------------------------------
1120 * pq_buffer_remaining_data - return number of bytes in receive buffer
1122 * This will *not* attempt to read more data. And reading up to that number of
1123 * bytes should not cause reading any more data either.
1124 * --------------------------------
1134/* --------------------------------
1135 * pq_startmsgread - begin reading a message from the client.
1137 * This must be called before any of the pq_get* functions.
1138 * --------------------------------
1144 * There shouldn't be a read active already, but let's check just to be
1149 (
errcode(ERRCODE_PROTOCOL_VIOLATION),
1150 errmsg(
"terminating connection because protocol synchronization was lost")));
1156/* --------------------------------
1157 * pq_endmsgread - finish reading message.
1159 * This must be called after reading a message with pq_getbytes()
1160 * and friends, to indicate that we have read the whole message.
1161 * pq_getmessage() does this implicitly.
1162 * --------------------------------
1172/* --------------------------------
1173 * pq_is_reading_msg - are we currently reading a message?
1175 * This is used in error recovery at the outer idle loop to detect if we have
1176 * lost protocol sync, and need to terminate the connection. pq_startmsgread()
1177 * will check for that too, but it's nicer to detect it earlier.
1178 * --------------------------------
1186/* --------------------------------
1187 * pq_getmessage - get a message with length word from connection
1189 * The return value is placed in an expansible StringInfo, which has
1190 * already been initialized by the caller.
1191 * Only the message body is placed in the StringInfo; the length word
1192 * is removed. Also, s->cursor is initialized to zero for convenience
1193 * in scanning the message contents.
1195 * maxlen is the upper limit on the length of the
1196 * message we are willing to accept. We abort the connection (by
1197 * returning EOF) if client tries to send more than that.
1199 * returns 0 if OK, EOF if trouble
1200 * --------------------------------
1211 /* Read message length word */
1215 (
errcode(ERRCODE_PROTOCOL_VIOLATION),
1216 errmsg(
"unexpected EOF within message length word")));
1222 if (len < 4 || len > maxlen)
1225 (
errcode(ERRCODE_PROTOCOL_VIOLATION),
1226 errmsg(
"invalid message length")));
1230 len -= 4;
/* discount length itself */
1235 * Allocate space for message. If we run out of room (ridiculously
1236 * large message), we will elog(ERROR), but we want to discard the
1237 * message body so as not to lose communication sync.
1247 (
errcode(ERRCODE_PROTOCOL_VIOLATION),
1248 errmsg(
"incomplete message from client")));
1250 /* we discarded the rest of the message so we're back in sync. */
1256 /* And grab the message */
1260 (
errcode(ERRCODE_PROTOCOL_VIOLATION),
1261 errmsg(
"incomplete message from client")));
1265 /* Place a trailing null per StringInfo convention */
1269 /* finished reading the message. */
1283 /* If buffer is full, then flush it out */
1292 * If the buffer is empty and data length is larger than the buffer
1293 * size, send it without buffering. Otherwise, copy as much data as
1294 * possible into the buffer.
1320/* --------------------------------
1321 * socket_flush - flush pending output
1323 * returns 0 if OK, EOF if trouble
1324 * --------------------------------
1331 /* No-op if reentrant call */
1341/* --------------------------------
1342 * internal_flush - flush pending output
1344 * Returns 0 if OK (meaning everything was sent, or operation would block
1345 * and the socket is in non-blocking mode), or EOF if trouble.
1346 * --------------------------------
1354/* --------------------------------
1355 * internal_flush_buffer - flush the given buffer content
1357 * Returns 0 if OK (meaning everything was sent, or operation would block
1358 * and the socket is in non-blocking mode), or EOF if trouble.
1359 * --------------------------------
1364 static int last_reported_send_errno = 0;
1367 const char *bufend =
buf + *end;
1369 while (bufptr < bufend)
1378 continue;
/* Ok if we were interrupted */
1381 * Ok if no data writable without blocking, and the socket is in
1382 * non-blocking mode.
1391 * Careful: an ereport() that tries to write to the client would
1392 * cause recursion to here, leading to stack overflow and core
1393 * dump! This message must go *only* to the postmaster log.
1395 * If a client disconnects while we're in the midst of output, we
1396 * might write quite a bit of data before we get to a safe query
1397 * abort point. So, suppress duplicate log messages.
1399 if (errno != last_reported_send_errno)
1401 last_reported_send_errno = errno;
1404 errmsg(
"could not send data to client: %m")));
1408 * We drop the buffered data anyway so that processing can
1409 * continue, even though we'll probably quit soon. We also set a
1410 * flag that'll cause the next CHECK_FOR_INTERRUPTS to terminate
1419 last_reported_send_errno = 0;
/* reset after any successful send */
1428/* --------------------------------
1429 * pq_flush_if_writable - flush pending output if writable without blocking
1431 * Returns 0 if OK, or EOF if trouble.
1432 * --------------------------------
1439 /* Quick exit if nothing to do */
1443 /* No-op if reentrant call */
1447 /* Temporarily put the socket into non-blocking mode */
1456/* --------------------------------
1457 * socket_is_send_pending - is there any pending data in the output buffer?
1458 * --------------------------------
1466/* --------------------------------
1467 * Message-level I/O routines begin here.
1468 * --------------------------------
1472/* --------------------------------
1473 * socket_putmessage - send a normal message (suppressed in COPY OUT mode)
1475 * msgtype is a message type code to place before the message body.
1477 * len is the length of the message body data at *s. A message length
1478 * word (equal to len+4 because it counts itself too) is inserted by this
1481 * We suppress messages generated while pqcomm.c is busy. This
1482 * avoids any possibility of messages being inserted within other
1483 * messages. The only known trouble case arises if SIGQUIT occurs
1484 * during a pqcomm.c routine --- quickdie() will try to send a warning
1485 * message, and the most reasonable approach seems to be to drop it.
1487 * returns 0 if OK, EOF if trouble
1488 * --------------------------------
1517/* --------------------------------
1518 * pq_putmessage_noblock - like pq_putmessage, but never blocks
1520 * If the output buffer is too small to hold the message, the buffer
1530 * Ensure we have enough space in the output buffer for the message header
1531 * as well as the message itself.
1540 Assert(res == 0);
/* should not fail when the message fits in
1544/* --------------------------------
1545 * pq_putmessage_v2 - send a message in protocol version 2
1547 * msgtype is a message type code to place before the message body.
1549 * We no longer support protocol version 2, but we have kept this
1550 * function so that if a client tries to connect with protocol version 2,
1551 * as a courtesy we can still send the "unsupported protocol version"
1552 * error to the client in the old format.
1554 * Like in pq_putmessage(), we suppress messages generated while
1557 * returns 0 if OK, EOF if trouble
1558 * --------------------------------
1582 * Support for TCP Keepalive parameters
1586 * On Windows, we need to set both idle and interval at the same time.
1587 * We also cannot reset them to the default (setting to zero will
1588 * actually set them to zero, not default), therefore we fallback to
1589 * the out-of-the-box default instead.
1591#if defined(WIN32) && defined(SIO_KEEPALIVE_VALS)
1595 struct tcp_keepalive ka;
1599 idle = 2 * 60 * 60;
/* default = 2 hours */
1601 interval = 1;
/* default = 1 second */
1604 ka.keepalivetime = idle * 1000;
1605 ka.keepaliveinterval =
interval * 1000;
1607 if (WSAIoctl(
port->sock,
1619 (
errmsg(
"%s(%s) failed: error code %d",
1620 "WSAIoctl",
"SIO_KEEPALIVE_VALS", WSAGetLastError())));
1623 if (
port->keepalives_idle != idle)
1624 port->keepalives_idle = idle;
1634#if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1635 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1638 if (
port->keepalives_idle != 0)
1639 return port->keepalives_idle;
1641 if (
port->default_keepalives_idle == 0)
1646 if (getsockopt(
port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1647 (
char *) &
port->default_keepalives_idle,
1651 (
errmsg(
"%s(%s) failed: %m",
"getsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1652 port->default_keepalives_idle = -1;
/* don't know */
1655 /* We can't get the defaults on Windows, so return "don't know" */
1656 port->default_keepalives_idle = -1;
1660 return port->default_keepalives_idle;
1669 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1672/* check SIO_KEEPALIVE_VALS here, not just WIN32, as some toolchains lack it */
1673#if defined(PG_TCP_KEEPALIVE_IDLE) || defined(SIO_KEEPALIVE_VALS)
1674 if (idle ==
port->keepalives_idle)
1678 if (
port->default_keepalives_idle <= 0)
1683 return STATUS_OK;
/* default is set but unknown */
1690 idle =
port->default_keepalives_idle;
1692 if (setsockopt(
port->sock, IPPROTO_TCP, PG_TCP_KEEPALIVE_IDLE,
1693 (
char *) &idle,
sizeof(idle)) < 0)
1696 (
errmsg(
"%s(%s) failed: %m",
"setsockopt", PG_TCP_KEEPALIVE_IDLE_STR)));
1700 port->keepalives_idle = idle;
1702 return pq_setkeepaliveswin32(
port, idle,
port->keepalives_interval);
1708 (
errmsg(
"setting the keepalive idle time is not supported")));
1719#if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1720 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1723 if (
port->keepalives_interval != 0)
1724 return port->keepalives_interval;
1726 if (
port->default_keepalives_interval == 0)
1731 if (getsockopt(
port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1732 (
char *) &
port->default_keepalives_interval,
1736 (
errmsg(
"%s(%s) failed: %m",
"getsockopt",
"TCP_KEEPINTVL")));
1737 port->default_keepalives_interval = -1;
/* don't know */
1740 /* We can't get the defaults on Windows, so return "don't know" */
1741 port->default_keepalives_interval = -1;
1745 return port->default_keepalives_interval;
1754 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1757#if defined(TCP_KEEPINTVL) || defined(SIO_KEEPALIVE_VALS)
1762 if (
port->default_keepalives_interval <= 0)
1767 return STATUS_OK;
/* default is set but unknown */
1776 if (setsockopt(
port->sock, IPPROTO_TCP, TCP_KEEPINTVL,
1780 (
errmsg(
"%s(%s) failed: %m",
"setsockopt",
"TCP_KEEPINTVL")));
1792 (
errmsg(
"%s(%s) not supported",
"setsockopt",
"TCP_KEEPINTVL")));
1804 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1807 if (
port->keepalives_count != 0)
1808 return port->keepalives_count;
1810 if (
port->default_keepalives_count == 0)
1814 if (getsockopt(
port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1815 (
char *) &
port->default_keepalives_count,
1819 (
errmsg(
"%s(%s) failed: %m",
"getsockopt",
"TCP_KEEPCNT")));
1820 port->default_keepalives_count = -1;
/* don't know */
1824 return port->default_keepalives_count;
1833 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1837 if (count ==
port->keepalives_count)
1840 if (
port->default_keepalives_count <= 0)
1845 return STATUS_OK;
/* default is set but unknown */
1852 count =
port->default_keepalives_count;
1854 if (setsockopt(
port->sock, IPPROTO_TCP, TCP_KEEPCNT,
1855 (
char *) &count,
sizeof(count)) < 0)
1858 (
errmsg(
"%s(%s) failed: %m",
"setsockopt",
"TCP_KEEPCNT")));
1862 port->keepalives_count = count;
1867 (
errmsg(
"%s(%s) not supported",
"setsockopt",
"TCP_KEEPCNT")));
1878#ifdef TCP_USER_TIMEOUT
1879 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1882 if (
port->tcp_user_timeout != 0)
1883 return port->tcp_user_timeout;
1885 if (
port->default_tcp_user_timeout == 0)
1889 if (getsockopt(
port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1890 (
char *) &
port->default_tcp_user_timeout,
1894 (
errmsg(
"%s(%s) failed: %m",
"getsockopt",
"TCP_USER_TIMEOUT")));
1895 port->default_tcp_user_timeout = -1;
/* don't know */
1899 return port->default_tcp_user_timeout;
1908 if (
port == NULL ||
port->laddr.addr.ss_family == AF_UNIX)
1911#ifdef TCP_USER_TIMEOUT
1912 if (timeout ==
port->tcp_user_timeout)
1915 if (
port->default_tcp_user_timeout <= 0)
1920 return STATUS_OK;
/* default is set but unknown */
1927 timeout =
port->default_tcp_user_timeout;
1929 if (setsockopt(
port->sock, IPPROTO_TCP, TCP_USER_TIMEOUT,
1930 (
char *) &timeout,
sizeof(timeout)) < 0)
1933 (
errmsg(
"%s(%s) failed: %m",
"setsockopt",
"TCP_USER_TIMEOUT")));
1937 port->tcp_user_timeout = timeout;
1942 (
errmsg(
"%s(%s) not supported",
"setsockopt",
"TCP_USER_TIMEOUT")));
1951 * GUC assign_hook for tcp_keepalives_idle
1957 * The kernel API provides no way to test a value without setting it; and
1958 * once we set it we might fail to unset it. So there seems little point
1959 * in fully implementing the check-then-assign GUC API for these
1960 * variables. Instead we just do the assignment on demand.
1961 * pq_setkeepalivesidle reports any problems via ereport(LOG).
1963 * This approach means that the GUC value might have little to do with the
1964 * actual kernel value, so we use a show_hook that retrieves the kernel
1965 * value rather than trusting GUC's copy.
1971 * GUC show_hook for tcp_keepalives_idle
1976 /* See comments in assign_tcp_keepalives_idle */
1977 static char nbuf[16];
1984 * GUC assign_hook for tcp_keepalives_interval
1989 /* See comments in assign_tcp_keepalives_idle */
1994 * GUC show_hook for tcp_keepalives_interval
1999 /* See comments in assign_tcp_keepalives_idle */
2000 static char nbuf[16];
2007 * GUC assign_hook for tcp_keepalives_count
2012 /* See comments in assign_tcp_keepalives_idle */
2017 * GUC show_hook for tcp_keepalives_count
2022 /* See comments in assign_tcp_keepalives_idle */
2023 static char nbuf[16];
2030 * GUC assign_hook for tcp_user_timeout
2035 /* See comments in assign_tcp_keepalives_idle */
2040 * GUC show_hook for tcp_user_timeout
2045 /* See comments in assign_tcp_keepalives_idle */
2046 static char nbuf[16];
2053 * Check if the client is still connected.
2062 * It's OK to modify the socket event filter without restoring, because
2063 * all FeBeWaitSet socket wait sites do the same.
2069 for (
int i = 0;
i < rc; ++
i)
2076 * A latch event might be preventing other events from being
2077 * reported. Reset it and poll again. No need to restore it
2078 * because no code should expect latches to survive across
2079 * CHECK_FOR_INTERRUPTS().
ssize_t secure_write(Port *port, const void *ptr, size_t len)
void secure_close(Port *port)
ssize_t secure_read(Port *port, void *ptr, size_t len)
#define PG_USED_FOR_ASSERTS_ONLY
#define MemSet(start, val, len)
int errcode_for_socket_access(void)
int errcode_for_file_access(void)
int errhint(const char *fmt,...)
int errcode(int sqlerrcode)
int errmsg(const char *fmt,...)
#define ereport(elevel,...)
void err(int eval, const char *fmt,...)
volatile sig_atomic_t InterruptPending
volatile sig_atomic_t ClientConnectionLost
int tcp_keepalives_interval
Assert(PointerIsAligned(start, uint64))
void pg_freeaddrinfo_all(int hint_ai_family, struct addrinfo *ai)
int pg_getnameinfo_all(const struct sockaddr_storage *addr, int salen, char *node, int nodelen, char *service, int servicelen, int flags)
int pg_getaddrinfo_all(const char *hostname, const char *servname, const struct addrinfo *hintp, struct addrinfo **result)
void on_proc_exit(pg_on_exit_callback function, Datum arg)
void ResetLatch(Latch *latch)
#define pq_putmessage(msgtype, s, len)
#define FeBeWaitSetLatchPos
#define FeBeWaitSetNEvents
#define FeBeWaitSetSocketPos
List * lappend(List *list, void *datum)
void * MemoryContextAlloc(MemoryContext context, Size size)
char * pstrdup(const char *in)
void * repalloc(void *pointer, Size size)
void * palloc0(Size size)
MemoryContext TopMemoryContext
void CreateSocketLockFile(const char *socketfile, bool amPostmaster, const char *socketDir)
bool pg_set_noblock(pgsocket sock)
static pgsocket * ListenSockets
static int NumListenSockets
int pq_setkeepalivesinterval(int interval, Port *port)
Port * pq_init(ClientSocket *client_sock)
void assign_tcp_keepalives_count(int newval, void *extra)
const PQcommMethods * PqCommMethods
static int pq_recvbuf(void)
const char * show_tcp_keepalives_interval(void)
int Unix_socket_permissions
static int internal_flush(void)
static void socket_set_nonblocking(bool nonblocking)
static size_t PqSendPointer
const char * show_tcp_keepalives_count(void)
int pq_getbyte_if_available(unsigned char *c)
static int socket_flush_if_writable(void)
int pq_getkeepalivescount(Port *port)
#define PQ_RECV_BUFFER_SIZE
int pq_getkeepalivesinterval(Port *port)
static int pq_discardbytes(size_t len)
int pq_settcpusertimeout(int timeout, Port *port)
int pq_getmessage(StringInfo s, int maxlen)
static const PQcommMethods PqCommSocketMethods
static bool PqCommReadingMsg
static int socket_flush(void)
ssize_t pq_buffer_remaining_data(void)
#define PQ_SEND_BUFFER_SIZE
const char * show_tcp_keepalives_idle(void)
int pq_setkeepalivesidle(int idle, Port *port)
static int internal_putbytes(const void *b, size_t len)
int pq_getbytes(void *b, size_t len)
int ListenServerPort(int family, const char *hostName, unsigned short portNumber, const char *unixSocketDir, pgsocket ListenSockets[], int *NumListenSockets, int MaxListen)
static void socket_comm_reset(void)
WaitEventSet * FeBeWaitSet
static char * PqSendBuffer
static int Lock_AF_UNIX(const char *unixSocketDir, const char *unixSocketPath)
int pq_getkeepalivesidle(Port *port)
int AcceptConnection(pgsocket server_fd, ClientSocket *client_sock)
void TouchSocketFiles(void)
static bool socket_is_send_pending(void)
void assign_tcp_keepalives_idle(int newval, void *extra)
static int socket_putmessage(char msgtype, const char *s, size_t len)
static void socket_putmessage_noblock(char msgtype, const char *s, size_t len)
static char PqRecvBuffer[PQ_RECV_BUFFER_SIZE]
const char * show_tcp_user_timeout(void)
static void socket_close(int code, Datum arg)
void assign_tcp_user_timeout(int newval, void *extra)
int pq_putmessage_v2(char msgtype, const char *s, size_t len)
static int Setup_AF_UNIX(const char *sock_path)
bool pq_is_reading_msg(void)
void RemoveSocketFiles(void)
int pq_gettcpusertimeout(Port *port)
bool pq_check_connection(void)
static int PqSendBufferSize
void assign_tcp_keepalives_interval(int newval, void *extra)
void pq_startmsgread(void)
int pq_setkeepalivescount(int count, Port *port)
static pg_noinline int internal_flush_buffer(const char *buf, size_t *start, size_t *end)
static size_t PqSendStart
#define UNIXSOCK_PATH(path, port, sockdir)
#define UNIXSOCK_PATH_BUFLEN
static int fd(const char *x, int i)
void pg_usleep(long microsec)
const char * gai_strerror(int ecode)
void resetStringInfo(StringInfo str)
void enlargeStringInfo(StringInfo str, int needed)
struct sockaddr_storage addr
void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, void *user_data)
int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents, uint32 wait_event_info)
WaitEventSet * CreateWaitEventSet(ResourceOwner resowner, int nevents)
#define WL_POSTMASTER_DEATH
#define WL_SOCKET_WRITEABLE
#define bind(s, addr, addrlen)
#define socket(af, type, protocol)
#define accept(s, addr, addrlen)
#define listen(s, backlog)