git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff
? search:
summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 174edbe)
Detect POLLHUP/POLLRDHUP while running queries.
Fri, 2 Apr 2021 19:52:30 +0000 (08:52 +1300)
Fri, 2 Apr 2021 20:02:41 +0000 (09:02 +1300)
Provide a new GUC check_client_connection_interval that can be used to
check whether the client connection has gone away, while running very
long queries. It is disabled by default.

For now this uses a non-standard Linux extension (also adopted by at
least one other OS). POLLRDHUP is not defined by POSIX, and other OSes
don't have a reliable way to know if a connection was closed without
actually trying to read or write.

In future we might consider trying to send a no-op/heartbeat message
instead, but that could require protocol changes.

Author: Sergey Cherkashin <s.cherkashin@postgrespro.ru>
Author: Thomas Munro <thomas.munro@gmail.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Reviewed-by: Tatsuo Ishii <ishii@sraoss.co.jp>
Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru>
Reviewed-by: Zhihong Yu <zyu@yugabyte.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Maksim Milyutin <milyutinma@gmail.com>
Reviewed-by: Tsunakawa, Takayuki/綱川 貴之 <tsunakawa.takay@fujitsu.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> (much earlier version)
Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru


diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 9d87b5097afd5334d281e6140546762d24c24f1e..0c9128a55d0de284e7170db648bba022b157a825 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -998,6 +998,43 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
+ <varlistentry id="guc-client-connection-check-interval" xreflabel="client_connection_check_interval">
+ <term><varname>client_connection_check_interval</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>client_connection_check_interval</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Sets the time interval between optional checks that the client is still
+ connected, while running queries. The check is performed by polling
+ the socket, and allows long running queries to be aborted sooner if
+ the kernel reports that the connection is closed.
+ </para>
+ <para>
+ This option is currently available only on systems that support the
+ non-standard <symbol>POLLRDHUP</symbol> extension to the
+ <symbol>poll</symbol> system call, including Linux.
+ </para>
+ <para>
+ If the value is specified without units, it is taken as milliseconds.
+ The default value is <literal>0</literal>, which disables connection
+ checks. Without connection checks, the server will detect the loss of
+ the connection only at the next interaction with the socket, when it
+ waits for, receives or sends data.
+ </para>
+ <para>
+ For the kernel itself to detect lost TCP connections reliably and within
+ a known timeframe in all scenarios including network failure, it may
+ also be necessary to adjust the TCP keepalive settings of the operating
+ system, or the <xref linkend="guc-tcp-keepalives-idle"/>,
+ <xref linkend="guc-tcp-keepalives-interval"/> and
+ <xref linkend="guc-tcp-keepalives-count"/> settings of
+ <productname>PostgreSQL</productname>.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c
index 4c7b1e7bfdf893e6fadafabab68f09aa78e4969e..4cd6d6dfbb98530442e31bd790b34a9372db84b9 100644 (file)
--- a/src/backend/libpq/pqcomm.c
+++ b/src/backend/libpq/pqcomm.c
@@ -54,6 +54,9 @@
*/
#include "postgres.h"
+#ifdef HAVE_POLL_H
+#include <poll.h>
+#endif
#include <signal.h>
#include <fcntl.h>
#include <grp.h>
@@ -1921,3 +1924,40 @@ pq_settcpusertimeout(int timeout, Port *port)
return STATUS_OK;
}
+
+/*
+ * Check if the client is still connected.
+ */
+bool
+pq_check_connection(void)
+{
+#if defined(POLLRDHUP)
+ /*
+ * POLLRDHUP is a Linux extension to poll(2) to detect sockets closed by
+ * the other end. We don't have a portable way to do that without
+ * actually trying to read or write data on other systems. We don't want
+ * to read because that would be confused by pipelined queries and COPY
+ * data. Perhaps in future we'll try to write a heartbeat message instead.
+ */
+ struct pollfd pollfd;
+ int rc;
+
+ pollfd.fd = MyProcPort->sock;
+ pollfd.events = POLLOUT | POLLIN | POLLRDHUP;
+ pollfd.revents = 0;
+
+ rc = poll(&pollfd, 1, 0);
+
+ if (rc < 0)
+ {
+ ereport(COMMERROR,
+ (errcode_for_socket_access(),
+ errmsg("could not poll socket: %m")));
+ return false;
+ }
+ else if (rc == 1 && (pollfd.revents & (POLLHUP | POLLRDHUP)))
+ return false;
+#endif
+
+ return true;
+}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2b1b68109fd67840c49107d69eee37e324074e72..ad351e2fd1e613095fc8441c39232463dff40eee 100644 (file)
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -102,6 +102,9 @@ int max_stack_depth = 100;
/* wait N seconds to allow attach from a debugger */
int PostAuthDelay = 0;
+/* Time between checks that the client is still connected. */
+int client_connection_check_interval = 0;
+
/* ----------------
* private typedefs etc
* ----------------
@@ -2671,6 +2674,14 @@ start_xact_command(void)
* not desired, the timeout has to be disabled explicitly.
*/
enable_statement_timeout();
+
+ /* Start timeout for checking if the client has gone away if necessary. */
+ if (client_connection_check_interval > 0 &&
+ IsUnderPostmaster &&
+ MyProcPort &&
+ !get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT))
+ enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
+ client_connection_check_interval);
}
static void
@@ -3149,6 +3160,27 @@ ProcessInterrupts(void)
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating connection due to administrator command")));
}
+
+ if (CheckClientConnectionPending)
+ {
+ CheckClientConnectionPending = false;
+
+ /*
+ * Check for lost connection and re-arm, if still configured, but not
+ * if we've arrived back at DoingCommandRead state. We don't want to
+ * wake up idle sessions, and they already know how to detect lost
+ * connections.
+ */
+ if (!DoingCommandRead && client_connection_check_interval > 0)
+ {
+ if (!pq_check_connection())
+ ClientConnectionLost = true;
+ else
+ enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT,
+ client_connection_check_interval);
+ }
+ }
+
if (ClientConnectionLost)
{
QueryCancelPending = false; /* lost connection trumps QueryCancel */
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 73e0a672ae33d53e70cae0460ae4c936717c7fc5..a9f0fc3017c9433a23778140b203d052230f4a02 100644 (file)
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol;
volatile sig_atomic_t InterruptPending = false;
volatile sig_atomic_t QueryCancelPending = false;
volatile sig_atomic_t ProcDiePending = false;
+volatile sig_atomic_t CheckClientConnectionPending = false;
volatile sig_atomic_t ClientConnectionLost = false;
volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false;
volatile sig_atomic_t IdleSessionTimeoutPending = false;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 7abeccb536285ed3ce64605e3c308342d9a0bcbc..a3ec358538a6542251b6359e0a7426611c84eece 100644 (file)
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -73,6 +73,7 @@ static void StatementTimeoutHandler(void);
static void LockTimeoutHandler(void);
static void IdleInTransactionSessionTimeoutHandler(void);
static void IdleSessionTimeoutHandler(void);
+static void ClientCheckTimeoutHandler(void);
static bool ThereIsAtLeastOneRole(void);
static void process_startup_options(Port *port, bool am_superuser);
static void process_settings(Oid databaseid, Oid roleid);
@@ -620,6 +621,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
IdleInTransactionSessionTimeoutHandler);
RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler);
+ RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler);
}
/*
@@ -1242,6 +1244,14 @@ IdleSessionTimeoutHandler(void)
SetLatch(MyLatch);
}
+static void
+ClientCheckTimeoutHandler(void)
+{
+ CheckClientConnectionPending = true;
+ InterruptPending = true;
+ SetLatch(MyLatch);
+}
+
/*
* Returns true if at least one role is defined in this database cluster.
*/
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 584daffc8a96fba4b0c7579685623eae20f8599b..60a9c7a2a0b5187c24b9731db462e6cd05ef3e7d 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -20,6 +20,9 @@
#include <float.h>
#include <math.h>
#include <limits.h>
+#ifdef HAVE_POLL_H
+#include <poll.h>
+#endif
#ifndef WIN32
#include <sys/mman.h>
#endif
@@ -204,6 +207,7 @@ static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource sourc
static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
static bool check_huge_page_size(int *newval, void **extra, GucSource source);
+static bool check_client_connection_check_interval(int *newval, void **extra, GucSource source);
static void assign_pgstat_temp_directory(const char *newval, void *extra);
static bool check_application_name(char **newval, void **extra, GucSource source);
static void assign_application_name(const char *newval, void *extra);
@@ -3501,6 +3505,17 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER,
+ gettext_noop("Sets the time interval between checks for disconnection while running queries."),
+ NULL,
+ GUC_UNIT_MS
+ },
+ &client_connection_check_interval,
+ 0, 0, INT_MAX,
+ check_client_connection_check_interval, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -11980,6 +11995,20 @@ check_huge_page_size(int *newval, void **extra, GucSource source)
return true;
}
+static bool
+check_client_connection_check_interval(int *newval, void **extra, GucSource source)
+{
+#ifndef POLLRDHUP
+ /* Linux only, for now. See pq_check_connection(). */
+ if (*newval != 0)
+ {
+ GUC_check_errdetail("client_connection_check_interval must be set to 0 on platforms that lack POLLRDHUP.");
+ return false;
+ }
+#endif
+ return true;
+}
+
static void
assign_pgstat_temp_directory(const char *newval, void *extra)
{
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 30cfddac1f70d1534ab20ecd24163dd69499ce53..39da7cc9427f26656701f166eead36a148272f9a 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -719,6 +719,9 @@
#dynamic_library_path = '$libdir'
+#client_connection_check_interval = 0 # time between checks for client
+ # disconnection while running queries;
+ # 0 for never
#------------------------------------------------------------------------------
# LOCK MANAGEMENT
diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h
index b20deeb5550d1a7d2021de2ee5a3c67f9a39c581..3ebbc8d6656bd46e896e8a65f7a612f8b1882526 100644 (file)
--- a/src/include/libpq/libpq.h
+++ b/src/include/libpq/libpq.h
@@ -71,6 +71,7 @@ extern int pq_getbyte(void);
extern int pq_peekbyte(void);
extern int pq_getbyte_if_available(unsigned char *c);
extern int pq_putmessage_v2(char msgtype, const char *s, size_t len);
+extern bool pq_check_connection(void);
/*
* prototypes for functions in be-secure.c
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 013850ac288fbee5b78d26cbcfbda402154e7a74..6f8251e0b07d0ed2f39f237465a9ae201fb1d687 100644 (file)
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending;
+extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending;
extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost;
/* these are marked volatile because they are examined by signal handlers: */
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index e5472100a436dd1edcaac328611d55e442ec5019..241e7c99614a1b968cbda0d46a2bf05c9b9bd894 100644 (file)
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -29,6 +29,7 @@ extern CommandDest whereToSendOutput;
extern PGDLLIMPORT const char *debug_query_string;
extern int max_stack_depth;
extern int PostAuthDelay;
+extern int client_connection_check_interval;
/* GUC-configurable parameters */
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index ecb2a366a5f44783293eab2d79601a3f836c272f..93e6a691b3f36c30e54326527c25651922567861 100644 (file)
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -32,6 +32,7 @@ typedef enum TimeoutId
STANDBY_LOCK_TIMEOUT,
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
IDLE_SESSION_TIMEOUT,
+ CLIENT_CONNECTION_CHECK_TIMEOUT,
/* First user-definable timeout reason */
USER_TIMEOUT,
/* Maximum number of timeout reasons */
This is the main PostgreSQL git repository.
RSS Atom

AltStyle によって変換されたページ (->オリジナル) /