git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff
? search:
summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 2a3f6e3)
Add GUC to control the time to wait before retrieving WAL after failed attempt.
2015年2月23日 11:55:17 +0000 (20:55 +0900)
2015年2月23日 11:55:17 +0000 (20:55 +0900)
Previously when the standby server failed to retrieve WAL files from any sources
(i.e., streaming replication, local pg_xlog directory or WAL archive), it always
waited for five seconds (hard-coded) before the next attempt. For example,
this is problematic in warm-standby because restore_command can fail
every five seconds even while new WAL file is expected to be unavailable for
a long time and flood the log files with its error messages.

This commit adds new parameter, wal_retrieve_retry_interval, to control that
wait time.

Alexey Vasiliev and Michael Paquier, reviewed by Andres Freund and me.


diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6bcb106518e4eefe33c9b69129d737f93a1646be..a3917aac7855bc3c71f7e6bb6b3c78078d981314 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2985,6 +2985,24 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
+ <varlistentry id="guc-wal-retrieve-retry-interval" xreflabel="wal_retrieve_retry_interval">
+ <term><varname>wal_retrieve_retry_interval</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>wal_retrieve_retry_interval</> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Specify how long the standby server should wait when WAL data is not
+ available from any sources (streaming replication,
+ local <filename>pg_xlog</> or WAL archive) before retrying to
+ retrieve WAL data. This parameter can only be set in the
+ <filename>postgresql.conf</> file or on the server command line.
+ The default value is 5 seconds. Units are milliseconds if not specified.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
</sect1>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 629a457965ff2e6987bd6ccd5ee0cabb26fe212c..f68f82b255c0608d7a7f958865a2abe3d179911c 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -93,6 +93,7 @@ int sync_method = DEFAULT_SYNC_METHOD;
int wal_level = WAL_LEVEL_MINIMAL;
int CommitDelay = 0; /* precommit delay in microseconds */
int CommitSiblings = 5; /* # concurrent xacts needed to sleep */
+int wal_retrieve_retry_interval = 5000;
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
@@ -10340,8 +10341,8 @@ static bool
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt, XLogRecPtr tliRecPtr)
{
- static pg_time_t last_fail_time = 0;
- pg_time_t now;
+ static TimestampTz last_fail_time = 0;
+ TimestampTz now;
/*-------
* Standby mode is implemented by a state machine:
@@ -10351,7 +10352,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* 2. Check trigger file
* 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
* 4. Rescan timelines
- * 5. Sleep 5 seconds, and loop back to 1.
+ * 5. Sleep wal_retrieve_retry_interval milliseconds, and loop back to 1.
*
* Failure to read from the current source advances the state machine to
* the next state.
@@ -10490,14 +10491,25 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* machine, so we've exhausted all the options for
* obtaining the requested WAL. We're going to loop back
* and retry from the archive, but if it hasn't been long
- * since last attempt, sleep 5 seconds to avoid
- * busy-waiting.
+ * since last attempt, sleep wal_retrieve_retry_interval
+ * milliseconds to avoid busy-waiting.
*/
- now = (pg_time_t) time(NULL);
- if ((now - last_fail_time) < 5)
+ now = GetCurrentTimestamp();
+ if (!TimestampDifferenceExceeds(last_fail_time, now,
+ wal_retrieve_retry_interval))
{
- pg_usleep(1000000L * (5 - (now - last_fail_time)));
- now = (pg_time_t) time(NULL);
+ long secs, wait_time;
+ int usecs;
+
+ TimestampDifference(last_fail_time, now, &secs, &usecs);
+ wait_time = wal_retrieve_retry_interval -
+ (secs * 1000 + usecs / 1000);
+
+ WaitLatch(&XLogCtl->recoveryWakeupLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ wait_time);
+ ResetLatch(&XLogCtl->recoveryWakeupLatch);
+ now = GetCurrentTimestamp();
}
last_fail_time = now;
currentSource = XLOG_FROM_ARCHIVE;
@@ -10653,12 +10665,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
}
/*
- * Wait for more WAL to arrive. Time out after 5 seconds,
- * like when polling the archive, to react to a trigger
- * file promptly.
+ * Wait for more WAL to arrive. Time out after 5 seconds
+ * to react to a trigger file promptly.
*/
WaitLatch(&XLogCtl->recoveryWakeupLatch,
- WL_LATCH_SET | WL_TIMEOUT,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
5000L);
ResetLatch(&XLogCtl->recoveryWakeupLatch);
break;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 95727776d3851a2d1a55ca3d3fa824a7cf492bbc..cf401d3cf03ecbb30987151ae0e8784e14033a12 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2363,6 +2363,18 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"wal_retrieve_retry_interval", PGC_SIGHUP, REPLICATION_STANDBY,
+ gettext_noop("Sets the time to wait before retrying to retrieve WAL"
+ "after a failed attempt."),
+ NULL,
+ GUC_UNIT_MS
+ },
+ &wal_retrieve_retry_interval,
+ 5000, 1, INT_MAX,
+ NULL, NULL, NULL
+ },
+
{
{"wal_segment_size", PGC_INTERNAL, PRESET_OPTIONS,
gettext_noop("Shows the number of pages per write ahead log segment."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b053659f88e85a2aff7b2ddcbb12b0ca4b2897d0..29d8485964d696cccc0d45e63c1644485973b1f1 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -260,6 +260,8 @@
#wal_receiver_timeout = 60s # time that receiver waits for
# communication from master
# in milliseconds; 0 disables
+#wal_retrieve_retry_interval = 5s # time to wait before retrying to
+ # retrieve WAL after a failed attempt
#------------------------------------------------------------------------------
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 138deaf7c8fa393507c59eec49e3b2cbee67d5a3..be27a85648665ba7aaa9f4d61c516feaf7dcb675 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -93,6 +93,7 @@ extern int CheckPointSegments;
extern int wal_keep_segments;
extern int XLOGbuffers;
extern int XLogArchiveTimeout;
+extern int wal_retrieve_retry_interval;
extern bool XLogArchiveMode;
extern char *XLogArchiveCommand;
extern bool EnableHotStandby;
This is the main PostgreSQL git repository.
RSS Atom

AltStyle によって変換されたページ (->オリジナル) /