WASP: An R package for Wavelet System Prediction

Ze Jiang, Md. Mamunur Rashid, Ashish Sharma, and Fiona Johnson

15:07:13 20 July, 2024

1 Setup
2 Required packages
3 DWT, MODWT and AT basic propertites
4 Variance transformation
- 4.1 Optimal preditive accuracy (RMSE)
- 4.2 Transformed predictor variables
5 Stepwise variance transformation

1 Setup

2 Required packages

 library(WASP)
 library(ggplot2)
 
 if(!require(SPEI)) devtools::install_github('sbegueria/SPEI@v1.7.1') # use 1.7.1
 #> Loading required package: SPEI
 #> Loading required package: lmomco
 #> Loading required package: parallel
 #> # Package SPEI (1.7.1) loaded [try SPEINews()].
 require(SPEI)
 library(readr)
 library(dplyr)
 #> 
 #> Attaching package: 'dplyr'
 #> The following object is masked from 'package:kableExtra':
 #> 
 #> group_rows
 #> The following objects are masked from 'package:stats':
 #> 
 #> filter, lag
 #> The following objects are masked from 'package:base':
 #> 
 #> intersect, setdiff, setequal, union
 library(FNN)
 #> 
 #> Attaching package: 'FNN'
 #> The following object is masked from 'package:WASP':
 #> 
 #> knn
 library(synthesis)
 #> 
 #> Attaching package: 'synthesis'
 #> The following objects are masked from 'package:WASP':
 #> 
 #> data.gen.HL, data.gen.Rossler, data.gen.SW, data.gen.ar1,
 #> data.gen.ar4, data.gen.ar9, data.gen.tar1, data.gen.tar2
 library(waveslim)
 #> 
 #> waveslim: Wavelet Method for 1/2/3D Signals (version = 1.8.4)
 library(cowplot)
 library(gridGraphics)
 #> Loading required package: grid

3 DWT, MODWT and AT basic propertites

 # data generation
x <- arima.sim(list(order = c(1, 0, 0), ar = 0.6), n = 512)
 # x <- as.numeric(scale(data.gen.Rossler(time = seq(0, 50, length.out = 512))$x, scale=F))
 
 # Daubechies wavelets
 for (wf in c("haar", "d4", "d8", "d16")) {
 print(paste0("Wavelet filter: ", wf))
 #----------------------------------------------------------------------------
 # wavelet family, extension mode and package
 # wf <- "haar" # wavelet family D8 or db4
 boundary <- "periodic"
 if (wf != "haar") v <- as.integer(parse_number(wf) / 2) else v <- 1
 
 # Maximum decomposition level J
 n <- length(x)
 J <- ceiling(log(n / (2 * v - 1)) / log(2)) - 1 # (Kaiser, 1994)
 
 cov <- rnorm(J + 1, sd = 2)
 Vr <- as.numeric(cov / norm(cov, type = "2") * sd(x))
 #----------------------------------------------------------------------------
 # DWT-MRA
 print("-----------DWT-MRA-----------")
 x.mra <- waveslim::mra(x, wf = wf, J = J, method = "dwt", boundary = boundary)
 x.mra.m <- matrix(unlist(x.mra), ncol = J + 1)
 
 x.n <- scale(x.mra.m) %*% Vr
 var(x.n) - var(x)
 
 message(paste0("Additive decompostion: ", isTRUE(all.equal(as.numeric(x), rowSums(x.mra.m)))))
 message(paste0("Variance decompostion: ", isTRUE(all.equal(var(x), sum(apply(x.mra.m, 2, var))))))
 
 #----------------------------------------------------------------------------
 # MODWT
 print("-----------MODWT-----------")
 x.modwt <- waveslim::modwt(x, wf = wf, n.levels = J, boundary = boundary)
 x.modwt.m <- matrix(unlist(x.modwt), ncol = J + 1)
 
 x.n <- scale(x.modwt.m) %*% Vr
 var(x.n) - var(x)
 
 message(paste0("Additive decompostion: ", isTRUE(all.equal(as.numeric(x), rowSums(x.modwt.m)))))
 message(paste0("Variance decompostion: ", isTRUE(all.equal(var(x), sum(apply(x.modwt.m, 2, var))))))
 
 #----------------------------------------------------------------------------
 # a trous
 print("-----------AT-----------")
 x.at <- at.wd(x, wf = wf, J = J, boundary = boundary)
 x.at.m <- matrix(unlist(x.at), ncol = J + 1)
 
 # x.mra.modwt <- waveslim::mra(x,wf=wf, J=J, method="modwt", boundary=boundary)
 # x.mra.modwt <- matrix(unlist(x.mra.modwt), ncol=J+1)
 #
 # print(sum(abs(x.at.m-x.mra.modwt)))
 
 message(paste0("Additive decompostion: ", isTRUE(all.equal(as.numeric(x), rowSums(x.at.m)))))
 message(paste0("Variance decompostion: ", isTRUE(all.equal(var(x), sum(apply(x.at.m, 2, var))))))
 
 if (isTRUE(all.equal(x.at.m, x.modwt.m))) {
 message(paste0("AT and MODWT is equivalent using the", wf, "!"))
 }
}
 #> [1] "Wavelet filter: haar"
 #> [1] "-----------DWT-MRA-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: TRUE
 #> [1] "-----------MODWT-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: TRUE
 #> [1] "-----------AT-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: TRUE
 #> AT and MODWT is equivalent using thehaar!
 #> [1] "Wavelet filter: d4"
 #> [1] "-----------DWT-MRA-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: TRUE
 #> [1] "-----------MODWT-----------"
 #> Additive decompostion: FALSE
 #> Variance decompostion: TRUE
 #> [1] "-----------AT-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: FALSE
 #> [1] "Wavelet filter: d8"
 #> [1] "-----------DWT-MRA-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: TRUE
 #> [1] "-----------MODWT-----------"
 #> Additive decompostion: FALSE
 #> Variance decompostion: TRUE
 #> [1] "-----------AT-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: FALSE
 #> [1] "Wavelet filter: d16"
 #> [1] "-----------DWT-MRA-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: TRUE
 #> [1] "-----------MODWT-----------"
 #> Additive decompostion: FALSE
 #> Variance decompostion: TRUE
 #> [1] "-----------AT-----------"
 #> Additive decompostion: TRUE
 #> Variance decompostion: FALSE

3.1 Summary of various properties for the three DWT methods

tab1 <- data.frame(
 col1 = c("DWT-MRA", "MODWT", "AT"),
 col2 = c("$\\checkmark$", "", "$\\checkmark$"),
 col3 = c("$\\checkmark$", "$\\checkmark$", ""),
 col4 = c("", "$\\checkmark$", "$\\checkmark$"),
 col5 = c("$\\checkmark$", "", "")
)
 
 colnames(tab1) <- c("Wavelet Method", "Additive decomposition", "Variance decomposition", "No dependence on future data", "Dyadic sample size")
 
 kable(tab1, caption = "Summary of various properties for the three DWT methods", booktabs = T, escape = F) %>%
 kable_styling(latex_options = c("HOLD_position"), position = "center") %>%
 column_spec(1, width = "6em") %>%
 column_spec(2:5, width = "7em") %>%
 footnote(general = "When Haar wavelet filter is used, MODWT and AT are equivalent and both of them preserves additive and variance decomposition.", footnote_as_chunk = T)

Table 3.1: Table 3.2: Summary of various properties for the three DWT methods
Wavelet Method	Additive decomposition	Variance decomposition	No dependence on future data
DWT-MRA	\(\checkmark\)	\(\checkmark\)	\(\checkmark\)
MODWT	\(\checkmark\)	\(\checkmark\)
AT	\(\checkmark\)	\(\checkmark\)
Note: When Haar wavelet filter is used, MODWT and AT are equivalent and both of them preserves additive and variance decomposition.

3.2 Illustration of three types of DWT methods

p.list <- NULL
wf.opts <- c("d16", "haar")
 for (k in seq_along(wf.opts)) {
 # data generation
 x <- arima.sim(list(order = c(1, 0, 0), ar = 0.6), n = 128)
 
 #----------------------------------------------------------------------------
 # wavelet family, extension mode and package
 wf <- wf.opts[k] # wavelet family D8 or db4
 boundary <- "periodic"
 if (wf != "haar") v <- as.integer(parse_number(wf) / 2) else v <- 1
 
 # Maximum decomposition level J
 n <- length(x)
 J <- ceiling(log(n / (2 * v - 1)) / log(2)) - 1 # (Kaiser, 1994)
 
 limits.x <- c(0, n)
 limits.y <- c(-3, 3)
 #----------------------------------------------------------------------------
 # DWT-MRA
 x.mra <- waveslim::mra(x, wf = wf, J = J, method = "dwt", boundary = boundary)
 x.mra.m <- matrix(unlist(x.mra), ncol = J + 1)
 
 p1 <- mra.plot(x, x.mra.m, limits.x, limits.y,
 ylab = "X", col = "red", type = "details",
 main = paste0("DWT-MRA", "(", wf, ")"), ps = 12
 )
 # p1 <- recordPlot()
 
 #----------------------------------------------------------------------------
 # MODWT
 x.modwt <- waveslim::modwt(x, wf = wf, n.levels = J, boundary = boundary)
 x.modwt.m <- matrix(unlist(x.modwt), ncol = J + 1)
 
 p2 <- mra.plot(x, x.modwt.m, limits.x, limits.y,
 ylab = "X", col = "red", type = "coefs",
 main = paste0("MODWT", "(", wf, ")"), ps = 12
 )
 
 #----------------------------------------------------------------------------
 # a trous
 x.at <- at.wd(x, wf = wf, J = J, boundary = boundary)
 x.at.m <- matrix(unlist(x.at), ncol = J + 1)
 
 p3 <- mra.plot(x, x.at.m, limits.x, limits.y,
 ylab = "X", col = "red", type = "coefs",
 main = paste0("AT", "(", wf, ")"), ps = 12
 )
 
 p.list[[k]] <- list(p1, p2, p3)
}

3.2.1 Daubechies 16 wavelet

 #----------------------------------------------------------------------------
 # plot and save
cowplot::plot_grid(
 plotlist = p.list[[1]], ncol = 3, labels = c("(a)", "(b)", "(c)"),
 label_size = 12
)

Illustration of three types of DWT methods

Figure 3.1: Illustration of three types of DWT methods

3.2.2 Haar wavelet filter

 #----------------------------------------------------------------------------
 # plot and save
cowplot::plot_grid(
 plotlist = p.list[[2]], ncol = 3, labels = c("(a)", "(b)", "(c)"),
 label_size = 12
)

Illustration of three types of DWT methods

Figure 3.2: Illustration of three types of DWT methods

3.3 Wavelet transform: decompostion level

sample <- seq(100, by = 200, length.out = 5)
v <- 2 # vanishing moment
tmp <- NULL
 for (n in sample) {
 J1 <- floor(log(n / (2 * v - 1)) / log(2))
 J # (Kaiser, 1994)
 J2 <- floor(log2(n / (2 * v - 1) - 1))
 J # Cornish, C. R., Bretherton, C. S., & Percival, D. B. (2006)
 J3 <- floor(log10(n))
 J # (Nourani et al., 2008)
 
 tmp <- cbind(tmp, c(J1, J2, J3))
}
 
tab <- tmp
 colnames(tab) <- sample
 rownames(tab) <- paste0("Method", 1:3)
 
 kable(tab,
 caption = "Decompostion level with varying sample size",
 booktabs = T, align = "c", digits = 3
) %>%
 kable_styling("striped", position = "center", full_width = FALSE) # %>%

Table 3.3: Table 3.4: Decompostion level with varying sample size
100	300	500	700	900
Method1	5	6	7	7	8
Method2	5	6	7	7	8
Method3	2	2	2	2	2

 # collapse_rows(columns = 1:2, valign = "middle")

4 Variance transformation

4.1 Optimal preditive accuracy (RMSE)

 if (TRUE) {
 ### Synthetic example
 # data generation
 set.seed(2020)
 sample <- 512
 # frequency, sampled from a given range
 fd <- c(3, 5, 10, 15, 25, 30, 55, 70, 95)
 # data <- WASP::data.gen.SW(nobs=sample,fp=25,fd=fd)
 data <- WASP::data.gen.SW(nobs = sample, fp = c(15, 25, 30), fd = fd)
 
 # ts = data.gen.Rossler(time = seq(0, 50, length.out = sample))
 # data <- list(x=ts$z, dp=cbind(ts$x, ts$y))
} else {
 ### Real-world example
 data("obs.mon")
 data("rain.mon")
 
 if (TRUE) { # SPI12 as response
 #SPI.12 <- SPEI::spi(rain.mon[, 5], scale = 12)$fitted
 SPI.12 <- SPI.calc(window(rain.mon[, 5], start=c(1949,1), end=c(2009,12)),sc=12)
 x <- window(SPI.12, start = c(1950, 1), end = c(2009, 12))
 dp <- window(obs.mon, start = c(1950, 1), end = c(2009, 12))
 } else { # rainfall as response
 x <- window(rain.mon[, 5], start = c(1950, 1), end = c(2009, 12))
 dp <- window(obs.mon, start = c(1950, 1), end = c(2009, 12))
 }
 data <- list(x = x, dp = dp)
 sample <- length(x)
}
 
 # plot.ts(cbind(data$x,data$dp))
 
tab.list <- list()
mode.opts <- c("MRA", "MODWT", "AT")
 for (mode in mode.opts) {
 print(mode)
 
 # cov.opt <- switch(2,"auto","pos","neg")
 if (mode == "MRA") {
 method <- switch(1,
 "dwt",
 "modwt"
 )
 }
 
 # wavelet family, extension mode and package
 # wf <- switch(mode, "MRA"="haar", "MODWT"="haar", "AT"="haar")
 wf <- "haar"
 pad <- "zero"
 boundary <- "periodic"
 if (wf != "haar") v <- as.integer(parse_number(wf) / 2) else v <- 1
 
 # Maximum decomposition level J
 n <- sample
 J <- ceiling(log(n / (2 * v - 1)) / log(2)) - 1 # (Kaiser, 1994)
 
 tab <- NULL
 for (cov.opt in c("auto", "pos", "neg")) {
 # variance transform - calibration
 if (mode == "MRA") {
 dwt <- dwt.vt(data, wf, J, method, pad, boundary, cov.opt)
 } else if (mode == "MODWT") {
 dwt <- modwt.vt(data, wf, J, boundary, cov.opt)
 } else {
 dwt <- at.vt(data, wf, J, boundary, cov.opt)
 }
 
 # optimal prediction accuracy
 opti.rmse <- NULL
 dp.RMSE <- NULL
 dp.n.RMSE <- NULL
 S <- dwt$S
 ndim <- ncol(S)
 for (i in 1:ndim) {
 x <- dwt$x
 dp <- dwt$dp[, i]
 dp.n <- dwt$dp.n[, i]
 
 # ts.plot(cbind(dp,dp.n), col=1:2)
 
 dp.RMSE <- c(dp.RMSE, sqrt(mean(lm(x ~ dp)$residuals^2)))
 dp.n.RMSE <- c(dp.n.RMSE, sqrt(mean(lm(x ~ dp.n)$residuals^2)))
 
 # small difference due to the reconstruction
 opti.rmse <- c(opti.rmse, sqrt((n - 1) / n * (var(x) - sum(S[, i]^2) * var(dp) / var(dp.n))))
 # opti.rmse <- c(opti.rmse, sqrt((n-1)/n*(var(x)-sum(S[,i]^2))))
 }
 
 tab <- rbind(tab, data.frame(cov.opt, var=1:ndim, dp.RMSE, dp.n.RMSE, opti.rmse))
 }
 
 colnames(tab) <- c("Sign of covariance", "Variable", "Std", "VT", "Optimal")
 tab.list[[length(tab.list) + 1]] <- tab
}
 #> [1] "MRA"
 #> [1] "MODWT"
 #> [1] "AT"
 
 # print(tab.list)
 
 kable(tab.list[[1]], caption = "Optimal RMSE using DWT-based VT",
 booktabs = T, align = "c", digits = 3) %>%
 kable_styling("striped", position = "center", full_width = FALSE) %>%
 collapse_rows(columns = 1, valign = "middle")

Table 4.1: Table 4.2: Optimal RMSE using DWT-based VT
Sign of covariance	Variable	Std	VT	Optimal
auto	1	1.235	1.231	1.231
	2	1.235	1.232	1.232
	3	1.235	1.232	1.232
	4	1.021	1.010	1.010
	5	1.016	1.013	1.013
	6	1.013	1.008	1.008
	7	1.235	1.222	1.222
	8	1.235	1.231	1.231
	9	1.235	1.220	1.220
pos	1	1.235	1.231	1.231
	2	1.235	1.232	1.232
	3	1.235	1.232	1.232
	4	1.021	1.010	1.010
	5	1.016	1.013	1.013
	6	1.013	1.008	1.008
	7	1.235	1.222	1.222
	8	1.235	1.231	1.231
	9	1.235	1.220	1.220
neg	1	1.235	1.231	1.231
	2	1.235	1.232	1.232
	3	1.235	1.232	1.232
	4	1.021	1.010	1.010
	5	1.016	1.013	1.013
	6	1.013	1.008	1.008
	7	1.235	1.222	1.222
	8	1.235	1.231	1.231
	9	1.235	1.220	1.220

 kable(tab.list[[2]], caption = "Optimal RMSE using MODWT/AT-based VT",
 booktabs = T, align = "c", digits = 3) %>%
 kable_styling("striped", position = "center", full_width = FALSE) %>%
 collapse_rows(columns = 1, valign = "middle")

Table 4.3: Table 4.4: Optimal RMSE using MODWT/AT-based VT
Sign of covariance	Variable	Std	VT	Optimal
auto	1	1.235	1.235	1.235
	2	1.235	1.234	1.234
	3	1.235	1.235	1.235
	4	1.021	1.021	1.021
	5	1.016	1.022	1.022
	6	1.013	1.016	1.016
	7	1.235	1.221	1.221
	8	1.235	1.228	1.228
	9	1.235	1.234	1.234
pos	1	1.235	1.235	1.235
	2	1.235	1.234	1.234
	3	1.235	1.235	1.235
	4	1.021	1.021	1.021
	5	1.016	1.022	1.022
	6	1.013	1.016	1.016
	7	1.235	1.221	1.221
	8	1.235	1.228	1.228
	9	1.235	1.234	1.234
neg	1	1.235	1.235	1.235
	2	1.235	1.234	1.234
	3	1.235	1.235	1.235
	4	1.021	1.021	1.021
	5	1.016	1.022	1.022
	6	1.013	1.016	1.016
	7	1.235	1.221	1.221
	8	1.235	1.228	1.228
	9	1.235	1.234	1.234

4.2 Transformed predictor variables

 #-------------------------------------------------------------------
 if (TRUE) {
 set.seed(2020)
 ### synthetic example - Rossler
 sample <- 10000
 s <- 0.1
 ts.list <- list()
 for (i in seq_along(s)) {
 ts.r <- data.gen.Rossler(a = 0.2, b = 0.2, w = 5.7, start = c(-2, -10, 0.2), time = seq(0, 50, length.out = sample))
 
 # add noise
 ts.r$x <- ts(ts.r$x + rnorm(n = sample, mean = 0, sd = s[i]))
 ts.r$y <- ts(ts.r$y + rnorm(n = sample, mean = 0, sd = s[i]))
 ts.r$z <- ts(ts.r$z + rnorm(n = sample, mean = 0, sd = s[i]))
 
 ts.list[[i]] <- ts.r
 }
 
 data.list <- lapply(ts.list, function(ts) list(x = ts$z, dp = cbind(ts$x, ts$y)))
 
 lab.names <- c("x", "y")
 xlim<- c(0,n);ylim <- c(-55, 55)
} else {
 
 ### Real-world example
 data("obs.mon")
 data("rain.mon")
 
 #SPI.12 <- SPEI::spi(rain.mon[, 5], scale = 12)$fitted
 SPI.12 <- SPI.calc(window(rain.mon[, 5], start=c(1949,1), end=c(2009,12)),sc=12)
 x <- window(SPI.12, start = c(1950, 1), end = c(2009, 12))
 dp <- window(obs.mon, start = c(1950, 1), end = c(2009, 12))
 
 data.list <- list(list(x = x, dp = dp))
 sample <- length(x)
 
 lab.names <- colnames(obs.mon)
 xlim<- NULL;ylim <- NULL
}
 
 #-------------------------------------------------------------------
p.list <- list()
dp.list <- list()
 if (wf != "haar") mode.opts <- c("MRA", "MODWT", "AT")[1:3] else mode.opts <- c("MRA", "MODWT","AT")[1:2]
 for (mode in mode.opts) {
 cov.opt <- switch(1,
 "auto",
 "pos",
 "neg"
 )
 flag <- switch(1,
 "biased",
 "unbiased"
 )
 if (mode == "MRA") {
 method <- switch(1,
 "dwt",
 "modwt"
 )
 }
 
 # wavelet family, extension mode and package
 # wf <- switch(mode, "MRA"="haar", "MODWT"="haar", "AT"="haar")
 wf <- "d16"
 pad <- "zero"
 boundary <- "periodic"
 if (wf != "haar") v <- as.integer(parse_number(wf) / 2) else v <- 1
 
 # Maximum decomposition level J
 n <- sample
 J <- ceiling(log(n / (2 * v - 1)) / log(2)) - 1 # (Kaiser, 1994)
 # J <- floor(log(n/(2*v-1))/log(2))
 
 # variance transform - calibration
 if (mode == "MRA") {
 dwt.list <- lapply(data.list, function(x) dwt.vt(x, wf, J, method, pad, boundary, cov.opt, flag))
 } else if (mode == "MODWT") {
 dwt.list <- lapply(data.list, function(x) modwt.vt(x, wf, J, boundary, cov.opt, flag))
 } else {
 dwt.list <- lapply(data.list, function(x) at.vt(x, wf, J, boundary, cov.opt, flag))
 }
 
 for (j in 1:length(dwt.list)) {
 dwt <- dwt.list[[j]]
 
 par(
 mfrow = c(ncol(dwt$dp), 1), mar = c(0, 2.5, 2, 1),
 oma = c(2, 1, 0, 0), # move plot to the right and up
 mgp = c(1.5, 0.5, 0), # move axis labels closer to axis
 pty = "m", bg = "transparent",
 ps = 12
 )
 
 # plot(dwt$x, type="l", xlab=NA, ylab="SPI12", col="red")
 # plot(dwt$x, type="l", xlab=NA, ylab="Rain", col="red")
 for (i in 1:ncol(dwt$dp)) {
 ts.plot(cbind(dwt$dp[, i], dwt$dp.n[, i]),
 xlab = NA, ylab = paste0(lab.names[i]),
 xlim = xlim, ylim = ylim,
 col = c("black", "blue"), lwd = c(1, 2)
 )
 }
 
 p.list[[length(p.list) + 1]] <- recordPlot()
 
 dp.list[[length(dp.list) + 1]] <- dwt$dp.n
 }
}
 #> [1] "Difference between reconstructed and original series: 2.28840755511772e-08"
 #> [1] "Difference between reconstructed and original series: 2.18861104239743e-08"
 #> [1] "Difference between reconstructed and original series: 45963.8818333333"
 #> [1] "Difference between reconstructed and original series: 46239.1064980564"
 
 #----------------------------------------------------------------------------
 # plot and save
fig <- cowplot::plot_grid(plotlist = p.list, nrow = 1, labels = c("(a)", "(b)", "(c)"))
fig

Orignal and VT predictors. (a): DWT-MRA (b): MODWT/AT

Figure 4.1: Orignal and VT predictors. (a): DWT-MRA (b): MODWT/AT

5 Stepwise variance transformation

 #-------------------------------------------------------------------
 ### Real-world example
 data("obs.mon")
 data("rain.mon")
op <- par()
station.id <- 5
lab.names <- colnames(obs.mon)[c(1, 3, 4, 5, 7)]
 
 if (TRUE) { # SPI12 as response
 #SPI.12 <- SPEI::spi(rain.mon, scale = 12)$fitted
 SPI.12 <- SPI.calc(window(rain.mon, start=c(1949,1), end=c(2009,12)),sc=12)
 x <- window(SPI.12, start = c(1950, 1), end = c(2009, 12))
 dp <- window(obs.mon[, lab.names], start = c(1950, 1), end = c(2009, 12))
} else { # rainfall as response
 x <- window(rain.mon, start = c(1950, 1), end = c(2009, 12))
 dp <- window(obs.mon[, lab.names], start = c(1950, 1), end = c(2009, 12))
}
 
data.list <- lapply(station.id, function(id) list(x = x[, id], dp = dp))
 
 
ylim <- data.frame(
 GPH = c(700, 900), TDP700 = c(5, 25), TDP500 = c(5, 25), EPT = c(300, 330),
 UWND = c(-5, 25), VWND = c(-5, 10), MSLP = c(-1, 1)
)[c(1, 3, 4, 5, 7)]
 
 #-------------------------------------------------------------------
p.list <- list()
RMSE <- NULL
mode.opts <- c("MRA", "MODWT", "AT")[1:2]
 for (mode in mode.opts) {
 cov.opt <- switch(1,
 "auto",
 "pos",
 "neg"
 )
 if (mode == "MRA") {
 method <- switch(1,
 "dwt",
 "modwt"
 )
 }
 
 # wavelet family, extension mode and package
 wf <- switch(mode,
 "MRA" = "d4",
 "MODWT" = "haar",
 "AT" = "haar"
 )
 pad <- "zero"
 boundary <- "periodic"
 if (wf != "haar") v <- as.integer(parse_number(wf) / 2) else v <- 1
 
 # Maximum decomposition level J
 n <- nrow(x)
 J <- ceiling(log(n / (2 * v - 1)) / log(2)) - 1 # (Kaiser, 1994)
 
 # high order variance transformation
 dwt.list <- lapply(data.list, function(data) stepwise.VT(data, mode = mode, wf = wf, J=J))
 
 for (j in seq_len(length(dwt.list))) {
 dwt <- dwt.list[[j]]
 cpy <- dwt$cpy
 
 MSE <- NULL
 for (i in seq_len(length(cpy))) {
 m1 <- sqrt(FNN::knn.reg(train = dwt$dp[, 1:i], y = dwt$x)$PRESS / n)
 m2 <- sqrt(FNN::knn.reg(train = dwt$dp.n[, 1:i], y = dwt$x)$PRESS / n)
 
 MSE <- rbind(MSE, c(m1, m2))
 }
 
 RMSE <- rbind(RMSE, data.frame(mode, MSE))
 
 par(
 mfrow = c(length(cpy), 1), mar = c(0, 4, 2, 1),
 oma = c(2, 1, 0, 0), # move plot to the right and up
 mgp = c(1.5, 0.5, 0), # move axis labels closer to axis
 pty = "m", bg = "transparent",
 ps = 8
 )
 
 # plot(dwt$x, type="l", xlab=NA, ylab="SPI12", ylim=c(-3,3),col="red")
 # plot(dwt$x, type="l", xlab=NA, ylab="Rain", col="red")
 for (i in seq_len(length(cpy))) {
 ts.plot(dwt$dp[, i], dwt$dp.n[, i],
 xlab = NA, ylab = paste0(lab.names[cpy[i]]), # ylim=ylim[,i],
 col = c("black", "blue"), lwd = c(1, 2)
 )
 }
 
 p.list[[length(p.list) + 1]] <- recordPlot()
 }
}
 #> [1] "Variance difference between transformed and original series by percentage: 17.2789472670388"
 par(op)
 #-------------------------------------------------------------------
 # plot and save
cowplot::plot_grid(plotlist = p.list, nrow = 1, labels = c("(a)", "(b)", "(c)"))

Orignal and SVT predictors. (a): DWT-MRA (b): MODWT/AT

Figure 5.1: Orignal and SVT predictors. (a): DWT-MRA (b): MODWT/AT

 
 #-------------------------------------------------------------------
 # RMSE when more predictors are included
 #tab1 <- round(RMSE, 3)
 #tab1 <- cbind(1:nrow(tab1), tab1)
 #colnames(tab1) <- c("No. of Predictors", rep(c("Original", "Transformed"), length(mode.opts)))
 # kable(tab1, caption = "Comparison of prediction accuracy using Std and SVT", booktabs = T) %>%
 # kable_styling(latex_options = c("HOLD_position"), position = "center", full_width = FALSE) %>%
 # # add_header_above(c(" " = 1, "DWT-MRA" = 2, "MODWT" = 2, "AT" = 2))
 # add_header_above(c(" " = 1, "DWT-MRA" = 2, "MODWT/AT" = 2))
tab1 <- RMSE %>% group_by(mode) %>% mutate(id = row_number())
 colnames(tab1) <- c("Method","No. of Predictors","Original","Transformed")
 kable(tab1[,c(1,4,2,3)], caption = "Comparison of prediction accuracy using Std and SVT", booktabs = T, 
 digits = 3) %>%
 kable_styling(latex_options = c("HOLD_position"), position = "center", full_width = FALSE) %>%
 collapse_rows(columns = 1)

Table 5.1: Table 5.2: Comparison of prediction accuracy using Std and SVT
Method	Transformed	No. of Predictors	Original
MRA	1	1.133	0.995
	2	1.101	0.850
	3	1.101	0.835
	4	1.110	0.734
MODWT	1	1.121	0.999
	2	1.122	0.902
	3	1.068	0.826