ScaleSpikeSlab

Niloy Biswas

31/01/2022

ScaleSpikeSlab (S^3)

This package contains algorithms for Scalable Spike-and-Slab (S^3), a scalable Gibbs sampling implementation for high-dimensional Bayesian regression with the continuous spike-and-slab prior.

It is based on the article "Scalable Spike-and-Slab", by Niloy Biswas, Lester Mackey and Xiao-Li Meng. The folder inst contains scripts to reproduce the results of the article.

Installation

The package can be installed from R via:

 # install.packages("devtools")
devtools::install_github("niloyb/ScaleSpikeSlab/R_package")
 
 # Install dependencies Rcpp, RcppEigen
 install.packages(c("Rcpp", "RcppEigen"))
 # Install additional packages to help with parallel computation and plotting
 install.packages(c("doParallel", "doRNG", "foreach", "dplyr", "tidyr", 
 "ggplot2", "latex2exp", "reshape2", "ggpubr"))

A tutorial with Riboflavin GWAS data

Import data and select hyperparameters.

 set.seed(1)
 library(ScaleSpikeSlab)
 
 # Riboflavin linear regression dataset of Buhlmann et al. (2014) 
 data(riboflavin)
X <- riboflavin$x
Xt <- t(X)
y <- riboflavin$y
 
 # Choose hyperparamters
params <- spike_slab_params(n=nrow(X),p=ncol(X))

Run MCMC with S^3

 library(doParallel)
 registerDoParallel(cores = detectCores()-1)
 library(foreach)
 
no_chains <- 50
sss_chain_z_output <- 
 foreach(i = c(1:no_chains), .combine=rbind)%dopar%{
 sss_chain <- spike_slab_linear(chain_length=5e3,burnin=1e3,X=X,Xt=Xt,y=y,
 tau0=params$tau0,tau1=params$tau1,q=params$q,
 verbose=FALSE,store=FALSE)
 return(as.vector(sss_chain$z_ergodic_avg))
}

Plot Spike-and-Slab marginal posterior probabilities for variable selection

 library(dplyr)
 library(ggplot2)
 library(latex2exp)
 
riboflavin_df <- 
 data.frame(post_prob_mean=apply(sss_chain_z_output,2,mean),
 post_prob_sd=apply(sss_chain_z_output,2,sd),
 cov_index=c(1:ncol(X)), no_chains=no_chains) %>%
 arrange(desc(post_prob_mean)) %>%
 mutate(xaxis =1:n())
 
 
 ggplot(riboflavin_df, aes(x=xaxis, y=post_prob_mean)) + 
 geom_point(size=2) + 
 geom_errorbar(aes(ymax=(post_prob_mean+3*post_prob_sd/sqrt(no_chains)), 
 ymin=(post_prob_mean-3*post_prob_sd/sqrt(no_chains))),
 position=position_dodge(.9)) +
 xlab('Riboflavin Covariates') + 
 ylab(TeX('Marginal posterior probabilities')) +
 scale_x_continuous(trans='log10') + theme_classic(base_size = 12)

AltStyle によって変換されたページ (->オリジナル) /