user_sample_1

 library(ubair)

Load data for one station and run counterfactual

Investigate effect of 9 Euro ticket for station Köln.

Prerequisites

Installation for user as described in README

Steps

adapt the data_dir (in windows e.g. //de-inf-008/$/Eigene Dateien/ubair-master/data/)

 # set data_dir where the data is stored
data_dir <- "../../Daten/user_sample_data/"
 # This might take a few seconds for large files
data <- load_uba_data_from_dir(data_dir = data_dir)

Set sample variables.

target <- "NO2"
station <- "DENW006"
station_name <- "Köln"
meteo_variables <- c("TMP", "RFE", "WIG", "WIR", "LDR")
 
 # dates for 9 Euro effect
application_start <- lubridate::ymd("20220301") # = start reference time
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
application_end <- lubridate::ymd("20220831") # = end effect time
 
buffer <- 0 # number of data points to be ignored before effect
 
trend <- "linear"
model_type <- "rf"
 
window_size <- 14 # days of data to calculate the mean in prediction results

create a params.yaml
either

by copying the default to your working dir, update the new params.yaml file and load it or
load the default and adapt the params programmatically

params <- load_params()
 # adapt params programatically
params$target <- target
params$meteo_variables <- meteo_variables

Prepare data of station for training.

See function documentation for further details

env_data <- clean_data(data, station = station)
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
 dt_prepared,
 application_start = application_start,
 application_end = application_end
)

Run counterfactual scenario (training and prediction)

res <- run_counterfactual(split_data,
 params,
 detrending_function = trend,
 model_type = model_type,
 alpha = 0.9,
 log_transform = FALSE
)
predictions <- res$prediction

Plot counterfactual run and optionally save to data_dir

counterfactual_plot <- plot_counterfactual(predictions, params,
 window_size = window_size,
 date_effect_start,
 buffer = buffer
)
counterfactual_plot

plot of chunk plot_counter_1

Evaluate model and effect

 round(calc_performance_metrics(predictions,
 date_effect_start,
 buffer = buffer
), 2)
 #> RMSE MSE MAE MAPE Bias R2 
 #> 8.69 75.45 5.83 0.36 -2.16 0.67 
 #> Coverage lower Coverage upper Coverage Correlation MFB FGE 
 #> 1.00 0.98 0.98 0.84 -0.10 0.41
 round(calc_summary_statistics(predictions,
 date_effect_start,
 buffer = buffer
), 2)
 #> true prediction
 #> min 0.63 -1.02
 #> max 92.58 57.05
 #> var 228.50 111.89
 #> mean 17.76 15.61
 #> 5-percentile 3.24 2.31
 #> 25-percentile 6.94 7.16
 #> median/50-percentile 11.91 13.02
 #> 75-percentile 24.65 22.92
 #> 95-percentile 48.59 34.90
 paste("effect size:", estimate_effect_size(predictions,
 date_effect_start,
 buffer = buffer,
 verbose = FALSE
))
 #> [1] "effect size: -0.393826299501436" "effect size: -0.0288"