Worked Example 4 - Parameter Estimation Using Markov Chain Monte Carlo Functions • YEP

This worked example demonstrates how to use the package to estimate parameters of the yellow fever model from observed epidemiological data within a Bayesian framework using Markov chain Monte Carlo sampling.

We load input data, environmental data and the observed data to be used for comparison in the formats used in previous worked examples (see Guide 1 - Inputs for details of the input format, Guide 2 - Calculating Parameters From Environmental Data for details of how environmental data is formatted and used to generate parameter values, and Worked Example 3 - Generating Epidemiological Data Sets for details of observed data formats):

library(YEP)
input_data <- readRDS(file = paste(path.package("YEP"), 
                                   "/exdata/input_data_example.Rds", sep = ""))
enviro_data_const <- read.csv(file = paste(path.package("YEP"), 
                                     "/exdata/enviro_data_example.csv", sep = ""),
                        header = TRUE)
enviro_data_var <- NULL
n_env_vars = ncol(enviro_data_const)-1
obs_sero_data <- read.csv(file = paste(path.package("YEP"), 
                                       "/exdata/obs_sero_data_example.csv", 
                                       sep = ""), header = TRUE)
obs_case_data <- read.csv(file = paste(path.package("YEP"), 
                                       "/exdata/obs_case_data_example.csv", 
                                       sep = ""), header = TRUE)
# Values of environmental coefficients to use as initial parameter values
enviro_coeffs_ini <- as.numeric(read.csv(file=paste(path.package("YEP"),
                        "/exdata/enviro_coeffs_example.csv", sep = ""), header = TRUE))

We input parameter data including initial values and prior settings through the params_data data frame:

covar_names = colnames(enviro_data_const)[c(1:n_env_vars)+1]
add_est_param_names=c("vaccine_efficacy","p_rep_severe","p_rep_death")
params_data=mcmc_params_data_create(covar_names,add_est_param_names)
params_data$min[c(1:5)]=1.0e-11
params_data$initial[params_data$name=="vaccine_efficacy"]=0.975
params_data$min[params_data$name=="vaccine_efficacy"]=0.95
params_data$mean[params_data$name=="vaccine_efficacy"]=0.975
params_data$initial[params_data$name=="p_rep_severe"]=0.5
params_data$min[params_data$name=="p_rep_severe"]=0.01
params_data$max[params_data$name=="p_rep_severe"]=0.5
params_data$mean[params_data$name=="p_rep_severe"]=0.5
params_data$initial[params_data$name=="p_rep_death"]=0.5
params_data$min[params_data$name=="p_rep_death"]=0.01
params_data$max[params_data$name=="p_rep_death"]=0.5
params_data$mean[params_data$name=="p_rep_death"]=0.5
params_data$initial[c(1:(2*n_env_vars))]=enviro_coeffs_ini
checks <- mcmc_checks(params_data, covar_names, check_initial = TRUE)
params_data

We set additional values:

# Prefix to be used for .csv output files; a new file is created every 10,000 
# iterations with incremental number suffixes (Chain00.csv, Chain01.csv, etc.)
filename_prefix <- "Chain" 
#Number of iterations to run
Niter <- 10 
# Flag indicating how to set initial population immunity level
mode_start <- 1 
# Time increment in days
time_inc <- 5.0 
# Number of repetitions for which to run model and obtain average outputs
n_reps <- 1 
# True/false flag indicating whether or not to run model in deterministic mode
# (so that binomial calculations give average instead of randomized output)
deterministic = TRUE
# Type of time variation of FOI_spillover and R0; here set to 0 for constant values
mode_time <- 0
# Variable to set different modes for running on multiple processors simultaneously;
# here set to "none" so that parallel processing is not used
mode_parallel=FALSE

We then run the MCMC() function. Output data recorded at each iteration is saved to an output file every 10 iterations. Likelihood is calculated as described in Guide 4 [Link TBA].

mcmc_out <- MCMC(params_data, input_data, obs_sero_data, obs_case_data, filename_prefix, 
                 Niter, mode_start, time_inc, n_reps, enviro_data_const, enviro_data_var, 
                 deterministic,mode_time,mode_parallel,NULL)

The saved output of MCMC() takes the form of a data frame of values of the current likelihood (posterior_current), likelihood calculated from proposed parameter values (posterior_prop), current and proposed parameter values, a flag indicating whether the proposed parameter values were accepted (set to 1 if they were accepted, 0 if they were not), and the chain covariance.

chain_example <- read.csv(file = paste(path.package("YEP"), 
                                       "/exdata/Chain_example.csv", sep = ""),
                          header = TRUE)
head(chain_example, 1)