Get started • helios

This vignette is not up to date with latest changes to the model!

Introduction

helios is an R package which allows users to simulate the impact of far UVC interventions in curtailing the spread of infectious disease outbreaks.

This vignette serves to demonstrate basic use of the package. We start by loading the package, as well as other packages used in the vignette:

library(helios)
library(individual)
library(ggplot2)
library(dplyr, warn.conflicts = FALSE)
library(purrr)
library(tictoc)
theme_set(theme_minimal())

Parameters

First, we define some constant parameters to be used in the model. In this vignette we use the default options, aside from the human_population variable. To facilitate reproducibility, we also set the seed for random number generation:

parameters_list <- get_parameters(list(human_population = 50000, 
                                       number_initial_S = 40000,
                                       number_initial_E = 5000,
                                       number_initial_I = 4000,
                                       number_initial_R = 1000,
                                       simulation_time = 100,
                                       seed = 1))

The names of all parameters are as follows:

names(parameters_list)
#>  [1] "human_population"                            
#>  [2] "initial_proportion_child"                    
#>  [3] "initial_proportion_adult"                    
#>  [4] "initial_proportion_elderly"                  
#>  [5] "number_initial_S"                            
#>  [6] "number_initial_E"                            
#>  [7] "number_initial_I"                            
#>  [8] "number_initial_R"                            
#>  [9] "seed"                                        
#> [10] "mean_household_size"                         
#> [11] "workplace_prop_max"                          
#> [12] "workplace_a"                                 
#> [13] "workplace_c"                                 
#> [14] "school_prop_max"                             
#> [15] "school_meanlog"                              
#> [16] "school_sdlog"                                
#> [17] "school_student_staff_ratio"                  
#> [18] "leisure_prob_visit"                          
#> [19] "leisure_mean_number_settings"                
#> [20] "leisure_mean_size"                           
#> [21] "leisure_overdispersion_size"                 
#> [22] "leisure_prop_max"                            
#> [23] "duration_exposed"                            
#> [24] "duration_infectious"                         
#> [25] "beta_household"                              
#> [26] "beta_workplace"                              
#> [27] "beta_school"                                 
#> [28] "beta_leisure"                                
#> [29] "beta_community"                              
#> [30] "dt"                                          
#> [31] "simulation_time"                             
#> [32] "render_diagnostics"                          
#> [33] "household_distribution_country"              
#> [34] "school_distribution_country"                 
#> [35] "workplace_distribution_country"              
#> [36] "endemic_or_epidemic"                         
#> [37] "duration_immune"                             
#> [38] "prob_inf_external"                           
#> [39] "setting_specific_riskiness_workplace"        
#> [40] "setting_specific_riskiness_workplace_meanlog"
#> [41] "setting_specific_riskiness_workplace_sdlog"  
#> [42] "setting_specific_riskiness_workplace_min"    
#> [43] "setting_specific_riskiness_workplace_max"    
#> [44] "setting_specific_riskiness_school"           
#> [45] "setting_specific_riskiness_school_meanlog"   
#> [46] "setting_specific_riskiness_school_sdlog"     
#> [47] "setting_specific_riskiness_school_min"       
#> [48] "setting_specific_riskiness_school_max"       
#> [49] "setting_specific_riskiness_leisure"          
#> [50] "setting_specific_riskiness_leisure_meanlog"  
#> [51] "setting_specific_riskiness_leisure_sdlog"    
#> [52] "setting_specific_riskiness_leisure_min"      
#> [53] "setting_specific_riskiness_leisure_max"      
#> [54] "setting_specific_riskiness_household"        
#> [55] "setting_specific_riskiness_household_meanlog"
#> [56] "setting_specific_riskiness_household_sdlog"  
#> [57] "setting_specific_riskiness_household_min"    
#> [58] "setting_specific_riskiness_household_max"    
#> [59] "far_uvc_joint"                               
#> [60] "far_uvc_joint_coverage"                      
#> [61] "far_uvc_joint_coverage_target"               
#> [62] "far_uvc_joint_coverage_type"                 
#> [63] "far_uvc_joint_efficacy"                      
#> [64] "far_uvc_joint_timestep"                      
#> [65] "far_uvc_workplace"                           
#> [66] "far_uvc_workplace_coverage"                  
#> [67] "far_uvc_workplace_coverage_target"           
#> [68] "far_uvc_workplace_coverage_type"             
#> [69] "far_uvc_workplace_efficacy"                  
#> [70] "far_uvc_workplace_timestep"                  
#> [71] "far_uvc_school"                              
#> [72] "far_uvc_school_coverage"                     
#> [73] "far_uvc_school_coverage_target"              
#> [74] "far_uvc_school_coverage_type"                
#> [75] "far_uvc_school_efficacy"                     
#> [76] "far_uvc_school_timestep"                     
#> [77] "far_uvc_leisure"                             
#> [78] "far_uvc_leisure_coverage"                    
#> [79] "far_uvc_leisure_coverage_target"             
#> [80] "far_uvc_leisure_coverage_type"               
#> [81] "far_uvc_leisure_efficacy"                    
#> [82] "far_uvc_leisure_timestep"                    
#> [83] "far_uvc_household"                           
#> [84] "far_uvc_household_coverage"                  
#> [85] "far_uvc_household_coverage_target"           
#> [86] "far_uvc_household_coverage_type"             
#> [87] "far_uvc_household_efficacy"                  
#> [88] "far_uvc_household_timestep"                  
#> [89] "size_per_individual_workplace"               
#> [90] "size_per_individual_school"                  
#> [91] "size_per_individual_leisure"                 
#> [92] "size_per_individual_household"

Variables

Next, we define the model variables:

variables_list <- create_variables(parameters_list)
variables_list <- variables_list$variables_list
names(variables_list)
#> [1] "disease_state"    "age_class"        "workplace"        "school"          
#> [5] "household"        "leisure"          "specific_leisure"

Aside from leisure, each entry of variables_list is of the CategoricalVariable class:

map(variables_list, class)
#> $disease_state
#> [1] "CategoricalVariable" "R6"                 
#> 
#> $age_class
#> [1] "CategoricalVariable" "R6"                 
#> 
#> $workplace
#> [1] "CategoricalVariable" "R6"                 
#> 
#> $school
#> [1] "CategoricalVariable" "R6"                 
#> 
#> $household
#> [1] "CategoricalVariable" "R6"                 
#> 
#> $leisure
#> [1] "RaggedInteger" "R6"           
#> 
#> $specific_leisure
#> [1] "CategoricalVariable" "R6"

The leisure variable is a RagggedInteger allowing its elements to have different lengths.

Some variables change over time (disease_state, leisure, specific_leisure), while other remain fixed (age_class, workplace, school, household).

Disease states

The outbreak is implemented as individual-level SEIR compartmental model.

(disease_states <- variables_list$disease_state$get_categories())
#> [1] "S" "E" "I" "R"

The number of individuals initially exposed is controlled by parameters_list$number_initial_E and parameters_list$number_initial_I

parameters_list$number_initial_E
#> [1] 5000
parameters_list$number_initial_I
#> [1] 4000

Initially, all other individuals are placed into the susceptible disease state:

disease_state_counts <- purrr::map_vec(disease_states, function(x) variables_list$disease_state$get_size_of(values = x))

data.frame("State" = disease_states, "Count" = disease_state_counts) |>
  gt::gt()

State	Count
S	40000
E	5000
I	4000
R	1000

Age classes

There are three age classes: children, adults, and the elderly. The age of each individual remains fixed throughout the simulation.

parameters_list[c("initial_proportion_child", "initial_proportion_adult", "initial_proportion_elderly")]
#> $initial_proportion_child
#> [1] 0.2
#> 
#> $initial_proportion_adult
#> [1] 0.6
#> 
#> $initial_proportion_elderly
#> [1] 0.2

age_classes <- variables_list$age_class$get_categories()
age_class_counts <- purrr::map_vec(age_classes, function(x) variables_list$age_class$get_size_of(values = x))

data.frame(age_classes, age_class_counts) |>
  mutate(age_classes = forcats::fct_relevel(age_classes, "child", "adult", "elderly")) |>
  ggplot(aes(x = age_classes, y = age_class_counts)) +
  geom_col() +
  labs(x = "Age class", y = "Count") +
  coord_flip()

Schools

Each child attends a fixed school. Additionally, a small number of adults attend schools as staff:

schools <- variables_list$school$get_categories()
schools <- schools[schools != "0"]
school_sizes <- purrr::map_vec(schools, function(x) variables_list$school$get_size_of(values = x))

data.frame(school_sizes) |>
  ggplot(aes(x = school_sizes)) +
  geom_histogram() +
  labs(x = "School size", y = "Count")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Workplaces

Each adult (aside from those who are school staff) attends a fixed workplace:

workplaces <- variables_list$workplace$get_categories()
workplaces <- workplaces[workplaces != "0"]
workplace_sizes <- purrr::map_vec(workplaces, function(x) variables_list$workplace$get_size_of(values = x))

data.frame(workplace_sizes) |>
  ggplot(aes(x = log(workplace_sizes))) +
  geom_histogram() +
  labs(x = "log(Workplace size)", y = "Count")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Households

Children, adults and elderly people are grouped into fixed households:

households <- variables_list$household$get_categories()
household_sizes <- purrr::map_vec(households, function(x) variables_list$household$get_size_of(values = x))

table(household_sizes) |>
  data.frame() |>
  ggplot(aes(x = household_sizes, y = Freq)) +
  geom_col() +
  labs(x = "Household size", y = "Count")

The age distribution in households is as follows:

household_df <- purrr::map_df(households, function(x) {
  indices <- variables_list$household$get_index_of(values = x)$to_vector()
  if(length(indices > 0)) data.frame("individual" = indices, "household" = as.numeric(x))
})

age_classes <- variables_list$age_class$get_categories()
age_df <- purrr::map_df(age_classes, function(x) {
  indices <- variables_list$age_class$get_index_of(values = x)$to_vector()
  if(length(indices > 0)) data.frame("individual" = indices, "age_class" = x)
})

household_df |>
  left_join(age_df, by = "individual") |>
  group_by(household) |>
  summarise(
    child = sum(age_class == "child"),
    adult = sum(age_class == "adult"),
    elderly = sum(age_class == "elderly")
  ) |>
  group_by(child, adult, elderly) |>
  summarise(
    count = n()
  ) |>
  ungroup() |>
  arrange(desc(count)) |>
  head() |>
  gt::gt()
#> `summarise()` has grouped output by 'child', 'adult'. You can override using
#> the `.groups` argument.

child	adult	elderly	count
0	1	0	7078
0	2	0	5250
0	0	1	1585
1	2	0	1337
0	3	0	1135
2	2	0	936

Leisure

Each week, individuals attend leisure venues:

leisure_places <- variables_list$leisure$get_values()
number_leisure_places <- sapply(leisure_places, function(x) sum(x > 0))

table(number_leisure_places) |>
  data.frame() |>
  ggplot(aes(x = number_leisure_places, y = Freq)) +
  geom_col() +
  labs(x = "Number of leisure places attended in a week", y = "Count")

Events

Events govern the transition of individuals between disease states:

events_list <- create_events(variables_list = variables_list, parameters_list = parameters_list)
names(events_list)
#> [1] "EI_event" "IR_event"

Each event is in the TargetedEvent class:

lapply(events_list, class)
#> $EI_event
#> [1] "TargetedEvent" "EventBase"     "R6"           
#> 
#> $IR_event
#> [1] "TargetedEvent" "EventBase"     "R6"

Render

We use a fixed number of time-steps to simulate over:

timesteps <- round(parameters_list$simulation_time / parameters_list$dt)

The object renderer is of class Render, and stores output from the model at each timestep:

renderer <- individual::Render$new(timesteps)
class(renderer)
#> [1] "Render" "R6"

Processes

Processes enable queing of events:

variables_list <- create_variables(parameters_list)
parameters_list <- variables_list$parameters_list 
variables_list <- variables_list$variables_list
events_list <- create_events(variables_list = variables_list, parameters_list = parameters_list)
timesteps <- round(parameters_list$simulation_time/parameters_list$dt)
renderer <- individual::Render$new(timesteps)

processes_list <- create_processes(
  variables_list = variables_list,
  events_list = events_list,
  parameters_list = parameters_list,
  renderer = renderer
)

names(processes_list)
#> [1] "SE_process" "EI_process" "IR_process" "renderer"

Simulate

See individual::simulation_loop:

individual::simulation_loop(
  variables = variables_list,
  events = unlist(events_list),
  processes = processes_list,
  timesteps = timesteps,
)

The output of the simulation is contained within renderer, and can be accessed using the to_dataframe() method:

states <- renderer$to_dataframe()

states |>
  tidyr::pivot_longer(cols = ends_with("count"), names_to = "compartment", values_to = "value", names_pattern = "(.*)_count") |>
  mutate(
    compartment = forcats::fct_relevel(compartment, "S", "E", "I", "R")
  ) |>
  ggplot(aes(x = timestep, y = value, col = compartment)) +
  geom_line() +
  scale_color_manual(values = c("royalblue3", "firebrick3", "darkorchid3", "orange2")) +
  labs(x = "Time-step", y = "Count", col = "Compartment")