3) Plotting model outputs • cowflu

This vignette walks through using the model simulations generated in the previous vignette - “2) Simulating the model with fit parameters”, to plot the figures shown in the associated publication “A mathematical model of H5N1 influenza transmission in US dairy cattle”.

library(cowflu)
library(dust2)
library(monty)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(ggplot2)

Load the model simulations

We load a set of model simulations from the previous vignette. Note, the results plotted in the main manuscript are from a simulation of 20,000 iterations. For simplicity, those shown below use only 500 iterations.

# Path to the .rds file
file_path <- system.file("extdata", "example_sim_results.rds", package = "cowflu")
# Load the .rds file
Sim_results <- readRDS(file_path)

Figure 2

First we load the real world data that will appear in the figure.

## Load data:
###################

data_outbreaks <- cowflu:::outbreaks_data$weekly_outbreaks_data
data_outbreaks <- cowflu:::process_data_outbreak(data_outbreaks)
data_week <- dust2::dust_filter_data(data_outbreaks, time = "week")

extended_outbreaks_data <- data_week %>%
  tidyr::unnest_longer(outbreak_detected) %>%
  mutate(state = rep(cowflu:::usda_data$US_States, times = nrow(data_week)))
extended_outbreaks_data <- extended_outbreaks_data[,c(2,3,4)]
colnames(extended_outbreaks_data) <- c("Time", "outbreak_detected", "US_state")

## Add number of herds per state:
extended_outbreaks_data$total_herds <- 0
for(i in 1:nrow(extended_outbreaks_data)){
  extended_outbreaks_data$total_herds[i] <- cowflu:::usda_data$n_herds_per_region[which(cowflu:::usda_data$US_States == extended_outbreaks_data$US_state[i])]
}
##########################################
## And also extract the exact number of new outbreaks data
new_outbreaks_data <- cowflu:::process_data_incidence(cowflu:::outbreaks_data$weekly_outbreaks_data)
new_data_week <- dust2::dust_filter_data(new_outbreaks_data, time = "week")

new_outbreaks_data <- new_data_week %>%
  tidyr::unnest_longer(positive_tests) %>%
  mutate(state = rep(cowflu:::usda_data$US_States, times = nrow(new_data_week)))
new_outbreaks_data <- new_outbreaks_data[,c(2,3,4)]
colnames(new_outbreaks_data) <- c("Time", "new_outbreaks", "US_state")
## Add number of herds per state:
new_outbreaks_data$total_herds <- 0
for(i in 1:nrow(new_outbreaks_data)){
  new_outbreaks_data$total_herds[i] <- cowflu:::usda_data$n_herds_per_region[which(cowflu:::usda_data$US_States == new_outbreaks_data$US_state[i])]
}

##Stick the two together:
new_outbreaks_data <- merge(new_outbreaks_data,
                            extended_outbreaks_data,
                            by = c("Time", "US_state", "total_herds"),
                            all.x = TRUE)

Next, we extract and rearrange the simulation outputs into a dataframe. Specifically, three model outputs of interest:

Another for “true number of herds with infected cattle”
Another for “total number of declared outbreaks”
One for model fit to “time of first outbreak detection”

First, number (i):

## The number of US states
n_states <- length(cowflu:::usda_data$US_States)

## Extract the data across all chains and iterations
I_data <- Sim_results[,49:96,]
n_timepoints <- dim(I_data)[3]
sim_samples <- dim(I_data)[1]
n_samples <- sim_samples

## Initialize an empty list to store data frames for each state
data_list <- list()
## Loop over each state and calculate summary statistics
for (state in 1:n_states) {
  ## Extract data for the current state
  state_data <- I_data[,state, ]
  ## Calculate mean, lower, and upper CI across particles for each time point
  state_summary <- apply(state_data, 2, function(x) {
    mean_val <- mean(x)
    ci_low <- quantile(x, 0.025)
    ci_high <- quantile(x, 0.975)
    return(c(mean = mean_val, lower_ci = ci_low, upper_ci = ci_high))
  })
  ## Convert to a data frame
  state_df <- as.data.frame(t(state_summary))
  ## Add time and state information
  state_df$Time <- 0:(n_timepoints-1)
  state_df$US_state <- state
  ## Append to the list
  data_list[[state]] <- state_df
}

## Combine all state data frames into one data frame
result_df <- dplyr::bind_rows(data_list)

## Rename columns
colnames(result_df) <- c("mean_infected", "lower_ci_infected", "upper_ci_infected", "Time", "US_state")
## Add number of herds per state:
result_df$total_herds <- cowflu:::usda_data$n_herds_per_region[result_df$US_state]
## Replace the US_state numbers with actual names
result_df$US_state <- factor(result_df$US_state, levels = 1:n_states, labels = cowflu:::usda_data$US_States)
## Add a descriptor of what type of data this is:
result_df$Type <- "True Outbreaks"

#Sim_data will be our final data frame.
Sim_data <- result_df

Now, repeat this process, to add (ii) to the data frame:

## Extract the  data across all chains and iterations
I_data <- Sim_results[,1:48,]
n_timepoints <- dim(I_data)[3]
n_samples <- sim_samples
## Initialize an empty list to store data frames for each state
data_list <- list()
## Loop over each state and calculate summary statistics
for (state in 1:n_states) {
  ## Extract data for the current state
  state_data <- I_data[,state, ]
  ## Calculate mean, lower, and upper CI across particles for each time point
  state_summary <- apply(state_data, 2, function(x) {
    mean_val <- mean(x)
    ci_low <- quantile(x, 0.025)
    ci_high <- quantile(x, 0.975)
    return(c(mean = mean_val, lower_ci = ci_low, upper_ci = ci_high))
  })
  ## Convert to a data frame
  state_df <- as.data.frame(t(state_summary))
  ## Add time and state information
  state_df$Time <- 0:(n_timepoints-1)
  state_df$US_state <- state
  ## Append to the list
  data_list[[state]] <- state_df
}

## Combine all state data frames into one data frame
result_df <- dplyr::bind_rows(data_list)

## Rename columns
colnames(result_df) <- c("mean_infected", "lower_ci_infected", "upper_ci_infected", "Time", "US_state")
## Add number of herds per state:
result_df$total_herds <- cowflu:::usda_data$n_herds_per_region[result_df$US_state]
## Replace the US_state numbers with actual names
result_df$US_state <- factor(result_df$US_state, levels = 1:n_states, labels = cowflu:::usda_data$US_States)
## Add a descriptor of what type of data this is:
result_df$Type <- "Declared Outbreaks"

## Add this new data to the previously made dataframe
Sim_data <- rbind(Sim_data, result_df)

And lastly for (iii):

## We will need to reorder the data into a data frame:
real_outbreaks_data <- data_week %>%
  tidyr::unnest_longer(outbreak_detected) %>%
  mutate(state = rep(cowflu:::usda_data$US_States, times = nrow(data_week)))
real_outbreaks_data <- real_outbreaks_data[,c(2,3,4)]
colnames(real_outbreaks_data) <- c("Time", "outbreak_detected", "US_state")

## Extract the  data across all chains and iterations
I_data <- Sim_results[,1:48,]
n_timepoints <- dim(I_data)[3]

## Convert to the step function form:
## Loop through each region and each step
for (place in 1:dim(I_data)[2]) {      # Loop over the regions
  for (chain in 1:dim(I_data)[1]) {    # Loop over the iterations

    ## Get the time series for this place and chain
    infections <- I_data[chain, place, ]

    ## Find the first time where infections > 0
    first_infection <- which(infections > 0)[1]

    if (!is.na(first_infection)) {
      ## From the first infection onwards, set the step function to 1
      I_data[chain, place, first_infection:dim(I_data)[3]] <- 1

      ## Prior to the first infection, set the values to 0
      I_data[chain, place, 1:(first_infection-1)] <- 0
    } else {
      ## If there are no infections at all, set everything to 0
      I_data[chain, place, ] <- 0
    }
  }
}

## Initialize an empty list to store data frames for each state
data_list <- list()
## Loop over each state and calculate summary statistics
for (state in 1:n_states) {
  ## Extract data for the current state
  state_data <- I_data[,state, ]
  ## Calculate mean, lower, and upper CI across particles for each time point
  state_summary <- apply(state_data, 2, function(x) {
    mean_val <- mean(x)
    ci_low <- quantile(x, 0.025)
    ci_high <- quantile(x, 0.975)
    return(c(mean = mean_val, lower_ci = ci_low, upper_ci = ci_high))
  })
  ## Convert to a data frame
  state_df <- as.data.frame(t(state_summary))
  ## Add time and state information
  state_df$Time <- 0:(n_timepoints-1)
  state_df$US_state <- state
  ## Append to the list
  data_list[[state]] <- state_df
}

## Combine all state data frames into one data frame
result_df <- dplyr::bind_rows(data_list)

## Rename columns
colnames(result_df) <- c("mean_infected", "lower_ci_infected", "upper_ci_infected", "Time", "US_state")
## Add number of herds per state:
result_df$total_herds <- cowflu:::usda_data$n_herds_per_region[result_df$US_state]
## Replace the US_state numbers with actual names
result_df$US_state <- factor(result_df$US_state, levels = 1:n_states, labels = cowflu:::usda_data$US_States)
## Add a descriptor of what type of data this is:
result_df$Type <- "First Outbreak"

Sim_data <- rbind(Sim_data, result_df)

Before plotting, we do a little tidying up of the dataframe:

# Add week_beginning columns
# Define the starting date
start_date <- as.Date("2023-12-18")

# Add the Week_Beginning column to the data frame
Sim_data$Week_Beginning <- start_date + weeks(Sim_data$Time)
new_outbreaks_data$Week_Beginning <- start_date + weeks(new_outbreaks_data$Time)

## Change capitalisation of States:
## Custom function to capitalize the first letter of each word
capitalize <- function(text) {
  sapply(strsplit(text, " "), function(x) {
    paste(toupper(substring(x, 1, 1)), tolower(substring(x, 2)), sep = "")
  }, USE.NAMES = FALSE) |>
    sapply(paste, collapse = " ")
}

Sim_data$US_state <- as.character(Sim_data$US_state)
Sim_data$US_state <- capitalize(Sim_data$US_state)
new_outbreaks_data$US_state <- capitalize(new_outbreaks_data$US_state)

And now we produce the plots using the geofacet package.

library(geofacet)
continental_us_grid1 <- us_state_grid1[c(-2, -11, -51), ]
continental_us_grid2 <- us_state_grid2[c(-2, -11, -51), ]
## i) The "fit to data" plot.
#############################
plot_sim_data <- filter(Sim_data, Type == "First Outbreak")

## Create the plot
ggplot(plot_sim_data) +
  geom_line(
    aes(x = Week_Beginning, y = mean_infected, color = "Model simulation")) +
  geom_ribbon(aes(x = Week_Beginning, ymin = lower_ci_infected, ymax = upper_ci_infected,
                  fill = "Model simulation"), alpha = 0.2) +
  geom_point(data = new_outbreaks_data,
             aes(x = Week_Beginning, y = outbreak_detected, color = "Real data", fill = "Real data")) +
  theme_bw() +
  labs(x = "Week Beginning",
       y = "First Outbreak Detected",
       title = "Time of First Outbreak Detection",
       color = "Legend", # Legend title for line and points
       fill = "Legend" ) +       # Exclude a separate legend title for the ribbon
  theme(strip.text = element_text(size = 16),
        plot.title = element_text(size = 24),    # Title text size
        axis.title.x = element_text(size = 20), # X-axis label size
        axis.title.y = element_text(size = 20), # Y-axis label size
        axis.text.x = element_text(size = 12),  # X-axis tick label size
        axis.text.y = element_text(size = 12),
        legend.title = element_text(size = 24),
        legend.text = element_text(size = 24),
        legend.key.size = unit(1.5, "lines"),
        legend.position = c(0.9, 0.25), # Adjust to place legend inside the plot area
        legend.background = element_rect(fill = "white", color = "black") # Optional border for clarity
  ) +   # Y-axis tick label size
  facet_geo(~ US_state, grid = continental_us_grid1, label = "name") +
  scale_x_date(
    date_labels = "%b %y", # Only show abbreviated month names
    date_breaks = "2 month" # Ensure monthly ticks
  ) +
  scale_color_manual(values = c("Model simulation" = "black", "Real data" = "firebrick")) +  # Set colors manually
  scale_fill_manual(values = c("Model simulation" = "black", "Real data" = NA)) +  # Set fill color to match the line color
  guides(
    color = guide_legend(override.aes = list(size = 3)),  # Increase point size in the legend
    fill = guide_legend(override.aes = list(size = 3))    # Increase ribbon fill size in the legend
  ) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))
#> Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
#> 3.5.0.
#> ℹ Please use the `legend.position.inside` argument of `theme()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Warning: Duplicated `override.aes` is ignored.

The other model outputs can be produced in the same way, just editing the above code to filter for the specific Type in the dataframe.

Figure 3

Because Figure 3 is plotting a hypothetical infection burden to demonstrate the role of state-differing ascertainment, we do not need to load the previous simulations.

First, the heatmap is produced like so:

I_values <- seq(1, 200, by = 1)  # Sequence of I values
I_over_N_values <- seq(0.01, 1, by = 0.01)  # Sequence of I/N values
x <- 0.7  # scaling the percentage
y <- 150  # Scaling the number of infected

# Generate data
data <- expand.grid(I = I_values, I_over_N = I_over_N_values)
data$N <- data$I / data$I_over_N  # Calculate N
data$f <- data$I / ((x * data$N)^0.95) + (data$I/y)  # Calculate f
data$z <- 1 - exp(-data$f)  # Calculate 1 - exp(-f)

# Create the heatmap with a custom color scale
ggplot(data, aes(x = I, y = I_over_N, fill = z)) +
  geom_tile() +
  scale_fill_gradientn(
    colors = c("deepskyblue3", "yellow", "firebrick", "black"),
    values = c(0, 0.49, 0.5, 0.51, 0.9, 1),
    limits = c(0, 1),
    breaks = seq(0, 1, by = 0.1)
  ) +
  labs(title = "Baseline probability that a herd reports an outbreak", x = "Infected Cattle (I)", y = "Proportion of herd infected (I / N)", fill = "Prob. report") +
  scale_x_continuous(expand = c(0, 0)) +  # Remove x-axis gap
  scale_y_continuous(expand = c(0, 0)) + # Remove y-axis gap
  theme_classic()

Next, we produce the data for panels B and C:

# How many samples to use for this plot:
asc_draws <- 1000 
# And what proportion do we assume is infected.
infected_prop = 0.1
# Use the ascertainment rate posterior
# Load the posterior samples first
file_path <- system.file("extdata", "example_posterior_fits.rds", package = "cowflu")
# Load the .rds file
samples <- readRDS(file_path)
asc_rate_samples <- as.numeric(samples$pars[5, ,])

p2_data <- data.frame(State = cowflu:::usda_data$US_States,
                      declare_prob_mid = rep(NA, 48),
                      declare_prob_lower = rep(NA, 48),
                      declare_prob_upper = rep(NA, 48))

for(i in 1:48){
  index_start <- cowflu:::usda_data$region_start[i] + 1
  index_end <- cowflu:::usda_data$region_start[i+1]
  n_herds <- cowflu:::usda_data$n_herds_per_region[i]
  total_cows <- cowflu:::usda_data$n_cows_per_herd[index_start:index_end]
  mid_hold <- rep(NA,length(asc_draws) )
  for(j in 1:asc_draws){
    infected_cows <- total_cows*infected_prop
    prob_of_declaring <- infected_cows/((x * total_cows)^0.95) + (infected_cows/y)
    prob_of_declaring <- prob_of_declaring * sample(asc_rate_samples, 1, replace = TRUE)
    prob_of_declaring <- 1 - exp(-prob_of_declaring)
    mid_hold[j] <- mean(prob_of_declaring)
  }

  p2_data$declare_prob_mid[i] <- mean(mid_hold)
  p2_data$declare_prob_lower[i] <- quantile(mid_hold, 0.025)
  p2_data$declare_prob_upper[i] <- quantile(mid_hold, 0.975)
}

Now we can plot the midpoints as a map:

library(maps)
library(sf)
#> Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.4.0; sf_use_s2() is TRUE
us_states <- map_data("state")
# Get the coordinates of the outbreak state
state_coords <- us_states %>%
  group_by(region) %>%
  summarize(lat = mean(lat), long = mean(long))
# Load US states map data
us_states <- sf::st_as_sf(maps::map("state", plot = FALSE, fill = TRUE))
# Prepare state names to match the data, assuming Prob_df has state names in all caps
us_states$State <- toupper(us_states$ID)

# Merge probability data with the map data
map_data <- us_states %>%
  inner_join(p2_data, by = "State")
colnames(map_data) <- c("region"  ,     "State"  ,  "prob_mid"  ,   "prob_lower", "prob_upper", "geom"   )

map_data <- map_data %>%
  inner_join(state_coords, by = "region")

  # Create ggplot map
  ggplot(map_data) +
    geom_sf(aes(fill = prob_mid), color = "black") +  
    scale_fill_gradientn(
      colors = c("white", "darkgoldenrod1", "darkolivegreen"),  # Colors for the gradient
      values = c(0, 0.5, 1),  # Corresponding values for the colors
      limits = c(0, 0.5),
      name = "Probability"
    ) +
    labs(title = sprintf("State Average Probability of Reporting an Outbreak, Assuming %s%% of All Cattle Are Infected.", as.integer(infected_prop*100)),
         #subtitle = "95% Confidence Intervals Shown",
         x = "Longitude", y = "Latitude") +
    theme_void() +
    theme(axis.text = element_blank(),  # Remove axis labels
          axis.ticks = element_blank())

And lastly output the 95% CrIs for each state as a forest plot:

## Capitalize region:
  map_data$region <- capitalize(map_data$region)
  ## Also get two letter shorthand
  map_data$shorthand <- state.abb[match(map_data$region, state.name)]
  # Step 1: Order data by mean (descending)
  map_data <- map_data %>%
    arrange(desc(prob_mid)) %>%
    mutate(shorthand = factor(shorthand, levels = shorthand))  # Reorder factor levels

  # Step 2: Create the forest plot
  ggplot(map_data, aes(x = shorthand, y = prob_mid)) +
    geom_errorbar(aes(ymin = prob_lower, ymax = prob_upper), width = 0.2, color = "black") +  # Error bars
    geom_point(size = 3, color = "darkgoldenrod3") +  # Plot mean as points
    #coord_flip() +  # Flip axes for a horizontal forest plot
    labs(
      title = sprintf("Probability of Reporting an Outbreak, Assuming %s%% of Cattle are Infected", as.integer(infected_prop*100)),
      x = "US State",
      y = "Probability of Declaring Outbreak"
    ) +
    scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
    theme_minimal()

Figure 4

Figure 4 captures the probability of exported cattle testing positive for H5N1 at the state border.

First, we calculate and prepare the data in a dataframe:

Prob_data <- Sim_results[,97:144,]
n_timepoints <- dim(Prob_data)[3]
## Make a master data_frame:
Prob_df <- data.frame(State = rep(cowflu:::usda_data$US_States, n_timepoints),
                      Week = rep(0:50, each = 48),
                      mean = rep(NA, 48*n_timepoints),
                      lower_ci = rep(NA, 48*n_timepoints),
                      upper_ci = rep(NA, 48*n_timepoints))
## Populate the dataframe:
for(i in 1:n_timepoints){
  Prob_df$mean[(1:48) + (i-1)*48] <- apply(Prob_data[,,i], 2, mean)
  Prob_df$lower_ci[(1:48) + (i-1)*48] <- apply(Prob_data[,,i], 2, function(x) quantile(x, 0.025))
  Prob_df$upper_ci[(1:48) + (i-1)*48] <- apply(Prob_data[,,i], 2, function(x) quantile(x, 0.975))
}

##Fix floating point precision issue:
Prob_df$mean <- pmin(1, Prob_df$mean)  # Cap at 1
Prob_df$lower_ci <- pmin(1, Prob_df$lower_ci)  # Cap at 1
Prob_df$upper_ci <- pmin(1, Prob_df$upper_ci)  # Cap at 1

## Convert from "probability of passing test", to, "probability of a positive test at state border"
Prob_df$mean <- 1 - Prob_df$mean
Prob_df$lower_ci <- 1 - Prob_df$lower_ci
Prob_df$upper_ci <- 1 - Prob_df$upper_ci

Then, decide which week you want to output, filter the data, and plot the probability on a US map:

# Which week to plot?
timepoint <- 40

us_states <- map_data("state")
# Get the coordinates of the outbreak state
state_coords <- us_states %>%
  group_by(region) %>%
  summarize(lat = mean(lat), long = mean(long))
# Load US states map data
us_states <- sf::st_as_sf(maps::map("state", plot = FALSE, fill = TRUE))
# Prepare state names to match the data, assuming Prob_df has state names in all caps
us_states$State <- toupper(us_states$ID)

start_date <- as.Date("2023-12-18")

  # Calculate w/b date
  Week_Beginning <- start_date + weeks(timepoint)
  Week_Beginning <- format(Week_Beginning, "%d/%m/%Y")

  Week_data <- filter(Prob_df, Week == timepoint)

  # Merge probability data with the map data
  map_data <- us_states %>%
    inner_join(Week_data, by = "State")
  colnames(map_data) <- c("region"  ,     "State"  , "Week",  "mean"  ,   "lower_ci", "upper_ci", "geom"   )

  map_data <- map_data %>%
    inner_join(state_coords, by = "region")

  if(max(map_data$mean) < 0.05){
    ##smaller scale
    # Create ggplot map
    ggplot(map_data) +
      geom_sf(aes(fill = mean), color = "black") +  
      scale_fill_gradientn(
        colors = c("white", "coral2"),  # Colors for the gradient
        values = c(0, 1),  # Corresponding values for the colors
        limits = c(0, 0.05),
        name = "Mean \nProbability"
      ) +
      labs(title = sprintf("Mean Probability of H5N1 Positive Test When Exporting From State \n    Week Beginning %s", Week_Beginning),
           x = "Longitude", y = "Latitude") +
      theme_void() +
      theme(axis.text = element_blank(),  # Remove axis labels
            axis.ticks = element_blank()) 

  } else{
    ##larger scale
    # Create ggplot map
    ggplot(map_data) +
      geom_sf(aes(fill = mean), color = "black") +  
      scale_fill_gradientn(
        colors = c("white", "coral2", "darkslategrey"),  # Colors for the gradient
        values = c(0, 0.1, 1),  # Corresponding values for the colors
        limits = c(0, 0.5),
        name = "Mean \nProbability"
      ) +
      labs(title = sprintf("Mean Probability of H5N1 Positive Test When Exporting From State \n    Week Beginning %s", Week_Beginning),
           x = "Longitude", y = "Latitude") +
      theme_void() +
      theme(axis.text = element_blank(),  # Remove axis labels
            axis.ticks = element_blank())
  }

Figure 5

Finally, Figure 5 explores the counterfactual scenarios of different border testing start times and numbers of cows tested. To do this, we have to run simulations like from the previous vignette, for different choices of n_test and time_test parameters. The actual manuscript figure uses 20,000 simulations of each scenario. The below code only runs 50 for illustrative purposes.

First generate all the simulation data, and save it all in a dataframe.

# How many simulation steps to calculate?
sim_samples <- 50
## Load posteriors:
file_path <- system.file("extdata", "example_posterior_fits.rds", package = "cowflu")
samples <- readRDS(file_path)
samples <- samples$pars
## Collapse chains together:
dim(samples) <- c(5, dim(samples)[2]*dim(samples)[3])
set.seed(1)
##We can't use all of them, so sample from the number of times we want to sim the model:
samples <- samples[, sample(1:dim(samples)[2], sim_samples, replace = TRUE)]

## Pre-allocate arrays:
Realtime_infected_factual <- array(dim = c(dim(samples)[2], 51))
Confirmed_outbreaks_factual <- array(dim = c(dim(samples)[2], 51))
True_infected_herds_factual <- array(dim = c(dim(samples)[2], 51))
Total_infected_cattle_factual <- array(dim = c(dim(samples)[2], 51))

for(i in 1:dim(samples)[2]){
  ## Sample the model:
  pars <- cowflu:::cowflu_inputs(alpha = samples[1,i], beta = samples[2,i], gamma = samples[3,i],
                                 sigma = samples[4,i], asc_rate = samples[5,i],
                                 dispersion = 1,
                                 cowflu:::cowflu_fixed_inputs(p_region_export = cowflu:::movement$p_region_export,
                                                              p_cow_export = cowflu:::movement$p_cow_export,
                                                              movement_matrix = cowflu:::movement$movement_matrix,
                                                              time_test = 19,
                                                              n_herds_per_region = cowflu:::usda_data$n_herds_per_region,
                                                              n_cows_per_herd = cowflu:::usda_data$n_cows_per_herd,
                                                              start_herd = 26940, #26804 is where Texas starts.
                                                              start_count = 5,
                                                              condition_on_export = TRUE
                                 ))


  sys <- dust2::dust_system_create(cowflu:::cows(), pars, n_particles = 1, dt = 1)
  dust2::dust_system_set_state_initial(sys)

  index <- seq.int(pars$n_herds + 1, length.out = pars$n_regions) #This is the S_region outputs
  index <- c(outer(index, (pars$n_herds + pars$n_regions) * (0:4), "+")) #*0:3 will get the SEIR, but 0:4 will also include the number of outbreaks
  ##Also want the "true" # of infected herds, so add a number of regions on the end again.
  index <- c(index, index[length(index)] + 1:48 )

  s <- dust2::dust_system_simulate(sys, 0:50, index)
  s2 <- array(s, c(pars$n_regions, 6, 51))
  ## Sum over dim 1, we only want national totals:
  s2 <- apply(s2, c(2, 3), sum)
  ##Fill the vectors:
  Realtime_infected_factual[i,] <-  s2[3 ,]
  Confirmed_outbreaks_factual[i,] <-  s2[5 ,]
  True_infected_herds_factual[i,] <-  s2[6 ,]
  ## To calculate cumulative infections, we take the number of recovered, and add the current infected
  Total_infected_cattle_factual[i,] <- s2[4, ] + s2[3,]
}

##Save the plot quantiles from these vectors:
Weeks <- (1:(dim(Total_infected_cattle_factual)[2])) - 1
Factual_data1 <- data.frame(metric = rep(NA, length(Weeks)),
                            Week = rep(NA, length(Weeks)),
                            mean_value = rep(NA, length(Weeks)),
                            lower_value = rep(NA, length(Weeks)),
                            upper_value = rep(NA, length(Weeks)))
Factual_data2 <- data.frame(metric = rep(NA, length(Weeks)),
                            Week = rep(NA, length(Weeks)),
                            mean_value = rep(NA, length(Weeks)),
                            lower_value = rep(NA, length(Weeks)),
                            upper_value = rep(NA, length(Weeks)))
Factual_data3 <- data.frame(metric = rep(NA, length(Weeks)),
                            Week = rep(NA, length(Weeks)),
                            mean_value = rep(NA, length(Weeks)),
                            lower_value = rep(NA, length(Weeks)),
                            upper_value = rep(NA, length(Weeks)))
Factual_data4 <- data.frame(metric = rep(NA, length(Weeks)),
                            Week = rep(NA, length(Weeks)),
                            mean_value = rep(NA, length(Weeks)),
                            lower_value = rep(NA, length(Weeks)),
                            upper_value = rep(NA, length(Weeks)))
## Populate the data frame:
for(i in 1:length(Weeks)){
  Factual_data1$metric[i] <- "Currently_infected_cattle"
  Factual_data1$Week[i] <- Weeks[i]
  Factual_data1$mean_value[i] <- mean(Realtime_infected_factual[,i])
  Factual_data1$lower_value[i] <- quantile(Realtime_infected_factual[,i], 0.025)
  Factual_data1$upper_value[i] <- quantile(Realtime_infected_factual[,i], 0.975)

  Factual_data2$metric[i] <- "Total_infected_cattle"
  Factual_data2$Week[i] <- Weeks[i]
  Factual_data2$mean_value[i] <- mean(Total_infected_cattle_factual[,i])
  Factual_data2$lower_value[i] <- quantile(Total_infected_cattle_factual[,i], 0.025)
  Factual_data2$upper_value[i] <- quantile(Total_infected_cattle_factual[,i], 0.975)

  Factual_data3$metric[i] <- "Declared_outbreaks"
  Factual_data3$Week[i] <- Weeks[i]
  Factual_data3$mean_value[i] <- mean(Confirmed_outbreaks_factual[,i])
  Factual_data3$lower_value[i] <- quantile(Confirmed_outbreaks_factual[,i], 0.025)
  Factual_data3$upper_value[i] <- quantile(Confirmed_outbreaks_factual[,i], 0.975)

  Factual_data4$metric[i] <- "True_infected_herds"
  Factual_data4$Week[i] <- Weeks[i]
  Factual_data4$mean_value[i] <- mean(True_infected_herds_factual[,i])
  Factual_data4$lower_value[i] <- quantile(True_infected_herds_factual[,i], 0.025)
  Factual_data4$upper_value[i] <- quantile(True_infected_herds_factual[,i], 0.975)

}
Factual_data <- rbind(Factual_data1, Factual_data2, Factual_data3, Factual_data4)
rm(Factual_data1, Factual_data2, Factual_data3, Factual_data4,
   Total_infected_cattle_factual, Realtime_infected_factual, True_infected_herds_factual, Confirmed_outbreaks_factual)
Factual_data$scenario <- "Factual"

####################
#Do it again for Counterfactual 1 - The scenario with NO interventions
#Pre-allocate arrays:
Realtime_infected_counterfactual1 <- array(dim = c(dim(samples)[2], 51))
Confirmed_outbreaks_counterfactual1 <- array(dim = c(dim(samples)[2], 51))
True_infected_herds_counterfactual1 <- array(dim = c(dim(samples)[2], 51))
Total_infected_cattle_counterfactual1 <- array(dim = c(dim(samples)[2], 51))

for(i in 1:dim(samples)[2]){
  ## Sample the model:
  pars <- cowflu:::cowflu_inputs(alpha = samples[1,i], beta = samples[2,i], gamma = samples[3,i],
                                 sigma = samples[4,i], asc_rate = samples[5,i],
                                 dispersion = 1,
                                 cowflu:::cowflu_fixed_inputs(p_region_export = cowflu:::movement$p_region_export,
                                                              p_cow_export = cowflu:::movement$p_cow_export,
                                                              movement_matrix = cowflu:::movement$movement_matrix,
                                                              time_test = 190000,
                                                              n_herds_per_region = cowflu:::usda_data$n_herds_per_region,
                                                              n_cows_per_herd = cowflu:::usda_data$n_cows_per_herd,
                                                              start_herd = 26940, #26804 is where Texas starts.
                                                              start_count = 5,
                                                              condition_on_export = TRUE
                                 ))


  sys <- dust2::dust_system_create(cowflu:::cows(), pars, n_particles = 1, dt = 1)
  dust2::dust_system_set_state_initial(sys)

  index <- seq.int(pars$n_herds + 1, length.out = pars$n_regions) #This is the S_region outputs
  index <- c(outer(index, (pars$n_herds + pars$n_regions) * (0:4), "+")) #*0:3 will get the SEIR, but 0:4 will also include the number of outbreaks
  ##Also want the "true" # of infected herds, so add a number of regions on the end again.
  index <- c(index, index[length(index)] + 1:48 )

  s <- dust2::dust_system_simulate(sys, 0:50, index)
  s2 <- array(s, c(pars$n_regions, 6, 51))
  ## Sum over dim 1, we only want national totals:
  s2 <- apply(s2, c(2, 3), sum)
  ##Fill the vectors:
  Realtime_infected_counterfactual1[i,] <-  s2[3 ,]
  Confirmed_outbreaks_counterfactual1[i,] <-  s2[5 ,]
  True_infected_herds_counterfactual1[i,] <-  s2[6 ,]
  ## To calculate cumulative infections, we take the number of recovered, and add the current infected
  Total_infected_cattle_counterfactual1[i,] <- s2[4, ] + s2[3,]
}

##Save the plot quantiles from these vectors:
Weeks <- (1:(dim(Total_infected_cattle_counterfactual1)[2])) - 1
Counterfactual1_data1 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
Counterfactual1_data2 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
Counterfactual1_data3 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
Counterfactual1_data4 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
## Populate the data frame:
for(i in 1:length(Weeks)){
  Counterfactual1_data1$metric[i] <- "Currently_infected_cattle"
  Counterfactual1_data1$Week[i] <- Weeks[i]
  Counterfactual1_data1$mean_value[i] <- mean(Realtime_infected_counterfactual1[,i])
  Counterfactual1_data1$lower_value[i] <- quantile(Realtime_infected_counterfactual1[,i], 0.025)
  Counterfactual1_data1$upper_value[i] <- quantile(Realtime_infected_counterfactual1[,i], 0.975)

  Counterfactual1_data2$metric[i] <- "Total_infected_cattle"
  Counterfactual1_data2$Week[i] <- Weeks[i]
  Counterfactual1_data2$mean_value[i] <- mean(Total_infected_cattle_counterfactual1[,i])
  Counterfactual1_data2$lower_value[i] <- quantile(Total_infected_cattle_counterfactual1[,i], 0.025)
  Counterfactual1_data2$upper_value[i] <- quantile(Total_infected_cattle_counterfactual1[,i], 0.975)

  Counterfactual1_data3$metric[i] <- "Declared_outbreaks"
  Counterfactual1_data3$Week[i] <- Weeks[i]
  Counterfactual1_data3$mean_value[i] <- mean(Confirmed_outbreaks_counterfactual1[,i])
  Counterfactual1_data3$lower_value[i] <- quantile(Confirmed_outbreaks_counterfactual1[,i], 0.025)
  Counterfactual1_data3$upper_value[i] <- quantile(Confirmed_outbreaks_counterfactual1[,i], 0.975)

  Counterfactual1_data4$metric[i] <- "True_infected_herds"
  Counterfactual1_data4$Week[i] <- Weeks[i]
  Counterfactual1_data4$mean_value[i] <- mean(True_infected_herds_counterfactual1[,i])
  Counterfactual1_data4$lower_value[i] <- quantile(True_infected_herds_counterfactual1[,i], 0.025)
  Counterfactual1_data4$upper_value[i] <- quantile(True_infected_herds_counterfactual1[,i], 0.975)

}
Counterfactual1_data <- rbind(Counterfactual1_data1, Counterfactual1_data2, Counterfactual1_data3, Counterfactual1_data4)

## Stick the two together
Factual_data$scenario <- "True measures"
Counterfactual1_data$scenario <- "Counterfactual 1 -\n No measures"

Plot_data <- rbind(Factual_data, Counterfactual1_data)

###########################
#Do it again for Counterfactual 2 - STRONGER interventions
#test up to 100 cows, and start it week 15 (a month earlier)
#Pre-allocate arrays:
Realtime_infected_counterfactual2 <- array(dim = c(dim(samples)[2], 51))
Confirmed_outbreaks_counterfactual2 <- array(dim = c(dim(samples)[2], 51))
True_infected_herds_counterfactual2 <- array(dim = c(dim(samples)[2], 51))
Total_infected_cattle_counterfactual2 <- array(dim = c(dim(samples)[2], 51))

for(i in 1:dim(samples)[2]){
  ## Sample the model:
  pars <- cowflu:::cowflu_inputs(alpha = samples[1,i], beta = samples[2,i], gamma = samples[3,i],
                                 sigma = samples[4,i], asc_rate = samples[5,i],
                                 dispersion = 1,
                                 cowflu:::cowflu_fixed_inputs(p_region_export = cowflu:::movement$p_region_export,
                                                              p_cow_export = cowflu:::movement$p_cow_export,
                                                              movement_matrix = cowflu:::movement$movement_matrix,
                                                              time_test = 15,
                                                              n_test = 100,
                                                              n_herds_per_region = cowflu:::usda_data$n_herds_per_region,
                                                              n_cows_per_herd = cowflu:::usda_data$n_cows_per_herd,
                                                              start_herd = 26940, #26804 is where Texas starts.
                                                              start_count = 5,
                                                              condition_on_export = TRUE
                                 ))


  sys <- dust2::dust_system_create(cowflu:::cows(), pars, n_particles = 1, dt = 1)
  dust2::dust_system_set_state_initial(sys)

  index <- seq.int(pars$n_herds + 1, length.out = pars$n_regions) #This is the S_region outputs
  index <- c(outer(index, (pars$n_herds + pars$n_regions) * (0:4), "+")) #*0:3 will get the SEIR, but 0:4 will also include the number of outbreaks
  ##Also want the "true" # of infected herds, so add a number of regions on the end again.
  index <- c(index, index[length(index)] + 1:48 )

  s <- dust2::dust_system_simulate(sys, 0:50, index)
  s2 <- array(s, c(pars$n_regions, 6, 51))
  ## Sum over dim 1, we only want national totals:
  s2 <- apply(s2, c(2, 3), sum)
  ##Fill the vectors:
  Realtime_infected_counterfactual2[i,] <-  s2[3 ,]
  Confirmed_outbreaks_counterfactual2[i,] <-  s2[5 ,]
  True_infected_herds_counterfactual2[i,] <-  s2[6 ,]
  ## To calculate cumulative infections, we take the number of recovered, and add the current infected
  Total_infected_cattle_counterfactual2[i,] <- s2[4, ] + s2[3,]
}

##Save the plot quantiles from these vectors:
Weeks <- (1:(dim(Total_infected_cattle_counterfactual2)[2])) - 1
Counterfactual2_data1 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
Counterfactual2_data2 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
Counterfactual2_data3 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
Counterfactual2_data4 <- data.frame(metric = rep(NA, length(Weeks)),
                                    Week = rep(NA, length(Weeks)),
                                    mean_value = rep(NA, length(Weeks)),
                                    lower_value = rep(NA, length(Weeks)),
                                    upper_value = rep(NA, length(Weeks)))
## Populate the data frame:
for(i in 1:length(Weeks)){
  Counterfactual2_data1$metric[i] <- "Currently_infected_cattle"
  Counterfactual2_data1$Week[i] <- Weeks[i]
  Counterfactual2_data1$mean_value[i] <- mean(Realtime_infected_counterfactual2[,i])
  Counterfactual2_data1$lower_value[i] <- quantile(Realtime_infected_counterfactual2[,i], 0.025)
  Counterfactual2_data1$upper_value[i] <- quantile(Realtime_infected_counterfactual2[,i], 0.975)

  Counterfactual2_data2$metric[i] <- "Total_infected_cattle"
  Counterfactual2_data2$Week[i] <- Weeks[i]
  Counterfactual2_data2$mean_value[i] <- mean(Total_infected_cattle_counterfactual2[,i])
  Counterfactual2_data2$lower_value[i] <- quantile(Total_infected_cattle_counterfactual2[,i], 0.025)
  Counterfactual2_data2$upper_value[i] <- quantile(Total_infected_cattle_counterfactual2[,i], 0.975)

  Counterfactual2_data3$metric[i] <- "Declared_outbreaks"
  Counterfactual2_data3$Week[i] <- Weeks[i]
  Counterfactual2_data3$mean_value[i] <- mean(Confirmed_outbreaks_counterfactual2[,i])
  Counterfactual2_data3$lower_value[i] <- quantile(Confirmed_outbreaks_counterfactual2[,i], 0.025)
  Counterfactual2_data3$upper_value[i] <- quantile(Confirmed_outbreaks_counterfactual2[,i], 0.975)

  Counterfactual2_data4$metric[i] <- "True_infected_herds"
  Counterfactual2_data4$Week[i] <- Weeks[i]
  Counterfactual2_data4$mean_value[i] <- mean(True_infected_herds_counterfactual2[,i])
  Counterfactual2_data4$lower_value[i] <- quantile(True_infected_herds_counterfactual2[,i], 0.025)
  Counterfactual2_data4$upper_value[i] <- quantile(True_infected_herds_counterfactual2[,i], 0.975)

}
Counterfactual2_data <- rbind(Counterfactual2_data1, Counterfactual2_data2, Counterfactual2_data3, Counterfactual2_data4)

## Stick the two together
Counterfactual2_data$scenario <- "Counterfactual 2 -\n Stronger measures"

Plot_data <- rbind(Plot_data, Counterfactual2_data)

And now, we can plot the outputs of interest like so:

start_date <- as.Date("2023-12-18")
# Add the Week_Beginning column to the data frame
Plot_data$Week_Beginning <- start_date + lubridate::weeks(Plot_data$Week)

## Declared outbreaks
ggplot(filter(Plot_data, metric == "Declared_outbreaks")) +
  geom_line(aes(x = Week_Beginning, y = mean_value, colour = scenario)) +
  geom_ribbon(aes(x= Week_Beginning, ymin = lower_value, ymax = upper_value, fill = scenario),
              alpha = 0.2) +
  theme_classic() +
  labs(x = "Time", y = "Reported Outbreaks", title = "Weekly National Reported Outbreaks",
       color = "Scenario", fill = "Scenario") +
  theme(strip.text = element_text(size = 8)) +
  theme(
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "white", color = NA),
    plot.title = element_text(size = 20),    # Title font size
    axis.title.x = element_text(size = 16),  # X-axis title font size
    axis.title.y = element_text(size = 16),  # Y-axis title font size
    axis.text.x = element_text(size = 12),   # X-axis labels font size
    axis.text.y = element_text(size = 12),   # Y-axis labels font size
    legend.text = element_text(size = 14),   # Legend text font size
    legend.title = element_text(size = 16),  # Legend title font size
    legend.position = c(0.3, 0.6), # Adjust to place legend inside the plot area
    legend.key.spacing.y = unit(0.5, 'cm')
  ) +
  scale_color_manual(values = c(
    "True measures" = "#362DD2",
    "Counterfactual 1 -\n No measures" = "#D2362D",
    "Counterfactual 2 -\n Stronger measures" = "#2DD236"
  )) +
  scale_fill_manual(values = c(
    "True measures" = "#362DD2",
    "Counterfactual 1 -\n No measures" = "#D2362D",
    "Counterfactual 2 -\n Stronger measures" = "#2DD236"
  ))

Other outputs can be produced by selecting a different filter option for metric in the first line of the ggplot() function.