10. Stochastic continuous time models (solutions)

Practical 1. Stochastic simulation with the Gillespie algorithm

library(ggplot2) ## for plotting
library(dplyr) ## for manipulation of results
library(tidyr) ## for storing multiple simulation runs in a data frame

## Function SIR_gillespie.
## This takes three arguments:
## - init_state: the initial state
##   (a named vector containing the number in S, I and R)
## - parms: the parameters
##   (a named vector containing the rates beta and gamma)
## - tf: the end time
SIR_gillespie <- function(init_state, parms, tf) {

  time <- 0 ## initialise time to 0

  ## assign parameters to easy-access variables
  beta <- parms[["beta"]]
  gamma <- parms[["gamma"]]

  ## assign states to easy-access variables
  S <- init_state[["S"]]
  I <- init_state[["I"]]
  R <- init_state[["R"]]
  N <- S + I + R

  ## create results data frame
  results_df <- data.frame(time = 0, t(init_state))

  ## loop until end time is reached
  while (time < tf) {
    ## update current rates
    rates <- c(
      infection = beta * S * I / N, 
      recovery = gamma * I
    )

    if (sum(rates) > 0) { ## check if any event can happen
      ## time of next event
      time <- time + rexp(n = 1, rate = sum(rates))
      ## check if next event is supposed to happen before end time
      if (time <= tf) {
        ## determine the type of the next event
        next_event <- sample(x = length(rates), size = 1, prob = rates)
        ## change to name
        next_event <- names(rates)[next_event]
        ## determine type of next event
        if (next_event == "infection") {
          ## infection
          S <- S - 1
          I <- I + 1
        } else if (next_event == "recovery") {
          ## recovery
          I <- I - 1
          R <- R + 1
        }
      } else { ## next event happens after end time
        time <- tf
      }
    } else { ## no event can happen - go straight to end time
      time <- tf
    }
    ## add new row to results data frame
    results_df <- rbind(results_df, c(time = time, S = S, I = I, R = R))
  }
  ## return results data frame
  return(results_df)
}

init.values <- c(S = 249, I = 1, R = 0) ## initial state
parms <- c(beta = 1, gamma = 0.5) ## parameter vector
tmax <- 20 ## end time

## run Gillespie simulation
r <- SIR_gillespie(init_state = init.values, parms = parms, tf = tmax)

## Plot the result
ggplot(r) +
    geom_line(aes(time, I, colour = "I"))
## Re-run this from the line `r <- SIR_gillespie(...)` a few times to 
## convince yourself the output is different every time.

## Run multiple simulation runs and plot a few of them
nsim <- 100 ## number of trial simulations

## We store the simulations in a data frame, traj, which
## contains the results from multiple simulation runs and an additional column
## that represents the simulation index
traj <- tibble(i = 1:nsim) %>%
  rowwise() %>%
  mutate(trajectory = list(as.data.frame(
           SIR_gillespie(init.values, parms, tmax)))) %>%
  unnest(trajectory)

## convert to long data frame
mlr <- traj %>%
  gather(compartment, value, 3:ncol(.))

## Next, plot the multiple simulation runs
ggplot(mlr,
       aes(x = time, y = value, group = i, color = compartment)) +
  geom_line() +
  facet_wrap(~compartment)
## Plot the distribution of overall outbreak sizes.

## create data frame of outbreak sizes
outbreak_sizes <- mlr %>%
  filter(compartment=="R") %>%
  group_by(i) %>%
  filter(time==max(time)) %>%
  select(i, size = value)

## plot it as a histogram
ggplot(outbreak_sizes, aes(x = size)) +
  geom_histogram()
## determine number of large outbreaks (defined as larger than 50)
outbreak_sizes %>%
  filter(size > 50) %>%
  nrow

## Extract the values of the trajectory at integer time points
timeTraj <- mlr %>%
  group_by(i, compartment) %>%
  summarise(traj = list(data.frame(
              time = seq(0, tmax, by = 0.1),
              value = approx(x = time, y = value, xout = seq(0, tmax, by = 0.1),
                           method="constant")$y))) %>%
  unnest(traj)

## Calculate summary trajectory with mean & sd of infectious people over time
sumTraj <- timeTraj %>%
  filter(compartment=="I") %>%
  group_by(time) %>%
  summarise(
    mean = mean(value),
            sd = sd(value))

## plot
ggplot(sumTraj, aes(x = time, y = mean, ymin = pmax(0, mean-sd), ymax = mean+sd)) +
  geom_line() +
  geom_ribbon(alpha = 0.3)
## Only consider trajectories that have not gone extinct:
sumTrajAll <- sumTraj %>%
  mutate(trajectories="all")

sumTrajGr0 <- timeTraj %>%
  filter(compartment=="I", value > 0) %>%
  group_by(time) %>%
  summarise(mean = mean(value),
            sd = sd(value)) %>%
  mutate(trajectories="greater_than_zero")

iTraj <- bind_rows(sumTrajAll, sumTrajGr0)

## plot
ggplot(iTraj, aes(x = time, y = mean, ymin = pmax(0, mean-sd), ymax = mean+sd,
                  colour = trajectories, fill = trajectories)) +
  geom_line() +
  geom_ribbon(alpha = 0.3) +
  scale_color_brewer(palette="Set1")
## We now compare the two averages to the deterministic trajectory

## Model function (from ODE practical)
library(deSolve)
SIR_model <- function(times, state, parms){
  ## Define variables
  S <- state["S"]
  I <- state["I"]
  R <- state["R"]
  N <- S + I + R
                                        # Extract parameters
  beta <- parms["beta"]
  gamma <- parms["gamma"]
                                        # Define differential equations
  dS <- - (beta * S * I) / N
  dI <- (beta * S * I) / N - gamma * I
  dR <- gamma * I
  res <- list(c(dS, dI, dR ))
  return(res)
}

ode_output_raw <-
  ode(y = init.values, times = seq(0, tmax), func = SIR_model, parms = parms,
      method = "rk4")
## Convert to data frame for easy extraction of columns
ode_output <- as.data.frame(ode_output_raw)

## Combine into one big data frame
allTraj <- ode_output %>%
  gather(compartment, value, 2:ncol(.)) %>% ## convert to long format
  filter(compartment=="I") %>%
  rename(mean = value) %>% ## in deterministic, mean = value
  mutate(trajectories="deterministic", ## label trajectories
         sd = 0) %>% ## in deterministic, sd = 0
  bind_rows(iTraj)

## plot
ggplot(allTraj, aes(x = time, y = mean, colour = trajectories)) +
  geom_line() +
  scale_color_brewer(palette="Set1")

Practical 2. The adaptivetau package

library(adaptivetau) ## for stochastic simulations

## Define transitions
transitions <- list(
  c(S = -1, I = +1),
  c(I = -1, R = +1))

## Specify rate function, giving rate for each transition
SIRrateF <- function(state, parms, time) {
  beta <- parms[["beta"]]
  gamma <- parms[["gamma"]]

  S <- state[["S"]]
  I <- state[["I"]]
  R <- state[["R"]]

  N <- S + I + R

  rates <- c(beta * S * I/N,
             gamma * I)

  return(rates)
}

## Initial values
init.values <- c(S = 249, ## number of susceptibles
                 I = 10, ## number infectious
                 R = 0) ## number immune

## Parameters
parms <- c(beta = 2, ## infection rate
           gamma = 1) ## recovery rate

## Run a trial simulation for 60 time steps
tmax <- 60 ## number of time steps to simulate

r <- ssa.adaptivetau(init.values, transitions, SIRrateF, parms, tf = tmax)

## Plot results
r_df <- as.data.frame(r)
plot(r_df$time, r_df$I, type = "l")
## Run the simulation multiple times
nsim <- 100 ## number of trial simulations

system.time(adaptivetau.traj <- tibble(i = 1:nsim) %>%
  rowwise() %>%
  mutate(trajectory = list(as.data.frame(
    ssa.adaptivetau(init.values, transitions, SIRrateF, parms, tf = tmax)
  ))) %>%
  unnest(trajectory))

## Plot the resulting trajectories
ggplot(adaptivetau.traj) +
    geom_line(aes(x = time, y = I, group = i, colour = i))
## Run Gillespie simulations again

system.time(gillespie.traj <- tibble(i = 1:nsim) %>%
  rowwise() %>%
  mutate(trajectory = list(as.data.frame(
           SIR_gillespie(init.values, parms, tmax)))) %>%
  unnest(trajectory)
)

Return to the practical here.