Gp Compare

Original go at nimble gp model, but this requires rather manual manipulation of parameter posteriors to get the GP posteriors.

library("MASS")
library("mcmc")
library("nimble")
library("ggplot2")
library("tidyr")
library("dplyr")
library("nonparametricbayes")
library("pdgControl")

sample data

set.seed(1234)
Tobs <- 40
f <- RickerAllee
sigma_g <- 0.05
z_g <- function() rlnorm(1, 0, sigma_g)
p <- c(2, 8, 5)
x_grid <- seq(0, 15, length=50)
h_grid <- x_grid
profit <- function(x,h) pmin(x, h)
delta <- 0.01
OptTime <- 50  # stationarity with unstable models is tricky thing
xT <- 0    # terminal condition
x0 <- 10   # simulation under policy starts from
MaxT <- 1000 # timeout for value iteration convergence
x <- numeric(Tobs)
x[1] <- 5.5
nz <- 1
for(t in 1:(Tobs-1))
  x[t+1] = z_g() * f(x[t], h=0, p=p)
obs <- data.frame(x = c(rep(0,nz), 
                        pmax(rep(0,Tobs-1), x[1:(Tobs-1)])), 
                  y = c(rep(0,nz), 
                        x[2:Tobs]))
ggplot(data.frame(time = 1:Tobs, x=x), aes(time,x)) + geom_line()

GP Model

code <- nimbleCode({

   l ~ dgamma(10, 1) 
   sigma.n ~ dunif(0, 1e5)
   sigma.k  ~ dunif(0, 1e5)
   Sigma[1:N, 1:N] <- sigma.k ^ 2 * exp(-0.5 * diff[1:N, 1:N] / l ^ 2) + 
                      sigma.n ^ 2 * I[1:N, 1:N]
   y[1:N] ~ dmnorm(Mu[1:N], cov = Sigma[1:N, 1:N])  

})
N <- length(obs$x)
diff <- outer(obs$x, obs$x, function(xi, xj) (xi-xj)^2)
constants = list(N = N, x = obs$x, Mu = rep(0,N), diff = diff, I = diag(1,N))
inits <- list(l = 1, sigma.n = 10, sigma.k = 1)
data <- data.frame(y = obs$y)
Rmodel <- nimbleModel(code = code, constants = constants, data = data, inits = inits)

Define our mcmc procedure in Nimble

Cmodel <- compileNimble(Rmodel)
mcmcspec <- configureMCMC(Rmodel, print=FALSE)
Rmcmc <- buildMCMC(mcmcspec)
Cmcmc <- compileNimble(Rmcmc, project = Rmodel)
system.time(
Cmcmc$run(1e6)
)
   user  system elapsed 
226.792   0.456 227.077 
samples <- as.data.frame(as.matrix(Cmcmc$mvSamples))
df <- gather(samples)

Posteriors

ggplot(df) + 
  geom_density(aes(value)) + 
  facet_wrap(~key, scale='free') + 
  scale_x_log10()

Calculating transition probabilities

Several ways we might go about this.

Technically all policies could be updated in response to new information, though this requires repeating the estimation process, or at least a Bayesian updating step (e.g. passive learning). Typically these steps are separated into estimating the policy and then implementing the policy separately, so we will focus on this case.

The transition probability is conditional

## Should be able to extract this from nimble...
predict <- 
function (posteriors, obs, x_predict) {
    out <- lapply(data.frame(t(posteriors)), 
      function(sample) {
      
        l <- sample[1]
        sigma.k <- sample[2]
        sigma.n <- sample[3]
       
        
        SE <- function (Xi, Xj, l) sigma.k * exp(- 0.5 * (Xi - Xj) ^ 2 / l ^ 2)
        cov <- function(X, Y) outer(X, Y, SE, l)
            
        cov_xx_inv <- solve(cov(obs$x, obs$x) + sigma.n^2 * diag(1, length(obs$x)))
        Ef <- cov(x_predict, obs$x) %*% cov_xx_inv %*% obs$y
        Cf <- cov(x_predict, x_predict) - cov(x_predict, obs$x) %*% 
            cov_xx_inv %*% cov(obs$x, x_predict)
        list(Ef = Ef, Cf = Cf, Vf = diag(Cf))
    })
    Ef_posterior <- sapply(out, `[[`, "Ef")
    Cf_posterior <- sapply(out, `[[`, "Cf")
    Vf_posterior <- sapply(out, `[[`, "Vf")
    E_Ef <- rowMeans(Ef_posterior)
    E_Cf <- matrix(apply(Cf_posterior, 1, sum)/dim(Cf_posterior)[2], 
        ncol = sqrt(dim(Cf_posterior)[1]))
    E_Vf <- diag(E_Cf)
    Cf_posterior <- lapply(out, `[[`, "Cf")
    list(Ef_posterior = Ef_posterior, Vf_posterior = Vf_posterior, 
        Cf_posterior = Cf_posterior, E_Ef = E_Ef, E_Cf = E_Cf, 
        E_Vf = E_Vf)
}

gp_posterior <- predict(sample_n(samples, 100), obs, x_grid)
matrices_gp <- gp_transition_matrix(gp_posterior$Ef_posterior, gp_posterior$Vf_posterior, x_grid, h_grid) 
opt_gp <- value_iteration(matrices_gp, x_grid, h_grid, MaxT, xT, profit, delta, reward)
Error: object 'reward' not found

Comparison

s2.p <- c(5,5)  
d.p = c(10, 1/0.1)
gp <- gp_mcmc(obs$x, y=obs$y, n=1e5, s2.p = s2.p, d.p = d.p)
gp_dat <- gp_predict(gp, x_grid, burnin=1e4, thin=300)
matrices_gp <- gp_transition_matrix(gp_dat$Ef_posterior, gp_dat$Vf_posterior, x_grid, h_grid) 
opt_gp <- value_iteration(matrices_gp, x_grid, h_grid, MaxT, xT, profit, delta)
matrices_true <- f_transition_matrix(f, p, x_grid, h_grid, sigma_g)
opt_true <- value_iteration(matrices_true, x_grid, h_grid, OptTime=MaxT, xT, profit, delta)
policy <- list(GP = opt_gp$D, exact = opt_true$D)
sets <- expand.grid(reps = 1:100, model = c("exact", "GP")) %>% group_by(reps, model)

sim_fn <- function(df){
  set.seed(df$reps)
  ForwardSimulate(f, p, x_grid, h_grid, x0, 
                  D = policy[[as.character(df$model)]], 
                  z_g, profit=profit, OptTime = OptTime)
  
}
sims <- sets %>% do(sim_fn(.))
colorkey <- c("#000000", "#E69F00", "#56B4E9", "#009E73", 
               "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ggplot(sims) + 
  geom_line(aes(time, fishstock, group=interaction(reps,model), col=model), alpha=.1) + 
  facet_wrap(~model) +
  scale_colour_manual(values=colorkey, guide=FALSE)
policies_plot <- function(policy){
  policy_df <- 
    data.frame(model = c("GP", "exact"))  %>% 
    group_by(model) %>% 
    do(data.frame(stock = x_grid, 
                  escapement = x_grid - h_grid[policy[[as.character(.$model)]]]))

  ggplot(policy_df, aes(stock, escapement, color=model)) +
    geom_line() + 
    facet_wrap(~model) +
    xlab("stock size, x(t)") + 
    ylab("escapement, S(t)")  +
    scale_colour_manual(values=colorkey, guide=FALSE)
}

policies_plot(policy)
sims %>% mutate(net_profit = sum(profit)) -> sims