Mdptoolbox Ex

Testing this out using MDPtoolbox, but not working yet…

library(MDPtoolbox)
library(ggplot2)

f <- function(x, h, p){ 
  x <- x - h
  p[[1]] * x / {1 + p[[2]] * x}  
}
p <- c(a = 6, b = 0.05)

profit <- function(x, h) pmin(x,h) - 0.1 * h  # vectorized for x
x_grid <- seq(0, 150, length = 151)
h_grid <- c(0,50,100)
delta <- 0.001
sigma_g <- 0.01
z_g <- function() 1 + rlnorm(1, 0, sigma_g)
pdfn <- function(P, s) dlnorm(P, 0, s)

M <- pdgControl::determine_SDP_matrix(f, p, x_grid, h_grid, sigma_g, pdfn)
out <- pdgControl::value_iteration(M, x_grid, h_grid, 1, 0, profit, delta)

Building the transition probability matrix:

transition_matrix <- function(f, p, x_grid, h_grid, sigma_g,
                                 pdfn=function(P, s) dlnorm(P, 0, s)){
  n_x <- length(x_grid)
  n_h <- length(h_grid)
  SDP_matrix <-  array(0, c(length(x_grid), length(x_grid),length(h_grid)))
  
  for(h_i in 1:n_h){
    h <- h_grid[h_i]
    # Cycle over x values
    for(i in 1:n_x){ 
      ## Calculate the expected transition  
      x1 <- x_grid[i]
      x2_expected <- f(x1, h, p)
      ## If expected 0, go to 0 with probabilty 1
      if( x2_expected == 0) 
        SDP_matrix[i,1,h_i] <- 1  
      else {
        # relative probability of a transition to that state
        ProportionalChance <- x_grid / x2_expected
        Prob <- pdfn(ProportionalChance, sigma_g)
        
        if(sum(Prob) > 0)
        # Store normalized probabilities in row
          SDP_matrix[i,,h_i] <- Prob/sum(Prob)
        else 
          SDP_matrix[i,,h_i] <- c(1, numeric(n_x-1))
        
        if(anyNA(SDP_matrix[i,,h_i]))
          recover()
      }
    }
  }
  SDP_matrix
}

Building the reward matrix:

# [S, A] array

R <- t(sapply(x_grid, function(x)
  sapply(h_grid, function(h)
  profit(x,h)
)))

Note that in \(f\) we must define the harvesting take place before the population recruits.

P <- transition_matrix(f, p, x_grid, h_grid, sigma_g, pdfn)

x0 <- (p[[1]]-1)/p[[2]]
i <- which.min(abs(x_grid-x0))

V0 <- rep(0, length(x_grid))

mdp_check(P=P,R=R)

[1] ""

out <- mdp_value_iteration(P, R, discount = 1 / (1 + delta), epsilon=0.001, max_iter = 5e3, V0=V0)

[1] "MDP Toolbox: iterations stopped by maximum number of iteration condition"

policy <- out$policy

#out <- mdp_finite_horizon(P, R, discount=1 / (1 + delta), 10)
#policy <- out$policy[,1]

Whoops, this doesn’t look right:

plot(x_grid, h_grid[policy])