m10.02d1 · DynamicHMCModels.jl

Load Julia packages (libraries) needed for the snippets in chapter 0

using DynamicHMCModels, Optim

snippet 10.4

d = CSV.read(rel_path("..", "data", "chimpanzees.csv"), delim=';');
df = convert(DataFrame, d);
df[:pulled_left] = convert(Array{Int64}, df[:pulled_left])
df[:prosoc_left] = convert(Array{Int64}, df[:prosoc_left])
first(df, 5)

struct m_10_02d{TY <: AbstractVector, TX <: AbstractMatrix}
    "Observations."
    y::TY
    "Covariates"
    X::TX
    "Number of observations"
    N::Int
end

Make the type callable with the parameters as a single argument.

function (problem::m_10_02d)(θ)
    @unpack y, X, N = problem   # extract the data
    @unpack β = trans(θ)  # works on the named tuple too
    ll = 0.0
    ll += sum(logpdf.(Normal(0, 10), β)) # a & bp
    ll += sum([loglikelihood(Binomial(1, logistic(dot(X[i, :], β))), [y[i]]) for i in 1:N])
    ll
end

Instantiate the model with data and inits.

N = size(df, 1)
X = hcat(ones(Int64, N), df[:prosoc_left]);
y = df[:pulled_left]
p = m_10_02d(y, X, N);

Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}([0, 1, 0, 0, 1, 1, 0, 0, 0, 0  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1 0; 1 0; … ; 1 0; 1 0], 504)

Function convert from a single vector of parms to parks NamedTuple

trans = as( (β = as(Array, size(p.X, 2)), ));

γ =  (β = [0.5, 1.0],)
θ = inverse(trans, γ)
p(θ)

-371.672843317438

Maximumapostrior

x0 = θ;
lower = [-1.0, -1.0];
upper = [1.0, 2.0];
ll(x) = -p(x)

inner_optimizer = GradientDescent()

optimize(ll, lower, upper, x0, Fminbox(inner_optimizer))

Results of Optimization Algorithm
 * Algorithm: Fminbox with Gradient Descent
 * Starting Point: [0.5,1.0]
 * Minimizer: [0.047708983716678,0.5573080392418944]
 * Minimum: 3.446925e+02
 * Iterations: 4
 * Convergence: true
   * |x - x'| ≤ 0.0e+00: false
     |x - x'| = 5.99e-09
   * |f(x) - f(x')| ≤ 0.0e+00 |f(x)|: true
     |f(x) - f(x')| = 0.00e+00 |f(x)|
   * |g(x)| ≤ 1.0e-08: true
     |g(x)| = 4.69e-09
   * Stopped by an increasing objective: true
   * Reached Maximum Number of Iterations: false
 * Objective Calls: 548
 * Gradient Calls: 548

Write a function to return properly dimensioned transformation.

problem_transformation(p::m_10_02d) =

as( (β = as(Array, size(p.X, 2)), ) )

      as( Vector, size(p.X, 2))

Wrap the problem with a transformation, then use Flux for the gradient.

P = TransformedLogDensity(problem_transformation(p), p)
∇P = LogDensityRejectErrors(ADgradient(:ForwardDiff, P));

LogDensityRejectErrors{InvalidLogDensityException,LogDensityProblems.ForwardDiffLogDensity{TransformedLogDensity{TransformVariables.ArrayTransform{TransformVariables.Identity,1},Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}},ForwardDiff.GradientConfig{ForwardDiff.Tag{getfield(LogDensityProblems, Symbol("##1#2")){TransformedLogDensity{TransformVariables.ArrayTransform{TransformVariables.Identity,1},Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}}},Float64},Float64,2,Array{ForwardDiff.Dual{ForwardDiff.Tag{getfield(LogDensityProblems, Symbol("##1#2")){TransformedLogDensity{TransformVariables.ArrayTransform{TransformVariables.Identity,1},Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}}},Float64},Float64,2},1}}}}(ForwardDiff AD wrapper for TransformedLogDensity of dimension 2, w/ chunk size 2)

Tune and sample.

chain, NUTS_tuned = NUTS_init_tune_mcmc(∇P, 1000);

MCMC, adapting ϵ (75 steps)
0.0011 s/step ...done
MCMC, adapting ϵ (25 steps)
0.0014 s/step ...done
MCMC, adapting ϵ (50 steps)
0.0014 s/step ...done
MCMC, adapting ϵ (100 steps)
0.00091 s/step ...done
MCMC, adapting ϵ (200 steps)
0.00074 s/step ...done
MCMC, adapting ϵ (400 steps)
0.00073 s/step ...done
MCMC, adapting ϵ (50 steps)
0.00098 s/step ...done
MCMC (1000 steps)
0.00071 s/step ...done
(NUTS_Transition{Array{Float64,1},Float64}[NUTS_Transition{Array{Float64,1},Float64}([0.034124116042762276, 0.6295785691103204], -347.2461941115505, 1, DynamicHMC.AdjacentTurn, 0.9126235540598593, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.05606517169141377, 0.6648584635501519], -345.18110094593743, 2, DynamicHMC.DoubledTurn, 0.9629974911366211, 3), NUTS_Transition{Array{Float64,1},Float64}([0.10871469220131577, 0.5334219753598195], -345.04295847276575, 2, DynamicHMC.DoubledTurn, 0.9988582356839549, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.03594501908973265, 0.5263276168272528], -347.74749480706015, 2, DynamicHMC.DoubledTurn, 0.7048858717551161, 3), NUTS_Transition{Array{Float64,1},Float64}([0.14878839389367415, 0.510288477354801], -346.53335854423835, 1, DynamicHMC.AdjacentTurn, 0.8692546450682586, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.25950894424006565, 0.6863396805696831], -348.59479683554605, 2, DynamicHMC.DoubledTurn, 0.7150944039846898, 3), NUTS_Transition{Array{Float64,1},Float64}([0.1799379207728461, 0.34322141832824254], -348.4893958035835, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.08357490033181128, 0.47169553803649794], -347.4455950018396, 2, DynamicHMC.DoubledTurn, 0.7915695344392312, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.13978226101488966, 0.599547514489062], -347.0688269782761, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.2832873649139311, 0.412840166545386], -347.8210580898942, 2, DynamicHMC.DoubledTurn, 0.9618705454915881, 3)  …  NUTS_Transition{Array{Float64,1},Float64}([-0.11286932466978525, 0.9805762635590581], -348.06076205035447, 1, DynamicHMC.DoubledTurn, 1.0, 1), NUTS_Transition{Array{Float64,1},Float64}([-0.07904277010192653, 0.6746965348203097], -348.03784081303036, 2, DynamicHMC.DoubledTurn, 0.9779367896893568, 3), NUTS_Transition{Array{Float64,1},Float64}([0.16445789618065604, 0.6581245650757779], -346.70306501557127, 2, DynamicHMC.DoubledTurn, 0.8808644203349146, 3), NUTS_Transition{Array{Float64,1},Float64}([0.08140921446421986, 0.42472396493859155], -346.35035203355415, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.09686555169282768, 0.8782727895230485], -350.24288071151466, 2, DynamicHMC.DoubledTurn, 0.5302036049521993, 3), NUTS_Transition{Array{Float64,1},Float64}([0.20935889494027662, 0.41041198960421166], -350.58556576138847, 2, DynamicHMC.DoubledTurn, 0.6544546732969668, 3), NUTS_Transition{Array{Float64,1},Float64}([0.04529500700260519, 0.48985416706758017], -345.89312214282916, 2, DynamicHMC.DoubledTurn, 0.9473678013822786, 3), NUTS_Transition{Array{Float64,1},Float64}([0.11469568628184287, 0.4921335357106115], -345.0624094834972, 2, DynamicHMC.AdjacentTurn, 0.9770142849884907, 7), NUTS_Transition{Array{Float64,1},Float64}([-0.004813909873152028, 0.743523431143264], -345.8760814326579, 2, DynamicHMC.DoubledTurn, 0.8494931371127983, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.12800353241302914, 0.7279894378124525], -346.37537336790217, 2, DynamicHMC.DoubledTurn, 0.88672084236871, 3)], NUTS sampler in 2 dimensions
  stepsize (ϵ) ≈ 0.922
  maximum depth = 10
  Gaussian kinetic energy, √diag(M⁻¹): [0.1266588195531402, 0.17861432282915035]
)

We use the transformation to obtain the posterior from the chain.

posterior = TransformVariables.transform.(Ref(problem_transformation(p)), get_position.(chain));
posterior[1:5]

5-element Array{Array{Float64,1},1}:
 [0.034124116042762276, 0.6295785691103204]
 [-0.05606517169141377, 0.6648584635501519]
 [0.10871469220131577, 0.5334219753598195]
 [-0.03594501908973265, 0.5263276168272528]
 [0.14878839389367415, 0.510288477354801]

Extract the parameter posterior means: β,

posterior_a = mean(first, posterior)
posterior_bp = mean(last, posterior)

0.5661896064241709

Effective sample sizes (of untransformed draws)

ess = mapslices(effective_sample_size, get_position_matrix(chain); dims = 1)
ess

1×2 Array{Float64,2}:
 918.114  1000.0

NUTS-specific statistics

NUTS_statistics(chain)

Hamiltonian Monte Carlo sample of length 1000
  acceptance rate mean: 0.91, min/25%/median/75%/max: 0.17 0.85 0.95 1.0 1.0
  termination: AdjacentTurn => 28% DoubledTurn => 72%
  depth: 1 => 21% 2 => 77% 3 => 2% 4 => 0%

CmdStan result

m_10_2s_result = "
Iterations = 1:1000
Thinning interval = 1
Chains = 1,2,3,4
Samples per chain = 1000

Empirical Posterior Estimates:
      Mean        SD       Naive SE       MCSE      ESS
 a 0.05103234 0.12579086 0.0019889282 0.0035186307 1000
bp 0.55711212 0.18074275 0.0028577937 0.0040160451 1000

Quantiles:
       2.5%        25.0%       50.0%      75.0%      97.5%
 a -0.19755400 -0.029431425 0.05024655 0.12978825 0.30087758
bp  0.20803447  0.433720250 0.55340400 0.67960975 0.91466915
";

"\nIterations = 1:1000\nThinning interval = 1\nChains = 1,2,3,4\nSamples per chain = 1000\n\nEmpirical Posterior Estimates:\n      Mean        SD       Naive SE       MCSE      ESS\n a 0.05103234 0.12579086 0.0019889282 0.0035186307 1000\nbp 0.55711212 0.18074275 0.0028577937 0.0040160451 1000\n\nQuantiles:\n       2.5%        25.0%       50.0%      75.0%      97.5%\n a -0.19755400 -0.029431425 0.05024655 0.12978825 0.30087758\nbp  0.20803447  0.433720250 0.55340400 0.67960975 0.91466915\n"

Extract the parameter posterior means: β,

[posterior_a, posterior_bp]

2-element Array{Float64,1}:
 0.044360884337988474
 0.5661896064241709

End of 10/m10.02d.jl

This page was generated using Literate.jl.