m10.02d1 · DynamicHMCModels.jl

Load Julia packages (libraries) needed for the snippets in chapter 0

using DynamicHMCModels, Optim

snippet 10.4

d = CSV.read(rel_path("..", "data", "chimpanzees.csv"), delim=';');
df = convert(DataFrame, d);
df[!, :pulled_left] = convert(Array{Int64}, df[!, :pulled_left])
df[!, :prosoc_left] = convert(Array{Int64}, df[!, :prosoc_left])
first(df, 5)

struct m_10_02d{TY <: AbstractVector, TX <: AbstractMatrix}
    "Observations."
    y::TY
    "Covariates"
    X::TX
    "Number of observations"
    N::Int
end

Make the type callable with the parameters as a single argument.

function (problem::m_10_02d)(θ)
    @unpack y, X, N = problem   # extract the data
    @unpack β = trans(θ)  # works on the named tuple too
    ll = 0.0
    ll += sum(logpdf.(Normal(0, 10), β)) # a & bp
    ll += sum([loglikelihood(Binomial(1, logistic(dot(X[i, :], β))), [y[i]]) for i in 1:N])
    ll
end

Instantiate the model with data and inits.

N = size(df, 1)
X = hcat(ones(Int64, N), df[!, :prosoc_left]);
y = df[!, :pulled_left]
p = m_10_02d(y, X, N);

Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}([0, 1, 0, 0, 1, 1, 0, 0, 0, 0  …  1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1 0; 1 0; … ; 1 0; 1 0], 504)

Function convert from a single vector of parms to parks NamedTuple

trans = as( (β = as(Array, size(p.X, 2)), ));

γ =  (β = [0.5, 1.0],)
θ = inverse(trans, γ)
p(θ)

-371.672843317438

Maximumapostrior

x0 = θ;
lower = [-1.0, -1.0];
upper = [1.0, 2.0];
ll(x) = -p(x)

inner_optimizer = GradientDescent()

optimize(ll, lower, upper, x0, Fminbox(inner_optimizer))

 * Status: success

 * Candidate solution
    Minimizer: [4.77e-02, 5.57e-01]
    Minimum:   3.446925e+02

 * Found with
    Algorithm:     Fminbox with Gradient Descent
    Initial Point: [5.00e-01, 1.00e+00]

 * Convergence measures
    |x - x'|               = 5.99e-09 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.07e-08 ≰ 0.0e+00
    |f(x) - f(x')|         = 0.00e+00 ≤ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 0.00e+00 ≤ 0.0e+00
    |g(x)|                 = 4.69e-09 ≤ 1.0e-08

 * Work counters
    Iterations:    4
    f(x) calls:    548
    ∇f(x) calls:   548

Write a function to return properly dimensioned transformation.

problem_transformation(p::m_10_02d) =
  as( Vector, size(p.X, 2)) #    as( (β = as(Array, size(p.X, 2)), ) )

problem_transformation (generic function with 1 method)

Wrap the problem with a transformation, then use Flux for the gradient.

P = TransformedLogDensity(problem_transformation(p), p)
∇P = LogDensityRejectErrors(ADgradient(:ForwardDiff, P));

LogDensityRejectErrors{InvalidLogDensityException,LogDensityProblems.ForwardDiffLogDensity{TransformedLogDensity{TransformVariables.ArrayTransform{TransformVariables.Identity,1},Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}},ForwardDiff.GradientConfig{ForwardDiff.Tag{getfield(LogDensityProblems, Symbol("##1#2")){TransformedLogDensity{TransformVariables.ArrayTransform{TransformVariables.Identity,1},Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}}},Float64},Float64,2,Array{ForwardDiff.Dual{ForwardDiff.Tag{getfield(LogDensityProblems, Symbol("##1#2")){TransformedLogDensity{TransformVariables.ArrayTransform{TransformVariables.Identity,1},Main.ex-m10.02d1.m_10_02d{Array{Int64,1},Array{Int64,2}}}},Float64},Float64,2},1}}}}(ForwardDiff AD wrapper for TransformedLogDensity of dimension 2, w/ chunk size 2)

Tune and sample.

chain, NUTS_tuned = NUTS_init_tune_mcmc(∇P, 1000);

(NUTS_Transition{Array{Float64,1},Float64}[NUTS_Transition{Array{Float64,1},Float64}([0.006401101423746752, 0.8494900898698133], -348.0391420280976, 2, DynamicHMC.DoubledTurn, 0.7669433429280383, 3), NUTS_Transition{Array{Float64,1},Float64}([0.04113361772807837, 0.7658122578011923], -346.50190486224295, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.139773695764526, 0.48086146782905875], -346.13279708011913, 2, DynamicHMC.AdjacentTurn, 0.9946704240354951, 7), NUTS_Transition{Array{Float64,1},Float64}([0.08780248528111687, 0.45023860520215075], -345.10797258353244, 2, DynamicHMC.DoubledTurn, 0.9964876221045372, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.03787699904017505, 0.7314664060113645], -345.2706028305615, 3, DynamicHMC.AdjacentTurn, 0.9709913432509133, 11), NUTS_Transition{Array{Float64,1},Float64}([0.09715464805884014, 0.421922384180495], -345.34501980746825, 2, DynamicHMC.AdjacentTurn, 0.9962687482736009, 7), NUTS_Transition{Array{Float64,1},Float64}([0.2358267500305552, 0.28569757261568524], -346.3681902093803, 2, DynamicHMC.DoubledTurn, 0.8853950313698015, 3), NUTS_Transition{Array{Float64,1},Float64}([0.054815171894018494, 0.5354390140392842], -346.46378224481083, 1, DynamicHMC.AdjacentTurn, 0.9583247839375573, 3), NUTS_Transition{Array{Float64,1},Float64}([0.049960293728120275, 0.44140992490349507], -345.16526419138756, 1, DynamicHMC.AdjacentTurn, 0.928965520144562, 3), NUTS_Transition{Array{Float64,1},Float64}([0.10832436775417126, 0.4906672618868469], -345.2401762085993, 2, DynamicHMC.DoubledTurn, 0.9775139886895344, 3)  …  NUTS_Transition{Array{Float64,1},Float64}([0.23092460678143717, 0.6247952871146852], -347.51611310067153, 2, DynamicHMC.DoubledTurn, 0.8988365545679432, 3), NUTS_Transition{Array{Float64,1},Float64}([0.2740567201003026, 0.1273339047283298], -349.89918058814186, 3, DynamicHMC.AdjacentTurn, 0.9300643357785148, 11), NUTS_Transition{Array{Float64,1},Float64}([-0.09339070222309373, 0.8444551556359425], -347.47636531369716, 3, DynamicHMC.AdjacentTurn, 0.9938305737217209, 15), NUTS_Transition{Array{Float64,1},Float64}([0.24232520043427908, 0.3103822822529658], -346.83996670601897, 2, DynamicHMC.DoubledTurn, 0.9485902208879207, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.0021145169470842273, 0.48371055267846863], -346.21749986587014, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.008423055679449426, 0.7651680799851497], -346.2307691749633, 3, DynamicHMC.AdjacentTurn, 0.9439777050813526, 11), NUTS_Transition{Array{Float64,1},Float64}([0.01888023657935249, 0.4568885167643989], -345.8299726479232, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.008586164014514258, 0.7811890069246579], -346.0168591285624, 2, DynamicHMC.DoubledTurn, 0.9489696006980272, 3), NUTS_Transition{Array{Float64,1},Float64}([-0.084220733938016, 0.8611873878577895], -346.3522453203515, 2, DynamicHMC.DoubledTurn, 0.9845346249861128, 3), NUTS_Transition{Array{Float64,1},Float64}([0.04772049821157803, 0.3983019664145397], -346.39713967504895, 2, DynamicHMC.DoubledTurn, 0.9880658039572238, 3)], NUTS sampler in 2 dimensions
  stepsize (ϵ) ≈ 0.903
  maximum depth = 10
  Gaussian kinetic energy, √diag(M⁻¹): [0.12045508311847805, 0.16741772514146783]
)

We use the transformation to obtain the posterior from the chain.

posterior = TransformVariables.transform.(Ref(problem_transformation(p)), get_position.(chain));
posterior[1:5]

5-element Array{Array{Float64,1},1}:
 [0.006401101423746752, 0.8494900898698133]
 [0.04113361772807837, 0.7658122578011923] 
 [0.139773695764526, 0.48086146782905875]  
 [0.08780248528111687, 0.45023860520215075]
 [-0.03787699904017505, 0.7314664060113645]

Extract the parameter posterior means: β,

posterior_a = mean(first, posterior)
posterior_bp = mean(last, posterior)

0.5468777351731422

Effective sample sizes (of untransformed draws)

ess = mapslices(effective_sample_size, get_position_matrix(chain); dims = 1)
ess

1×2 Array{Float64,2}:
 726.53  593.857

NUTS-specific statistics

NUTS_statistics(chain)

Hamiltonian Monte Carlo sample of length 1000
  acceptance rate mean: 0.91, min/25%/median/75%/max: 0.37 0.87 0.95 0.99 1.0
  termination: AdjacentTurn => 31% DoubledTurn => 69%
  depth: 1 => 21% 2 => 72% 3 => 6% 4 => 1%

CmdStan result

m_10_2s_result = "
Iterations = 1:1000
Thinning interval = 1
Chains = 1,2,3,4
Samples per chain = 1000

Empirical Posterior Estimates:
      Mean        SD       Naive SE       MCSE      ESS
 a 0.05103234 0.12579086 0.0019889282 0.0035186307 1000
bp 0.55711212 0.18074275 0.0028577937 0.0040160451 1000

Quantiles:
       2.5%        25.0%       50.0%      75.0%      97.5%
 a -0.19755400 -0.029431425 0.05024655 0.12978825 0.30087758
bp  0.20803447  0.433720250 0.55340400 0.67960975 0.91466915
";

"\nIterations = 1:1000\nThinning interval = 1\nChains = 1,2,3,4\nSamples per chain = 1000\n\nEmpirical Posterior Estimates:\n      Mean        SD       Naive SE       MCSE      ESS\n a 0.05103234 0.12579086 0.0019889282 0.0035186307 1000\nbp 0.55711212 0.18074275 0.0028577937 0.0040160451 1000\n\nQuantiles:\n       2.5%        25.0%       50.0%      75.0%      97.5%\n a -0.19755400 -0.029431425 0.05024655 0.12978825 0.30087758\nbp  0.20803447  0.433720250 0.55340400 0.67960975 0.91466915\n"

Extract the parameter posterior means: β,

[posterior_a, posterior_bp]

2-element Array{Float64,1}:
 0.05717559081641652
 0.5468777351731422

End of 10/m10.02d.jl

This page was generated using Literate.jl.