m5.6d · DynamicHMCModels.jl

Load Julia packages (libraries) needed for the snippets in chapter 0

using DynamicHMCModels

CmdStan uses a tmp directory to store the output of cmdstan

ProjDir = rel_path_d("..", "scripts", "05")
cd(ProjDir)

Read the milk data

wd = CSV.read(rel_path("..", "data", "milk.csv"), delim=';')
df = convert(DataFrame, wd);
dcc = filter(row -> !(row[:neocortex_perc] == "NA"), df)
dcc[!, :kcal_per_g] = convert(Vector{Float64}, dcc[!, :kcal_per_g])
dcc[!, :log_mass] = log.(convert(Vector{Float64}, dcc[!, :mass]))

17-element Array{Float64,1}:
  0.6678293725756554
  1.6582280766035324
  1.6808279085207734
  0.9202827531436925
 -0.3856624808119846
 -2.120263536200091 
 -0.7550225842780328
 -1.1394342831883648
  0.4382549309311553
  1.1755733298042381
  2.509599262378372 
  1.6808279085207734
  3.5689691574413787
  4.374876130645041 
  3.70721041079866  
  3.4998353515591547
  4.006423680849631

Show first 5 rows

first(dcc[!, [3, 7, 9]], 5)

5 rows × 3 columns

	kcal_per_g	mass	log_mass
	Float64	Float64	Float64
1	0.49	1.95	0.667829
2	0.47	5.25	1.65823
3	0.56	5.37	1.68083
4	0.89	2.51	0.920283
5	0.92	0.68	-0.385662

Define the model struct

struct m_5_6{TY <: AbstractVector, TX <: AbstractMatrix}
    "Observations."
    y::TY
    "Covariates"
    X::TX
end

Make the type callable with the parameters as a single argument.

function (problem::m_5_6)(θ)
    @unpack y, X, = problem   # extract the data
    @unpack β, σ = θ            # works on the named tuple too
    ll = 0.0
    ll += logpdf(Normal(0, 100), X[1]) # a = X[1]
    ll += logpdf(Normal(0, 1), X[2]) # b1 = X[2]
    ll += logpdf(TDist(1.0), σ)
    ll += loglikelihood(Normal(0, σ), y .- X*β)
    ll
end

Instantiate the model with data and inits.

N = size(dcc, 1)
X = hcat(ones(N), dcc[!, :log_mass]);
y = dcc[!, :kcal_per_g]
p = m_5_6(y, X);
p((β = [1.0, 2.0], σ = 1.0))

-242.8761844035513

Write a function to return properly dimensioned transformation.

problem_transformation(p::m_5_6) =
    as((β = as(Array, size(p.X, 2)), σ = asℝ₊))

problem_transformation (generic function with 1 method)

Wrap the problem with a transformation, then use Flux for the gradient.

P = TransformedLogDensity(problem_transformation(p), p)
∇P = ADgradient(:ForwardDiff, P);

ForwardDiff AD wrapper for TransformedLogDensity of dimension 3, w/ chunk size 3

Tune and sample.

chain, NUTS_tuned = NUTS_init_tune_mcmc(∇P, 1000);

(NUTS_Transition{Array{Float64,1},Float64}[NUTS_Transition{Array{Float64,1},Float64}([0.7462665039905438, -0.06756187909534059, -1.8619434447892902], -7.2598207647159345, 3, DynamicHMC.DoubledTurn, 0.8978513856588961, 7), NUTS_Transition{Array{Float64,1},Float64}([0.7524463411809885, -0.04526069729693541, -1.8389233936393201], -4.4149666155676215, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.6382298293704601, -0.009375393181532, -1.71593672265595], -3.9369004739582483, 3, DynamicHMC.DoubledTurn, 0.950877146316798, 7), NUTS_Transition{Array{Float64,1},Float64}([0.608088721824474, 0.020623543863125877, -1.699858062825136], -5.563678851982166, 2, DynamicHMC.DoubledTurn, 0.8891347418694403, 3), NUTS_Transition{Array{Float64,1},Float64}([0.6310424546352974, 0.03150916716301844, -1.5771847836851534], -7.762372734684922, 3, DynamicHMC.DoubledTurn, 0.9826327946943109, 7), NUTS_Transition{Array{Float64,1},Float64}([0.8162874560390045, -0.0996736523811119, -1.840876827920433], -8.301216056055575, 3, DynamicHMC.DoubledTurn, 0.953528831646004, 7), NUTS_Transition{Array{Float64,1},Float64}([0.8200572177362877, -0.08240191513616679, -1.5844684175934414], -9.027698959668037, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.8081504165740322, -0.07069045325374292, -1.8297293341388685], -6.769124694439065, 3, DynamicHMC.DoubledTurn, 0.9426865557888423, 7), NUTS_Transition{Array{Float64,1},Float64}([0.7449208279741782, -0.05923643556492308, -1.6660530508526692], -5.269885209890247, 3, DynamicHMC.DoubledTurn, 1.0, 7), NUTS_Transition{Array{Float64,1},Float64}([0.6719565190658527, -0.029101072362316713, -1.743665986808353], -3.8702255523992455, 2, DynamicHMC.AdjacentTurn, 0.9932038157079415, 7)  …  NUTS_Transition{Array{Float64,1},Float64}([0.7119660869773805, -0.028644114354484766, -1.6163983484895146], -3.6464294403847664, 3, DynamicHMC.DoubledTurn, 0.9958124514613053, 7), NUTS_Transition{Array{Float64,1},Float64}([0.6914899547549666, -0.0684776933141037, -1.673448120041311], -5.844097343157375, 2, DynamicHMC.AdjacentTurn, 0.8906211907088515, 5), NUTS_Transition{Array{Float64,1},Float64}([0.749049367781539, -0.05209124521164411, -1.6624420375267832], -5.471936403695484, 2, DynamicHMC.DoubledTurn, 1.0, 3), NUTS_Transition{Array{Float64,1},Float64}([0.6681420031605552, -0.018516992835107327, -1.7307483251877724], -6.288033863946183, 1, DynamicHMC.AdjacentTurn, 0.8809856417537762, 3), NUTS_Transition{Array{Float64,1},Float64}([0.6813254788093739, -0.04593118743800228, -1.4919827680841329], -4.781875831845424, 2, DynamicHMC.DoubledTurn, 0.8783510367521722, 3), NUTS_Transition{Array{Float64,1},Float64}([0.7183138885579228, -0.02472450740546496, -1.5210655412414018], -7.828089851162854, 2, DynamicHMC.AdjacentTurn, 0.8965888420104463, 7), NUTS_Transition{Array{Float64,1},Float64}([0.731054601177782, -0.0475199792908209, -1.8904454561470756], -5.362103041994445, 3, DynamicHMC.DoubledTurn, 0.9756590015337692, 7), NUTS_Transition{Array{Float64,1},Float64}([0.7213556684183221, -0.00037732201641914667, -1.4095692439716343], -8.150670235742682, 2, DynamicHMC.AdjacentTurn, 0.6757072379747723, 7), NUTS_Transition{Array{Float64,1},Float64}([0.608876643719785, -0.025579439687686143, -1.6071047895413166], -8.908519343536826, 2, DynamicHMC.AdjacentTurn, 0.9387967830815297, 7), NUTS_Transition{Array{Float64,1},Float64}([0.6174590276710058, -0.019938757081014478, -1.5325938140862743], -5.304245327383504, 2, DynamicHMC.DoubledTurn, 1.0, 3)], NUTS sampler in 3 dimensions
  stepsize (ϵ) ≈ 0.595
  maximum depth = 10
  Gaussian kinetic energy, √diag(M⁻¹): [0.05461946988141577, 0.02403680163251555, 0.1885036897668246]
)

We use the transformation to obtain the posterior from the chain.

posterior = TransformVariables.transform.(Ref(problem_transformation(p)), get_position.(chain));
posterior[1:5]

5-element Array{NamedTuple{(:β, :σ),Tuple{Array{Float64,1},Float64}},1}:
 (β = [0.7462665039905438, -0.06756187909534059], σ = 0.1553703830013402) 
 (β = [0.7524463411809885, -0.04526069729693541], σ = 0.15898850203224466)
 (β = [0.6382298293704601, -0.009375393181532], σ = 0.17979522354854754)  
 (β = [0.608088721824474, 0.020623543863125877], σ = 0.18270945547630585) 
 (β = [0.6310424546352974, 0.03150916716301844], σ = 0.20655577965022826)

Extract the parameter posterior means: β,

posterior_β = mean(first, posterior)

2-element Array{Float64,1}:
  0.7069489658527458  
 -0.032546402501349575

then σ:

posterior_σ = mean(last, posterior)

0.1823872281799675

Effective sample sizes (of untransformed draws)

ess = mapslices(effective_sample_size,
                get_position_matrix(chain); dims = 1)

1×3 Array{Float64,2}:
 579.939  607.738  648.78

NUTS-specific statistics

NUTS_statistics(chain)

Hamiltonian Monte Carlo sample of length 1000
  acceptance rate mean: 0.94, min/25%/median/75%/max: 0.1 0.92 0.97 1.0 1.0
  termination: AdjacentTurn => 18% DoubledTurn => 82%
  depth: 1 => 3% 2 => 46% 3 => 51%

cmdstan result

cmdstan_result = "
Iterations = 1:1000
Thinning interval = 1
Chains = 1,2,3,4
Samples per chain = 1000

Empirical Posterior Estimates:
          Mean         SD        Naive SE       MCSE      ESS
    a  0.70472876 0.057040655 0.00090189195 0.0011398893 1000
   bm -0.03150330 0.023642759 0.00037382484 0.0004712342 1000
sigma  0.18378372 0.039212805 0.00062000888 0.0011395979 1000

Quantiles:
          2.5%       25.0%       50.0%        75.0%       97.5%
    a  0.59112968  0.66848775  0.70444950  0.741410500 0.81915225
   bm -0.07729257 -0.04708425 -0.03104865 -0.015942925 0.01424901
sigma  0.12638780  0.15605950  0.17800600  0.204319250 0.27993590
";

"\nIterations = 1:1000\nThinning interval = 1\nChains = 1,2,3,4\nSamples per chain = 1000\n\nEmpirical Posterior Estimates:\n          Mean         SD        Naive SE       MCSE      ESS\n    a  0.70472876 0.057040655 0.00090189195 0.0011398893 1000\n   bm -0.03150330 0.023642759 0.00037382484 0.0004712342 1000\nsigma  0.18378372 0.039212805 0.00062000888 0.0011395979 1000\n\nQuantiles:\n          2.5%       25.0%       50.0%        75.0%       97.5%\n    a  0.59112968  0.66848775  0.70444950  0.741410500 0.81915225\n   bm -0.07729257 -0.04708425 -0.03104865 -0.015942925 0.01424901\nsigma  0.12638780  0.15605950  0.17800600  0.204319250 0.27993590\n"

Extract the parameter posterior means: [β, σ],

[posterior_β, posterior_σ]

2-element Array{Any,1}:
  [0.7069489658527458, -0.032546402501349575]
 0.1823872281799675

End of 05/5.6d.jl

This page was generated using Literate.jl.