On page 399 of Statistical Rethinking Edition 1, m13.2
is defined as
import CSV
import Random
import TuringModels
using DataFrames
Random.seed!(1)
data_path = joinpath(TuringModels.project_root, "data", "UCBadmit.csv")
df = CSV.read(data_path, DataFrame; delim=';')
12×5 DataFrame
Row │ dept gender admit reject applications
│ String1 String7 Int64 Int64 Int64
─────┼───────────────────────────────────────────────
1 │ A male 512 313 825
2 │ A female 89 19 108
3 │ B male 353 207 560
4 │ B female 17 8 25
5 │ C male 120 205 325
6 │ C female 202 391 593
7 │ D male 138 279 417
8 │ D female 131 244 375
9 │ E male 53 138 191
10 │ E female 94 299 393
11 │ F male 22 351 373
12 │ F female 24 317 341
using Turing
dept_map = Dict(key => idx for (idx, key) in enumerate(unique(df.dept)))
df.male = [g == "male" ? 1 : 0 for g in df.gender]
df.dept_id = [dept_map[de] for de in df.dept]
@model function m13_2(applications, dept_id, male, admit)
sigma_dept ~ truncated(Cauchy(0, 2), 0, Inf)
bm ~ Normal(0, 1)
a ~ Normal(0, 10)
a_dept ~ filldist(Normal(a, sigma_dept), 6)
logit_p = a_dept[dept_id] + bm*male
admit .~ BinomialLogit.(applications, logit_p)
end
chns = sample(
m13_2(df.applications, df.dept_id, df.male, df.admit),
NUTS(),
1000
)
Chains MCMC chain (1000×21×1 Array{Float64, 3}):
Iterations = 501:1:1500
Number of chains = 1
Samples per chain = 1000
Wall duration = 9.1 seconds
Compute duration = 9.1 seconds
parameters = sigma_dept, bm, a, a_dept[1], a_dept[2], a_dept[3], a_dept[4], a_dept[5], a_dept[6]
internals = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rhat ess_per_sec
Symbol Float64 Float64 Float64 Float64 Float64 Float64 Float64
sigma_dept 1.4562 0.5572 0.0176 0.0224 708.3248 1.0050 77.8293
bm -0.0922 0.0798 0.0025 0.0037 388.0852 1.0126 42.6420
a -0.6067 0.6206 0.0196 0.0221 770.8409 0.9999 84.6985
a_dept[1] 0.6706 0.0928 0.0029 0.0036 681.3253 1.0070 74.8627
a_dept[2] 0.6263 0.1193 0.0038 0.0052 601.5435 1.0076 66.0964
a_dept[3] -0.5852 0.0695 0.0022 0.0021 1047.1394 1.0023 115.0576
a_dept[4] -0.6182 0.0873 0.0028 0.0031 906.8329 1.0071 99.6410
a_dept[5] -1.0599 0.0976 0.0031 0.0024 1423.1180 1.0000 156.3694
a_dept[6] -2.6078 0.1555 0.0049 0.0050 1288.2488 1.0007 141.5503
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
sigma_dept 0.7463 1.0834 1.3206 1.6934 2.8494
bm -0.2491 -0.1473 -0.0936 -0.0416 0.0693
a -1.8714 -0.9708 -0.6026 -0.2258 0.6506
a_dept[1] 0.4914 0.6118 0.6684 0.7369 0.8475
a_dept[2] 0.3790 0.5486 0.6270 0.7062 0.8541
a_dept[3] -0.7207 -0.6341 -0.5837 -0.5387 -0.4515
a_dept[4] -0.7834 -0.6776 -0.6174 -0.5568 -0.4519
a_dept[5] -1.2433 -1.1266 -1.0561 -0.9960 -0.8678
a_dept[6] -2.9245 -2.7103 -2.6038 -2.4970 -2.3257
using StatsPlots
StatsPlots.plot(chns)
"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/multinomial-poisson/code/output/chns.svg"
"""
Inference for Stan model: 359c2483e3bdbf74fd0484be27c2909b.
3 chains, each with iter=4500; warmup=500; thin=1;
post-warmup draws per chain=4000, total post-warmup draws=12000.
mean se_mean sd 2.5% 25% 50% 75% 97.5%
a_dept[1] 0.67 0.00 0.10 0.48 0.61 0.67 0.74 0.87
a_dept[2] 0.63 0.00 0.12 0.40 0.55 0.63 0.71 0.85
a_dept[3] -0.58 0.00 0.08 -0.73 -0.63 -0.58 -0.53 -0.44
a_dept[4] -0.62 0.00 0.09 -0.79 -0.67 -0.62 -0.56 -0.45
a_dept[5] -1.06 0.00 0.10 -1.26 -1.13 -1.06 -0.99 -0.86
a_dept[6] -2.61 0.00 0.16 -2.92 -2.71 -2.60 -2.50 -2.31
a -0.59 0.01 0.66 -1.94 -0.97 -0.59 -0.21 0.73
bm -0.09 0.00 0.08 -0.25 -0.15 -0.09 -0.04 0.06
sigma_dept 1.49 0.01 0.62 0.78 1.09 1.35 1.71 3.01
lp__ -2602.27 0.04 2.27 -2607.74 -2603.48 -2601.90 -2600.61 -2598.95
n_eff Rhat
a_dept[1] 8191 1
a_dept[2] 8282 1
a_dept[3] 10974 1
a_dept[4] 10034 1
a_dept[5] 10783 1
a_dept[6] 10344 1
a 5839 1
bm 7137 1
sigma_dept 4626 1
lp__ 4200 1
""";