This is model 13.4 from Statistical Rethinking Edition 1.
import CSV
import Random
using DataFrames
using Turing
using TuringModels
Random.seed!(1)
data_path = joinpath(TuringModels.project_root, "data", "UCBadmit.csv")
df = CSV.read(data_path, DataFrame; delim=';')
dept_map = Dict(key => idx for (idx, key) in enumerate(unique(df.dept)))
df.male = [g == "male" ? 1 : 0 for g in df.gender]
df.dept_id = [dept_map[de] for de in df.dept]
df
12×7 DataFrame
Row │ dept gender admit reject applications male dept_id
│ String1 String7 Int64 Int64 Int64 Int64 Int64
─────┼───────────────────────────────────────────────────────────────
1 │ A male 512 313 825 1 1
2 │ A female 89 19 108 0 1
3 │ B male 353 207 560 1 2
4 │ B female 17 8 25 0 2
5 │ C male 120 205 325 1 3
6 │ C female 202 391 593 0 3
7 │ D male 138 279 417 1 4
8 │ D female 131 244 375 0 4
9 │ E male 53 138 191 1 5
10 │ E female 94 299 393 0 5
11 │ F male 22 351 373 1 6
12 │ F female 24 317 341 0 6
@model function m13_4(applications, dept_id, male, admit)
sigma_dept ~ truncated(Cauchy(0, 2), 0, Inf)
a ~ Normal(0, 10)
a_dept ~ filldist(Normal(a, sigma_dept), 6)
logit_p = a_dept[dept_id]
admit .~ BinomialLogit.(applications, logit_p)
end;
chns = sample(
m13_4(df.applications, df.dept_id, df.male, df.admit),
Turing.NUTS(),
5000
)
Chains MCMC chain (5000×20×1 Array{Float64, 3}):
Iterations = 1001:1:6000
Number of chains = 1
Samples per chain = 5000
Wall duration = 11.61 seconds
Compute duration = 11.61 seconds
parameters = sigma_dept, a, a_dept[1], a_dept[2], a_dept[3], a_dept[4], a_dept[5], a_dept[6]
internals = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rhat ess_per_sec
Symbol Float64 Float64 Float64 Float64 Float64 Float64 Float64
sigma_dept 1.4496 0.5571 0.0079 0.0094 3294.5325 1.0000 283.6935
a -0.6550 0.6212 0.0088 0.0096 3512.4987 0.9999 302.4626
a_dept[1] 0.5910 0.0688 0.0010 0.0010 5355.1578 1.0003 461.1347
a_dept[2] 0.5369 0.0845 0.0012 0.0012 4877.0565 0.9999 419.9653
a_dept[3] -0.6157 0.0706 0.0010 0.0012 4714.6794 0.9998 405.9829
a_dept[4] -0.6651 0.0752 0.0011 0.0010 4935.4210 0.9999 424.9910
a_dept[5] -1.0895 0.0953 0.0013 0.0016 4207.4434 0.9998 362.3046
a_dept[6] -2.6515 0.1510 0.0021 0.0020 5131.4029 0.9998 441.8671
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
sigma_dept 0.7564 1.0770 1.3341 1.6745 2.8615
a -1.8786 -1.0174 -0.6575 -0.2834 0.5888
a_dept[1] 0.4558 0.5442 0.5917 0.6361 0.7253
a_dept[2] 0.3719 0.4807 0.5367 0.5929 0.7044
a_dept[3] -0.7512 -0.6651 -0.6158 -0.5665 -0.4802
a_dept[4] -0.8151 -0.7153 -0.6649 -0.6147 -0.5165
a_dept[5] -1.2785 -1.1525 -1.0908 -1.0277 -0.9011
a_dept[6] -2.9639 -2.7491 -2.6481 -2.5501 -2.3665
using StatsPlots
StatsPlots.plot(chns)
"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/ignoring-gender-admit/code/output/chns.svg"
m_13_4_rethinking = """
Inference for Stan model: 0c5c36512c20c41995d1bd25be525138.
3 chains, each with iter=4500; warmup=500; thin=1;
post-warmup draws per chain=4000, total post-warmup draws=12000.
mean se_mean sd 2.5% 25% 50% 75% 97.5%
a_dept[1] 0.59 0.00 0.07 0.46 0.54 0.59 0.64 0.73
a_dept[2] 0.54 0.00 0.09 0.37 0.48 0.54 0.60 0.71
a_dept[3] -0.62 0.00 0.07 -0.75 -0.66 -0.62 -0.57 -0.49
a_dept[4] -0.67 0.00 0.08 -0.81 -0.72 -0.67 -0.62 -0.52
a_dept[5] -1.09 0.00 0.10 -1.28 -1.15 -1.09 -1.03 -0.90
a_dept[6] -2.65 0.00 0.15 -2.96 -2.75 -2.65 -2.55 -2.37
a -0.63 0.01 0.63 -1.89 -1.01 -0.64 -0.26 0.66
sigma_dept 1.45 0.01 0.57 0.77 1.08 1.33 1.68 2.90
lp__ -2602.37 0.03 2.08 -2607.45 -2603.49 -2602.01 -2600.86 -2599.35
n_eff Rhat
a_dept[1] 11285 1
a_dept[2] 11943 1
a_dept[3] 12707 1
a_dept[4] 11592 1
a_dept[5] 14598 1
a_dept[6] 13363 1
a 6965 1
sigma_dept 5643 1
lp__ 5322 1
""";