TuringModels

Ignoring gender for admittance

This is model 13.4 from Statistical Rethinking Edition 1.

  1. Data
  2. Model
  3. Output
  4. Original output

Data

import CSV
import Random

using DataFrames
using Turing
using TuringModels

Random.seed!(1)

data_path = joinpath(TuringModels.project_root, "data", "UCBadmit.csv")
df = CSV.read(data_path, DataFrame; delim=';')

dept_map = Dict(key => idx for (idx, key) in enumerate(unique(df.dept)))
df.male = [g == "male" ? 1 : 0 for g in df.gender]
df.dept_id = [dept_map[de] for de in df.dept]
df
12×7 DataFrame
 Row │ dept     gender   admit  reject  applications  male   dept_id
     │ String1  String7  Int64  Int64   Int64         Int64  Int64
─────┼───────────────────────────────────────────────────────────────
   1 │ A        male       512     313           825      1        1
   2 │ A        female      89      19           108      0        1
   3 │ B        male       353     207           560      1        2
   4 │ B        female      17       8            25      0        2
   5 │ C        male       120     205           325      1        3
   6 │ C        female     202     391           593      0        3
   7 │ D        male       138     279           417      1        4
   8 │ D        female     131     244           375      0        4
   9 │ E        male        53     138           191      1        5
  10 │ E        female      94     299           393      0        5
  11 │ F        male        22     351           373      1        6
  12 │ F        female      24     317           341      0        6

Model

@model function m13_4(applications, dept_id, male, admit)
    sigma_dept ~ truncated(Cauchy(0, 2), 0, Inf)
    a ~ Normal(0, 10)
    a_dept ~ filldist(Normal(a, sigma_dept), 6)

    logit_p = a_dept[dept_id]

    admit .~ BinomialLogit.(applications, logit_p)
end;

Output

chns = sample(
    m13_4(df.applications, df.dept_id, df.male, df.admit),
    Turing.NUTS(),
    5000
)
Chains MCMC chain (5000×20×1 Array{Float64, 3}):

Iterations        = 1001:1:6000
Number of chains  = 1
Samples per chain = 5000
Wall duration     = 11.61 seconds
Compute duration  = 11.61 seconds
parameters        = sigma_dept, a, a_dept[1], a_dept[2], a_dept[3], a_dept[4], a_dept[5], a_dept[6]
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64

  sigma_dept    1.4496    0.5571     0.0079    0.0094   3294.5325    1.0000      283.6935
           a   -0.6550    0.6212     0.0088    0.0096   3512.4987    0.9999      302.4626
   a_dept[1]    0.5910    0.0688     0.0010    0.0010   5355.1578    1.0003      461.1347
   a_dept[2]    0.5369    0.0845     0.0012    0.0012   4877.0565    0.9999      419.9653
   a_dept[3]   -0.6157    0.0706     0.0010    0.0012   4714.6794    0.9998      405.9829
   a_dept[4]   -0.6651    0.0752     0.0011    0.0010   4935.4210    0.9999      424.9910
   a_dept[5]   -1.0895    0.0953     0.0013    0.0016   4207.4434    0.9998      362.3046
   a_dept[6]   -2.6515    0.1510     0.0021    0.0020   5131.4029    0.9998      441.8671

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5%
      Symbol   Float64   Float64   Float64   Float64   Float64

  sigma_dept    0.7564    1.0770    1.3341    1.6745    2.8615
           a   -1.8786   -1.0174   -0.6575   -0.2834    0.5888
   a_dept[1]    0.4558    0.5442    0.5917    0.6361    0.7253
   a_dept[2]    0.3719    0.4807    0.5367    0.5929    0.7044
   a_dept[3]   -0.7512   -0.6651   -0.6158   -0.5665   -0.4802
   a_dept[4]   -0.8151   -0.7153   -0.6649   -0.6147   -0.5165
   a_dept[5]   -1.2785   -1.1525   -1.0908   -1.0277   -0.9011
   a_dept[6]   -2.9639   -2.7491   -2.6481   -2.5501   -2.3665

using StatsPlots

StatsPlots.plot(chns)
"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/ignoring-gender-admit/code/output/chns.svg"

Original output

m_13_4_rethinking = """
Inference for Stan model: 0c5c36512c20c41995d1bd25be525138.
    3 chains, each with iter=4500; warmup=500; thin=1;
    post-warmup draws per chain=4000, total post-warmup draws=12000.

                   mean se_mean   sd     2.5%      25%      50%      75%    97.5%
    a_dept[1]      0.59    0.00 0.07     0.46     0.54     0.59     0.64     0.73
    a_dept[2]      0.54    0.00 0.09     0.37     0.48     0.54     0.60     0.71
    a_dept[3]     -0.62    0.00 0.07    -0.75    -0.66    -0.62    -0.57    -0.49
    a_dept[4]     -0.67    0.00 0.08    -0.81    -0.72    -0.67    -0.62    -0.52
    a_dept[5]     -1.09    0.00 0.10    -1.28    -1.15    -1.09    -1.03    -0.90
    a_dept[6]     -2.65    0.00 0.15    -2.96    -2.75    -2.65    -2.55    -2.37
    a             -0.63    0.01 0.63    -1.89    -1.01    -0.64    -0.26     0.66
    sigma_dept     1.45    0.01 0.57     0.77     1.08     1.33     1.68     2.90
    lp__       -2602.37    0.03 2.08 -2607.45 -2603.49 -2602.01 -2600.86 -2599.35
               n_eff Rhat
    a_dept[1]  11285    1
    a_dept[2]  11943    1
    a_dept[3]  12707    1
    a_dept[4]  11592    1
    a_dept[5]  14598    1
    a_dept[6]  13363    1
    a           6965    1
    sigma_dept  5643    1
    lp__        5322    1
""";