TuringModels

Ignoring gender for admittance

This is model 13.4 from Statistical Rethinking Edition 1.

  1. Data
  2. Model
  3. Output
  4. Original output

Data

import CSV
import Random

using DataFrames
using Turing
using TuringModels

Random.seed!(1)

data_path = joinpath(TuringModels.project_root, "data", "UCBadmit.csv")
df = CSV.read(data_path, DataFrame; delim=';')

dept_map = Dict(key => idx for (idx, key) in enumerate(unique(df.dept)))
df.male = [g == "male" ? 1 : 0 for g in df.gender]
df.dept_id = [dept_map[de] for de in df.dept]
df
12×7 DataFrame
 Row │ dept       gender     admit  reject  applications  male   dept_id
     │ InlineSt…  InlineSt…  Int64  Int64   Int64         Int64  Int64
─────┼───────────────────────────────────────────────────────────────────
   1 │ A          male         512     313           825      1        1
   2 │ A          female        89      19           108      0        1
   3 │ B          male         353     207           560      1        2
   4 │ B          female        17       8            25      0        2
   5 │ C          male         120     205           325      1        3
   6 │ C          female       202     391           593      0        3
   7 │ D          male         138     279           417      1        4
   8 │ D          female       131     244           375      0        4
   9 │ E          male          53     138           191      1        5
  10 │ E          female        94     299           393      0        5
  11 │ F          male          22     351           373      1        6
  12 │ F          female        24     317           341      0        6

Model

@model function m13_4(applications, dept_id, male, admit)
    sigma_dept ~ truncated(Cauchy(0, 2), 0, Inf)
    a ~ Normal(0, 10)
    a_dept ~ filldist(Normal(a, sigma_dept), 6)

    logit_p = a_dept[dept_id]

    admit .~ BinomialLogit.(applications, logit_p)
end;

Output

chns = sample(
    m13_4(df.applications, df.dept_id, df.male, df.admit),
    Turing.NUTS(),
    5000
)
Chains MCMC chain (5000×20×1 Array{Float64, 3}):

Iterations        = 1001:1:6000
Number of chains  = 1
Samples per chain = 5000
Wall duration     = 8.2 seconds
Compute duration  = 8.2 seconds
parameters        = a, a_dept[1], a_dept[4], a_dept[5], a_dept[2], sigma_dept, a_dept[6], a_dept[3]
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64

  sigma_dept    1.4539    0.5758     0.0081    0.0109   3293.9732    0.9998      401.9491
           a   -0.6432    0.6322     0.0089    0.0110   3373.5155    0.9998      411.6553
   a_dept[1]    0.5898    0.0687     0.0010    0.0009   5952.8463    1.0007      726.3998
   a_dept[2]    0.5369    0.0874     0.0012    0.0009   7325.8361    0.9998      893.9397
   a_dept[3]   -0.6162    0.0713     0.0010    0.0010   6746.1081    0.9998      823.1981
   a_dept[4]   -0.6662    0.0741     0.0010    0.0008   6137.3194    0.9998      748.9102
   a_dept[5]   -1.0862    0.0945     0.0013    0.0010   6927.3429    0.9998      845.3134
   a_dept[6]   -2.6537    0.1489     0.0021    0.0017   6955.6970    0.9998      848.7733

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5%
      Symbol   Float64   Float64   Float64   Float64   Float64

  sigma_dept    0.7642    1.0643    1.3227    1.6828    2.9444
           a   -1.9242   -0.9910   -0.6327   -0.2856    0.6164
   a_dept[1]    0.4524    0.5441    0.5904    0.6346    0.7255
   a_dept[2]    0.3677    0.4786    0.5373    0.5949    0.7090
   a_dept[3]   -0.7540   -0.6631   -0.6161   -0.5700   -0.4738
   a_dept[4]   -0.8122   -0.7165   -0.6671   -0.6150   -0.5227
   a_dept[5]   -1.2777   -1.1479   -1.0846   -1.0227   -0.9024
   a_dept[6]   -2.9548   -2.7529   -2.6506   -2.5519   -2.3709

using StatsPlots

StatsPlots.plot(chns)

Original output

m_13_4_rethinking = """
Inference for Stan model: 0c5c36512c20c41995d1bd25be525138.
    3 chains, each with iter=4500; warmup=500; thin=1;
    post-warmup draws per chain=4000, total post-warmup draws=12000.

                   mean se_mean   sd     2.5%      25%      50%      75%    97.5%
    a_dept[1]      0.59    0.00 0.07     0.46     0.54     0.59     0.64     0.73
    a_dept[2]      0.54    0.00 0.09     0.37     0.48     0.54     0.60     0.71
    a_dept[3]     -0.62    0.00 0.07    -0.75    -0.66    -0.62    -0.57    -0.49
    a_dept[4]     -0.67    0.00 0.08    -0.81    -0.72    -0.67    -0.62    -0.52
    a_dept[5]     -1.09    0.00 0.10    -1.28    -1.15    -1.09    -1.03    -0.90
    a_dept[6]     -2.65    0.00 0.15    -2.96    -2.75    -2.65    -2.55    -2.37
    a             -0.63    0.01 0.63    -1.89    -1.01    -0.64    -0.26     0.66
    sigma_dept     1.45    0.01 0.57     0.77     1.08     1.33     1.68     2.90
    lp__       -2602.37    0.03 2.08 -2607.45 -2603.49 -2602.01 -2600.86 -2599.35
               n_eff Rhat
    a_dept[1]  11285    1
    a_dept[2]  11943    1
    a_dept[3]  12707    1
    a_dept[4]  11592    1
    a_dept[5]  14598    1
    a_dept[6]  13363    1
    a           6965    1
    sigma_dept  5643    1
    lp__        5322    1
""";