TuringModels

Multinomial Poisson regression

On page 399 of Statistical Rethinking Edition 1, m13.2 is defined as

AiBinomial(ni,pi)logit(pi)=αdept[i]+βmiαdeptNormal(α,σ)αNormal(0,10)βNormal(0,1)σHalfCauchy(0,2) \begin{aligned} A_i &\sim \text{Binomial}(n_i, p_i) \\ \text{logit}(p_i) &= \alpha_\text{dept}[i] + \beta m_i \\ \alpha_\text{dept} &\sim \text{Normal}(\alpha, \sigma) \\ \alpha &\sim \text{Normal}(0, 10) \\ \beta &\sim \text{Normal}(0, 1) \\ \sigma &\sim \text{HalfCauchy}(0, 2) \end{aligned}
  1. Data
  2. Model
  3. Output
  4. Original output

Data

import CSV
import Random
import TuringModels

using DataFrames

Random.seed!(1)

data_path = joinpath(TuringModels.project_root, "data", "UCBadmit.csv")
df = CSV.read(data_path, DataFrame; delim=';')
12×5 DataFrame
 Row │ dept     gender   admit  reject  applications
     │ String1  String7  Int64  Int64   Int64
─────┼───────────────────────────────────────────────
   1 │ A        male       512     313           825
   2 │ A        female      89      19           108
   3 │ B        male       353     207           560
   4 │ B        female      17       8            25
   5 │ C        male       120     205           325
   6 │ C        female     202     391           593
   7 │ D        male       138     279           417
   8 │ D        female     131     244           375
   9 │ E        male        53     138           191
  10 │ E        female      94     299           393
  11 │ F        male        22     351           373
  12 │ F        female      24     317           341

Model

using Turing

dept_map = Dict(key => idx for (idx, key) in enumerate(unique(df.dept)))
df.male = [g == "male" ? 1 : 0 for g in df.gender]
df.dept_id = [dept_map[de] for de in df.dept]

@model function m13_2(applications, dept_id, male, admit)
    sigma_dept ~ truncated(Cauchy(0, 2), 0, Inf)
    bm ~ Normal(0, 1)
    a ~ Normal(0, 10)
    a_dept ~ filldist(Normal(a, sigma_dept), 6)

    logit_p = a_dept[dept_id] + bm*male

    admit .~ BinomialLogit.(applications, logit_p)
end

chns = sample(
    m13_2(df.applications, df.dept_id, df.male, df.admit),
    NUTS(),
    1000
)
Chains MCMC chain (1000×21×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 9.1 seconds
Compute duration  = 9.1 seconds
parameters        = sigma_dept, bm, a, a_dept[1], a_dept[2], a_dept[3], a_dept[4], a_dept[5], a_dept[6]
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64

  sigma_dept    1.4562    0.5572     0.0176    0.0224    708.3248    1.0050       77.8293
          bm   -0.0922    0.0798     0.0025    0.0037    388.0852    1.0126       42.6420
           a   -0.6067    0.6206     0.0196    0.0221    770.8409    0.9999       84.6985
   a_dept[1]    0.6706    0.0928     0.0029    0.0036    681.3253    1.0070       74.8627
   a_dept[2]    0.6263    0.1193     0.0038    0.0052    601.5435    1.0076       66.0964
   a_dept[3]   -0.5852    0.0695     0.0022    0.0021   1047.1394    1.0023      115.0576
   a_dept[4]   -0.6182    0.0873     0.0028    0.0031    906.8329    1.0071       99.6410
   a_dept[5]   -1.0599    0.0976     0.0031    0.0024   1423.1180    1.0000      156.3694
   a_dept[6]   -2.6078    0.1555     0.0049    0.0050   1288.2488    1.0007      141.5503

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5%
      Symbol   Float64   Float64   Float64   Float64   Float64

  sigma_dept    0.7463    1.0834    1.3206    1.6934    2.8494
          bm   -0.2491   -0.1473   -0.0936   -0.0416    0.0693
           a   -1.8714   -0.9708   -0.6026   -0.2258    0.6506
   a_dept[1]    0.4914    0.6118    0.6684    0.7369    0.8475
   a_dept[2]    0.3790    0.5486    0.6270    0.7062    0.8541
   a_dept[3]   -0.7207   -0.6341   -0.5837   -0.5387   -0.4515
   a_dept[4]   -0.7834   -0.6776   -0.6174   -0.5568   -0.4519
   a_dept[5]   -1.2433   -1.1266   -1.0561   -0.9960   -0.8678
   a_dept[6]   -2.9245   -2.7103   -2.6038   -2.4970   -2.3257

Output

using StatsPlots

StatsPlots.plot(chns)
"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/multinomial-poisson/code/output/chns.svg"

Original output

"""
Inference for Stan model: 359c2483e3bdbf74fd0484be27c2909b.
    3 chains, each with iter=4500; warmup=500; thin=1;
    post-warmup draws per chain=4000, total post-warmup draws=12000.

                   mean se_mean   sd     2.5%      25%      50%      75%    97.5%
    a_dept[1]      0.67    0.00 0.10     0.48     0.61     0.67     0.74     0.87
    a_dept[2]      0.63    0.00 0.12     0.40     0.55     0.63     0.71     0.85
    a_dept[3]     -0.58    0.00 0.08    -0.73    -0.63    -0.58    -0.53    -0.44
    a_dept[4]     -0.62    0.00 0.09    -0.79    -0.67    -0.62    -0.56    -0.45
    a_dept[5]     -1.06    0.00 0.10    -1.26    -1.13    -1.06    -0.99    -0.86
    a_dept[6]     -2.61    0.00 0.16    -2.92    -2.71    -2.60    -2.50    -2.31
    a             -0.59    0.01 0.66    -1.94    -0.97    -0.59    -0.21     0.73
    bm            -0.09    0.00 0.08    -0.25    -0.15    -0.09    -0.04     0.06
    sigma_dept     1.49    0.01 0.62     0.78     1.09     1.35     1.71     3.01
    lp__       -2602.27    0.04 2.27 -2607.74 -2603.48 -2601.90 -2600.61 -2598.95
               n_eff Rhat
    a_dept[1]   8191    1
    a_dept[2]   8282    1
    a_dept[3]  10974    1
    a_dept[4]  10034    1
    a_dept[5]  10783    1
    a_dept[6]  10344    1
    a           5839    1
    bm          7137    1
    sigma_dept  4626    1
    lp__        4200    1
""";