TuringModels

Oceanic tool complexity

This is supposed to be a "bad" model since we take non-centered data for the predictor log_pop. A centered version can be found at centered oceanic tool complexity.

  1. Data
  2. Model
  3. Output
  4. Original output

Data

import CSV
import Random
import TuringModels

using DataFrames

Random.seed!(1)

delim = ';'
data_path = joinpath(TuringModels.project_root, "data", "Kline.csv")
df = CSV.read(data_path, DataFrame; delim)

df.log_pop = log.(df.population)
df.contact_high = [contact == "high" ? 1 : 0 for contact in df.contact]
df
10×7 DataFrame
 Row │ culture     population  contact  total_tools  mean_TU  log_pop   contact_high
     │ String15    Int64       String7  Int64        Float64  Float64   Int64
─────┼───────────────────────────────────────────────────────────────────────────────
   1 │ Malekula          1100  low               13      3.2   7.00307             0
   2 │ Tikopia           1500  low               22      4.7   7.31322             0
   3 │ Santa Cruz        3600  low               24      4.0   8.18869             0
   4 │ Yap               4791  high              43      5.0   8.47449             1
   5 │ Lau Fiji          7400  high              33      5.0   8.90924             1
   6 │ Trobriand         8000  high              19      4.0   8.9872              1
   7 │ Chuuk             9200  high              40      3.8   9.12696             1
   8 │ Manus            13000  low               28      6.6   9.4727              0
   9 │ Tonga            17500  high              55      5.4   9.76996             1
  10 │ Hawaii          275000  low               71      6.6  12.5245              0

Model

using Turing

@model function m10_10stan(total_tools, log_pop, contact_high)
    α ~ Normal(0, 100)
    βp ~ Normal(0, 1)
    βc ~ Normal(0, 1)
    βpc ~ Normal(0, 1)

    for i ∈ 1:length(total_tools)
        λ = exp(α + βp*log_pop[i] + βc*contact_high[i] +
            βpc*contact_high[i]*log_pop[i])
        total_tools[i] ~ Poisson(λ)
    end
end;

Output

chns = sample(
    m10_10stan(df.total_tools, df.log_pop, df.contact_high),
    NUTS(),
    1000
)
Chains MCMC chain (1000×16×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 10.46 seconds
Compute duration  = 10.46 seconds
parameters        = α, βp, βc, βpc
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse        ess      rhat   ess_per_sec
      Symbol   Float64   Float64    Float64   Float64    Float64   Float64       Float64

           α    0.9405    0.3622     0.0115    0.0196   326.1608    1.0046       31.1877
          βp    0.2641    0.0350     0.0011    0.0019   320.7521    1.0046       30.6705
          βc   -0.0679    0.8847     0.0280    0.0500   272.4546    1.0028       26.0523
         βpc    0.0401    0.0968     0.0031    0.0055   274.0906    1.0033       26.2087

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5%
      Symbol   Float64   Float64   Float64   Float64   Float64

           α    0.2322    0.6806    0.9625    1.2130    1.5952
          βp    0.2010    0.2382    0.2627    0.2892    0.3355
          βc   -1.7875   -0.6647   -0.0906    0.5256    1.6761
         βpc   -0.1578   -0.0224    0.0420    0.1045    0.2285

using StatsPlots

StatsPlots.plot(chns)
"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/oceanic-tool-complexity/code/output/chns.svg"

Original output

m_10_10t_result = "
     mean   sd  5.5% 94.5% n_eff Rhat
 a    0.94 0.37  0.36  1.53  3379    1
 bp   0.26 0.04  0.21  0.32  3347    1
 bc  -0.08 0.84 -1.41  1.23  2950    1
 bpc  0.04 0.09 -0.10  0.19  2907    1
";