This is supposed to be a "bad" model since we take non-centered data for the predictor log_pop
. A centered version can be found at centered oceanic tool complexity.
import CSV
import Random
import TuringModels
using DataFrames
Random.seed!(1)
delim = ';'
data_path = joinpath(TuringModels.project_root, "data", "Kline.csv")
df = CSV.read(data_path, DataFrame; delim)
df.log_pop = log.(df.population)
df.contact_high = [contact == "high" ? 1 : 0 for contact in df.contact]
df
10×7 DataFrame
Row │ culture population contact total_tools mean_TU log_pop contact_high
│ String15 Int64 String7 Int64 Float64 Float64 Int64
─────┼───────────────────────────────────────────────────────────────────────────────
1 │ Malekula 1100 low 13 3.2 7.00307 0
2 │ Tikopia 1500 low 22 4.7 7.31322 0
3 │ Santa Cruz 3600 low 24 4.0 8.18869 0
4 │ Yap 4791 high 43 5.0 8.47449 1
5 │ Lau Fiji 7400 high 33 5.0 8.90924 1
6 │ Trobriand 8000 high 19 4.0 8.9872 1
7 │ Chuuk 9200 high 40 3.8 9.12696 1
8 │ Manus 13000 low 28 6.6 9.4727 0
9 │ Tonga 17500 high 55 5.4 9.76996 1
10 │ Hawaii 275000 low 71 6.6 12.5245 0
using Turing
@model function m10_10stan(total_tools, log_pop, contact_high)
α ~ Normal(0, 100)
βp ~ Normal(0, 1)
βc ~ Normal(0, 1)
βpc ~ Normal(0, 1)
for i ∈ 1:length(total_tools)
λ = exp(α + βp*log_pop[i] + βc*contact_high[i] +
βpc*contact_high[i]*log_pop[i])
total_tools[i] ~ Poisson(λ)
end
end;
chns = sample(
m10_10stan(df.total_tools, df.log_pop, df.contact_high),
NUTS(),
1000
)
Chains MCMC chain (1000×16×1 Array{Float64, 3}):
Iterations = 501:1:1500
Number of chains = 1
Samples per chain = 1000
Wall duration = 10.46 seconds
Compute duration = 10.46 seconds
parameters = α, βp, βc, βpc
internals = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size
Summary Statistics
parameters mean std naive_se mcse ess rhat ess_per_sec
Symbol Float64 Float64 Float64 Float64 Float64 Float64 Float64
α 0.9405 0.3622 0.0115 0.0196 326.1608 1.0046 31.1877
βp 0.2641 0.0350 0.0011 0.0019 320.7521 1.0046 30.6705
βc -0.0679 0.8847 0.0280 0.0500 272.4546 1.0028 26.0523
βpc 0.0401 0.0968 0.0031 0.0055 274.0906 1.0033 26.2087
Quantiles
parameters 2.5% 25.0% 50.0% 75.0% 97.5%
Symbol Float64 Float64 Float64 Float64 Float64
α 0.2322 0.6806 0.9625 1.2130 1.5952
βp 0.2010 0.2382 0.2627 0.2892 0.3355
βc -1.7875 -0.6647 -0.0906 0.5256 1.6761
βpc -0.1578 -0.0224 0.0420 0.1045 0.2285
using StatsPlots
StatsPlots.plot(chns)
"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/oceanic-tool-complexity/code/output/chns.svg"
m_10_10t_result = "
mean sd 5.5% 94.5% n_eff Rhat
a 0.94 0.37 0.36 1.53 3379 1
bp 0.26 0.04 0.21 0.32 3347 1
bc -0.08 0.84 -1.41 1.23 2950 1
bpc 0.04 0.09 -0.10 0.19 2907 1
";