Spatial autocorrelation in Oceanic tools

Data
Model
Output
Original output

Data

import CSV
import Random
import TuringModels

using DataFrames
using RData
using LinearAlgebra

Random.seed!(1)

data_dir = joinpath(TuringModels.project_root, "data")
kline_path = joinpath(data_dir, "Kline2.csv")
df = CSV.read(kline_path, DataFrame; delim=';')

dmat_path = joinpath(data_dir, "islandsDistMatrix.rda")
dmat = load(dmat_path)["islandsDistMatrix"]

df.society = 1:10;
df

10×10 DataFrame
 Row │ culture     population  contact  total_tools  mean_TU  lat      lon      lon2     logpop    society
     │ String15    Int64       String7  Int64        Float64  Float64  Float64  Float64  Float64   Int64
─────┼─────────────────────────────────────────────────────────────────────────────────────────────────────
   1 │ Malekula          1100  low               13      3.2    -16.3    167.5    -12.5   7.00307        1
   2 │ Tikopia           1500  low               22      4.7    -12.3    168.8    -11.2   7.31322        2
   3 │ Santa Cruz        3600  low               24      4.0    -10.7    166.0    -14.0   8.18869        3
   4 │ Yap               4791  high              43      5.0      9.5    138.1    -41.9   8.47449        4
   5 │ Lau Fiji          7400  high              33      5.0    -17.7    178.1     -1.9   8.90924        5
   6 │ Trobriand         8000  high              19      4.0     -8.7    150.9    -29.1   8.9872         6
   7 │ Chuuk             9200  high              40      3.8      7.4    151.6    -28.4   9.12696        7
   8 │ Manus            13000  low               28      6.6     -2.1    146.9    -33.1   9.4727         8
   9 │ Tonga            17500  high              55      5.4    -21.2   -175.2      4.8   9.76996        9
  10 │ Hawaii          275000  low               71      6.6     19.9   -155.6     24.4  12.5245        10

Model

using Turing

@model function m13_7(Dmat, society, logpop, total_tools)
    rhosq ~ truncated(Cauchy(0, 1), 0, Inf)
    etasq ~ truncated(Cauchy(0, 1), 0, Inf)
    bp ~ Normal(0, 1)
    a ~ Normal(0, 10)

    # GPL2
    SIGMA_Dmat = etasq * exp.(-rhosq * Dmat.^2)
    SIGMA_Dmat = SIGMA_Dmat + 0.01I
    SIGMA_Dmat = (SIGMA_Dmat' + SIGMA_Dmat) / 2
    g ~ MvNormal(zeros(10), SIGMA_Dmat)

    log_lambda = a .+ g[society] .+ bp * logpop

    total_tools .~ Poisson.(exp.(log_lambda))
end

chns = sample(
    m13_7(dmat, df.society, df.logpop, df.total_tools),
    NUTS(),
    5000
)

Chains MCMC chain (5000×26×1 Array{Float64, 3}):

Iterations        = 1001:1:6000
Number of chains  = 1
Samples per chain = 5000
Wall duration     = 51.58 seconds
Compute duration  = 51.58 seconds
parameters        = rhosq, etasq, bp, a, g[1], g[2], g[3], g[4], g[5], g[6], g[7], g[8], g[9], g[10]
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse         ess      rhat   ess_per_sec
      Symbol   Float64   Float64    Float64   Float64     Float64   Float64       Float64

       rhosq    1.4731    8.5127     0.1204    0.1803   2278.0414    1.0009       44.1609
       etasq    0.3302    0.4437     0.0063    0.0153    834.7156    1.0006       16.1814
          bp    0.2443    0.1120     0.0016    0.0034   1024.9052    1.0003       19.8683
           a    1.3205    1.1603     0.0164    0.0426    707.7427    1.0000       13.7199
        g[1]   -0.2750    0.4613     0.0065    0.0194    512.7885    0.9998        9.9407
        g[2]   -0.1250    0.4524     0.0064    0.0199    488.5482    0.9999        9.4707
        g[3]   -0.1696    0.4387     0.0062    0.0192    481.2797    1.0000        9.3298
        g[4]    0.2933    0.3877     0.0055    0.0166    503.0492    0.9999        9.7518
        g[5]    0.0273    0.3904     0.0055    0.0164    516.3554    0.9999       10.0098
        g[6]   -0.4585    0.4005     0.0057    0.0166    540.4004    1.0002       10.4759
        g[7]    0.0971    0.3800     0.0054    0.0157    535.0840    1.0000       10.3729
        g[8]   -0.2617    0.3885     0.0055    0.0157    551.1514    1.0003       10.6843
        g[9]    0.2360    0.3626     0.0051    0.0148    557.8682    1.0003       10.8145
       g[10]   -0.1137    0.4555     0.0064    0.0133    982.8920    1.0010       19.0538

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5%
      Symbol   Float64   Float64   Float64   Float64   Float64

       rhosq    0.0328    0.1618    0.4005    1.0053    8.0913
       etasq    0.0280    0.0995    0.1887    0.3792    1.4831
          bp    0.0207    0.1724    0.2441    0.3134    0.4769
           a   -1.0497    0.6079    1.3055    2.0368    3.7503
        g[1]   -1.3210   -0.4977   -0.2389   -0.0146    0.5371
        g[2]   -1.1549   -0.3389   -0.1002    0.1228    0.6713
        g[3]   -1.2174   -0.3683   -0.1408    0.0737    0.5947
        g[4]   -0.6123    0.1260    0.3054    0.4900    0.9674
        g[5]   -0.8993   -0.1416    0.0435    0.2301    0.7137
        g[6]   -1.4320   -0.6337   -0.4223   -0.2302    0.1960
        g[7]   -0.8001   -0.0617    0.1163    0.2888    0.7523
        g[8]   -1.2321   -0.4233   -0.2293   -0.0562    0.4184
        g[9]   -0.6445    0.0880    0.2562    0.4197    0.8593
       g[10]   -1.1174   -0.3486   -0.0919    0.1489    0.7420

Output

using StatsPlots

StatsPlots.plot(chns)

"/home/runner/work/TuringModels.jl/TuringModels.jl/__site/assets/models/spatial-autocorrelation-oceanic/code/output/chns.svg"

Original output

m_13_7_rethinking = """
Inference for Stan model: 6422d8042e9cdd08dae2420ad26842f1.
    4 chains, each with iter=10000; warmup=2000; thin=1;
    post-warmup draws per chain=8000, total post-warmup draws=32000.

            mean se_mean    sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
    g[1]   -0.27    0.01  0.44  -1.26  -0.50  -0.24  -0.01   0.53  3278    1
    g[2]   -0.13    0.01  0.43  -1.07  -0.34  -0.10   0.12   0.67  3144    1
    g[3]   -0.17    0.01  0.42  -1.10  -0.37  -0.14   0.07   0.59  3096    1
    g[4]    0.30    0.01  0.37  -0.51   0.12   0.30   0.50   1.02  3145    1
    g[5]    0.03    0.01  0.37  -0.77  -0.15   0.04   0.22   0.72  3055    1
    g[6]   -0.46    0.01  0.38  -1.31  -0.64  -0.42  -0.23   0.19  3306    1
    g[7]    0.10    0.01  0.36  -0.70  -0.07   0.11   0.29   0.78  3175    1
    g[8]   -0.26    0.01  0.37  -1.07  -0.43  -0.24  -0.06   0.40  3246    1
    g[9]    0.23    0.01  0.35  -0.52   0.07   0.25   0.42   0.89  3302    1
    g[10]  -0.12    0.01  0.45  -1.04  -0.36  -0.11   0.13   0.77  5359    1
    a       1.31    0.02  1.15  -0.98   0.61   1.31   2.00   3.69  4389    1
    bp      0.25    0.00  0.11   0.02   0.18   0.24   0.31   0.47  5634    1
    etasq   0.34    0.01  0.53   0.03   0.10   0.20   0.39   1.52  5643    1
    rhosq   1.52    0.09 11.82   0.03   0.16   0.39   0.96   7.96 15955    1
    lp__  925.98    0.03  2.96 919.16 924.20 926.34 928.14 930.67  7296    1
""";

CC BY-SA 4.0 Rob Goedman, Richard Torkar, Rik Huijzer, Martin Trapp and contributors. Last modified: December 15, 2022. Website built with Franklin.jl and the Julia programming language.