In [1]:
using Turing
using DataFrames
using CSV
using Random
using Distributions
using StatisticalRethinking
using StatisticalRethinking: link
using StatisticalRethinkingPlots
using ParetoSmooth
using StatsPlots
using Plots.PlotMeasures
using StatsBase
using FreqTables
using Logging

default(label=false);
Logging.disable_logging(Logging.Warn);

13.1 Example: multilevel tadpoles

Code 13.1

In [2]:
d = DataFrame(CSV.File("data/reedfrogs.csv"))
describe(d)
Out[2]:

5 rows × 7 columns

variablemeanminmedianmaxnmissingeltype
SymbolUnion…AnyUnion…AnyInt64DataType
1density23.33331025.0350Int64
2prednopred0String7
3sizebigsmall0String7
4surv16.3125412.5350Int64
5propsurv0.7216070.1142860.8857141.00Float64

Code 13.2

In [3]:
d.tank = 1:nrow(d)

@model function m13_1(S, N, tank)
    tank_size = length(levels(tank))
    a ~ filldist(Normal(0, 1.5), tank_size)
    p = logistic.(a)
    @. S ~ Binomial(N, p)
end

Random.seed!(1)
m13_1_ch = sample(m13_1(d.surv, d.density, d.tank), NUTS(200, 0.65, init_ϵ=0.5), 1000)
m13_1_df = DataFrame(m13_1_ch);

Code 13.3

In [4]:
@model function m13_2(S, N, tank)
    tank_size = length(levels(tank))
    σ ~ Exponential()
    ā ~ Normal(0, 1.5)
    a ~ filldist(Normal(ā, σ), tank_size)
    p = logistic.(a)
    @. S ~ Binomial(N, p)
end

Random.seed!(1)
m13_2_ch = sample(m13_2(d.surv, d.density, d.tank), NUTS(200, 0.65, init_ϵ=0.2), 1000)
m13_2_df = DataFrame(m13_2_ch);

Code 13.4

In [5]:
link_fun = (r, dr) -> begin
    a = get(r, "a[$(dr.tank)]", 0)
    p = logistic(a)
    binomlogpdf(dr.density, p, dr.surv)
end

m1_ll = link(m13_1_df, link_fun, eachrow(d))
m1_ll = hcat(m1_ll...);

m2_ll = link(m13_2_df, link_fun, eachrow(d))
m2_ll = hcat(m2_ll...);

compare([m1_ll, m2_ll], :waic, mnames=["m13.1", "m13.2"])
Out[5]:

2 rows × 8 columns

modelsWAIClppdSEdWAICdSEpWAICweight
StringFloat64Float64Float64Float64Float64Float64Float64
1m13.2201.2158.417.470.00.021.41.0
2m13.1214.7163.284.6813.53.9925.710.0

Code 13.5

In [6]:
Random.seed!()
post = sample(resetrange(m13_2_ch), 10000)
post_df = DataFrame(post)

propsurv_est = [
    logistic(mean(post_df[:,"a[$i]"]))
    for i  1:nrow(d)
]

scatter(propsurv_est, mc=:white, xlab="tank", ylab="proportion survival", ylim=(-0.05, 1.05))
scatter!(d.propsurv, mc=:blue, ms=3)
hline!([mean(logistic.(post_df.ā))], ls=:dash, c=:black)
vline!([16.5, 32.5], c=:black)
annotate!([
        (8, 0, ("small tanks", 10)),
        (16+8, 0, ("medium tanks", 10)),
        (32+8, 0, ("large tanks", 10))
    ])
Out[6]:

Code 13.6

In [7]:
p1 = plot(xlim=(-3, 4), xlab="log-odds survive", ylab="Density")

for r  first(eachrow(post_df), 100)
    plot!(Normal(r.ā, r.σ), c=:black, alpha=0.2)
end

sim_tanks = @. rand(Normal(post_df.ā[1:8000], post_df.σ[1:8000]));
p2 = plot(xlab="probability survive", ylab="Density", xlim=(-0.1, 1.1))
density!(logistic.(sim_tanks), lw=2)

plot(p1, p2, size=(800, 400), margin=2mm)
Out[7]:

13.2 Varying effects and the underfitting/overfitting trade-off

Code 13.7

In [8]:
 = 1.5
σ = 1.5
nponds = 60
Ni = repeat([5, 10, 25, 35], inner=15);

Code 13.8

In [9]:
Random.seed!(5005)
a_pond = rand(Normal(, σ), nponds);

Code 13.9

In [10]:
dsim = DataFrame(pond=1:nponds, Ni=Ni, true_a=a_pond);

Code 13.10

Doesn't make much sense in Julia, but anyways

In [11]:
typeof(1:3), typeof([1,2,3])
Out[11]:
(UnitRange{Int64}, Vector{Int64})

Code 13.11

In [12]:
Random.seed!(1)
dsim.Si = @. rand(Binomial(dsim.Ni, logistic(dsim.true_a)));

Code 13.12

In [13]:
dsim.p_nopool = dsim.Si ./ dsim.Ni;

Code 13.13

In [14]:
@model function m13_3(Si, Ni, pond)
    σ ~ Exponential()
     ~ Normal(0, 1.5)
    a_pond ~ filldist(Normal(, σ), nponds)
    p = logistic.(a_pond)
    @. Si ~ Binomial(Ni, p)
end

Random.seed!(1)
m13_3_ch = sample(m13_3(dsim.Si, dsim.Ni, dsim.pond), NUTS(), 1000)
m13_3_df = DataFrame(m13_3_ch);

Code 13.14

In [15]:
precis(m13_3_df)
┌────────────┬────────────────────────────────────────────────────────────┐
│      param     mean     std     5.5%      50%    94.5%       histogram │
├────────────┼────────────────────────────────────────────────────────────┤
│ a_pond[10] │  2.7871  1.3037   0.8877    2.668   5.0628       ▁▂▆█▇▃▁▁▁ │
│ a_pond[11] │  2.7634  1.2303    0.991   2.6744   4.9192       ▁▂▇█▇▃▂▁▁ │
│ a_pond[12] │ -0.0117  0.8542  -1.2945  -0.0151   1.2766        ▁▁▃██▃▁▁ │
│ a_pond[13] │  1.5263   0.988  -0.0357   1.5169   3.0946  ▁▁▂▄▇▇█▆▅▂▁▁▁▁ │
│ a_pond[14] │  2.7699   1.234   1.0144   2.7041   4.8799       ▁▂▇█▇▃▁▁▁ │
│ a_pond[15] │  -0.011  0.7978  -1.2401  -0.0285   1.2438    ▁▁▁▃▅██▅▃▁▁▁ │
│ a_pond[16] │  2.1949  0.9062   0.9032   2.1323   3.6539   ▁▁▃▆██▆▄▂▁▁▁▁ │
│ a_pond[17] │  2.1436  0.8338   0.9518   2.0839   3.6436     ▁▂▅▇█▅▃▂▁▁▁ │
│ a_pond[18] │  2.1927  0.9286   0.7956   2.1145   3.7073  ▁▁▃▆██▆▅▂▁▁▁▁▁ │
│ a_pond[19] │   2.175  0.8629   0.8435   2.1215   3.5511    ▁▁▃▅██▆▄▂▁▁▁ │
│  a_pond[1] │ -1.6247  1.0774  -3.5333  -1.5273  -0.0803        ▁▁▂▅█▆▁▁ │
│ a_pond[20] │  0.5822  0.6324   -0.382   0.5682   1.6015       ▁▁▅██▅▂▁▁ │
│ a_pond[21] │   3.128  1.1414   1.4743   3.0138   5.1205        ▁▄█▇▄▂▁▁ │
│ a_pond[22] │  2.1806  0.9028   0.8011   2.1086   3.7658  ▁▁▃▅██▆▄▂▁▁▁▁▁ │
│ a_pond[23] │   1.508  0.7347    0.451   1.4471   2.8261     ▁▁▂▆█▇▄▃▁▁▁ │
│ a_pond[24] │ -2.2068  0.9207  -3.8283  -2.0923  -0.8888  ▁▁▁▁▁▂▄▆██▆▃▁▁ │
│ a_pond[25] │ -1.5408  0.7909  -2.9362  -1.4729  -0.3867     ▁▁▁▂▃▅██▇▃▁ │
│ a_pond[26] │  0.5944  0.6518  -0.4036   0.5621   1.6623       ▁▁▅██▅▂▁▁ │
│ a_pond[27] │  3.1811  1.1907   1.4076   3.0804   5.1803       ▁▄██▅▂▁▁▁ │
│ a_pond[28] │   1.468  0.7848   0.3416   1.4274   2.7582   ▁▁▃▇██▄▃▁▁▁▁▁ │
│ a_pond[29] │  3.1396  1.0943   1.5897   3.0105   4.9926        ▁▃█▇▄▁▁▁ │
│  a_pond[2] │   2.756  1.1946   0.9952   2.6579   4.6886        ▂▆█▆▃▁▁▁ │
│ a_pond[30] │  2.1475  0.9018   0.8159   2.0692   3.6994   ▁▁▃▇██▇▄▂▁▁▁▁ │
│ a_pond[31] │   0.834  0.4247    0.187   0.8166   1.5212        ▁▁▄█▅▁▁▁ │
│ a_pond[32] │  -1.697  0.5355  -2.5934  -1.6737  -0.8806        ▁▁▂▅██▂▁ │
│ a_pond[33] │   0.826  0.4281   0.1572   0.8139   1.5343         ▁▁▄█▅▂▁ │
│ a_pond[34] │  0.3391  0.4084  -0.2986   0.3346    0.992         ▁▃█▆▁▁▁ │
│ a_pond[35] │  0.8264   0.403   0.1995   0.8195   1.4725   ▁▁▂▄▇██▆▅▃▁▁▁ │
│ a_pond[36] │  2.0323  0.5928   1.1086    1.992   3.0308        ▁▁▄█▇▄▂▁ │
│ a_pond[37] │  1.4378  0.4895   0.7066   1.3936   2.2765         ▁▄█▇▂▁▁ │
│ a_pond[38] │  2.9338  0.8079   1.7247   2.8616   4.2329     ▁▁▃▇█▇▅▂▁▁▁ │
│ a_pond[39] │  2.4147  0.6783   1.4143   2.3627   3.4993     ▁▂▆█▆▄▂▁▁▁▁ │
│  a_pond[3] │   2.736  1.2323   0.8996   2.6837   4.8082        ▁▂▇█▇▄▁▁ │
│ a_pond[40] │  1.4392  0.5257   0.6512     1.41   2.2673        ▁▁▄█▇▃▁▁ │
│ a_pond[41] │  1.6916  0.5244   0.9124   1.6683   2.5374        ▁▂██▆▂▁▁ │
│ a_pond[42] │  3.7426  1.0807    2.149   3.6661   5.4972        ▁▆█▇▃▁▁▁ │
│ a_pond[43] │  3.7672  1.0446    2.268   3.6938   5.5001   ▁▁▃▇██▇▅▃▂▁▁▁ │
│ a_pond[44] │   2.974  0.8327   1.7429   2.8818   4.4708    ▁▁▃▆█▆▄▃▂▁▁▁ │
│ a_pond[45] │ -0.3246  0.4108  -0.9746  -0.3269   0.3301          ▁▁▅█▄▁ │
│ a_pond[46] │  1.2722  0.4274   0.6247   1.2772   1.9499  ▁▁▁▂▅▆███▅▃▂▁▁ │
│ a_pond[47] │  2.3605  0.5572    1.544   2.3244   3.2767       ▁▂▅█▆▃▁▁▁ │
│ a_pond[48] │  0.4596  0.3525  -0.0991   0.4657   1.0399    ▁▁▁▃▅▇█▇▄▂▁▁ │
│ a_pond[49] │  0.4414  0.3381  -0.0893   0.4476   0.9788   ▁▁▁▃▆██▇▄▂▁▁▁ │
│  a_pond[4] │ -0.0099  0.8433  -1.3751   0.0001   1.3435  ▁▁▁▁▁▃▅██▆▂▁▁▁ │
│ a_pond[50] │ -0.1176  0.3398  -0.6709  -0.1086   0.4311    ▁▁▃▅▇██▄▃▁▁▁ │
│ a_pond[51] │ -0.4502  0.3454  -1.0103  -0.4489   0.0896    ▁▁▁▂▄▆██▆▃▁▁ │
│ a_pond[52] │  0.1215  0.3239  -0.3864   0.1219   0.6283     ▁▁▂▄██▇▅▂▁▁ │
│ a_pond[53] │  1.4367  0.4463   0.7709   1.4048   2.2121          ▁▃█▆▂▁ │
│ a_pond[54] │  3.2507    0.84   2.0457   3.1751   4.6379   ▁▂▅█▇▆▄▂▁▁▁▁▁ │
│ a_pond[55] │ -1.8379  0.4738  -2.6312  -1.8214  -1.1006          ▁▂▆█▄▁ │
│ a_pond[56] │  1.2708  0.4044    0.662   1.2434   1.9225  ▁▁▂▃▆█▇▆▄▃▁▁▁▁ │
│ a_pond[57] │  1.4354  0.4271   0.7706   1.4289   2.1378         ▁▃█▇▂▁▁ │
│ a_pond[58] │  1.8178  0.4508   1.1293   1.7849    2.573  ▁▂▃▅███▆▄▄▂▁▁▁ │
│ a_pond[59] │ -1.2641  0.3962  -1.9016  -1.2475  -0.6651         ▁▁▄█▄▁▁ │
│  a_pond[5] │  2.6734  1.2277   0.8274   2.6071   4.7021       ▁▂▇█▆▃▁▁▁ │
│ a_pond[60] │  3.9789  1.0396   2.4838   3.8705   5.8658        ▁▄█▆▃▁▁▁ │
│  a_pond[6] │  0.7089  0.8728  -0.6575   0.7054   2.1006   ▁▁▁▃▅▇██▄▂▁▁▁ │
│  a_pond[7] │  1.5054  0.9959  -0.0121   1.4492   3.2375   ▁▁▂▅███▇▄▂▁▁▁ │
│  a_pond[8] │  2.7786   1.293   0.8393   2.6775    4.926      ▁▂▆█▆▃▁▁▁▁ │
│  a_pond[9] │  0.7212  0.8901  -0.6507   0.7049   2.1479    ▁▁▂▆███▄▂▁▁▁ │
│          ā │   1.338  0.2493   0.9468   1.3273   1.7511        ▁▂▆█▇▃▁▁ │
│          σ │   1.719  0.2275   1.3806   1.6944   2.1208        ▁▂▆█▅▃▁▁ │
└────────────┴────────────────────────────────────────────────────────────┘

Code 13.15

In [16]:
dsim.p_partpool = [
    mean(logistic.(m13_3_df[:,"a_pond[$i]"]))
    for i  1:nponds
];

Code 13.16

In [17]:
dsim.p_true = logistic.(dsim.true_a);

Code 13.17

In [18]:
nopool_error = @. abs(dsim.p_nopool - dsim.p_true)
partpool_error = @. abs(dsim.p_partpool - dsim.p_true);

Code 13.18

In [19]:
scatter(nopool_error, xlab="pond", ylab="absolute error")
scatter!(partpool_error, mc=:white)
Out[19]:

Code 13.19

In [20]:
dsim.nopool_error = nopool_error
dsim.partpool_error = partpool_error

gb = groupby(dsim, :Ni)
nopool_avg = combine(gb, :nopool_error => mean)
partpool_avg = combine(gb, :partpool_error => mean);
nopool_avg, partpool_avg
Out[20]:
(4×2 DataFrame
 Row  Ni     nopool_error_mean 
      Int64  Float64           
─────┼──────────────────────────
   1 │     5          0.147649
   2 │    10          0.0649205
   3 │    25          0.0689178
   4 │    35          0.0477076, 4×2 DataFrame
 Row  Ni     partpool_error_mean 
      Int64  Float64             
─────┼────────────────────────────
   1 │     5            0.138281
   2 │    10            0.0502345
   3 │    25            0.0621743
   4 │    35            0.0450389)

Code 13.20

In [21]:
 = 1.5
σ = 1.5
nponds = 60
Ni = repeat([5, 10, 25, 35], inner=15)
a_pond = rand(Normal(, σ), nponds)

dsim = DataFrame(pond=1:nponds, Ni=Ni, true_a=a_pond)
dsim.Si = @. rand(Binomial(dsim.Ni, logistic(dsim.true_a)))
dsim.p_nopool = dsim.Si ./ dsim.Ni

m13_3_ch = sample(m13_3(dsim.Si, dsim.Ni, dsim.pond), NUTS(), 1000)
m13_3_df = DataFrame(m13_3_ch)

dsim.p_partpool = [
    mean(logistic.(m13_3_df[:,"a_pond[$i]"]))
    for i  1:nponds
]
dsim.p_true = logistic.(dsim.true_a)
nopool_error = @. abs(dsim.p_nopool - dsim.p_true)
partpool_error = @. abs(dsim.p_partpool - dsim.p_true)

scatter(nopool_error, xlab="pond", ylab="absolute error")
scatter!(partpool_error, mc=:white)
Out[21]:

13.3 More than one type of cluster

Code 13.21

In [22]:
d = DataFrame(CSV.File("data/chimpanzees.csv", delim=';'));
d.treatment = 1 .+ d.prosoc_left .+ 2*d.condition;

Random.seed!(13)

@model function m13_4(pulled_left, actor, block_id, treatment)
    σ_a ~ Exponential()
    σ_g ~ Exponential()
     ~ Normal(0, 1.5)
    actors_count = length(levels(actor))
    blocks_count = length(levels(block_id))
    treats_count = length(levels(treatment))
    a ~ filldist(Normal(, σ_a), actors_count)
    g ~ filldist(Normal(0, σ_g), blocks_count)
    b ~ filldist(Normal(0, 0.5), treats_count)
    
    p = @. logistic(a[actor] + g[block_id] + b[treatment])
    @. pulled_left ~ Binomial(1, p)
end

m13_4_ch = sample(m13_4(d.pulled_left, d.actor, d.block, d.treatment), NUTS(), 4000)
m13_4_df = DataFrame(m13_4_ch);

Code 13.22

In [23]:
precis(m13_4_df)
coeftab_plot(m13_4_df, size=(800,600))
┌───────┬────────────────────────────────────────────────────────────┐
│ param     mean     std     5.5%      50%    94.5%       histogram │
├───────┼────────────────────────────────────────────────────────────┤
│  a[1] │ -0.3696  0.3736  -0.9527  -0.3614   0.2245         ▁▁▆█▃▁▁ │
│  a[2] │  4.6715  1.2414   3.0805   4.4924   6.9113     ▁▁▇█▅▃▁▁▁▁▁ │
│  a[3] │ -0.6737  0.3789  -1.2842  -0.6824  -0.0673          ▁▃█▅▁▁ │
│  a[4] │ -0.6797  0.3725  -1.2943  -0.6711  -0.1029          ▁▃█▅▁▁ │
│  a[5] │ -0.3664  0.3667  -0.9526  -0.3526   0.1959         ▁▁▆█▃▁▁ │
│  a[6] │  0.5701  0.3695  -0.0321   0.5718   1.1599          ▁▂▇█▃▁ │
│  a[7] │  2.1083  0.4609   1.3834      2.1    2.891         ▁▂▇█▄▁▁ │
│  b[1] │  -0.119  0.3015  -0.5939  -0.1217   0.3687   ▁▁▁▂▄▇█▆▃▂▁▁▁ │
│  b[2] │  0.4116  0.3054  -0.0736   0.4147   0.8785   ▁▁▁▃▆██▆▃▁▁▁▁ │
│  b[3] │  -0.474  0.2976  -0.9366  -0.4818   0.0123    ▁▁▁▁▄▇█▇▄▂▁▁ │
│  b[4] │  0.2868  0.2959  -0.1621    0.276   0.7579  ▁▁▁▂▄▇█▆▄▁▁▁▁▁ │
│  g[1] │ -0.1717  0.2207  -0.5788  -0.1181    0.078    ▁▁▁▁▁▂▄█▄▁▁▁ │
│  g[2] │  0.0433  0.1859  -0.2136   0.0232   0.3702    ▁▁▁▁▂▇█▃▁▁▁▁ │
│  g[3] │  0.0541  0.1859  -0.2017   0.0357   0.3567    ▁▁▁▁▆█▃▁▁▁▁▁ │
│  g[4] │   0.014  0.1865  -0.2621   0.0099   0.3147   ▁▁▁▁▂██▂▁▁▁▁▁ │
│  g[5] │ -0.0339  0.1851  -0.3451   -0.022   0.2323    ▁▁▁▁▁▃█▇▂▁▁▁ │
│  g[6] │  0.1165  0.2059  -0.1492   0.0795   0.4881          ▁▄█▁▁▁ │
│     ā │  0.5746  0.7472   -0.618   0.5849   1.7528   ▁▁▁▁▂▅▇█▅▃▁▁▁ │
│   σ_a │  2.0167  0.6475   1.2094   1.9068   3.1572        ▁█▆▂▁▁▁▁ │
│   σ_g │  0.2236  0.1785   0.0422   0.1829   0.5409    █▅▂▁▁▁▁▁▁▁▁▁ │
└───────┴────────────────────────────────────────────────────────────┘
Out[23]:

Code 13.23

In [24]:
Random.seed!(14)

@model function m13_5(pulled_left, actor, treatment)
    σ_a ~ Exponential()
     ~ Normal(0, 1.5)
    actors_count = length(levels(actor))
    treats_count = length(levels(treatment))
    a ~ filldist(Normal(, σ_a), actors_count)
    b ~ filldist(Normal(0, 0.5), treats_count)
    
    p = @. logistic(a[actor] + b[treatment])
    @. pulled_left ~ Binomial(1, p)
end

m13_5_ch = sample(m13_5(d.pulled_left, d.actor, d.treatment), NUTS(), 4000)
m13_5_df = DataFrame(m13_5_ch);

Code 13.24

In [25]:
l_fun = (r, dr) -> begin
    a = get(r, "a[$(dr.actor)]", 0)
    g = get(r, "g[$(dr.block)]", 0)
    b = get(r, "b[$(dr.treatment)]", 0)
    p = logistic(a + g + b)
    binomlogpdf(1, p, dr.pulled_left)
end

m13_4_ll = link(m13_4_df, l_fun, eachrow(d))
m13_4_ll = hcat(m13_4_ll...)

l_fun = (r, dr) -> begin
    a = get(r, "a[$(dr.actor)]", 0)
    b = get(r, "b[$(dr.treatment)]", 0)
    p = logistic(a + b)
    binomlogpdf(1, p, dr.pulled_left)
end

m13_5_ll = link(m13_5_df, l_fun, eachrow(d))
m13_5_ll = hcat(m13_5_ll...);

compare([m13_4_ll, m13_5_ll], :waic, mnames=["m4", "m5"])
Out[25]:

2 rows × 8 columns

modelsWAIClppdSEdWAICdSEpWAICweight
StringFloat64Float64Float64Float64Float64Float64Float64
1m5531.1514.0319.20.00.08.510.63
2m4532.2510.7919.431.11.7210.720.37

Code 13.25

In [26]:
Random.seed!(15)

@model function m13_6(pulled_left, actor, block_id, treatment)
    σ_a ~ Exponential()
    σ_g ~ Exponential()
    σ_b ~ Exponential()
     ~ Normal(0, 1.5)
    actors_count = length(levels(actor))
    blocks_count = length(levels(block_id))
    treats_count = length(levels(treatment))
    a ~ filldist(Normal(, σ_a), actors_count)
    g ~ filldist(Normal(0, σ_g), blocks_count)
    b ~ filldist(Normal(0, σ_b), treats_count)
    
    p = @. logistic(a[actor] + g[block_id] + b[treatment])
    @. pulled_left ~ Binomial(1, p)
end

m13_6_ch = sample(m13_6(d.pulled_left, d.actor, d.block, d.treatment), NUTS(), 4000)
m13_6_df = DataFrame(m13_6_ch);

precis(m13_4_df[:,r"b"])
precis(m13_6_df[:,r"b"])
┌───────┬──────────────────────────────────────────────────────────┐
│ param    mean     std     5.5%      50%   94.5%       histogram │
├───────┼──────────────────────────────────────────────────────────┤
│  b[1] │ -0.119  0.3015  -0.5939  -0.1217  0.3687   ▁▁▁▂▄▇█▆▃▂▁▁▁ │
│  b[2] │ 0.4116  0.3054  -0.0736   0.4147  0.8785   ▁▁▁▃▆██▆▃▁▁▁▁ │
│  b[3] │ -0.474  0.2976  -0.9366  -0.4818  0.0123    ▁▁▁▁▄▇█▇▄▂▁▁ │
│  b[4] │ 0.2868  0.2959  -0.1621    0.276  0.7579  ▁▁▁▂▄▇█▆▄▁▁▁▁▁ │
└───────┴──────────────────────────────────────────────────────────┘
┌───────┬───────────────────────────────────────────────────────┐
│ param     mean     std     5.5%      50%   94.5%   histogram │
├───────┼───────────────────────────────────────────────────────┤
│  b[1] │ -0.1116  0.3607  -0.6868  -0.1029  0.4391   ▁▁▂█▅▁▁▁▁ │
│  b[2] │  0.3877  0.3609  -0.1153   0.3614  0.9866   ▁▁▂█▅▁▁▁▁ │
│  b[3] │ -0.4464  0.3655  -1.0599   -0.422  0.0814    ▁▁▁▆█▂▁▁ │
│  b[4] │  0.2806    0.36  -0.2171    0.246  0.8956    ▁▁▃█▃▁▁▁ │
│   σ_b │  0.5973  0.3767   0.2072   0.5049  1.2311  █▇▂▁▁▁▁▁▁▁ │
└───────┴───────────────────────────────────────────────────────┘

13.4 Divirgent transitions and non-centered priors

Code 13.26

In [27]:
@model function m13_7(N)
    v ~ Normal(0, 3)
    x ~ Normal(0, exp(v))
end

Random.seed!(5)
m13_7_ch = sample(m13_7(1), NUTS(), 1000)
Out[27]:
Chains MCMC chain (1000×14×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 3.24 seconds
Compute duration  = 3.24 seconds
parameters        = v, x
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean        std   naive_se      mcse        ess      rhat        Symbol   Float64    Float64    Float64   Float64    Float64   Float64    ⋯

           v    4.1270     1.3028     0.0412    0.2004    10.3585    1.0110    ⋯
           x   30.6167   318.1477    10.0607   27.9820   141.9414    1.0026    ⋯
                                                                1 column omitted

Quantiles
  parameters        2.5%      25.0%     50.0%     75.0%      97.5% 
      Symbol     Float64    Float64   Float64   Float64    Float64 

           v      2.7433     3.0582    3.7470    4.9860     7.2314
           x   -475.4665   -14.7917   -2.4200   21.4529   766.0837

Code 13.27

In [28]:
@model function m13_7nc(N)
    v ~ Normal(0, 3)
    z ~ Normal()
    x = z * exp(v)
end

Random.seed!(5)
m13_7nc_ch = sample(m13_7nc(1), NUTS(), 1000)
Out[28]:
Chains MCMC chain (1000×14×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 2.93 seconds
Compute duration  = 2.93 seconds
parameters        = v, z
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse        ess      rhat   e     Symbol   Float64   Float64    Float64   Float64    Float64   Float64     ⋯

           v   -0.0217    2.9165     0.0922    0.1000   975.4488    1.0034     ⋯
           z   -0.0017    1.0110     0.0320    0.0324   928.7026    0.9995     ⋯
                                                                1 column omitted

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5% 
      Symbol   Float64   Float64   Float64   Float64   Float64 

           v   -5.8505   -1.9989    0.0482    1.8963    5.6774
           z   -1.9249   -0.6809   -0.0099    0.6728    1.9642

Code 13.28

There is no way to get amount of divergent samples, but they could be estimated by comparing ess values from the chain

In [29]:
Random.seed!(13)
m13_4b_ch = sample(m13_4(d.pulled_left, d.actor, d.block, d.treatment), NUTS(0.95, init_ϵ=0.1), 4000)
Out[29]:
Chains MCMC chain (4000×32×1 Array{Float64, 3}):

Iterations        = 1001:1:5000
Number of chains  = 1
Samples per chain = 4000
Wall duration     = 22.22 seconds
Compute duration  = 22.22 seconds
parameters        = σ_a, σ_g, ā, a[1], a[2], a[3], a[4], a[5], a[6], a[7], g[1], g[2], g[3], g[4], g[5], g[6], b[1], b[2], b[3], b[4]
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters      mean       std   naive_se      mcse         ess      rhat        Symbol   Float64   Float64    Float64   Float64     Float64   Float64    ⋯

         σ_a    2.0123    0.6448     0.0102    0.0154   1928.9317    0.9998    ⋯
         σ_g    0.2254    0.1768     0.0028    0.0072    655.0168    0.9998    ⋯
           ā    0.5905    0.7271     0.0115    0.0172   1668.0941    1.0004    ⋯
        a[1]   -0.3596    0.3647     0.0058    0.0095    982.0526    1.0025    ⋯
        a[2]    4.6656    1.2885     0.0204    0.0296   1481.5114    1.0000    ⋯
        a[3]   -0.6614    0.3712     0.0059    0.0092   1108.9323    1.0016    ⋯
        a[4]   -0.6603    0.3602     0.0057    0.0094   1006.5836    1.0025    ⋯
        a[5]   -0.3495    0.3711     0.0059    0.0088   1061.5711    1.0021    ⋯
        a[6]    0.5874    0.3676     0.0058    0.0089   1079.3178    1.0016    ⋯
        a[7]    2.1131    0.4571     0.0072    0.0109   1248.0935    1.0010    ⋯
        g[1]   -0.1777    0.2190     0.0035    0.0069   1138.3718    0.9998    ⋯
        g[2]    0.0429    0.1864     0.0029    0.0042   2022.7685    0.9998    ⋯
        g[3]    0.0554    0.1834     0.0029    0.0040   2201.4475    0.9999    ⋯
        g[4]    0.0107    0.1906     0.0030    0.0037   2323.5314    0.9998    ⋯
        g[5]   -0.0337    0.1789     0.0028    0.0036   2211.6014    0.9998    ⋯
        g[6]    0.1166    0.1983     0.0031    0.0052   1464.2777    0.9998    ⋯
        b[1]   -0.1316    0.2961     0.0047    0.0074   1053.2554    1.0014    ⋯
        b[2]    0.3919    0.3039     0.0048    0.0074    999.4859    1.0040    ⋯
        b[3]   -0.4786    0.3022     0.0048    0.0078   1044.5529    1.0006    ⋯
        b[4]    0.2745    0.3002     0.0047    0.0079   1003.2132    1.0016    ⋯
                                                                1 column omitted

Quantiles
  parameters      2.5%     25.0%     50.0%     75.0%     97.5% 
      Symbol   Float64   Float64   Float64   Float64   Float64 

         σ_a    1.0938    1.5534    1.9003    2.3560    3.5495
         σ_g    0.0257    0.1006    0.1860    0.2985    0.6869
           ā   -0.9008    0.1319    0.5915    1.0443    1.9871
        a[1]   -1.0718   -0.6056   -0.3566   -0.1153    0.3601
        a[2]    2.7792    3.7892    4.4519    5.3011    7.8573
        a[3]   -1.3976   -0.8984   -0.6568   -0.4202    0.0797
        a[4]   -1.3599   -0.9004   -0.6646   -0.4125    0.0514
        a[5]   -1.0684   -0.5981   -0.3589   -0.1002    0.3986
        a[6]   -0.1205    0.3389    0.5795    0.8366    1.3073
        a[7]    1.2231    1.7957    2.1121    2.4170    3.0189
        g[1]   -0.7116   -0.3000   -0.1309   -0.0192    0.1410
        g[2]   -0.3066   -0.0561    0.0255    0.1351    0.4609
        g[3]   -0.3032   -0.0439    0.0323    0.1527    0.4739
        g[4]   -0.3961   -0.0746    0.0062    0.0985    0.4263
        g[5]   -0.4315   -0.1180   -0.0176    0.0572    0.3120
        g[6]   -0.2165   -0.0089    0.0744    0.2209    0.5842
        b[1]   -0.7123   -0.3282   -0.1300    0.0674    0.4568
        b[2]   -0.2180    0.1924    0.3873    0.5893    0.9994
        b[3]   -1.0814   -0.6817   -0.4807   -0.2661    0.0956
        b[4]   -0.3056    0.0748    0.2815    0.4757    0.8592
In [30]:
t = ess_rhat(m13_4_ch)
ess_4 = t[:,:ess]
t = ess_rhat(m13_4b_ch)
ess_4b = t[:,:ess]

plot(ess_4, lw=2, label="ESS m13_4")
plot!(ess_4b, lw=2, label="ESS m13_4b")
Out[30]:

Code 13.29

In [31]:
Random.seed!(13)

@model function m13_4nc(pulled_left, actor, block_id, treatment)
    σ_a ~ Exponential()
    σ_g ~ Exponential()
    ā ~ Normal(0, 1.5)
    actors_count = length(levels(actor))
    blocks_count = length(levels(block_id))
    treats_count = length(levels(treatment))
    b ~ filldist(Normal(0, 0.5), treats_count)
    z ~ filldist(Normal(), actors_count)
    x ~ filldist(Normal(), blocks_count)
    a = @.  + σ_a*z
    g = σ_g*x
    
    p = @. logistic(a[actor] + g[block_id] + b[treatment])
    @. pulled_left ~ Binomial(1, p)
end

m13_4nc_ch = sample(m13_4nc(d.pulled_left, d.actor, d.block, d.treatment), NUTS(), 4000);

Code 13.30

In [32]:
t = ess_rhat(m13_4_ch)
ess_4 = t[:,:ess]
t = ess_rhat(m13_4nc_ch)
ess_4nc = t[:,:ess]

lims = extrema(vcat(ess_4, ess_4nc)) .+ (-100, 100)
plot(xlim=lims, ylims=lims, xlab="n_eff (centered)", ylab="n_eff (non-centered)", size=(500,500))
scatter!(ess_4, ess_4nc)
plot!(identity, c=:gray, s=:dash)
Out[32]:

13.5 Multilevel posterior predictions

Code 13.31

In [33]:
chimp = 2
d_pred = DataFrame(
    actor = fill(chimp, 4),
    treatment = 1:4,
    block = fill(1, 4)
)

l_fun = (r, dr) -> begin
    a = get(r, "a[$(dr.actor)]", 0)
    g = get(r, "g[$(dr.block)]", 0)
    b = get(r, "b[$(dr.treatment)]", 0)
    logistic(a + g + b)
end

p = link(m13_4_df, l_fun, eachrow(d_pred))
p = hcat(p...)
p_μ = mean.(eachcol(p))
p_ci = PI.(eachcol(p));

Code 13.32

In [34]:
post = sample(resetrange(m13_4_ch), 2000)
post = DataFrame(post)
describe(post)
Out[34]:

20 rows × 7 columns

variablemeanminmedianmaxnmissingeltype
SymbolFloat64Float64Float64Float64Int64DataType
1a[1]-0.362603-1.74662-0.3439841.168490Float64
2a[2]4.66971.994114.4770611.94580Float64
3a[3]-0.658423-1.95394-0.6724650.7594450Float64
4a[4]-0.669022-1.94008-0.6675990.7023580Float64
5a[5]-0.36085-1.61727-0.3607180.8086890Float64
6a[6]0.582945-0.5687260.5840921.774740Float64
7a[7]2.124670.733582.123243.413720Float64
8b[1]-0.125846-1.19189-0.1231590.8388210Float64
9b[2]0.406341-0.5949210.4093541.604540Float64
10b[3]-0.483899-1.71984-0.4871850.4359650Float64
11b[4]0.283934-0.8389330.2730361.734030Float64
12g[1]-0.166633-1.37376-0.1170140.4517820Float64
13g[2]0.0394322-0.802550.02059890.770870Float64
14g[3]0.047988-0.717060.0336770.8236520Float64
15g[4]0.00900076-1.075570.007038651.282190Float64
16g[5]-0.0321617-0.828938-0.01758070.7291830Float64
17g[6]0.108842-0.5191470.0820751.174740Float64
18ā0.58869-2.631280.6079642.625950Float64
19σ_a2.015880.7928441.912055.937710Float64
20σ_g0.215040.02856910.1780792.27550Float64

Code 13.33

In [35]:
density(post."a[5]")
Out[35]:

Code 13.34

In [36]:
p_link = (treatment, actor, block_id) -> begin
    logodds = 
        getproperty(post, "a[$actor]") + 
        getproperty(post, "b[$block_id]") + 
        getproperty(post, "g[$treatment]")
    logistic.(logodds)
end
Out[36]:
#15 (generic function with 1 method)

Code 13.35

In [37]:
p_raw = p_link.(1:4, 2, 1)
p_raw = hcat(p_raw...)
p_μ = mean.(eachcol(p_raw))
p_ci = PI.(eachcol(p_raw));

Code 13.36

In [38]:
p_link_abar = treatment -> begin
    logodds = 
        post. + getproperty(post, "b[$treatment]")
    logistic.(logodds)
end
Out[38]:
#17 (generic function with 1 method)

Code 13.37

In [39]:
p_raw = p_link_abar.(1:4)
p_raw = hcat(p_raw...)
p_μ = mean.(eachcol(p_raw))
p_ci = PI.(eachcol(p_raw))
p_ci = vcat(p_ci'...)

plot(xlab="treatment", ylab="proportion pulled left", title="average actor", ylim=(0, 1))
plot!(["R/N", "L/N", "R/P", "L/P"], [p_μ p_μ], fillrange=p_ci, fillalpha=0.2, c=:black, lw=1.5)
Out[39]:

Code 13.38

In [40]:
Random.seed!(1)
a_sim = rand.(Normal.(post., post.σ_a))

p_link_asim = treatment -> begin
    logodds = 
        a_sim + getproperty(post, "b[$treatment]")
    logistic.(logodds)
end

p_raw_asim = p_link_asim.(1:4)
p_raw_asim = hcat(p_raw_asim...)
p_μ = mean.(eachcol(p_raw_asim))
p_ci = PI.(eachcol(p_raw_asim))
p_ci = vcat(p_ci'...)

plot(xlab="treatment", ylab="proportion pulled left", title="marginal of actor", ylim=(0, 1))
plot!(["R/N", "L/N", "R/P", "L/P"], [p_μ p_μ], fillrange=p_ci, fillalpha=0.2, c=:black, lw=1.5)
Out[40]:

Code 13.39

In [41]:
p = plot(xlab="treatment", ylab="proportion pulled left", title="simulated actors", ylim=(0, 1))

for r in first(eachrow(p_raw_asim), 100)
    plot!(["R/N", "L/N", "R/P", "L/P"], r, c=:black, alpha=0.2)
end
p
Out[41]:
In [ ]: