In [1]:
using Turing
using DataFrames
using CSV
using Random
using Distributions
using StatisticalRethinking
using StatisticalRethinking: link
using StatisticalRethinkingPlots
using StatsPlots
using StatsBase
using Logging
using LinearAlgebra

default(label=false);
Logging.disable_logging(Logging.Warn);

Code 15.1

In [2]:
Random.seed!(2)

function sim_pancake()
    pancake = [[1, 1], [1, 0], [0, 0]]
    sides = sample(pancake)
    sample([sides, reverse(sides)])
end

pancakes = vcat([sim_pancake() for _ in 1:10_000]'...)
up = pancakes[:,1]
down = pancakes[:,2]

num_11_10 = sum(up .== 1)
num_11 = sum((up .== 1) .& (down .== 1))
num_11 / num_11_10
Out[2]:
0.6619385342789598

15.1 Measurement error

Code 15.2

In [3]:
d = DataFrame(CSV.File("data/WaffleDivorce.csv"))

scatter(d.MedianAgeMarriage, d.Divorce, xlab="Median age marriage", ylab="Divorse rate")
scatter!(d.MedianAgeMarriage, d.Divorce, yerror=d."Divorce SE", ms=0)
Out[3]:

Code 15.3

In [4]:
dlist = (
    D_obs = standardize(ZScoreTransform, d.Divorce),
    D_sd = d."Divorce SE" ./ std(d.Divorce),
    M = standardize(ZScoreTransform, d.Marriage),
    A = standardize(ZScoreTransform, d.MedianAgeMarriage),
    N = nrow(d),
)

@model function m15_1(D_obs, D_sd, M, A, N)
    a ~ Normal(0, 0.2)
    bA ~ Normal(0, 0.5)
    bM ~ Normal(0, 0.5)
    μ = @. a + bA * A + bM * M
    σ ~ Exponential()
    D_true ~ MvNormal(μ, σ)
    @. D_obs ~ Normal(D_true, D_sd)
end

Random.seed!(1)
m15_1_ch = sample(m15_1(dlist...), NUTS(), 1000)
m15_1_df = DataFrame(m15_1_ch);

Code 15.4

In [5]:
precis(m15_1_df)
┌────────────┬────────────────────────────────────────────────────────────┐
│      param     mean     std     5.5%      50%    94.5%       histogram │
├────────────┼────────────────────────────────────────────────────────────┤
│ D_true[10] │  -0.621   0.173  -0.8887  -0.6122  -0.3597    ▁▁▂▄▆▇█▆▃▁▁▁ │
│ D_true[11] │  0.7624  0.2994    0.305   0.7637   1.2305     ▁▁▁▃▆██▅▂▁▁ │
│ D_true[12] │ -0.5211  0.5113  -1.3393  -0.5269   0.2834         ▁▄██▃▁▁ │
│ D_true[13] │  0.1502  0.4773  -0.6217   0.1682   0.9289         ▁▁▂▆█▄▁ │
│ D_true[14] │ -0.8638  0.2135  -1.1956  -0.8584  -0.5373        ▁▁▂▅█▇▂▁ │
│ D_true[15] │  0.5593   0.293   0.1014   0.5529   1.0363     ▁▁▁▃▅██▄▂▁▁ │
│ D_true[16] │  0.2798  0.3782  -0.3492   0.2904   0.8596         ▁▁▄█▅▁▁ │
│ D_true[17] │  0.4957  0.4189  -0.1862   0.5133   1.1655          ▁▃██▂▁ │
│ D_true[18] │   1.238  0.3584   0.6892   1.2332   1.8434     ▁▂▃▆██▇▃▂▁▁ │
│ D_true[19] │  0.4156  0.3715  -0.1677   0.4065   0.9925  ▁▁▂▃▆██▆▄▂▁▁▁▁ │
│  D_true[1] │  1.1516  0.3774   0.5353   1.1559   1.7522   ▁▁▂▄▇██▆▄▁▁▁▁ │
│ D_true[20] │   0.403  0.5436  -0.4521   0.4054   1.2635        ▁▁▅█▇▃▁▁ │
│ D_true[21] │  -0.546  0.3205  -1.0308  -0.5538  -0.0157    ▁▁▁▂▆██▆▄▂▁▁ │
│ D_true[22] │ -1.0957  0.2813  -1.5296  -1.1059  -0.6473     ▁▁▁▄▇██▄▁▁▁ │
│ D_true[23] │ -0.2638  0.2736  -0.6867   -0.263   0.1595      ▁▁▃▇██▄▂▁▁ │
│ D_true[24] │ -0.9966  0.2894  -1.4577  -0.9901  -0.5235      ▁▁▂▅██▅▂▁▁ │
│ D_true[25] │  0.4019  0.4203  -0.2522   0.3908   1.0977          ▁▃█▆▂▁ │
│ D_true[26] │ -0.0291  0.3057  -0.5213  -0.0287   0.4641    ▁▁▂▃▇██▆▂▁▁▁ │
│ D_true[27] │ -0.0441  0.5093  -0.8817  -0.0215   0.7562         ▁▁▄██▃▁ │
│ D_true[28] │ -0.1701  0.3882   -0.764  -0.1717   0.4562   ▁▁▂▄▇██▇▄▃▁▁▁ │
│ D_true[29] │ -0.2743  0.4969  -1.0534  -0.2953   0.5025       ▁▁▂▆█▆▂▁▁ │
│  D_true[2] │  0.6714  0.5659  -0.2582   0.6898   1.5599       ▁▁▃▇█▆▂▁▁ │
│ D_true[30] │ -1.7987  0.2262  -2.1686   -1.787  -1.4542       ▁▁▁▄██▅▁▁ │
│ D_true[31] │  0.1662  0.4056  -0.4738   0.1453   0.8209   ▁▁▁▃▅▇█▇▅▄▂▁▁ │
│ D_true[32] │ -1.6648  0.1624  -1.9333  -1.6691  -1.3921     ▁▁▃▄██▇▃▂▁▁ │
│ D_true[33] │  0.1228  0.2384  -0.2683   0.1283   0.4928       ▁▁▂▅█▇▃▁▁ │
│ D_true[34] │ -0.0676  0.5222  -0.9399  -0.0571   0.7506        ▁▁▂▄██▃▁ │
│ D_true[35] │ -0.1328   0.233  -0.5078  -0.1322   0.2459        ▁▁▃▇█▅▂▁ │
│ D_true[36] │  1.2639  0.4344   0.5982   1.2352   2.0097         ▁▁▅█▅▂▁ │
│ D_true[37] │  0.2305  0.3605  -0.3347   0.2314   0.7965   ▁▁▂▃▆██▇▄▂▁▁▁ │
│ D_true[38] │ -1.0325  0.2209  -1.3859   -1.036  -0.6787         ▁▂▅█▇▃▁ │
│ D_true[39] │ -0.9114  0.5421  -1.7033  -0.9179  -0.0434      ▁▁▃▇█▄▁▁▁▁ │
│  D_true[3] │  0.4359  0.3457  -0.0985   0.4308   0.9936    ▁▁▁▃▆██▇▄▂▁▁ │
│ D_true[40] │ -0.6777  0.3512  -1.2349  -0.6804  -0.1297     ▁▁▂▅▆██▅▃▁▁ │
│ D_true[41] │  0.2565  0.5575  -0.6044   0.2442   1.1973       ▁▁▂▆█▅▂▁▁ │
│ D_true[42] │  0.7264  0.3459   0.1648   0.7287   1.2918         ▁▁▄█▃▁▁ │
│ D_true[43] │  0.1913  0.1847  -0.1012   0.1882   0.4839   ▁▁▁▂▄▇██▆▄▂▁▁ │
│ D_true[44] │  0.7896  0.4368   0.0922   0.7949   1.4719         ▁▁▅█▆▁▁ │
│ D_true[45] │ -0.4221  0.5272  -1.2463  -0.4081   0.4098      ▁▁▁▃▇█▄▁▁▁ │
│ D_true[46] │ -0.3769  0.2479  -0.7904  -0.3703   0.0146        ▁▂▄██▅▂▁ │
│ D_true[47] │  0.1457  0.3117  -0.3459   0.1531   0.6335     ▁▁▁▄▇██▅▂▁▁ │
│ D_true[48] │   0.557  0.4512  -0.1538   0.5678   1.2713          ▁▂▇█▃▁ │
│ D_true[49] │ -0.6367   0.287  -1.0779  -0.6339  -0.1883      ▁▁▄▆██▅▂▁▁ │
│  D_true[4] │  1.4067  0.4591   0.6659   1.4032   2.0949         ▁▁▄██▂▁ │
│ D_true[50] │  0.8623  0.5903   -0.126    0.894   1.7718         ▁▃▅██▄▁ │
│  D_true[5] │ -0.9001  0.1265  -1.0994  -0.8983  -0.6918      ▁▁▂▅██▅▂▁▁ │
│  D_true[6] │  0.6447  0.4016   0.0158   0.6324   1.2895         ▁▁▆█▄▁▁ │
│  D_true[7] │  -1.368  0.3699  -1.9534  -1.3572  -0.7894   ▁▁▂▃▅▇█▆▄▂▁▁▁ │
│  D_true[8] │ -0.3436  0.4608  -1.0237   -0.346   0.3602         ▁▂▇█▅▁▁ │
│  D_true[9] │ -1.8451   0.607  -2.7822  -1.8427  -0.8434       ▁▁▃▇█▅▂▁▁ │
│          a │ -0.0577   0.097  -0.2045  -0.0583   0.0982   ▁▁▁▂▅▆██▅▄▂▁▁ │
│         bA │ -0.6051  0.1629  -0.8687  -0.6087  -0.3347    ▁▁▁▃▆██▅▃▂▁▁ │
│         bM │  0.0608  0.1681     -0.2   0.0616   0.3301     ▁▁▂▅▇██▅▂▁▁ │
│          σ │  0.5855  0.1069   0.4239   0.5773   0.7609        ▁▄█▇▃▁▁▁ │
└────────────┴────────────────────────────────────────────────────────────┘

Code 15.5

In [6]:
dlist = (
    D_obs = standardize(ZScoreTransform, d.Divorce),
    D_sd = d."Divorce SE" ./ std(d.Divorce),
    M_obs = standardize(ZScoreTransform, d.Marriage),
    M_sd = d."Marriage SE" ./ std(d.Marriage),
    A = standardize(ZScoreTransform, d.MedianAgeMarriage),
    N = nrow(d),
)

@model function m15_2(D_obs, D_sd, M_obs, M_sd, A, N)
    a ~ Normal(0, 0.2)
    bA ~ Normal(0, 0.5)
    bM ~ Normal(0, 0.5)
    M_true ~ filldist(Normal(), N)
    
    μ = @. a + bA * A + bM * M_true
    σ ~ Exponential()
    D_true ~ MvNormal(μ, σ)
    @. D_obs ~ Normal(D_true, D_sd)
    @. M_obs ~ Normal(M_true, M_sd)
end

Random.seed!(1)
m15_2_ch = sample(m15_2(dlist...), NUTS(), 1000)
m15_2_df = DataFrame(m15_2_ch);

Code 15.6

In [7]:
D_true = [mean(m15_2_df[!, "D_true[$i]"]) for i  1:dlist.N]
M_true = [mean(m15_2_df[!, "M_true[$i]"]) for i  1:dlist.N]

p = scatter(dlist.M_obs, dlist.D_obs, mc=:black, xlab="marriage rate (std)", ylab="divorse rate (std)")
scatter!(M_true, D_true, mc=:white)

for i  1:dlist.N
    plot!([dlist.M_obs[i], M_true[i]], [dlist.D_obs[i], D_true[i]], c=:black)
end
p
Out[7]:

Code 15.7

In [9]:
N = 500
A = rand(Normal(), N)
M = rand.(Normal.(-A))
D = rand.(Normal.(A))
A_obs = rand.(Normal.(A));

15.2 Missing data

Code 15.8

In [10]:
N = 100
S = rand(Normal(), N)
H = rand.([BinomialLogit(10, l) for l in S]);

Code 15.9

In [11]:
D = rand(Bernoulli(), N)
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;

Code 15.10

In [12]:
D = S .> 0
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;

Code 15.11

In [13]:
Random.seed!(501)
N = 1000
X = rand(Normal(), N)
S = rand(Normal(), N)
H = rand.([BinomialLogit(10, l) for l in 2 .+ S .- 2X])
D = X .> 1
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;

Code 15.12

In [14]:
@model function m15_3(H, S)
    a ~ Normal()
    bS ~ Normal(0, 0.5)
    p = @. logistic(a + bS*S)
    @. H ~ Binomial(10, p)
end

Random.seed!(1)
m15_3_ch = sample(m15_3(H, S), NUTS(100, 0.65, init_ϵ=0.25), 1000)
m15_3_df = DataFrame(m15_3_ch)
precis(m15_3_df)
┌───────┬────────────────────────────────────────────────────┐
│ param    mean     std    5.5%     50%   94.5%   histogram │
├───────┼────────────────────────────────────────────────────┤
│     a │ 1.2133   0.024  1.1743  1.2129  1.2533  ▁▁▁▂▅█▇▃▁▁ │
│    bS │ 0.6832  0.0258  0.6431  0.6831   0.726  ▁▂▄██▆▂▁▁▁ │
└───────┴────────────────────────────────────────────────────┘

Code 15.13

Model is the same, no need to redefine it

In [15]:
Random.seed!(1)
m15_4_ch = sample(m15_3(H[D .== 0], S[D .== 0]), NUTS(100, 0.65, init_ϵ=0.25), 1000)
m15_4_df = DataFrame(m15_4_ch)
precis(m15_4_df)
┌───────┬───────────────────────────────────────────────────────┐
│ param    mean     std    5.5%     50%   94.5%      histogram │
├───────┼───────────────────────────────────────────────────────┤
│     a │ 1.9059  0.0372  1.8478  1.9035  1.9669  ▁▁▁▁▃▅█▇▆▄▃▁▁ │
│    bS │ 0.8202  0.0353  0.7648  0.8194  0.8777   ▁▁▂▃▆██▅▄▂▁▁ │
└───────┴───────────────────────────────────────────────────────┘

Code 15.14

In [16]:
D = abs.(X) .< 1;

Code 15.15

In [17]:
N = 100
S = rand(Normal(), N)
H = rand.([BinomialLogit(10, l) for l in S])
D = H .< 5
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;

Code 15.16

In [18]:
d = DataFrame(CSV.File("data/milk.csv",  missingstring="NA"))

# get rid of dots in column names
rename!(n -> replace(n, "." => "_"), d)

d.neocortex_prop = d.neocortex_perc ./ 100
d.logmass = log.(d.mass)

t = Vector{Union{Missing, Float64}}(missing, nrow(d))
present_mask = completecases(d, :neocortex_prop)
t[present_mask] .= standardize(ZScoreTransform, Vector{Float64}(d.neocortex_prop[present_mask]))

dat_list = (
    K = standardize(ZScoreTransform, d.kcal_per_g),
    B = t,
    M = standardize(ZScoreTransform, d.logmass),
);

Code 15.17

In [19]:
@model function m15_5(K, B, M)
    σ ~ Exponential()
    σ_B ~ Exponential()
    a ~ Normal(0, 0.5)
    ν ~ Normal(0, 0.5)
    bB ~ Normal(0, 0.5)
    bM ~ Normal(0, 0.5)
    
    N_missing = sum(ismissing.(B))
    B_impute ~ filldist(Normal(), N_missing)

    i_missing = 1
    for i in eachindex(B)
        if ismissing(B[i])
            B_impute[i_missing] ~ Normal(ν, σ_B)
            b = B_impute[i_missing]
            i_missing += 1
        else
            B[i] ~ Normal(ν, σ_B)
            b = B[i]
        end
        μ = a + bB * b + bM * M[i]
        K[i] ~ Normal(μ, σ)
    end
end

Random.seed!(1)
m15_5_ch = sample(m15_5(dat_list...), NUTS(), 1000);
m15_5_df = DataFrame(m15_5_ch);

Code 15.18

In [20]:
precis(m15_5_df)
┌──────────────┬────────────────────────────────────────────────────────────┐
│        param     mean     std     5.5%      50%    94.5%       histogram │
├──────────────┼────────────────────────────────────────────────────────────┤
│ B_impute[10] │ -0.4493   0.937  -1.8564   -0.498   0.9944       ▁▁▁▅█▅▁▁▁ │
│ B_impute[11] │ -0.2744  0.8988  -1.7309   -0.257   1.0971  ▁▁▁▂▅▇█▇▅▂▁▁▁▁ │
│ B_impute[12] │  0.1195  0.9545  -1.3751   0.1335   1.5797        ▁▁▃██▄▁▁ │
│  B_impute[1] │ -0.5521  0.8934     -2.0  -0.5796    0.886  ▁▁▂▃▆██▅▃▂▁▁▁▁ │
│  B_impute[2] │ -0.7539  0.8957  -2.1775  -0.7862     0.68   ▁▁▂▄▆█▆▄▂▁▁▁▁ │
│  B_impute[3] │ -0.7344  0.9837  -2.3531  -0.7162   0.8267        ▁▁▂▇█▄▁▁ │
│  B_impute[4] │ -0.3395  0.8973  -1.7759  -0.3676   1.0974   ▁▁▁▃▆██▆▅▂▁▁▁ │
│  B_impute[5] │  0.4948  0.8933  -0.9695   0.5018   1.8817   ▁▁▂▃▅██▆▃▁▁▁▁ │
│  B_impute[6] │ -0.1546  0.8967  -1.5207  -0.2084   1.2607        ▁▃█▇▂▁▁▁ │
│  B_impute[7] │  0.1963  0.8784  -1.2708    0.201   1.5409        ▁▂▆█▄▁▁▁ │
│  B_impute[8] │  0.3036  0.8823  -1.0939   0.2987   1.6281        ▁▁▆█▄▁▁▁ │
│  B_impute[9] │  0.5297  0.8957  -0.8859   0.5158    1.925       ▁▁▁▄█▅▁▁▁ │
│            a │  0.0292  0.1562  -0.2086   0.0255   0.2772    ▁▁▁▂▅██▇▄▁▁▁ │
│           bB │  0.4975  0.2287   0.1381   0.5011   0.8518       ▁▁▃▆█▆▂▁▁ │
│           bM │ -0.5457  0.2064  -0.8789   -0.544  -0.2248       ▁▁▃▇█▅▁▁▁ │
│            ν │ -0.0465  0.2129  -0.3867  -0.0327   0.2739        ▁▁▁▅██▃▁ │
│            σ │  0.8417  0.1454   0.6408   0.8254   1.1024      ▁▁▄██▅▃▂▁▁ │
│          σ_B │  1.0173  0.1715   0.7843    0.998   1.3414      ▁▃▇█▇▅▃▂▁▁ │
└──────────────┴────────────────────────────────────────────────────────────┘

Code 15.19

In [21]:
dat_list_obs = (
    K = dat_list.K[present_mask],
    B = Vector{Float64}(dat_list.B[present_mask]),
    M = dat_list.M[present_mask]
)

@model function m15_6(K, B, M)
    σ ~ Exponential()
    σ_B ~ Exponential()
    a ~ Normal(0, 0.5)
    ν ~ Normal(0, 0.5)
    bB ~ Normal(0, 0.5)
    bM ~ Normal(0, 0.5)
    
    @. B ~ Normal(ν, σ_B)
    μ = @. a + bB * B + bM * M
    @. K ~ Normal(μ, σ)
end

Random.seed!(1)
m15_6_ch = sample(m15_6(dat_list_obs...), NUTS(), 1000)
m15_6_df = DataFrame(m15_6_ch);
precis(m15_6_df)
┌───────┬───────────────────────────────────────────────────────────┐
│ param     mean     std     5.5%      50%   94.5%       histogram │
├───────┼───────────────────────────────────────────────────────────┤
│     a │  0.1072  0.1882  -0.2002   0.1037  0.4056        ▁▁▂▅█▆▂▁ │
│    bB │  0.5884  0.2734   0.1279   0.6015  1.0006      ▁▁▂▅██▅▂▁▁ │
│    bM │ -0.6323  0.2394  -1.0099  -0.6366  -0.253        ▁▂▅█▇▃▁▁ │
│     ν │ -0.0037  0.2481  -0.3922  -0.0089  0.3907       ▁▂▅██▅▂▁▁ │
│     σ │  0.8706  0.1706   0.6441   0.8461  1.1826   ▁▅██▆▄▂▁▁▁▁▁▁ │
│   σ_B │  1.0475  0.1945   0.7895   1.0167  1.4083  ▁▃▆██▆▄▂▂▁▁▁▁▁ │
└───────┴───────────────────────────────────────────────────────────┘

Code 15.20

In [23]:
coeftab_plot(m15_5_df, m15_6_df; pars=(:bB, :bM), names=("m15.5", "m15.6"))
Out[23]:

Code 15.21

In [28]:
N_missing = sum(ismissing.(dat_list.B))

B_impute_μ = [
    mean(m15_5_df[!,"B_impute[$i]"])
    for i  1:N_missing
]

B_impute_pi = [
    PI(m15_5_df[!,"B_impute[$i]"])
    for i  1:N_missing
]

p1 = scatter(dat_list.B, dat_list.K, xlab="neocortex percent (std)", ylab="kcal milk (std)")
miss_mask = ismissing.(dat_list.B)
Ki = dat_list.K[miss_mask]
scatter!(B_impute_μ, Ki, mc=:white)

err = (
    B_impute_μ .- first.(B_impute_pi), 
    last.(B_impute_pi) .- B_impute_μ
)

scatter!(B_impute_μ, Ki, xerr=err, ms=0)

p2 = scatter(dat_list.M, dat_list.B, ylab="neocortex percent (std)", xlab="log body mass (std)")
Mi = dat_list.M[miss_mask]
scatter!(Mi, B_impute_μ, mc=:white)
scatter!(Mi, B_impute_μ, yerr=err, ms=0)

plot(p1, p2, size=(800, 400))
Out[28]:

Code 15.22

In [29]:
@model function m15_7(K, MB, Mi)
    σ ~ Exponential()
    σ_BM ~ Exponential()
    a ~ Normal(0, 0.5)
    μB ~ Normal(0, 0.5)
    μM ~ Normal(0, 0.5)
    bB ~ Normal(0, 0.5)
    bM ~ Normal(0, 0.5)
    Rho_BM ~ LKJ(2, 2)
    
    Σ = (σ_BM .* σ_BM') .* Rho_BM
    
    # process complete cases
    for i  eachindex(MB)
        MB[i] ~ MvNormal([μM, μB], Σ)
    end
    
    # impute and process incomplete cases
    N_missing = length(Mi)
    B_impute ~ filldist(Normal(), N_missing)
    MBi = [
        [m, b]
        for (m, b)  zip(Mi, B_impute)
    ]

    for i  eachindex(MBi)
        MBi[i] ~ MvNormal([μM, μB], Σ)
    end

    # from both sets, build mean vector for K
    μ = [
        a + bB * b + bM * m
        for (m, b)  Iterators.flatten((MB, MBi))
    ]
    
    @. K ~ Normal(μ, σ)
end

Random.seed!(1)

# to improve stability and performance, need to separate full samples and samples need to be imputed
pres_mask =  @. !ismissing(dat_list.B)
miss_mask = ismissing.(dat_list.B)
MB = [
    [m, b]
    for (m, b)  zip(dat_list.M[pres_mask], Vector{Float64}(dat_list.B[pres_mask]))
]
Mi = dat_list.M[miss_mask]

# very important to reorder K values to match order of samples
KK = vcat(dat_list.K[pres_mask], dat_list.K[miss_mask])

m15_7_ch = sample(m15_7(KK, MB, Mi), NUTS(), 1000)
m15_7_df = DataFrame(m15_7_ch);
precis(m15_7_df[!,r"b|Rho.+1,2|Rho.+2,1"])
┌─────────────┬───────────────────────────────────────────────────────┐
│       param     mean     std     5.5%      50%    94.5%  histogram │
├─────────────┼───────────────────────────────────────────────────────┤
│ Rho_BM[1,2] │  0.6517  0.1281   0.4297   0.6627   0.8248  ▁▁▁▃▅██▃▁ │
│ Rho_BM[2,1] │  0.6517  0.1281   0.4297   0.6627   0.8248  ▁▁▁▃▅██▃▁ │
│          bB │   0.568  0.2604   0.1183   0.5808   0.9805  ▁▁▂▅██▄▂▁ │
│          bM │ -0.6192  0.2399  -0.9805  -0.6336  -0.2181   ▁▂▅█▇▄▁▁ │
└─────────────┴───────────────────────────────────────────────────────┘

Code 15.23

In [30]:
miss_mask = ismissing.(dat_list.B);

Code 15.24

In [32]:
d = DataFrame(CSV.File("data/Moralizing_gods.csv", missingstring="NA"))
describe(d)
Out[32]:

5 rows × 7 columns (omitted printing of 1 columns)

variablemeanminmedianmaxnmissing
SymbolUnion…AnyUnion…AnyInt64
1polityBig Island HawaiiYemeni Coastal Plain0
2year-1339.35-9600-600.019000
3population4.862461.408324.769148.530460
4moralizing_gods0.94940501.01528
5writing0.45949100.010

Code 15.25

In [33]:
countmap(d.moralizing_gods)
Out[33]:
Dict{Union{Missing, Int64}, Int64} with 3 entries:
  0       => 17
  missing => 528
  1       => 319

Code 15.26

In [34]:
symbol = map(g -> ismissing(g) ? :x : (g == 0 ? :circle : :+), d.moralizing_gods)
color = map(g -> ismissing(g) ? :black : :blue, d.moralizing_gods)
scatter(d.year, d.population, m=symbol, mc=color, xlab="Time (year)", ylab="Population size")
Out[34]:

Code 15.27

In [35]:
countmap(zip(d.moralizing_gods, d.writing))
Out[35]:
Dict{Tuple{Union{Missing, Int64}, Int64}, Int64} with 6 entries:
  (0, 0)       => 16
  (1, 1)       => 310
  (missing, 1) => 86
  (missing, 0) => 442
  (0, 1)       => 1
  (1, 0)       => 9

Code 15.28

In [36]:
d[d.polity .== "Big Island Hawaii", ["year", "writing", "moralizing_gods"]]
Out[36]:

9 rows × 3 columns

yearwritingmoralizing_gods
Int64Int64Int64?
110000missing
211000missing
312000missing
413000missing
514000missing
615000missing
716000missing
817000missing
9180001

15.3 Categorical errors and discrete absences

Code 15.29

In [37]:
Random.seed!(9)

N_houses = 100
α = 5
β = -3
k = 0.5
r = 0.2

cat = rand(Bernoulli(k), N_houses)
notes = rand.([Poisson(α + β * c) for c  cat])
R_C = rand(Bernoulli(r), N_houses)

cat_obs = Vector{Int}(cat)
cat_obs[R_C] .= -9

dat = (
    notes = notes,
    cat = cat_obs,
    RC = R_C,
    N = N_houses,
);

Code 15.30

In [38]:
@model function m15_8(notes, cat, RC, N)
    a ~ Normal()
    b ~ Normal(0, 0.5)
    k ~ Beta(2, 2)
    λ = @. logistic(a + b * cat)
    
    for i  eachindex(cat)
        if !RC[i]
            cat[i] ~ Bernoulli(k)
            notes[i] ~ Poisson(λ[i])
        else
            Turing.@addlogprob! log(k) + poislogpdf(exp(a+b), notes[i])
            Turing.@addlogprob! log(1-k) + poislogpdf(exp(a), notes[i])
        end
    end
end

m15_8_ch = sample(m15_8(dat...), NUTS(), 10000)
m15_8_df = DataFrame(m15_8_ch)
precis(m15_8_df)
┌───────┬──────────────────────────────────────────────────────────┐
│ param     mean     std     5.5%      50%    94.5%     histogram │
├───────┼──────────────────────────────────────────────────────────┤
│     a │    1.55  0.0861    1.413   1.5511   1.6872       ▁▁▅█▅▁▁ │
│     b │ -0.2694  0.1358  -0.4872  -0.2687  -0.0541  ▁▁▁▂▄▇█▆▃▁▁▁ │
│     k │  0.4639  0.0448   0.3926   0.4637   0.5344       ▁▂▇█▄▁▁ │
└───────┴──────────────────────────────────────────────────────────┘
In [ ]: