using Turing
using DataFrames
using CSV
using Random
using Distributions
using StatisticalRethinking
using StatisticalRethinking: link
using StatisticalRethinkingPlots
using StatsPlots
using StatsBase
using Logging
using LinearAlgebra
default(label=false);
Logging.disable_logging(Logging.Warn);
Code 15.1
Random.seed!(2)
function sim_pancake()
pancake = [[1, 1], [1, 0], [0, 0]]
sides = sample(pancake)
sample([sides, reverse(sides)])
end
pancakes = vcat([sim_pancake() for _ in 1:10_000]'...)
up = pancakes[:,1]
down = pancakes[:,2]
num_11_10 = sum(up .== 1)
num_11 = sum((up .== 1) .& (down .== 1))
num_11 / num_11_10
Code 15.2
d = DataFrame(CSV.File("data/WaffleDivorce.csv"))
scatter(d.MedianAgeMarriage, d.Divorce, xlab="Median age marriage", ylab="Divorse rate")
scatter!(d.MedianAgeMarriage, d.Divorce, yerror=d."Divorce SE", ms=0)
Code 15.3
dlist = (
D_obs = standardize(ZScoreTransform, d.Divorce),
D_sd = d."Divorce SE" ./ std(d.Divorce),
M = standardize(ZScoreTransform, d.Marriage),
A = standardize(ZScoreTransform, d.MedianAgeMarriage),
N = nrow(d),
)
@model function m15_1(D_obs, D_sd, M, A, N)
a ~ Normal(0, 0.2)
bA ~ Normal(0, 0.5)
bM ~ Normal(0, 0.5)
μ = @. a + bA * A + bM * M
σ ~ Exponential()
D_true ~ MvNormal(μ, σ)
@. D_obs ~ Normal(D_true, D_sd)
end
Random.seed!(1)
m15_1_ch = sample(m15_1(dlist...), NUTS(), 1000)
m15_1_df = DataFrame(m15_1_ch);
Code 15.4
precis(m15_1_df)
Code 15.5
dlist = (
D_obs = standardize(ZScoreTransform, d.Divorce),
D_sd = d."Divorce SE" ./ std(d.Divorce),
M_obs = standardize(ZScoreTransform, d.Marriage),
M_sd = d."Marriage SE" ./ std(d.Marriage),
A = standardize(ZScoreTransform, d.MedianAgeMarriage),
N = nrow(d),
)
@model function m15_2(D_obs, D_sd, M_obs, M_sd, A, N)
a ~ Normal(0, 0.2)
bA ~ Normal(0, 0.5)
bM ~ Normal(0, 0.5)
M_true ~ filldist(Normal(), N)
μ = @. a + bA * A + bM * M_true
σ ~ Exponential()
D_true ~ MvNormal(μ, σ)
@. D_obs ~ Normal(D_true, D_sd)
@. M_obs ~ Normal(M_true, M_sd)
end
Random.seed!(1)
m15_2_ch = sample(m15_2(dlist...), NUTS(), 1000)
m15_2_df = DataFrame(m15_2_ch);
Code 15.6
D_true = [mean(m15_2_df[!, "D_true[$i]"]) for i ∈ 1:dlist.N]
M_true = [mean(m15_2_df[!, "M_true[$i]"]) for i ∈ 1:dlist.N]
p = scatter(dlist.M_obs, dlist.D_obs, mc=:black, xlab="marriage rate (std)", ylab="divorse rate (std)")
scatter!(M_true, D_true, mc=:white)
for i ∈ 1:dlist.N
plot!([dlist.M_obs[i], M_true[i]], [dlist.D_obs[i], D_true[i]], c=:black)
end
p
Code 15.7
N = 500
A = rand(Normal(), N)
M = rand.(Normal.(-A))
D = rand.(Normal.(A))
A_obs = rand.(Normal.(A));
Code 15.8
N = 100
S = rand(Normal(), N)
H = rand.([BinomialLogit(10, l) for l in S]);
Code 15.9
D = rand(Bernoulli(), N)
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;
Code 15.10
D = S .> 0
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;
Code 15.11
Random.seed!(501)
N = 1000
X = rand(Normal(), N)
S = rand(Normal(), N)
H = rand.([BinomialLogit(10, l) for l in 2 .+ S .- 2X])
D = X .> 1
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;
Code 15.12
@model function m15_3(H, S)
a ~ Normal()
bS ~ Normal(0, 0.5)
p = @. logistic(a + bS*S)
@. H ~ Binomial(10, p)
end
Random.seed!(1)
m15_3_ch = sample(m15_3(H, S), NUTS(100, 0.65, init_ϵ=0.25), 1000)
m15_3_df = DataFrame(m15_3_ch)
precis(m15_3_df)
Code 15.13
Model is the same, no need to redefine it
Random.seed!(1)
m15_4_ch = sample(m15_3(H[D .== 0], S[D .== 0]), NUTS(100, 0.65, init_ϵ=0.25), 1000)
m15_4_df = DataFrame(m15_4_ch)
precis(m15_4_df)
Code 15.14
D = abs.(X) .< 1;
Code 15.15
N = 100
S = rand(Normal(), N)
H = rand.([BinomialLogit(10, l) for l in S])
D = H .< 5
Hm = Vector{Union{Missing,Int}}(H)
Hm[D .== 1] .= missing;
Code 15.16
d = DataFrame(CSV.File("data/milk.csv", missingstring="NA"))
# get rid of dots in column names
rename!(n -> replace(n, "." => "_"), d)
d.neocortex_prop = d.neocortex_perc ./ 100
d.logmass = log.(d.mass)
t = Vector{Union{Missing, Float64}}(missing, nrow(d))
present_mask = completecases(d, :neocortex_prop)
t[present_mask] .= standardize(ZScoreTransform, Vector{Float64}(d.neocortex_prop[present_mask]))
dat_list = (
K = standardize(ZScoreTransform, d.kcal_per_g),
B = t,
M = standardize(ZScoreTransform, d.logmass),
);
Code 15.17
@model function m15_5(K, B, M)
σ ~ Exponential()
σ_B ~ Exponential()
a ~ Normal(0, 0.5)
ν ~ Normal(0, 0.5)
bB ~ Normal(0, 0.5)
bM ~ Normal(0, 0.5)
N_missing = sum(ismissing.(B))
B_impute ~ filldist(Normal(), N_missing)
i_missing = 1
for i in eachindex(B)
if ismissing(B[i])
B_impute[i_missing] ~ Normal(ν, σ_B)
b = B_impute[i_missing]
i_missing += 1
else
B[i] ~ Normal(ν, σ_B)
b = B[i]
end
μ = a + bB * b + bM * M[i]
K[i] ~ Normal(μ, σ)
end
end
Random.seed!(1)
m15_5_ch = sample(m15_5(dat_list...), NUTS(), 1000);
m15_5_df = DataFrame(m15_5_ch);
Code 15.18
precis(m15_5_df)
Code 15.19
dat_list_obs = (
K = dat_list.K[present_mask],
B = Vector{Float64}(dat_list.B[present_mask]),
M = dat_list.M[present_mask]
)
@model function m15_6(K, B, M)
σ ~ Exponential()
σ_B ~ Exponential()
a ~ Normal(0, 0.5)
ν ~ Normal(0, 0.5)
bB ~ Normal(0, 0.5)
bM ~ Normal(0, 0.5)
@. B ~ Normal(ν, σ_B)
μ = @. a + bB * B + bM * M
@. K ~ Normal(μ, σ)
end
Random.seed!(1)
m15_6_ch = sample(m15_6(dat_list_obs...), NUTS(), 1000)
m15_6_df = DataFrame(m15_6_ch);
precis(m15_6_df)
Code 15.20
coeftab_plot(m15_5_df, m15_6_df; pars=(:bB, :bM), names=("m15.5", "m15.6"))
Code 15.21
N_missing = sum(ismissing.(dat_list.B))
B_impute_μ = [
mean(m15_5_df[!,"B_impute[$i]"])
for i ∈ 1:N_missing
]
B_impute_pi = [
PI(m15_5_df[!,"B_impute[$i]"])
for i ∈ 1:N_missing
]
p1 = scatter(dat_list.B, dat_list.K, xlab="neocortex percent (std)", ylab="kcal milk (std)")
miss_mask = ismissing.(dat_list.B)
Ki = dat_list.K[miss_mask]
scatter!(B_impute_μ, Ki, mc=:white)
err = (
B_impute_μ .- first.(B_impute_pi),
last.(B_impute_pi) .- B_impute_μ
)
scatter!(B_impute_μ, Ki, xerr=err, ms=0)
p2 = scatter(dat_list.M, dat_list.B, ylab="neocortex percent (std)", xlab="log body mass (std)")
Mi = dat_list.M[miss_mask]
scatter!(Mi, B_impute_μ, mc=:white)
scatter!(Mi, B_impute_μ, yerr=err, ms=0)
plot(p1, p2, size=(800, 400))
Code 15.22
@model function m15_7(K, MB, Mi)
σ ~ Exponential()
σ_BM ~ Exponential()
a ~ Normal(0, 0.5)
μB ~ Normal(0, 0.5)
μM ~ Normal(0, 0.5)
bB ~ Normal(0, 0.5)
bM ~ Normal(0, 0.5)
Rho_BM ~ LKJ(2, 2)
Σ = (σ_BM .* σ_BM') .* Rho_BM
# process complete cases
for i ∈ eachindex(MB)
MB[i] ~ MvNormal([μM, μB], Σ)
end
# impute and process incomplete cases
N_missing = length(Mi)
B_impute ~ filldist(Normal(), N_missing)
MBi = [
[m, b]
for (m, b) ∈ zip(Mi, B_impute)
]
for i ∈ eachindex(MBi)
MBi[i] ~ MvNormal([μM, μB], Σ)
end
# from both sets, build mean vector for K
μ = [
a + bB * b + bM * m
for (m, b) ∈ Iterators.flatten((MB, MBi))
]
@. K ~ Normal(μ, σ)
end
Random.seed!(1)
# to improve stability and performance, need to separate full samples and samples need to be imputed
pres_mask = @. !ismissing(dat_list.B)
miss_mask = ismissing.(dat_list.B)
MB = [
[m, b]
for (m, b) ∈ zip(dat_list.M[pres_mask], Vector{Float64}(dat_list.B[pres_mask]))
]
Mi = dat_list.M[miss_mask]
# very important to reorder K values to match order of samples
KK = vcat(dat_list.K[pres_mask], dat_list.K[miss_mask])
m15_7_ch = sample(m15_7(KK, MB, Mi), NUTS(), 1000)
m15_7_df = DataFrame(m15_7_ch);
precis(m15_7_df[!,r"b|Rho.+1,2|Rho.+2,1"])
Code 15.23
miss_mask = ismissing.(dat_list.B);
Code 15.24
d = DataFrame(CSV.File("data/Moralizing_gods.csv", missingstring="NA"))
describe(d)
Code 15.25
countmap(d.moralizing_gods)
Code 15.26
symbol = map(g -> ismissing(g) ? :x : (g == 0 ? :circle : :+), d.moralizing_gods)
color = map(g -> ismissing(g) ? :black : :blue, d.moralizing_gods)
scatter(d.year, d.population, m=symbol, mc=color, xlab="Time (year)", ylab="Population size")
Code 15.27
countmap(zip(d.moralizing_gods, d.writing))
Code 15.28
d[d.polity .== "Big Island Hawaii", ["year", "writing", "moralizing_gods"]]
Code 15.29
Random.seed!(9)
N_houses = 100
α = 5
β = -3
k = 0.5
r = 0.2
cat = rand(Bernoulli(k), N_houses)
notes = rand.([Poisson(α + β * c) for c ∈ cat])
R_C = rand(Bernoulli(r), N_houses)
cat_obs = Vector{Int}(cat)
cat_obs[R_C] .= -9
dat = (
notes = notes,
cat = cat_obs,
RC = R_C,
N = N_houses,
);
Code 15.30
@model function m15_8(notes, cat, RC, N)
a ~ Normal()
b ~ Normal(0, 0.5)
k ~ Beta(2, 2)
λ = @. logistic(a + b * cat)
for i ∈ eachindex(cat)
if !RC[i]
cat[i] ~ Bernoulli(k)
notes[i] ~ Poisson(λ[i])
else
Turing.@addlogprob! log(k) + poislogpdf(exp(a+b), notes[i])
Turing.@addlogprob! log(1-k) + poislogpdf(exp(a), notes[i])
end
end
end
m15_8_ch = sample(m15_8(dat...), NUTS(), 10000)
m15_8_df = DataFrame(m15_8_ch)
precis(m15_8_df)