using Turing
using DataFrames
using CSV
using Random
using Distributions
using StatisticalRethinking
using StatisticalRethinking: link
using StatisticalRethinkingPlots
using ParetoSmooth
using StatsPlots
using Plots.PlotMeasures
using StatsBase
using FreqTables
using Logging
default(label=false);
Logging.disable_logging(Logging.Warn);
Code 13.1
d = DataFrame(CSV.File("data/reedfrogs.csv"))
describe(d)
Code 13.2
d.tank = 1:nrow(d)
@model function m13_1(S, N, tank)
tank_size = length(levels(tank))
a ~ filldist(Normal(0, 1.5), tank_size)
p = logistic.(a)
@. S ~ Binomial(N, p)
end
Random.seed!(1)
m13_1_ch = sample(m13_1(d.surv, d.density, d.tank), NUTS(200, 0.65, init_ϵ=0.5), 1000)
m13_1_df = DataFrame(m13_1_ch);
Code 13.3
@model function m13_2(S, N, tank)
tank_size = length(levels(tank))
σ ~ Exponential()
ā ~ Normal(0, 1.5)
a ~ filldist(Normal(ā, σ), tank_size)
p = logistic.(a)
@. S ~ Binomial(N, p)
end
Random.seed!(1)
m13_2_ch = sample(m13_2(d.surv, d.density, d.tank), NUTS(200, 0.65, init_ϵ=0.2), 1000)
m13_2_df = DataFrame(m13_2_ch);
Code 13.4
link_fun = (r, dr) -> begin
a = get(r, "a[$(dr.tank)]", 0)
p = logistic(a)
binomlogpdf(dr.density, p, dr.surv)
end
m1_ll = link(m13_1_df, link_fun, eachrow(d))
m1_ll = hcat(m1_ll...);
m2_ll = link(m13_2_df, link_fun, eachrow(d))
m2_ll = hcat(m2_ll...);
compare([m1_ll, m2_ll], :waic, mnames=["m13.1", "m13.2"])
Code 13.5
Random.seed!()
post = sample(resetrange(m13_2_ch), 10000)
post_df = DataFrame(post)
propsurv_est = [
logistic(mean(post_df[:,"a[$i]"]))
for i ∈ 1:nrow(d)
]
scatter(propsurv_est, mc=:white, xlab="tank", ylab="proportion survival", ylim=(-0.05, 1.05))
scatter!(d.propsurv, mc=:blue, ms=3)
hline!([mean(logistic.(post_df.ā))], ls=:dash, c=:black)
vline!([16.5, 32.5], c=:black)
annotate!([
(8, 0, ("small tanks", 10)),
(16+8, 0, ("medium tanks", 10)),
(32+8, 0, ("large tanks", 10))
])
Code 13.6
p1 = plot(xlim=(-3, 4), xlab="log-odds survive", ylab="Density")
for r ∈ first(eachrow(post_df), 100)
plot!(Normal(r.ā, r.σ), c=:black, alpha=0.2)
end
sim_tanks = @. rand(Normal(post_df.ā[1:8000], post_df.σ[1:8000]));
p2 = plot(xlab="probability survive", ylab="Density", xlim=(-0.1, 1.1))
density!(logistic.(sim_tanks), lw=2)
plot(p1, p2, size=(800, 400), margin=2mm)
Code 13.7
ā = 1.5
σ = 1.5
nponds = 60
Ni = repeat([5, 10, 25, 35], inner=15);
Code 13.8
Random.seed!(5005)
a_pond = rand(Normal(ā, σ), nponds);
Code 13.9
dsim = DataFrame(pond=1:nponds, Ni=Ni, true_a=a_pond);
Code 13.10
Doesn't make much sense in Julia, but anyways
typeof(1:3), typeof([1,2,3])
Code 13.11
Random.seed!(1)
dsim.Si = @. rand(Binomial(dsim.Ni, logistic(dsim.true_a)));
Code 13.12
dsim.p_nopool = dsim.Si ./ dsim.Ni;
Code 13.13
@model function m13_3(Si, Ni, pond)
σ ~ Exponential()
ā ~ Normal(0, 1.5)
a_pond ~ filldist(Normal(ā, σ), nponds)
p = logistic.(a_pond)
@. Si ~ Binomial(Ni, p)
end
Random.seed!(1)
m13_3_ch = sample(m13_3(dsim.Si, dsim.Ni, dsim.pond), NUTS(), 1000)
m13_3_df = DataFrame(m13_3_ch);
Code 13.14
precis(m13_3_df)
Code 13.15
dsim.p_partpool = [
mean(logistic.(m13_3_df[:,"a_pond[$i]"]))
for i ∈ 1:nponds
];
Code 13.16
dsim.p_true = logistic.(dsim.true_a);
Code 13.17
nopool_error = @. abs(dsim.p_nopool - dsim.p_true)
partpool_error = @. abs(dsim.p_partpool - dsim.p_true);
Code 13.18
scatter(nopool_error, xlab="pond", ylab="absolute error")
scatter!(partpool_error, mc=:white)
Code 13.19
dsim.nopool_error = nopool_error
dsim.partpool_error = partpool_error
gb = groupby(dsim, :Ni)
nopool_avg = combine(gb, :nopool_error => mean)
partpool_avg = combine(gb, :partpool_error => mean);
nopool_avg, partpool_avg
Code 13.20
ā = 1.5
σ = 1.5
nponds = 60
Ni = repeat([5, 10, 25, 35], inner=15)
a_pond = rand(Normal(ā, σ), nponds)
dsim = DataFrame(pond=1:nponds, Ni=Ni, true_a=a_pond)
dsim.Si = @. rand(Binomial(dsim.Ni, logistic(dsim.true_a)))
dsim.p_nopool = dsim.Si ./ dsim.Ni
m13_3_ch = sample(m13_3(dsim.Si, dsim.Ni, dsim.pond), NUTS(), 1000)
m13_3_df = DataFrame(m13_3_ch)
dsim.p_partpool = [
mean(logistic.(m13_3_df[:,"a_pond[$i]"]))
for i ∈ 1:nponds
]
dsim.p_true = logistic.(dsim.true_a)
nopool_error = @. abs(dsim.p_nopool - dsim.p_true)
partpool_error = @. abs(dsim.p_partpool - dsim.p_true)
scatter(nopool_error, xlab="pond", ylab="absolute error")
scatter!(partpool_error, mc=:white)
Code 13.21
d = DataFrame(CSV.File("data/chimpanzees.csv", delim=';'));
d.treatment = 1 .+ d.prosoc_left .+ 2*d.condition;
Random.seed!(13)
@model function m13_4(pulled_left, actor, block_id, treatment)
σ_a ~ Exponential()
σ_g ~ Exponential()
ā ~ Normal(0, 1.5)
actors_count = length(levels(actor))
blocks_count = length(levels(block_id))
treats_count = length(levels(treatment))
a ~ filldist(Normal(ā, σ_a), actors_count)
g ~ filldist(Normal(0, σ_g), blocks_count)
b ~ filldist(Normal(0, 0.5), treats_count)
p = @. logistic(a[actor] + g[block_id] + b[treatment])
@. pulled_left ~ Binomial(1, p)
end
m13_4_ch = sample(m13_4(d.pulled_left, d.actor, d.block, d.treatment), NUTS(), 4000)
m13_4_df = DataFrame(m13_4_ch);
Code 13.22
precis(m13_4_df)
coeftab_plot(m13_4_df, size=(800,600))
Code 13.23
Random.seed!(14)
@model function m13_5(pulled_left, actor, treatment)
σ_a ~ Exponential()
ā ~ Normal(0, 1.5)
actors_count = length(levels(actor))
treats_count = length(levels(treatment))
a ~ filldist(Normal(ā, σ_a), actors_count)
b ~ filldist(Normal(0, 0.5), treats_count)
p = @. logistic(a[actor] + b[treatment])
@. pulled_left ~ Binomial(1, p)
end
m13_5_ch = sample(m13_5(d.pulled_left, d.actor, d.treatment), NUTS(), 4000)
m13_5_df = DataFrame(m13_5_ch);
Code 13.24
l_fun = (r, dr) -> begin
a = get(r, "a[$(dr.actor)]", 0)
g = get(r, "g[$(dr.block)]", 0)
b = get(r, "b[$(dr.treatment)]", 0)
p = logistic(a + g + b)
binomlogpdf(1, p, dr.pulled_left)
end
m13_4_ll = link(m13_4_df, l_fun, eachrow(d))
m13_4_ll = hcat(m13_4_ll...)
l_fun = (r, dr) -> begin
a = get(r, "a[$(dr.actor)]", 0)
b = get(r, "b[$(dr.treatment)]", 0)
p = logistic(a + b)
binomlogpdf(1, p, dr.pulled_left)
end
m13_5_ll = link(m13_5_df, l_fun, eachrow(d))
m13_5_ll = hcat(m13_5_ll...);
compare([m13_4_ll, m13_5_ll], :waic, mnames=["m4", "m5"])
Code 13.25
Random.seed!(15)
@model function m13_6(pulled_left, actor, block_id, treatment)
σ_a ~ Exponential()
σ_g ~ Exponential()
σ_b ~ Exponential()
ā ~ Normal(0, 1.5)
actors_count = length(levels(actor))
blocks_count = length(levels(block_id))
treats_count = length(levels(treatment))
a ~ filldist(Normal(ā, σ_a), actors_count)
g ~ filldist(Normal(0, σ_g), blocks_count)
b ~ filldist(Normal(0, σ_b), treats_count)
p = @. logistic(a[actor] + g[block_id] + b[treatment])
@. pulled_left ~ Binomial(1, p)
end
m13_6_ch = sample(m13_6(d.pulled_left, d.actor, d.block, d.treatment), NUTS(), 4000)
m13_6_df = DataFrame(m13_6_ch);
precis(m13_4_df[:,r"b"])
precis(m13_6_df[:,r"b"])
Code 13.26
@model function m13_7(N)
v ~ Normal(0, 3)
x ~ Normal(0, exp(v))
end
Random.seed!(5)
m13_7_ch = sample(m13_7(1), NUTS(), 1000)
Code 13.27
@model function m13_7nc(N)
v ~ Normal(0, 3)
z ~ Normal()
x = z * exp(v)
end
Random.seed!(5)
m13_7nc_ch = sample(m13_7nc(1), NUTS(), 1000)
Code 13.28
There is no way to get amount of divergent samples, but they could be estimated by comparing ess
values from the chain
Random.seed!(13)
m13_4b_ch = sample(m13_4(d.pulled_left, d.actor, d.block, d.treatment), NUTS(0.95, init_ϵ=0.1), 4000)
t = ess_rhat(m13_4_ch)
ess_4 = t[:,:ess]
t = ess_rhat(m13_4b_ch)
ess_4b = t[:,:ess]
plot(ess_4, lw=2, label="ESS m13_4")
plot!(ess_4b, lw=2, label="ESS m13_4b")
Code 13.29
Random.seed!(13)
@model function m13_4nc(pulled_left, actor, block_id, treatment)
σ_a ~ Exponential()
σ_g ~ Exponential()
ā ~ Normal(0, 1.5)
actors_count = length(levels(actor))
blocks_count = length(levels(block_id))
treats_count = length(levels(treatment))
b ~ filldist(Normal(0, 0.5), treats_count)
z ~ filldist(Normal(), actors_count)
x ~ filldist(Normal(), blocks_count)
a = @. ā + σ_a*z
g = σ_g*x
p = @. logistic(a[actor] + g[block_id] + b[treatment])
@. pulled_left ~ Binomial(1, p)
end
m13_4nc_ch = sample(m13_4nc(d.pulled_left, d.actor, d.block, d.treatment), NUTS(), 4000);
Code 13.30
t = ess_rhat(m13_4_ch)
ess_4 = t[:,:ess]
t = ess_rhat(m13_4nc_ch)
ess_4nc = t[:,:ess]
lims = extrema(vcat(ess_4, ess_4nc)) .+ (-100, 100)
plot(xlim=lims, ylims=lims, xlab="n_eff (centered)", ylab="n_eff (non-centered)", size=(500,500))
scatter!(ess_4, ess_4nc)
plot!(identity, c=:gray, s=:dash)
Code 13.31
chimp = 2
d_pred = DataFrame(
actor = fill(chimp, 4),
treatment = 1:4,
block = fill(1, 4)
)
l_fun = (r, dr) -> begin
a = get(r, "a[$(dr.actor)]", 0)
g = get(r, "g[$(dr.block)]", 0)
b = get(r, "b[$(dr.treatment)]", 0)
logistic(a + g + b)
end
p = link(m13_4_df, l_fun, eachrow(d_pred))
p = hcat(p...)
p_μ = mean.(eachcol(p))
p_ci = PI.(eachcol(p));
Code 13.32
post = sample(resetrange(m13_4_ch), 2000)
post = DataFrame(post)
describe(post)
Code 13.33
density(post."a[5]")
Code 13.34
p_link = (treatment, actor, block_id) -> begin
logodds =
getproperty(post, "a[$actor]") +
getproperty(post, "b[$block_id]") +
getproperty(post, "g[$treatment]")
logistic.(logodds)
end
Code 13.35
p_raw = p_link.(1:4, 2, 1)
p_raw = hcat(p_raw...)
p_μ = mean.(eachcol(p_raw))
p_ci = PI.(eachcol(p_raw));
Code 13.36
p_link_abar = treatment -> begin
logodds =
post.ā + getproperty(post, "b[$treatment]")
logistic.(logodds)
end
Code 13.37
p_raw = p_link_abar.(1:4)
p_raw = hcat(p_raw...)
p_μ = mean.(eachcol(p_raw))
p_ci = PI.(eachcol(p_raw))
p_ci = vcat(p_ci'...)
plot(xlab="treatment", ylab="proportion pulled left", title="average actor", ylim=(0, 1))
plot!(["R/N", "L/N", "R/P", "L/P"], [p_μ p_μ], fillrange=p_ci, fillalpha=0.2, c=:black, lw=1.5)
Code 13.38
Random.seed!(1)
a_sim = rand.(Normal.(post.ā, post.σ_a))
p_link_asim = treatment -> begin
logodds =
a_sim + getproperty(post, "b[$treatment]")
logistic.(logodds)
end
p_raw_asim = p_link_asim.(1:4)
p_raw_asim = hcat(p_raw_asim...)
p_μ = mean.(eachcol(p_raw_asim))
p_ci = PI.(eachcol(p_raw_asim))
p_ci = vcat(p_ci'...)
plot(xlab="treatment", ylab="proportion pulled left", title="marginal of actor", ylim=(0, 1))
plot!(["R/N", "L/N", "R/P", "L/P"], [p_μ p_μ], fillrange=p_ci, fillalpha=0.2, c=:black, lw=1.5)
Code 13.39
p = plot(xlab="treatment", ylab="proportion pulled left", title="simulated actors", ylim=(0, 1))
for r in first(eachrow(p_raw_asim), 100)
plot!(["R/N", "L/N", "R/P", "L/P"], r, c=:black, alpha=0.2)
end
p