In [1]:
using Turing
using DataFrames
using CSV
using Random
using Distributions
using StatisticalRethinking
using StatisticalRethinking: link
using StatisticalRethinkingPlots
using StatsPlots
using StatsBase
using Logging
using LinearAlgebra

default(label=false);
Logging.disable_logging(Logging.Warn);

16.1 Geometric people

Code 16.1

In [2]:
d = DataFrame(CSV.File("data/Howell1.csv"))
d.w = d.weight ./ mean(d.weight)
d.h = d.height ./ mean(d.height);

Code 16.2

In [3]:
@model function m16_1(w, h)
    σ ~ Exponential()
    k ~ Exponential(0.5)
    p ~ Beta(2, 18)
    μ = @. log(π * k * p^2 * h^3)
    @. w ~ LogNormal(μ, σ)
end

Random.seed!(1)
m16_1_ch = sample(m16_1(d.w, d.h), NUTS(), 1000)
m16_1_df = DataFrame(m16_1_ch);

Code 16.3

In [4]:
h_seq = range(0, maximum(d.h), length=30)

function rx_dist(r, x)
    μ = log(π * r.k * r.p^2 * x^3)
    LogNormal(μ, r.σ)
end

w_sim = simulate(m16_1_df, rx_dist, h_seq);
w_sim = hcat(w_sim...)'
w_μ = mean.(eachcol(w_sim))
w_CI = PI.(eachcol(w_sim))
w_CI = hcat(w_CI...)'

scatter(d.h, d.w, xlab="height (scaled)", ylab="weight (scaled)", 
    xlim=(0, maximum(d.h)+.1), ylim=(0, maximum(d.w)+.1))
plot!(h_seq, [w_μ w_μ], fillrange=w_CI, c=:black, fillalpha=0.2)
Out[4]:

16.2 Hidden minds and observed behaviour

Code 16.4

In [5]:
d = DataFrame(CSV.File("data/Boxes.csv"))
precis(d)
┌────────────────┬───────────────────────────────────────────────┐
│          param    mean     std  5.5%  50%  94.5%    histogram │
├────────────────┼───────────────────────────────────────────────┤
│              y │ 2.1208   0.728   1.0  2.0    3.0  ▄▁▁▁▁█▁▁▁▁▆ │
│         gender │ 1.5056  0.5004   1.0  2.0    2.0  █▁▁▁▁▁▁▁▁▁█ │
│            age │ 8.0302  2.4979   5.0  8.0   13.0  ▂█▅▇▆▆▅▃▃▃▁ │
│ majority_first │ 0.4849  0.5002   0.0  0.0    1.0  █▁▁▁▁▁▁▁▁▁█ │
│        culture │  3.752  1.9603   1.0  3.0    8.0     ▄▂█▃▃▃▁▂ │
└────────────────┴───────────────────────────────────────────────┘

Code 16.5

In [6]:
Dict(
    k => v / nrow(d)
    for (k, v) in countmap(d.y)
)
Out[6]:
Dict{Int64, Float64} with 3 entries:
  2 => 0.45628
  3 => 0.332273
  1 => 0.211447

Code 16.6

In [7]:
Random.seed!(3)
# number of children
N = 30 
# half are random
y₁ = rand(1:3, N >> 1)
# half follow majority
y₂ = fill(2, N >> 1)
# combine and shuffle y1 and y2
y = shuffle(vcat(y₁, y₂))

mean(y .== 2)
Out[7]:
0.6333333333333333

Code 16.7

In [8]:
@model function m16_2(N, y, majority_first)
    p ~ Dirichlet(5, 4)
    
    phi = [
        y .== 2,  # majority
        y .== 3,  # minority
        y .== 1,  # maverick
        fill(1/3, N),  # random
        [ 
            mf ? a : b 
            for (mf, a, b)  zip(majority_first .> 0, y .== 2, y .== 3)
        ]
    ]
    lphi = [
        log(p_v) .+ log.(phi_v)
        for (p_v, phi_v) in zip(p, phi)
    ]
    lphi = hcat(lphi...)
    Turing.@addlogprob! sum(map(logsumexp, eachrow(lphi)))
end
Out[8]:
m16_2 (generic function with 2 methods)

Code 16.8

In [9]:
Random.seed!(1)
m16_2_ch = sample(m16_2(nrow(d), d.y, d.majority_first), NUTS(200, 0.65, init_ϵ=0.5), 1000)
m16_2_df = DataFrame(m16_2_ch);

coeftab_plot(m16_2_df, pars_names=("1 Majority","2 Minority","3 Maverick","4 Random",
    "5 Follow First"))
Out[9]:

16.3 Ordinary differential nut cracking

Code 16.9

In [10]:
d = DataFrame(CSV.File("data/Panda_nuts.csv"));
describe(d)
Out[10]:

7 rows × 7 columns

variablemeanminmedianmaxnmissingeltype
SymbolUnion…AnyUnion…AnyInt64DataType
1chimpanzee11.261919.0220Int64
2age8.738138.0160Int64
3sexfm0String1
4hammerGwood0String7
5nuts_opened12.071405.0770Int64
6seconds25.81552.520.0135.00Float64
7helpNy0String1

Code 16.10

In [11]:
N = 10_000
ϕ = rand(LogNormal(log(1), 0.1), N)
k = rand(LogNormal(log(2), 0.25), N)
θ = rand(LogNormal(log(5), 0.25), N)

max_age = maximum(d.age)

p1 = plot(xlim=(0, 1.5*max_age), ylim=(0, 1), xlab="age", ylab="body mass")
p2 = plot(xlim=(0, 1.5*max_age), ylim=(0, 1.2), xlab="age", ylab="nuts per second")

for i  1:20
    plot!(p1, x -> 1 - exp(-k[i]*x/max_age), c=:gray)
    plot!(p2, x -> ϕ[i] * (1-exp(-k[i]*x/max_age))^θ[i], c=:gray)
end


plot(p1, p2)
Out[11]:

Code 16.11

In [12]:
dat_list = (
    n = d.nuts_opened,
    age = d.age ./ max_age,
    seconds = d.seconds,
)

@model function m16_4(n, age, seconds)
    ϕ ~ LogNormal(log(1), 0.1)
    k ~ LogNormal(log(2), 0.25)
    θ ~ LogNormal(log(5), 0.25)
    λ = @. seconds * ϕ * (1-exp(-k*age))^θ
    @. n ~ Poisson(λ)
end

Random.seed!(3)
m16_4_ch = sample(m16_4(dat_list...), NUTS(100, 0.65, init_ϵ=0.025), 10000)
m16_4_df = DataFrame(m16_4_ch);

Code 16.12

In [13]:
p = plot(xlim=(0, max_age), ylim=(-0.1, 1.5), xlab="age", ylab="nuts per second")

pts = dat_list.n ./ dat_list.seconds
pts_size = standardize(ZScoreTransform, dat_list.seconds) * 3
scatter!(d.age, pts, ms=pts_size)

for r  first(eachrow(m16_4_df), 30)
    plot!(x -> r.ϕ*(1-exp(-r.k*x/max_age))^r.θ, c=:gray)
end
p
Out[13]:

16.4 Population dynamics

Code 16.13

In [14]:
d = DataFrame(CSV.File("data/Lynx_Hare.csv"))

plot(xlab="year", ylab="thousands of pelts")
plot!(d.Year, d.Hare, lw=1.5, c=:black, lab="Lepus")
scatter!(d.Year, d.Hare, c=:black)
plot!(d.Year, d.Lynx, lw=1.5, c=:blue, lab="Lynx")
scatter!(d.Year, d.Lynx, c=:blue)
Out[14]:

Code 16.14

In [15]:
function sim_lynx_hare(n_steps, init, θ, dt=.002)
    L = Vector{Float64}()
    H = Vector{Float64}()
    l = init[1]
    h = init[2]
    for i  1:n_steps
        push!(L, l)
        push!(H, h)
        hh = h + dt * h * (θ[1] - θ[2] * l)
        ll = l + dt * l * (θ[3]*h - θ[4])
        l = ll
        h = hh
    end
    (L, H)
end
Out[15]:
sim_lynx_hare (generic function with 2 methods)

Code 16.15

In [16]:
θ = [0.5, 0.05, 0.025, 0.5]
z = sim_lynx_hare(10_000, [d.Lynx[1], d.Hare[1]], θ);

plot(z[1], lw=2, xlab="time", ylab="number (thousands)", c=:blue)
plot!(z[2], lw=2, c=:black)
Out[16]:

Code 16.16

In [17]:
N = 10_000
Ht = 10_000
p = rand(Beta(2, 18), N)
h = [rand(Binomial(Ht, pv)) for pv in p]

density(h ./ 1000, xlab="thousand of pelts", lw=2)
Out[17]:

Code 16.17

In [18]:
using DifferentialEquations
In [19]:
function lynx_hare!(dLH, LH, θ, t)
    L, H = LH
    bh, mh, bl, ml = θ

    dLH[1] = dL = (bl * H - ml) * L
    dLH[2] = dH = (bh - mh * L) * H
end


@model function lynx_hare_model(N, L, H)
    b_h ~ TruncatedNormal(1, 0.5, 0.0, Inf)
    b_l ~ TruncatedNormal(0.05, 0.05, 0.0, Inf)
    m_h ~ TruncatedNormal(0.05, 0.05, 0.0, Inf)
    m_l ~ TruncatedNormal(1, 0.5, 0.0, Inf)
    
    σ_h ~ Exponential()
    σ_l ~ Exponential()
    h₁ ~ LogNormal(log(10), 1)
    l₁ ~ LogNormal(log(10), 1)
    p_h ~ Beta(40, 200)
    p_l ~ Beta(40, 200)
    
    LH₁ = [l₁, h₁]
    θ = [b_h, m_h, b_l, m_l]
    prob = ODEProblem(lynx_hare!, LH₁, N-1, θ)
    sol = solve(prob, saveat=1)
    if length(sol.u) < N
        Turing.@addlogprob! -Inf
    else
        μ_l = map(first, sol.u[1:N]) .* p_l
        μ_h = map(last, sol.u[1:N]) .* p_h
        # sometime solution might end up with negative population :)
        if all(μ_l .> 0.0) && all(μ_h .> 0.0)
            logμ_l = log.(μ_l)
            logμ_h = log.(μ_h)
            @. L ~ LogNormal(logμ_l, σ_l)
            @. H ~ LogNormal(logμ_h, σ_h)
        else
            Turing.@addlogprob! -Inf
        end
    end
end

Code 16.18

In [20]:
Random.seed!(1)
m16_5_ch = sample(lynx_hare_model(nrow(d), d.Lynx, d.Hare), NUTS(), 4000)
m16_5_df = DataFrame(m16_5_ch);

Code 16.19

In [23]:
# from row of parameters, get population of Hare and Lynx
function row_to_population(r, N)
    LH₁ = [r.l₁, r.h₁]
    θ = [r.b_h, r.m_h, r.b_l, r.m_l]
    prob = ODEProblem(lynx_hare!, LH₁, N-1, θ)
    sol = solve(prob, saveat=1)
    L = map(first, sol.u[1:N])
    H = map(last, sol.u[1:N])
    L, H
end

p = plot(xlab="year", ylab="thousands of pelts")
plot!(d.Year, d.Hare, lw=1.5, c=:black, lab="Lepus")
scatter!(d.Year, d.Hare, c=:black)
plot!(d.Year, d.Lynx, lw=1.5, c=:blue, lab="Lynx")
scatter!(d.Year, d.Lynx, c=:blue)

for r in first(eachrow(m16_5_df), 21)
    L, H = row_to_population(r, nrow(d))
    L .*= r.p_l
    H .*= r.p_h
    
    plot!(d.Year, L, c=:blue, alpha=0.2)
    plot!(d.Year, H, c=:black, alpha=0.2)
end
p
Out[23]:

Code 16.20

In [24]:
p = plot(xlab="year", ylab="thousands animals")

for r in first(eachrow(m16_5_df), 21)
    L, H = row_to_population(r, nrow(d))
    plot!(d.Year, L, c=:blue, alpha=0.2)
    plot!(d.Year, H, c=:black, alpha=0.2)
end
p
Out[24]:
In [ ]: