In [1]:
using Turing
using DataFrames
using CSV
using Random
using Distributions
using StatisticalRethinking
using StatisticalRethinking: link
using StatisticalRethinkingPlots
using ParetoSmooth
using StatsPlots
using StatsBase
using FreqTables
using Logging

default(label=false);
#Logging.disable_logging(Logging.Warn);

12.1 Over-dispersed counts

Code 12.1

In [2]:
# define alias for Beta(α, β), see: https://en.wikipedia.org/wiki/Beta_distribution#Mean_and_sample_size
Beta2(μ, ν) = Beta(μ*ν, (1-μ)*ν)
BetaBinomial2(n, μ, ν) = BetaBinomial(n, μ*ν, (1-μ)*ν)

 = 0.5
θ = 5
plot(Beta2(, θ), xlab="probability", ylab="Density")
Out[2]:

Code 12.2

In [3]:
d = DataFrame(CSV.File("data/UCBadmit.csv", skipto=2, 
        header=[:id, :dept, :gender, :admit, :reject, :applications]))

d.gid = @. ifelse(d.gender == "male", 1, 2)

@model function m12_1(A, N, gid)
    a ~ MvNormal([0, 0], 1.5)
     = @. logistic(a[gid])
    ϕ ~ Exponential(1)
    θ = ϕ + 2
    @. A ~ BetaBinomial2(N, , θ)
end

Random.seed!(1)
m12_1_ch = sample(m12_1(d.admit, d.applications, d.gid), NUTS(), 1000)
m12_1_df = DataFrame(m12_1_ch);
┌ Info: Found initial step size
│   ϵ = 0.8
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
Sampling: 100%|█████████████████████████████████████████| Time: 0:00:00

Code 12.3

In [4]:
m12_1_df.θ = m12_1_df.ϕ .+ 2
m12_1_df.da = m12_1_df."a[1]" .- m12_1_df."a[2]"
precis(m12_1_df)
┌───────┬───────────────────────────────────────────────────────────┐
│ param     mean     std     5.5%      50%   94.5%       histogram │
├───────┼───────────────────────────────────────────────────────────┤
│  a[1] │ -0.4218  0.3935  -1.0357  -0.4374  0.2071  ▁▁▂▄▆█▇▆▄▂▁▁▁▁ │
│  a[2] │  -0.326  0.4014  -0.9482  -0.3262  0.3537          ▁▁▅█▃▁ │
│     ϕ │  1.0505  0.8115   0.1089   0.8704  2.4536     ██▆▄▂▁▁▁▁▁▁ │
│     θ │  3.0505  0.8115   2.1089   2.8704  4.4536     ██▆▄▂▁▁▁▁▁▁ │
│    da │ -0.0958  0.5832   -0.994  -0.0992  0.8353       ▁▁▂▆██▄▁▁ │
└───────┴───────────────────────────────────────────────────────────┘

Code 12.4

In [5]:
gid = 2
 = m12_1_df[:, "a[$gid]"] .|> logistic |> mean
θ = mean(m12_1_df.θ)
plot(Beta2(, θ), lw=2, c=:black, ylab="Density", 
    xlab="probability admit", ylims=(0, 3))

for (a, θ)  first(zip(m12_1_df[:, "a[$gid]"], m12_1_df.θ), 50)
    plot!(Beta2(logistic(a), θ), c=:black, alpha=0.2)
end
gender = gid == 2 ? "female" : "male"
title!("distribution of $gender admission rates")
Out[5]:

Code 12.5

In [6]:
Random.seed!(1)

adm_rate = d.admit ./ d.applications

fun = (r, (N, gid)) -> begin
     = logistic(get(r, "a[$gid]", 0))
    rand(BetaBinomial2(N, , r.θ))
end

pred_adm = link(m12_1_df, fun, zip(d.applications, d.gid))
pred_rates = pred_adm ./ d.applications

μ_adm = mean.(pred_rates)
σ = std.(pred_rates) ./ 2
ci_adm = PI.(pred_rates)

scatter(adm_rate, xlab="case", ylab="A", title="Posterior validation check")
scatter!(μ_adm, mc=:white, yerr=σ)
scatter!(first.(ci_adm), shape=:cross, c=:black)
scatter!(last.(ci_adm), shape=:cross, c=:black)
Out[6]:

Code 12.6

In [7]:
d = DataFrame(CSV.File("data/Kline.csv"))
d.P = standardize(ZScoreTransform, log.(d.population))
d.contact_id = ifelse.(d.contact .== "high", 2, 1)

@model function m12_2(T, P, cid)
    g ~ Exponential()
    ϕ ~ Exponential()
    a ~ MvNormal([1,1], 1)
    b₁ ~ Exponential()
    b₂ ~ Exponential()
    b = [b₁, b₂]
    λ = @. exp(a[cid])*(P^b[cid]) / g
    p = 1/(ϕ+1)
    r = λ/ϕ
    clamp!(r, 0.01, Inf)
    p = clamp(p, 0.01, 1)
    @. T ~ NegativeBinomial(r, p)
end

Random.seed!(1)
m12_2_ch = sample(m12_2(d.total_tools, d.population, d.contact_id), NUTS(), 1000)
m12_2_df = DataFrame(m12_2_ch)
precis(m12_2_df)
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Info: Found initial step size
│   ϵ = 0.0001953125
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
Sampling:  22%|█████████▏                               |  ETA: 0:00:01┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
Sampling: 100%|█████████████████████████████████████████| Time: 0:00:01
┌───────┬────────────────────────────────────────────────────────┐
│ param    mean     std     5.5%     50%   94.5%      histogram │
├───────┼────────────────────────────────────────────────────────┤
│  a[1] │ 0.8888  0.7358  -0.3216  0.8937  1.9908   ▁▁▁▁▃▆█▇▆▂▁▁ │
│  a[2] │ 1.0038  0.9476  -0.5036  1.0289  2.4515  ▁▁▁▂▄▆██▇▄▂▁▁ │
│    b₁ │ 0.2572  0.0485   0.1801  0.2571  0.3335       ▁▃▇█▃▁▁▁ │
│    b₂ │ 0.2749   0.122   0.0721  0.2746  0.4691        ▃▅██▃▁▁ │
│     g │ 1.1112  0.7524   0.2547  0.9492   2.577     ▅█▆▃▂▁▁▁▁▁ │
│     ϕ │ 1.2824  0.9127   0.1805  1.0914  2.9921        █▇▃▁▁▁▁ │
└───────┴────────────────────────────────────────────────────────┘

12.2 Zero-inflated outcomes

Code 12.7

In [8]:
# define parameters
prob_drink = 0.2 # 20% of days
rate_work = 1    # average 1 manuscript per day

# sample one year of production
N = 365

# simulate days monks drink
Random.seed!(365)
drink = rand(Binomial(1, prob_drink), N)

# simulate manuscripts completed
y = (1 .- drink).*rand(Poisson(rate_work), N);

Code 12.8

In [9]:
p = histogram(y, xlab="manuscripts completed", ylab="Frequency")
zeros_drink = sum(drink)
zeros_work = sum(@. (y == 0) & (drink == 0))
bar!([0], [zeros_work], bar_width=0.3)
p
Out[9]:

Code 12.9

In [10]:
# Based on this discussion
# https://github.com/StatisticalRethinkingJulia/SR2TuringPluto.jl/issues/1
import Distributions: logpdf, rand

struct ZIPoisson{T1,T2} <: DiscreteUnivariateDistribution
    λ::T1
    w::T2
end

function logpdf(d::ZIPoisson, y::Int)
    if y == 0
        logsumexp([log(d.w), log(1 - d.w) - d.λ])
    else
        log(1 - d.w) + logpdf(Poisson(d.λ), y)
    end
end

function rand(d::ZIPoisson)
    rand() <= d.w ? 0 : rand(Poisson(d.λ))
end

rand(d::ZIPoisson, N::Int) = map(_->rand(d), 1:N)
Out[10]:
rand (generic function with 227 methods)
In [11]:
@model function m12_3(y)
    ap ~ Normal(-1.5, 1)
    al ~ Normal(1, 0.5)
    λ = exp(al)
    p = logistic(ap)
    y .~ ZIPoisson(λ, p)
end

m12_3_ch = sample(m12_3(y), NUTS(), 1000)
m12_3_df = DataFrame(m12_3_ch)
precis(m12_3_df)
┌ Info: Found initial step size
│   ϵ = 0.2
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
Sampling: 100%|█████████████████████████████████████████| Time: 0:00:00
┌───────┬─────────────────────────────────────────────────────────┐
│ param     mean     std     5.5%      50%    94.5%    histogram │
├───────┼─────────────────────────────────────────────────────────┤
│    al │  -0.038  0.0813  -0.1715  -0.0334   0.0947  ▁▁▁▃▅▇█▇▃▂▁ │
│    ap │ -1.5905  0.4556  -2.4467   -1.504  -1.0205       ▁▁▃▆█▁ │
└───────┴─────────────────────────────────────────────────────────┘

Code 12.10

In [12]:
m12_3_df.ap .|> logistic |> mean,
exp.(m12_3_df.al) |> mean
Out[12]:
(0.17834308374614505, 0.9658855358700561)

12.3 Ordered categorical outcomes

Code 12.12

In [13]:
d = DataFrame(CSV.File("data/Trolley.csv"))
describe(d)
Out[13]:

12 rows × 7 columns

variablemeanminmedianmaxnmissingeltype
SymbolUnion…AnyUnion…AnyInt64DataType
1casecfaqunfrub0String7
2response4.199314.070Int64
3order16.5005116.5320Int64
4id96;43498;2990String7
5age37.48941036.0720Int64
6male0.57401801.010Int64
7eduBachelor's DegreeSome High School0String31
8action0.43333300.010Int64
9intention0.46666700.010Int64
10contact0.200.010Int64
11storyaquswi0String3
12action20.63333301.010Int64

Code 12.13

In [14]:
histogram(d.response, xlab="response")
Out[14]:

Code 12.14

In [15]:
pr_k = counts(d.response) / length(d.response)
cum_pr_k = cumsum(pr_k)
plot(cum_pr_k, m=:o, xlab="response", ylab="cumulative proportion")
Out[15]:

Code 12.15

In [16]:
round.(logit.(cum_pr_k), digits=2)
Out[16]:
7-element Vector{Float64}:
 -1.92
 -1.27
 -0.72
  0.25
  0.89
  1.77
 Inf

Code 12.16

In [17]:
@model function m12_4(R)
    # to ensure sorted cutpoints, use deltas
    Δ_cutpoints ~ filldist(Exponential(), 6)
    cutpoints = -2 .+ cumsum(Δ_cutpoints)
    R .~ OrderedLogistic(0, cutpoints)
end
Out[17]:
m12_4 (generic function with 2 methods)

Code 12.18

In [18]:
Random.seed!(1)
m12_4_ch = sample(m12_4(d.response), NUTS(), 1000)
m12_4_df = DataFrame(m12_4_ch)
precis(m12_4_df)
cutpoints = -2 .+ cumsum(mean.(eachcol(m12_4_df[!,r"Δ.*"])))
┌ Info: Found initial step size
│   ϵ = 0.025
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
Sampling: 100%|█████████████████████████████████████████| Time: 0:00:15
┌────────────────┬───────────────────────────────────────────────────────┐
│          param    mean     std    5.5%     50%   94.5%      histogram │
├────────────────┼───────────────────────────────────────────────────────┤
│ Δ_cutpoints[1] │ 0.0832  0.0288  0.0366  0.0831  0.1298      ▁▂▅█▇▆▄▁▁ │
│ Δ_cutpoints[2] │ 0.6488  0.0211  0.6144  0.6488  0.6823       ▁▂▇█▆▂▁▁ │
│ Δ_cutpoints[3] │ 0.5486  0.0155  0.5224  0.5493  0.5736     ▁▁▃▆██▅▃▁▁ │
│ Δ_cutpoints[4] │  0.967  0.0177  0.9395  0.9668  0.9943  ▁▁▁▂▄███▆▃▁▁▁ │
│ Δ_cutpoints[5] │  0.643  0.0151  0.6189  0.6432  0.6657    ▁▁▂▄▇█▆▃▁▁▁ │
│ Δ_cutpoints[6] │ 0.8795  0.0231  0.8402    0.88  0.9175       ▁▂▄██▄▂▁ │
└────────────────┴───────────────────────────────────────────────────────┘
Out[18]:
6-element Vector{Float64}:
 -1.916848096044849
 -1.2680842467133162
 -0.7195018544090639
  0.2475040415196963
  0.8905126697478538
  1.7700339206951212

Code 12.19

In [19]:
round.(logistic.(cutpoints), digits=3)
Out[19]:
6-element Vector{Float64}:
 0.128
 0.22
 0.328
 0.562
 0.709
 0.854

Code 12.20

In [20]:
pk = pdf.(OrderedLogistic(0, cutpoints), 1:7)
round.(pk, digits=2)
Out[20]:
7-element Vector{Float64}:
 0.13
 0.09
 0.11
 0.23
 0.15
 0.15
 0.15

Code 12.21

In [21]:
sum(pk.*(1:7))
Out[21]:
4.199678568694903

Code 12.22

In [22]:
pk = pdf.(OrderedLogistic(0, cutpoints .- 0.5), 1:7)
round.(pk, digits=2)
Out[22]:
7-element Vector{Float64}:
 0.08
 0.06
 0.08
 0.21
 0.16
 0.18
 0.22

Code 12.23

In [23]:
sum(pk.*(1:7))
Out[23]:
4.729934288492506

Code 12.24

In [24]:
@model function m12_5(R, A, I, C)
    # to ensure sorted cutpoints, use deltas
    Δ_cutpoints ~ filldist(Exponential(), 6)
    cutpoints = -3 .+ cumsum(Δ_cutpoints)
    
    bA ~ Normal(0, 0.5)
    bI ~ Normal(0, 0.5)
    bC ~ Normal(0, 0.5)
    bIA ~ Normal(0, 0.5)
    bIC ~ Normal(0, 0.5)
    BI = @. bI + bIA*A + bIC*C
    phi = @. bA*A + bC*C + BI*I
    
    for i in eachindex(R)
        R[i] ~ OrderedLogistic(phi[i], cutpoints)
    end
end

Random.seed!(2)
m12_5_ch = sample(m12_5(d.response, d.action, d.intention, d.contact), NUTS(), 1000)
m12_5_df = DataFrame(m12_5_ch)
precis(m12_5_df)
#cutpoints = -2 .+ cumsum(mean.(eachcol(m12_5_df[!,r"Δ.*"])))
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
┌ Info: Found initial step size
│   ϵ = 0.0125
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC /home/shmuma/.julia/packages/AdvancedHMC/HQHnm/src/hamiltonian.jl:47
Sampling: 100%|█████████████████████████████████████████| Time: 0:00:29
┌────────────────┬───────────────────────────────────────────────────────────┐
│          param     mean     std     5.5%      50%    94.5%      histogram │
├────────────────┼───────────────────────────────────────────────────────────┤
│             bA │ -0.4746  0.0567  -0.5595  -0.4731  -0.3832      ▁▁▂▆█▆▂▁▁ │
│             bC │ -0.3462   0.071  -0.4591  -0.3459  -0.2319    ▁▁▂▅██▅▃▁▁▁ │
│             bI │ -0.2916   0.061   -0.389  -0.2904  -0.1938       ▁▁▄██▅▂▁ │
│            bIA │ -0.4352  0.0824   -0.563  -0.4357  -0.3061  ▁▁▁▃▆███▄▂▁▁▁ │
│            bIC │ -1.2356  0.0952  -1.3861  -1.2322  -1.0772       ▁▁▁▅█▇▂▁ │
│ Δ_cutpoints[1] │  0.3634  0.0535   0.2762    0.364   0.4444       ▁▁▂▇█▅▁▁ │
│ Δ_cutpoints[2] │  0.6959   0.023   0.6608   0.6958   0.7332       ▁▂▅██▃▁▁ │
│ Δ_cutpoints[3] │  0.5947  0.0167   0.5689   0.5942   0.6209    ▁▁▂▅▇█▇▅▂▁▁ │
│ Δ_cutpoints[4] │  1.0347  0.0203   1.0025   1.0343   1.0664  ▁▁▂▃▆▇█▇▅▃▂▁▁ │
│ Δ_cutpoints[5] │  0.6708  0.0162   0.6443   0.6707    0.696   ▁▁▁▃▅██▆▃▁▁▁ │
│ Δ_cutpoints[6] │  0.9052  0.0223   0.8702   0.9042   0.9425       ▁▁▃██▆▂▁ │
└────────────────┴───────────────────────────────────────────────────────────┘

Code 12.25

In [25]:
coeftab_plot(m12_5_df, pars=[:bIC, :bIA, :bC, :bI, :bA])
Out[25]:

Code 12.26

(not needed, in fact)

In [26]:
#p = plot(xlab="intention", ylab="probability", xlim=(0, 1), ylim=(0, 1))

Code 12.27

In [27]:
kA = 0      # value for action
kC = 0      # value for contact
kI = 0:1    # values for intention to calculate over

rI_to_phi = (r, i) -> begin
    BI = r.bI + r.bIA*kA + r.bIC*kC
    r.bA*kA + r.bC*kC + BI*i
end

phi = link(m12_5_df, rI_to_phi, kI);

Code 12.28

In [28]:
p = plot(xlab="intention", ylab="probability", xlim=(0, 1), ylim=(0, 1), title="action=$kA, contact=$kC")
for ri in 1:50
    r = m12_5_df[ri,:]
    cutpoints = -3 .+ cumsum(r[r"Δ.*"])
    pk1 = cumsum(pdf.(OrderedLogistic(phi[1][ri], cutpoints), 1:6))
    pk2 = cumsum(pdf.(OrderedLogistic(phi[2][ri], cutpoints), 1:6))
    for i  1:6
        plot!(kI, [pk1[i], pk2[i]], c=:gray, alpha=0.2)
    end
end
p
Out[28]:

Code 12.29

In [29]:
kA = 0
kC = 1
kI = 0:1

rI_to_dist = (r, i) -> begin
    BI = r.bI + r.bIA*kA + r.bIC*kC
    phi = r.bA*kA + r.bC*kC + BI*i
    cutpoints = -3 .+ cumsum(r[r"Δ.*"])
    OrderedLogistic(phi, cutpoints)
end

Random.seed!(1)
s = simulate(m12_5_df, rI_to_dist, kI)
histogram(map(first, s), bar_width=0.5, label="I=0")
histogram!(map(last, s), bar_width=0.2, label="I=1")
Out[29]:

12.4 Ordered categorical predictors

Code 12.30

In [30]:
d = DataFrame(CSV.File("data/Trolley.csv"))
levels(d.edu)
Out[30]:
8-element Vector{String31}:
 "Bachelor's Degree"
 "Elementary School"
 "Graduate Degree"
 "High School Graduate"
 "Master's Degree"
 "Middle School"
 "Some College"
 "Some High School"

Code 12.31

In [31]:
edu_l = Dict{String, Int}(
    "Bachelor's Degree" => 6,
    "Elementary School" => 1,
    "Graduate Degree" => 8,
    "High School Graduate" => 4,
    "Master's Degree" => 7,
    "Middle School" => 2,
    "Some College" => 5,
    "Some High School" => 3,
)

d.edu_new = map(s -> edu_l[s], d.edu);

Code 12.32

In [32]:
Random.seed!(1805)
delta = rand(Dirichlet(7, 2), 10)
Out[32]:
7×10 Matrix{Float64}:
 0.198046   0.0815291  0.0676167  …  0.207756   0.123371   0.375937
 0.0870147  0.231992   0.21954       0.16657    0.25109    0.091998
 0.164311   0.0899774  0.195904      0.0776228  0.0819914  0.0634425
 0.204465   0.0635362  0.199463      0.0964028  0.150229   0.162127
 0.0541011  0.119333   0.0121268     0.0805194  0.201325   0.136166
 0.256943   0.310157   0.0639191  …  0.202942   0.145848   0.148425
 0.0351202  0.103475   0.24143       0.168186   0.0461463  0.0219053

Code 12.33

In [33]:
h = 3
p = plot(xlab="index", ylab="probability")
for (idx, col)  enumerate(eachcol(delta))
    plot!(col, c=:black, alpha=0.7, lw=idx == h ? 3 : 1, m= idx == h ? :v : :o)
end
p
Out[33]:

Code 12.34

Could take 20-30 minutes...

In [34]:
@model function m12_6(R, action, intention, contact, E)
    delta ~ Dirichlet(7, 2)
    pushfirst!(delta, 0.0)
    
    bA ~ Normal()
    bI ~ Normal()
    bC ~ Normal()
    bE ~ Normal()

    # sum all education's deltas
    sE = sum.(map(i -> delta[1:i], E))
    phi = @. bE*sE + bA*action + bI*intention + bC*contact

    # use same cutpoints as before
    Δ_cutpoints ~ filldist(Exponential(), 6)
    cutpoints = -3 .+ cumsum(Δ_cutpoints)
    
    for i  eachindex(R)
        R[i] ~ OrderedLogistic(phi[i], cutpoints)
    end
end

model = m12_6(d.response, d.action, d.intention, d.contact, d.edu_new)

m12_6_ch = sample(model, NUTS(), 1000);
┌ Info: Found initial step size
│   ϵ = 0.0125
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
Sampling: 100%|█████████████████████████████████████████| Time: 0:31:46

Code 12.35

In [35]:
m12_6_df = DataFrame(m12_6_ch)
precis(m12_6_df)
┌────────────────┬────────────────────────────────────────────────────────────┐
│          param     mean     std     5.5%      50%    94.5%       histogram │
├────────────────┼────────────────────────────────────────────────────────────┤
│             bA │ -0.6981  0.0396    -0.76  -0.6987  -0.6359  ▁▁▁▁▂▄▇██▆▅▂▁▁ │
│             bC │  -0.949  0.0495   -1.027  -0.9497  -0.8697         ▁▄██▄▁▁ │
│             bE │ -0.1749  0.0868  -0.2907  -0.1828  -0.0304    ▁▁▂▅██▆▃▂▁▁▁ │
│             bI │ -0.7108  0.0365  -0.7684  -0.7115  -0.6542  ▁▁▁▃▆▇█▇▆▂▁▁▁▁ │
│       delta[1] │  0.1534  0.0967    0.028   0.1366    0.322   ▇██▇▆▄▂▁▁▁▁▁▁ │
│       delta[2] │   0.146  0.0918   0.0298    0.128   0.3089    ▅██▅▄▃▂▁▁▁▁▁ │
│       delta[3] │   0.177  0.1062   0.0405   0.1557   0.3613   ▃▇█▆▆▄▃▁▁▁▁▁▁ │
│       delta[4] │  0.1883  0.1056   0.0506   0.1781   0.3912    ▃▇██▆▅▃▂▂▁▁▁ │
│       delta[5] │  0.0641    0.06     0.01   0.0454    0.173       █▄▂▁▁▁▁▁▁ │
│       delta[6] │  0.1226  0.0758   0.0298     0.11   0.2548      ▅██▅▃▂▁▁▁▁ │
│       delta[7] │  0.1485  0.0859   0.0308   0.1347   0.3152       ▅██▇▄▃▂▁▁ │
│ Δ_cutpoints[1] │  0.0589  0.0497   0.0032   0.0478   0.1532   █▅▅▄▃▂▂▁▁▁▁▁▁ │
│ Δ_cutpoints[2] │  0.6787  0.0217   0.6444    0.679   0.7136        ▁▁▅██▄▁▁ │
│ Δ_cutpoints[3] │  0.5816  0.0174   0.5545   0.5815    0.609   ▁▁▁▃▅██▆▄▂▁▁▁ │
│ Δ_cutpoints[4] │  1.0187  0.0193   0.9892   1.0177   1.0504  ▁▁▁▂▄▇█▇▅▃▂▁▁▁ │
│ Δ_cutpoints[5] │   0.667  0.0172   0.6385   0.6668   0.6941     ▁▁▂▃▆█▇▄▂▁▁ │
│ Δ_cutpoints[6] │  0.9066  0.0233   0.8701   0.9065   0.9425       ▁▁▃▇█▅▂▁▁ │
└────────────────┴────────────────────────────────────────────────────────────┘

Code 12.36

In [36]:
delta_labels = ["Elem","MidSch","SHS","HSG","SCol","Bach","Mast","Grad"]
df = m12_6_df[!,r"delta.*"]

corrplot(Matrix(df); seriestype=:scatter, bins=30, grid=false, ms=0.1, ma=0.8, 
    size=(1000,800), label=delta_labels)
Out[36]:

Code 12.37

In [37]:
d.edu_norm = standardize(UnitRangeTransform, Float64.(d.edu_new))

@model function m12_7(R, action, intention, contact, edu_norm)
    bA ~ Normal()
    bI ~ Normal()
    bC ~ Normal()
    bE ~ Normal()

    phi = @. bE*edu_norm + bA*action + bI*intention + bC*contact

    # use same cutpoints as before
    Δ_cutpoints ~ filldist(Exponential(), 6)
    cutpoints = -3 .+ cumsum(Δ_cutpoints)
    
    for i  eachindex(R)
        R[i] ~ OrderedLogistic(phi[i], cutpoints)
    end
end

model = m12_7(d.response, d.action, d.intention, d.contact, d.edu_norm)

m12_7_ch = sample(model, NUTS(), 1000)
m12_7_df = DataFrame(m12_7_ch)
precis(m12_7_df)
┌ Info: Found initial step size
│   ϵ = 0.0125
└ @ Turing.Inference /home/shmuma/.julia/packages/Turing/nfMhU/src/inference/hmc.jl:188
Sampling: 100%|█████████████████████████████████████████| Time: 0:00:41
┌────────────────┬────────────────────────────────────────────────────────────┐
│          param     mean     std     5.5%      50%    94.5%       histogram │
├────────────────┼────────────────────────────────────────────────────────────┤
│             bA │ -0.6949  0.0458  -0.7636  -0.6963  -0.6167  ▁▁▁▂▆▆██▆▆▃▃▁▁ │
│             bC │ -0.9406  0.0565  -1.0292  -0.9431  -0.8547         ▁▁▄█▇▇▁ │
│             bE │ -0.0743    0.09  -0.2136  -0.0762   0.0596      ▁▁▃▆█▇▆█▃▁ │
│             bI │ -0.7156  0.0344  -0.7714  -0.7152  -0.6651    ▁▁▁▃▅▇█▇▄▂▁▁ │
│ Δ_cutpoints[1] │  0.1353  0.0941   0.0167    0.112   0.3148         ▆█▇▄▂▃▃ │
│ Δ_cutpoints[2] │   0.679   0.021   0.6488   0.6777   0.7136   ▁▁▂▆▇██▆▄▂▁▁▁ │
│ Δ_cutpoints[3] │  0.5787  0.0178    0.552   0.5766   0.6106     ▁▂▃▇█▆▅▃▂▁▁ │
│ Δ_cutpoints[4] │  1.0133  0.0236   0.9753   1.0147   1.0479   ▁▆▅▅███▇▆▂▁▁▁ │
│ Δ_cutpoints[5] │  0.6685  0.0158   0.6419   0.6677   0.6941     ▁▁▁▂▅█▇▄▂▁▁ │
│ Δ_cutpoints[6] │  0.9066  0.0205   0.8748   0.9073    0.939        ▁▂▅█▄▁▁▁ │
└────────────────┴────────────────────────────────────────────────────────────┘
In [ ]: