using Distributions
using StatsPlots
using StatsBase
using LaTeXStrings
using CSV
using DataFrames
using StatisticalRethinking
using StatisticalRethinking: link
using LinearAlgebra
using Logging
using Random
using Turing

# setting default attributes for plots
default(label=false)
Logging.disable_logging(Logging.Warn);


n = rand(Uniform(-1, 1), 1000, 16);
pos = sum.(eachrow(n));
density(pos)


prod(1 .+ rand(Uniform(0, .1), 12))

1.9037047943702374


u = Uniform(0, .1)
growth = prod.(eachrow(1 .+ rand(u, 10000, 12)));

density(growth; label="density")
# overlay normal distribution
μ = mean(growth)
σ = std(growth)
plot!(Normal(μ, σ); label="Normal")


big = prod.(eachrow(1 .+ rand(Uniform(0, 0.5), 10000, 12)));
small = prod.(eachrow(1 .+ rand(Uniform(0, 0.01), 10000, 12)));
density([big, small]; layout=(2, 1))


density(log.(big))


w = 6
n = 9
p_grid = range(0, 1; length=100)
bin_dens = [pdf(Binomial(n, p), w) for p in p_grid]
uni_dens = [pdf(Uniform(0, 1), p) for p in p_grid];
posterior = bin_dens .* uni_dens
posterior /= sum(posterior);


d = DataFrame(CSV.File("data/Howell1.csv"));


describe(d)


precis(d)

┌────────┬────────────────────────────────────────────────────────────┐
│  param │    mean      std     5.5%      50%    94.5%      histogram │
├────────┼────────────────────────────────────────────────────────────┤
│ height │ 138.264  27.6024  81.1086   148.59  165.735  ▁▁▁▂▂▂▂▂▂██▆▁ │
│ weight │ 35.6106  14.7192   9.3607  40.0578  54.5029  ▁▃▄▄▃▂▃▆██▅▃▁ │
│    age │ 29.3444  20.7469      1.0     27.0   66.135      █▆▆▆▆▃▃▁▁ │
│   male │  0.4724   0.4997      0.0      0.0      1.0    █▁▁▁▁▁▁▁▁▁█ │
└────────┴────────────────────────────────────────────────────────────┘


d.height

544-element Vector{Float64}:
 151.765
 139.7
 136.525
 156.845
 145.415
 163.83
 149.225
 168.91
 147.955
 165.1
 154.305
 151.13
 144.78
   ⋮
 156.21
 152.4
 162.56
 114.935
  67.945
 142.875
  76.835
 145.415
 162.56
 156.21
  71.12
 158.75


d2 = d[d.age .>= 18,:];


plot(Normal(178, 20); xlim=(100, 250))


plot(Uniform(0, 50), xlim=(-10, 60), ylim=(0, 0.03))


size = 10_000
sample_μ = rand(Normal(178, 20), size)
sample_σ = rand(Uniform(0, 50), size);
prior_h = [rand(Normal(μ, σ)) for (μ, σ) in zip(sample_μ, sample_σ)];

p1 = density(sample_μ; title="μ")
p2 = density(sample_σ; title="σ")
p3 = density(prior_h; title="prior_h")

plot(p1, p2, p3, layout=@layout [a b; c])


sample_μ = rand(Normal(178, 100), size)
prior_h = [rand(Normal(μ, σ)) for (μ, σ) in zip(sample_μ, sample_σ)];

density(prior_h)
vline!([0, 272])


μ_list = range(150, 160; length=100)
σ_list = range(7, 9; length=100)

log_likelihood = [
    sum(logpdf.(Normal(μ, σ), d2.height))
    for μ ∈ μ_list, σ ∈ σ_list
]
log_prod = log_likelihood .+ [
    logpdf.(Normal(178, 20), μ) + logpdf(Uniform(0, 50), σ)
    for μ ∈ μ_list, σ ∈ σ_list
];

max_prod = maximum(log_prod)
prob = @. exp(log_prod - max_prod);


# note the transposition, that's due to Julia matrix order
contour(μ_list, σ_list, prob')


heatmap(μ_list, σ_list, prob')


indices = collect(Iterators.product(1:length(μ_list), 1:length(σ_list)));
sample_idx = wsample(vec(indices), vec(prob), 10_000; replace=true)
sample_μ = μ_list[first.(sample_idx)]
sample_σ = σ_list[last.(sample_idx)];


scatter(sample_μ, sample_σ; alpha=0.1)


p1 = density(sample_μ)
p2 = density(sample_σ)
plot(p1, p2, layout=(2,1))


println(hpdi(sample_μ, alpha=0.11))
println(hpdi(sample_σ, alpha=0.11))

[153.83838383838383, 155.15151515151516]
[7.3232323232323235, 8.232323232323232]


d3 = sample(d2.height, 20);


μ_list = range(150, 170; length=200)
σ_list = range(4, 20; length=200)

log_likelihood = [
    sum(logpdf.(Normal(μ, σ), d3))
    for μ ∈ μ_list, σ ∈ σ_list
]
log_prod = log_likelihood .+ [
    logpdf.(Normal(178, 20), μ) + logpdf(Uniform(0, 50), σ)
    for μ ∈ μ_list, σ ∈ σ_list
]

max_prod = maximum(log_prod)
prob2 = @. exp(log_prod - max_prod)

indices = collect(Iterators.product(1:length(μ_list), 1:length(σ_list)));
sample2_idx = wsample(vec(indices), vec(prob2), 10_000; replace=true)
sample2_μ = μ_list[first.(sample2_idx)]
sample2_σ = σ_list[last.(sample2_idx)]

scatter(sample2_μ, sample2_σ; alpha=0.1)


density(sample2_σ)
μ = mean(sample2_σ)
σ = std(sample2_σ)
plot!(Normal(μ, σ); label="Normal")


d = DataFrame(CSV.File("data/Howell1.csv"));
d2 = d[d.age .>= 18,:];


@model function model_height(height)
    μ ~ Normal(178, 20)
    σ ~ Uniform(0, 50)
    height ~ Normal(μ, σ)
end

model_height (generic function with 2 methods)


m4_1 = sample(model_height(d2.height), NUTS(), 1000)

Chains MCMC chain (1000×14×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 7.05 seconds
Compute duration  = 7.05 seconds
parameters        = μ, σ
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters       mean       std   naive_se      mcse        ess      rhat    ⋯
      Symbol    Float64   Float64    Float64   Float64    Float64   Float64    ⋯

           μ   154.6043    0.4035     0.0128    0.0123   710.9447    0.9994    ⋯
           σ     7.7684    0.2914     0.0092    0.0112   585.2718    0.9998    ⋯
                                                                1 column omitted

Quantiles
  parameters       2.5%      25.0%      50.0%      75.0%      97.5% 
      Symbol    Float64    Float64    Float64    Float64    Float64 

           μ   153.8338   154.3286   154.6214   154.8762   155.3800
           σ     7.1831     7.5921     7.7671     7.9385     8.3746


display.(describe(m4_1; q=[0.055, 0.945]));

Summary Statistics
  parameters       mean       std   naive_se      mcse        ess      rhat    ⋯
      Symbol    Float64   Float64    Float64   Float64    Float64   Float64    ⋯

           μ   154.6043    0.4035     0.0128    0.0123   710.9447    0.9994    ⋯
           σ     7.7684    0.2914     0.0092    0.0112   585.2718    0.9998    ⋯
                                                                1 column omitted

Quantiles
  parameters       5.5%      94.5% 
      Symbol    Float64    Float64 

           μ   153.9667   155.2413
           σ     7.3161     8.2782


init_vals = [mean(d2.height), std(d2.height)]
chain = sample(model_height(d2.height), NUTS(), 1000, init_theta=init_vals)

Chains MCMC chain (1000×14×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 0.69 seconds
Compute duration  = 0.69 seconds
parameters        = μ, σ
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters       mean       std   naive_se      mcse        ess      rhat    ⋯
      Symbol    Float64   Float64    Float64   Float64    Float64   Float64    ⋯

           μ   154.6167    0.4157     0.0131    0.0188   487.9933    1.0076    ⋯
           σ     7.7845    0.2950     0.0093    0.0105   705.7988    0.9991    ⋯
                                                                1 column omitted

Quantiles
  parameters       2.5%      25.0%      50.0%      75.0%      97.5% 
      Symbol    Float64    Float64    Float64    Float64    Float64 

           μ   153.8414   154.3242   154.6174   154.9156   155.4342
           σ     7.2417     7.5880     7.7812     7.9781     8.4005


@model function model_height(height)
    μ ~ Normal(178, 0.1)
    σ ~ Uniform(0, 50)
    height ~ Normal(μ, σ)
end

m4_2 = sample(model_height(d2.height), NUTS(), 1000)
display.(describe(m4_2; q=[0.055, 0.945]));

Summary Statistics
  parameters       mean       std   naive_se      mcse         ess      rhat   ⋯
      Symbol    Float64   Float64    Float64   Float64     Float64   Float64   ⋯

           μ   177.8577    0.0990     0.0031    0.0037   1053.3464    0.9990   ⋯
           σ    24.6767    1.0103     0.0319    0.0408    768.8385    0.9992   ⋯
                                                                1 column omitted

Quantiles
  parameters       5.5%      94.5% 
      Symbol    Float64    Float64 

           μ   177.7028   178.0147
           σ    23.1493    26.3962


cov(hcat(m4_1[:μ], m4_1[:σ]))

2×2 Matrix{Float64}:
 0.162836    0.00108959
 0.00108959  0.0848939


c = cov(hcat(m4_1[:μ], m4_1[:σ]))
cov2cor(c, diag(c))

2×2 Matrix{Float64}:
 1.0        0.0788197
 0.0788197  1.0


# resetrange is needed due to bug in MCMCChains: https://github.com/TuringLang/MCMCChains.jl/issues/324
# once it will be fixed, direct sampling from the chain will be possible
samp_chain = sample(resetrange(m4_1), 10_000)
samp_df = DataFrame(samp_chain)
first(samp_df, 5)


precis(samp_df)

┌───────┬──────────────────────────────────────────────────────────┐
│ param │    mean     std     5.5%      50%    94.5%     histogram │
├───────┼──────────────────────────────────────────────────────────┤
│     μ │ 154.605  0.4031  153.967  154.622  155.249       ▁▁▂▆█▃▁ │
│     σ │  7.7689  0.2908   7.3161   7.7672   8.2777  ▁▁▁▂▅█▇▃▂▁▁▁ │
└───────┴──────────────────────────────────────────────────────────┘


data = hcat(m4_1[:μ], m4_1[:σ])
μ = mean(data, dims=1)
σ = cov(data)
mvn = MvNormal(vec(μ), σ)
post = rand(mvn, 10_000);
print(mvn)

FullNormal(
dim: 2
μ: [154.6043025429419, 7.768408555866158]
Σ: [0.16283634567175437 0.0010895880325130884; 0.0010895880325130884 0.08489387940800541]
)


d = DataFrame(CSV.File("data/Howell1.csv"));
d2 = d[d.age .>= 18,:];
# fancy way of doing scatter(d2.weight, d2.height)
@df d2 scatter(:weight, :height)


Random.seed!(2971)
N = 100
a = rand(Normal(178, 20), N)
b = rand(Normal(0, 10), N);


p = hline([0, 272]; ylims=(-100, 400), xlabel="weight", ylabel="hegiht")
title!(L"\beta \sim \mathcal{N}(\mu=0,\sigma=10)")

x_mean = mean(d2.weight)
xlims = extrema(d2.weight)  # getting min and max in one pass

for (α, β) ∈ zip(a, b)
    plot!(x -> α + β * (x - x_mean); xlims=xlims, c=:black, alpha=0.3)
end
display(p)


b = rand(LogNormal(0, 1), 10_000)
density(b, xlims=(0, 5), bandwidth=0.1)


Random.seed!(2971)
N = 100
a = rand(Normal(178, 20), N)
b = rand(LogNormal(0, 1), N);


d = DataFrame(CSV.File("data/Howell1.csv"));
d2 = d[d.age .>= 18,:]
xbar = mean(d2.weight)

@model function height_regr_model(weight, height)
    a ~ Normal(178, 20)
    b ~ LogNormal(0, 1)
    μ = @. a + b * (weight - xbar)
    σ ~ Uniform(0, 50)
    height ~ MvNormal(μ, σ)
end

m4_3 = sample(height_regr_model(d2.weight, d2.height), NUTS(), 1000)
m4_3 = resetrange(m4_3);


@model function height_regr_model_exp(weight, height)
    a ~ Normal(178, 20)
    log_b ~ Normal(0, 1)
    μ = @. a + exp(log_b) * (weight - xbar)
    σ ~ Uniform(0, 50)
    height ~ MvNormal(μ, σ)
end

m4_3b = sample(height_regr_model_exp(d2.weight, d2.height), NUTS(), 1000);


m4_3_df = DataFrame(m4_3)
precis(m4_3_df)

┌───────┬───────────────────────────────────────────────────────────┐
│ param │    mean     std     5.5%      50%    94.5%      histogram │
├───────┼───────────────────────────────────────────────────────────┤
│     a │ 154.589  0.2605  154.163  154.592  155.006      ▁▁▂▅██▅▂▁ │
│     b │   0.905   0.041   0.8402   0.9041   0.9717  ▁▁▂▄▆██▇▅▃▂▁▁ │
│     σ │   5.098  0.1957   4.8058   5.0888   5.4277   ▁▁▂▅███▆▄▃▁▁ │
└───────┴───────────────────────────────────────────────────────────┘


round.(cov(Matrix(m4_3_df)), digits=3)

3×3 Matrix{Float64}:
 0.068  0.001  0.001
 0.001  0.002  0.0
 0.001  0.0    0.038


p = @df d2 scatter(:weight, :height; alpha=0.3)

chain = resetrange(m4_3)
samples = sample(chain, 1000)

a_map = mean(samples[:a])
b_map = mean(samples[:b])
plot!(x -> a_map + b_map*(x-xbar))


post = sample(m4_3, 1000)
post_df = DataFrame(post)
post_df[1:5,:]


N = 10
dN = d2[1:N,:]

@model function height_regr_model_N(weight, height)
    a ~ Normal(178, 20)
    b ~ LogNormal(0, 1)
    m_weight = mean(weight)
    μ = @. a + b * (weight - m_weight)
    σ ~ Uniform(0, 50)
    height ~ MvNormal(μ, σ)
end

mN = sample(height_regr_model(dN.weight, dN.height), NUTS(), 1000)
mN = resetrange(mN);


post = sample(mN, 20)
post_df = DataFrame(post);

xlims = extrema(d2.weight)
ylims = extrema(d2.height)
p = @df dN scatter(:weight, :height; xlims=xlims, ylims=ylims)
title!("N = $N"; xlab="weight", ylab="height")

x_mean = mean(dN.weight)
for (a, b) ∈ zip(post_df.a, post_df.b)
    plot!(x -> a + b * (x-x_mean); c="black", alpha=0.3)
end
display(p)


post = sample(m4_3, 1000)
post_df = DataFrame(post)
μ_at_50 = @. post_df.a + post_df.b * (50 - xbar);


density(μ_at_50; lw=2, xlab=L"\mu|weight=50")


PI(μ_at_50)

2-element Vector{Float64}:
 158.57002789554085
 159.6651868989757


μ = link(post_df, [:a :b], d2.weight, xbar);
μ = hcat(μ...);
Base.size(μ), μ[1:5,1]

((1000, 352), [157.30402446200378, 156.8659470071084, 156.79036549722116, 157.15231115026998, 156.841321474765])


weight_seq = 25:70
μ = link(post_df, [:a :b], weight_seq, xbar);
μ = hcat(μ...);
Base.size(μ), μ[1:5,1]

((1000, 46), [135.54235296248638, 136.8171208342461, 136.41803987120076, 137.15923430935746, 137.36985081806435])


p = plot()
for i in 1:100
    scatter!(weight_seq, μ[i,:]; c=:blue, alpha=0.2)
end
display(p)


μ_mean = mean.(eachcol(μ))
μ_PI = PI.(eachcol(μ))
μ_PI = vcat(μ_PI'...);


@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)


post = sample(m4_3, 1000)
post = DataFrame(post)

weight_seq = 25:70
μ = map(w -> post.a + post.b * (w - xbar), weight_seq)
μ = hcat(μ...)
μ_mean = mean.(eachcol(μ))
μ_CI = PI.(eachcol(μ));


sim_height = simulate(post, [:a, :b, :σ], weight_seq .- xbar);
Base.size(sim_height), sim_height[1:5,1]

((1000, 46), [140.9740643278276, 130.13224080628405, 138.68393185320338, 134.3163947982685, 128.9830302969236])


height_PI = PI.(eachcol(sim_height))
height_PI = vcat(height_PI'...);


@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)


post = sample(m4_3, 10_000)
post = DataFrame(post)
sim_height = simulate(post, [:a, :b, :σ], weight_seq .- xbar)
height_PI = PI.(eachcol(sim_height))
height_PI = vcat(height_PI'...);


@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)


post = sample(m4_3, 1000)
post = DataFrame(post)

sim_height = [
    [
        rand(Normal(a + b * (w - xbar), σ))
        for (a, b, σ) ∈ zip(post.a, post.b, post.σ)
    ]
    for w ∈ weight_seq
]
sim_height = hcat(sim_height...)

height_PI = PI.(eachcol(sim_height));
height_PI = vcat(height_PI'...);


@df d2 scatter(:weight, :height; alpha=0.2, xlab="weight", ylab="height")
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3)
plot!(weight_seq, [μ_mean μ_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)


d = DataFrame(CSV.File("data/Howell1.csv"))
scatter(d.weight, d.height; alpha=0.3)


d[!, :weight_s] = standardize(ZScoreTransform, d.weight)
d[!, :weight_s2] = d.weight_s.^2;

@model function height_regr_m2(weight_s, weight_s2, height)
    a ~ Normal(178, 20)
    b1 ~ LogNormal(0, 1)
    b2 ~ Normal(0, 1)
    μ = @. a + b1 * weight_s + b2 * weight_s2
    σ ~ Uniform(0, 50)
    height ~ MvNormal(μ, σ)
end

m4_5 = sample(height_regr_m2(d.weight_s, d.weight_s2, d.height), NUTS(), 1000)
m4_5 = resetrange(m4_5)
m4_5_df = DataFrame(m4_5);


precis(m4_5_df)

┌───────┬───────────────────────────────────────────────────────────┐
│ param │    mean     std     5.5%      50%    94.5%      histogram │
├───────┼───────────────────────────────────────────────────────────┤
│     a │ 146.044  0.3754  145.419   146.06  146.627  ▁▁▁▂▃▅▇█▇▄▂▁▁ │
│    b1 │ 21.7413  0.2826  21.2854  21.7375  22.1805     ▁▁▂▆█▇▄▂▁▁ │
│    b2 │ -7.7903  0.2673  -8.2011  -7.7943  -7.3547     ▁▂▅██▅▂▁▁▁ │
│     σ │  5.8069  0.1767   5.5367    5.803   6.0976  ▁▁▁▄▆██▆▃▂▁▁▁ │
└───────┴───────────────────────────────────────────────────────────┘


Random.seed!(1)
df = sample(m4_5_df, 1000)
weight_seq = range(-2.2, 2; length=30)

mu = link(df, (r, x) -> r.a + r.b1*x + r.b2*x^2, weight_seq)
mu = hcat(mu...)
mu_mean = mean.(eachcol(mu))
mu_PI = PI.(eachcol(mu))
mu_PI = vcat(mu_PI'...)

sim_height = simulate(df, (r, x) -> Normal(r.a + r.b1*x + r.b2*x^2, r.σ), weight_seq)
sim_height = vcat(sim_height'...);

height_PI = PI.(eachcol(sim_height))
height_PI = vcat(height_PI'...);


p_square = @df d scatter(:weight_s, :height; alpha=0.3, title="Square poly")
plot!(weight_seq, mu_mean; c=:black)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=mu_PI, fillalpha=0.3)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)


d[!, :weight_s3] = d.weight_s.^3;

@model function height_regr_m3(weight_s, weight_s2, weight_s3, height)
    a ~ Normal(178, 20)
    b1 ~ LogNormal(0, 1)
    b2 ~ Normal(0, 1)
    b3 ~ Normal(0, 1)
    μ = @. a + b1 * weight_s + b2 * weight_s2 + b3 * weight_s3
    σ ~ Uniform(0, 50)
    height ~ MvNormal(μ, σ)
end

m4_6 = sample(height_regr_m3(d.weight_s, d.weight_s2, d.weight_s3, d.height), NUTS(), 1000)
m4_6 = resetrange(m4_6)
m4_6_df = DataFrame(m4_6)
precis(m4_6_df)

┌───────┬─────────────────────────────────────────────────────────┐
│ param │    mean     std     5.5%      50%    94.5%    histogram │
├───────┼─────────────────────────────────────────────────────────┤
│     a │  146.39  0.3147  145.907  146.389  146.898  ▁▁▃▆██▅▃▁▁▁ │
│    b1 │ 15.2378  0.4896  14.4475  15.2349  16.0455      ▁▂▆█▅▂▁ │
│    b2 │ -6.2086  0.2688  -6.6249   -6.225   -5.757    ▁▁▂▅█▇▄▂▁ │
│    b3 │  3.5726  0.2366   3.2018   3.5753   3.9548     ▁▂▅██▄▁▁ │
│     σ │  4.8559   0.148   4.6266   4.8559   5.1009   ▁▁▄▆█▆▄▂▁▁ │
└───────┴─────────────────────────────────────────────────────────┘


Random.seed!(1)
df = sample(m4_6_df, 1000)
weight_seq = range(-2.2, 2; length=30)

func = (r, x) -> r.a + r.b1*x + r.b2*x^2 + r.b3*x^3
mu = link(df, func, weight_seq)
mu = hcat(mu...)
mu_mean = mean.(eachcol(mu))
mu_PI = PI.(eachcol(mu))
mu_PI = vcat(mu_PI'...)

func = (r, x) -> Normal(r.a + r.b1*x + r.b2*x^2 + r.b3*x^3, r.σ)
sim_height = simulate(df, func, weight_seq)
sim_height = vcat(sim_height'...);

height_PI = PI.(eachcol(sim_height))
height_PI = vcat(height_PI'...);


p_cubic = @df d scatter(:weight_s, :height; alpha=0.3, title="Cubic poly")
plot!(weight_seq, mu_mean; c=:black)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=mu_PI, fillalpha=0.3)
plot!(weight_seq, [mu_mean mu_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)

plot(p_square, p_cubic; layout=(1, 2))


μ = mean(d.weight)
σ = std(d.weight)
w = @. d.weight_s * σ + μ
scatter(w, d.height; alpha=0.3)

w_s = @. weight_seq * σ + μ
plot!(w_s, mu_mean; c=:black)
plot!(w_s, [mu_mean mu_mean]; c=:black, fillrange=mu_PI, fillalpha=0.3)
plot!(w_s, [mu_mean mu_mean]; c=:black, fillrange=height_PI, fillalpha=0.3)


d = DataFrame(CSV.File("data/cherry_blossoms.csv", missingstring="NA"))
precis(d)

┌────────────┬─────────────────────────────────────────────────────────┐
│      param │   mean      std    5.5%     50%    94.5%      histogram │
├────────────┼─────────────────────────────────────────────────────────┤
│       year │ 1408.0  350.885  867.77  1408.0  1948.23  ████████████▂ │
│        doy │ 104.54    6.407   94.43   105.0    115.0       ▁▂▆██▅▂▁ │
│       temp │ 6.1419   0.6636    5.15     6.1   7.2947       ▁▄▆█▄▂▁▁ │
│ temp_upper │ 7.1852   0.9929  5.8977    7.04   8.9023       ▂██▃▁▁▁▁ │
│ temp_lower │ 5.0989   0.8503  3.7876   5.145     6.37       ▁▁▁▂▇█▂▁ │
└────────────┴─────────────────────────────────────────────────────────┘


d2 = d[completecases(d[!,[:doy]]),:]
d2 = disallowmissing(d2[!,[:year,:doy]])
num_knots = 15
knots_list = quantile(d2.year, range(0, 1; length=num_knots));


using BSplines

basis = BSplineBasis(3, knots_list)

16-element BSplineBasis{Vector{Float64}}:
 order: 3
 breakpoints: [812.0, 1036.0, 1174.0, 1269.0, 1377.0, 1454.0, 1518.0, 1583.0, 1650.0, 1714.0, 1774.0, 1833.0, 1893.0, 1956.0, 2015.0]


p1 = plot(basis)
scatter!(knots_list, repeat([1], num_knots); xlab="year", ylab="basis", legend=false)


@model function model_splines(year, doy)
    w ~ MvNormal(zeros(length(basis)), 1)
    a ~ Normal(100, 10)
    s = Spline(basis, w)
    μ = a .+ s.(year)
    σ ~ Exponential(1)
    doy ~ MvNormal(μ, σ)
end

m4_7 = sample(model_splines(d2.year, d2.doy), NUTS(0.65; init_ϵ = 9.765625e-5), 1000)

Chains MCMC chain (1000×30×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 12.56 seconds
Compute duration  = 12.56 seconds
parameters        = w[1], w[2], w[3], w[4], w[5], w[6], w[7], w[8], w[9], w[10], w[11], w[12], w[13], w[14], w[15], w[16], a, σ
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters       mean       std   naive_se      mcse         ess      rhat   ⋯
      Symbol    Float64   Float64    Float64   Float64     Float64   Float64   ⋯

        w[1]    -0.6930    0.9362     0.0296    0.0215   1945.2764    0.9994   ⋯
        w[2]    -0.8597    0.8902     0.0282    0.0159   3252.6949    0.9990   ⋯
        w[3]     0.8102    0.7776     0.0246    0.0199   1338.4246    0.9990   ⋯
        w[4]     0.6773    0.7727     0.0244    0.0146   2310.0298    0.9996   ⋯
        w[5]     0.5543    0.8216     0.0260    0.0212   1602.2965    1.0006   ⋯
        w[6]    -1.2216    0.7478     0.0236    0.0205   1509.1485    0.9994   ⋯
        w[7]     0.2699    0.7394     0.0234    0.0195   1616.5799    0.9990   ⋯
        w[8]     1.7919    0.7792     0.0246    0.0161   2409.3785    0.9990   ⋯
        w[9]    -0.0637    0.7701     0.0244    0.0211   1922.1638    0.9998   ⋯
       w[10]     1.9593    0.7778     0.0246    0.0179   1329.8195    0.9992   ⋯
       w[11]     0.7013    0.8161     0.0258    0.0221   2441.5417    0.9991   ⋯
       w[12]     1.2512    0.7753     0.0245    0.0204   1531.8764    0.9990   ⋯
       w[13]     1.2685    0.7700     0.0243    0.0222   1743.3661    1.0002   ⋯
       w[14]    -0.7327    0.8190     0.0259    0.0210   2040.4069    0.9991   ⋯
       w[15]    -2.8985    0.8731     0.0276    0.0231   1560.4025    0.9994   ⋯
       w[16]    -2.7200    0.9596     0.0303    0.0207   1672.6364    0.9993   ⋯
           a   104.2733    0.3264     0.0103    0.0126    954.7923    0.9992   ⋯
           σ     6.0944    0.1438     0.0045    0.0036   1973.5280    1.0007   ⋯
                                                                1 column omitted

Quantiles
  parameters       2.5%      25.0%      50.0%      75.0%      97.5% 
      Symbol    Float64    Float64    Float64    Float64    Float64 

        w[1]    -2.6408    -1.3140    -0.6834    -0.0703     1.1979
        w[2]    -2.6857    -1.4305    -0.8427    -0.2744     0.9362
        w[3]    -0.7307     0.3000     0.8008     1.3413     2.3604
        w[4]    -0.8886     0.1325     0.6754     1.2188     2.1385
        w[5]    -0.9992     0.0130     0.5749     1.0825     2.1280
        w[6]    -2.6770    -1.7371    -1.2190    -0.7424     0.2481
        w[7]    -1.2163    -0.2452     0.2709     0.7686     1.7316
        w[8]     0.2888     1.2534     1.7828     2.3059     3.3314
        w[9]    -1.4970    -0.6270    -0.0828     0.4375     1.5278
       w[10]     0.4547     1.4331     1.9742     2.5159     3.4421
       w[11]    -0.8953     0.1630     0.7291     1.2573     2.3180
       w[12]    -0.3171     0.7569     1.2381     1.7793     2.7183
       w[13]    -0.2086     0.7450     1.2663     1.7847     2.7973
       w[14]    -2.2305    -1.3187    -0.7611    -0.1507     0.9471
       w[15]    -4.6087    -3.4830    -2.8958    -2.3500    -1.2049
       w[16]    -4.6113    -3.3602    -2.7307    -2.0645    -0.9025
           a   103.6368   104.0453   104.2708   104.5035   104.8985
           σ     5.8227     6.0022     6.0911     6.1858     6.3761


post = DataFrame(m4_7)

# convert columns w[*] into single column w
w_df = DataFrames.select(post, r"w")
post = DataFrames.select(post, Not(r"w"))
post[!,:w] = Vector.(eachrow(w_df))

# vector of 16 average w values
w_mean = mean.(eachcol(w_df))
p2 = plot(basis .* w_mean)
scatter!(knots_list, repeat([1], num_knots); xlab="year", ylab="basis × weight")


μ = link(post, (r, x) -> r.a + Spline(basis, r.w)(x), d2.year)
μ = vcat(μ'...);

μ_PI = PI.(eachrow(μ))
μ_PI = vcat(μ_PI'...);

p3 = @df d2 scatter(:year, :doy; alpha=0.3)
μ_mean = mean.(eachrow(μ_PI))
plot!(d2.year, [μ_mean, μ_mean]; c=:black, fillrange=μ_PI, fillalpha=0.3, alpha=0)


plot(p1, p2, p3; layout=(3, 1))


basis = BSplineBasis(3, knots_list)

# list of splines with 1 only at corresponding basis index
splines = [
    Spline(basis, [float(idx == knot) for idx ∈ 1:length(basis)])
    for knot ∈ 1:length(basis)
]

# calculate each spline for every year. Resulting matrix B is 827x16
B = [
    map(s -> s(year), splines)
    for year in d2.year
]
B = vcat(B'...);


# do not need years parameter anymore, all the information is in B matrix
@model function model_splines_matrix(doy)
    w ~ MvNormal(zeros(length(basis)), 1)
    a ~ Normal(100, 10)
    μ = a .+ B * w
    σ ~ Exponential(1)
    doy ~ MvNormal(μ, σ)
end

m4_7alt = sample(model_splines_matrix(d2.doy), NUTS(0.65; init_ϵ = 0.0001953125), 1000)

Chains MCMC chain (1000×30×1 Array{Float64, 3}):

Iterations        = 501:1:1500
Number of chains  = 1
Samples per chain = 1000
Wall duration     = 4.67 seconds
Compute duration  = 4.67 seconds
parameters        = w[1], w[2], w[3], w[4], w[5], w[6], w[7], w[8], w[9], w[10], w[11], w[12], w[13], w[14], w[15], w[16], a, σ
internals         = lp, n_steps, is_accept, acceptance_rate, log_density, hamiltonian_energy, hamiltonian_energy_error, max_hamiltonian_energy_error, tree_depth, numerical_error, step_size, nom_step_size

Summary Statistics
  parameters       mean       std   naive_se      mcse         ess      rhat   ⋯
      Symbol    Float64   Float64    Float64   Float64     Float64   Float64   ⋯

        w[1]    -0.7241    0.8998     0.0285    0.0271   1697.9565    0.9997   ⋯
        w[2]    -0.8411    0.8831     0.0279    0.0221   2165.6782    0.9990   ⋯
        w[3]     0.8419    0.8046     0.0254    0.0224   1451.9809    1.0014   ⋯
        w[4]     0.6414    0.7654     0.0242    0.0206   1360.1771    0.9991   ⋯
        w[5]     0.5549    0.7795     0.0246    0.0188   1462.0164    1.0004   ⋯
        w[6]    -1.2591    0.8080     0.0255    0.0155   1937.4765    0.9991   ⋯
        w[7]     0.2666    0.7821     0.0247    0.0157   1432.7254    0.9994   ⋯
        w[8]     1.7751    0.7513     0.0238    0.0148   1769.1459    1.0019   ⋯
        w[9]    -0.0465    0.7647     0.0242    0.0169   2060.6376    0.9996   ⋯
       w[10]     1.9268    0.8347     0.0264    0.0189   1603.1755    0.9997   ⋯
       w[11]     0.6937    0.7857     0.0248    0.0168   2697.3835    0.9991   ⋯
       w[12]     1.2368    0.7708     0.0244    0.0168   1940.8865    0.9999   ⋯
       w[13]     1.2566    0.7720     0.0244    0.0183   1674.4318    0.9990   ⋯
       w[14]    -0.7881    0.8238     0.0260    0.0173   2046.1462    1.0003   ⋯
       w[15]    -2.8762    0.8830     0.0279    0.0213   2048.4908    1.0006   ⋯
       w[16]    -2.7619    0.8669     0.0274    0.0176   1746.0300    0.9991   ⋯
           a   104.2873    0.3497     0.0111    0.0104    799.9617    0.9992   ⋯
           σ     6.1066    0.1547     0.0049    0.0032   2426.3254    1.0018   ⋯
                                                                1 column omitted

Quantiles
  parameters       2.5%      25.0%      50.0%      75.0%      97.5% 
      Symbol    Float64    Float64    Float64    Float64    Float64 

        w[1]    -2.4726    -1.3282    -0.7051    -0.1193     1.0145
        w[2]    -2.5446    -1.4358    -0.8161    -0.2445     0.8577
        w[3]    -0.7099     0.3136     0.8121     1.4050     2.4401
        w[4]    -0.8142     0.1135     0.6205     1.1695     2.1878
        w[5]    -0.9944     0.0427     0.5319     1.0986     2.0761
        w[6]    -2.8611    -1.8353    -1.2889    -0.6767     0.2688
        w[7]    -1.2410    -0.2768     0.2756     0.7993     1.7621
        w[8]     0.4084     1.2482     1.7673     2.2857     3.2702
        w[9]    -1.5533    -0.5618    -0.0740     0.4554     1.4439
       w[10]     0.3413     1.3430     1.9267     2.5319     3.5704
       w[11]    -0.8745     0.1957     0.6811     1.1667     2.2240
       w[12]    -0.2486     0.7130     1.2334     1.7426     2.7929
       w[13]    -0.2056     0.7516     1.2577     1.7624     2.7255
       w[14]    -2.4701    -1.3474    -0.7707    -0.2403     0.8671
       w[15]    -4.6227    -3.5115    -2.8558    -2.2575    -1.1556
       w[16]    -4.3563    -3.4059    -2.7802    -2.1510    -1.0782
           a   103.6229   104.0482   104.2939   104.5386   104.9743
           σ     5.7853     6.0027     6.1108     6.2076     6.4063

	μ	σ
	Float64	Float64
1	154.107	7.73804
2	154.905	8.32489
3	154.86	7.34465
4	155.102	7.48781
5	154.062	7.57456

	a	b	σ
	Float64	Float64	Float64
1	154.465	0.91025	4.96922
2	154.209	0.869266	5.17996
3	154.998	0.892325	5.09778
4	154.501	0.9345	4.72493
5	154.423	0.853054	5.35971

4.1 Why normal distributions are normal¶

4.2 A language for describing models¶

4.3 Gaussian model of height¶

4.4 Linear predictions¶

4.5 Curves from lines¶

	variable	mean	min	median	max	nmissing	eltype
	Symbol	Float64	Real	Float64	Real	Int64	DataType
1	height	138.264	53.975	148.59	179.07	0	Float64
2	weight	35.6106	4.25242	40.0578	62.9926	0	Float64
3	age	29.3444	0.0	27.0	88.0	0	Float64
4	male	0.472426	0	0.0	1	0	Int64