library(rprojroot)
root <- has_file(".Bayesian-Workflow-root")$make_fix_file()
library(cmdstanr)
# CmdStanR output directory makes Quarto cache to work
dir.create(root("world_cup", "stan_output"), showWarnings = FALSE)
options(cmdstanr_output_dir = root("world_cup", "stan_output"))
options(mc.cores = 4)
library(posterior)
options(
tibble.print_max = 35,
pillar.neg = FALSE,
pillar.subtle = FALSE,
pillar.sigfig = 2
)
devtools::load_all("~/proj/bayesplot")
## library(bayesplot)
library(ggplot2)
theme_set(bayesplot::theme_default(base_family = "sans", base_size=14))
library(arm)
library(loo)
library(dplyr)
library(readr)
print_stan_code <- function(code) {
if (isTRUE(getOption("knitr.in.progress")) &&
identical(knitr::opts_current$get("results"), "asis")) {
# In render: emit as-is so Pandoc/Quarto does syntax highlighting
block <- paste0("```stan", "\n", paste(code, collapse = "\n"), "\n", "```")
knitr::asis_output(block)
} else {
writeLines(code)
}
}Debugging a model: World Cup football
This notebook includes the code for the Bayesian Workflow book Chapter 23 Debugging a model: World Cup football.
1 World Cup 2014 team performance analysis
We use an item-response model such as described in the previous chapter to fit a model to estimate the abilities of the teams in the 2014 football World Cup. We use score differentials as data (ignoring the shoot-outs).
The forthcoming Bayesian workflow book has more information about the modeling task, but this version of the code was made available early, as it includes illustration of predictive performance comparison of continuous and discrete models.
Game scores could be modelled with a discrete bivariate Poisson model, and score differences with Poisson difference models (Karlis and Ntzoufras 2003), which are commonly used in analysis of sports data. But we’ll start with continuous models and discretized continuous models to illustrate how those can be compared, and eventually build also models using naturally discrete data models.
Load packages
2 Data
Data include 64 game results from World Cup 2014 and Soccer Power Index that was available on the internet a month before the tournament (Silver 2014). We took the rankings, with Brazil at the top (getting a score of 32) and Australia at the bottom (with a score of 1), and then for simplicity in interpretation of the parameters we rescaled these to have mean 0 and standard deviation 1/2, to get ``prior scores’’ that ranged from -0.83 to +0.83.
powerindex <- read_csv(root("world_cup", "data","soccerpowerindex.csv")) |>
mutate(prior_score = as.vector(scale(rev(index))/2))
teamnames <- powerindex$team
worldcup2014 <- read_csv(root("world_cup", "data", "worldcup2014.csv")) |>
mutate(team_1 = match(team1, teamnames),
team_2 = match(team2, teamnames))
N_games <- nrow(worldcup2014)
gamenames <- with(worldcup2014,
rev(paste(teamnames[team_1], "vs.", teamnames[team_2])))2.1 Data for Stan
stan_data <- with(
worldcup2014,
list(
N_teams = nrow(powerindex),
N_games = N_games,
team_1 = team_1,
score_1 = score1,
team_2 = team_2,
score_2 = score2,
prior_score = powerindex$prior_score,
df = 7
)
)3 First model
The first model uses Student’s t for square root of score difference. It’s not particularly well justified model, but that is what Andrew first thought and it turned out to be useful for illustration.
The model structure is as follows: if game i has teams j_1 and team j_2 playing, and they score z_1 and z_2 goals, respectively, then the data point for this game is y_i = \mbox{sign}(z_1-z_2)*\sqrt{|z_1-z_2|}, and the data model is: y_i \sim \mbox{normal}(a_{j_1[i]}-a_{j_2[i]}, \sigma_y), where a_{j_1} and a_{j_2} are the ability parameters (to use psychometrics jargon) for the two teams and \sigma_y is a scale parameter estimated from the data.
y_i \sim \mbox{t}_{\nu}(a_{j_1[i]}-a_{j_2[i]}, \sigma_y), setting the degrees of freedom to \nu=7.
Stan model
model_1 <- cmdstan_model(stan_file = root("world_cup", "worldcup_first_try.stan"))print_stan_code(model_1$code())/* This program has a mistake in it, as will be explained later */
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games) {
sqrt_dif[i] = (step(dif[i]) - 0.5) * sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
sqrt_dif ~ student_t(df, a[team_1] - a[team_2], sigma_y);
}Fit the model and show the results
fit_1 <- model_1$sample(data = stan_data, refresh = 0)fit_1$summary(c("a", "b", "sigma_a", "sigma_y"))# A tibble: 35 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.28 0.29 0.14 0.14 0.043 0.49 1.0 3665. 3447.
2 a[2] 0.37 0.36 0.12 0.12 0.17 0.58 1.00 5804. 3285.
3 a[3] 0.49 0.47 0.16 0.16 0.25 0.77 1.0 1884. 3260.
4 a[4] 0.17 0.19 0.18 0.17 -0.15 0.42 1.0 2394. 2851.
5 a[5] 0.27 0.28 0.15 0.13 0.021 0.51 1.0 6204. 3089.
6 a[6] 0.30 0.29 0.14 0.12 0.082 0.53 1.00 4106. 2736.
7 a[7] 0.34 0.32 0.14 0.14 0.13 0.59 1.0 3690. 3404.
8 a[8] 0.14 0.16 0.16 0.14 -0.14 0.38 1.0 5035. 2812.
9 a[9] 0.036 0.059 0.17 0.17 -0.27 0.28 1.0 2491. 3070.
10 a[10] 0.20 0.19 0.13 0.12 -0.0040 0.43 1.0 4170. 2995.
11 a[11] 0.34 0.32 0.16 0.18 0.10 0.63 1.0 1539. 2902.
12 a[12] 0.045 0.060 0.15 0.13 -0.22 0.27 1.0 4380. 3058.
13 a[13] 0.051 0.062 0.15 0.13 -0.20 0.29 1.00 5524. 3063.
14 a[14] 0.033 0.041 0.15 0.12 -0.22 0.27 1.0 5295. 3279.
15 a[15] -0.024 -0.011 0.15 0.13 -0.27 0.20 1.0 4588. 2865.
16 a[16] -0.073 -0.054 0.15 0.13 -0.33 0.15 1.0 4263. 3134.
17 a[17] -0.049 -0.036 0.15 0.12 -0.31 0.19 1.0 5154. 3103.
18 a[18] 0.0062 -0.0031 0.14 0.12 -0.22 0.25 1.0 5455. 2912.
19 a[19] -0.024 -0.034 0.14 0.12 -0.24 0.21 1.0 5157. 3220.
20 a[20] 0.012 -0.0045 0.14 0.13 -0.20 0.26 1.0 3756. 3380.
21 a[21] -0.13 -0.13 0.15 0.13 -0.38 0.10 1.0 6040. 3084.
22 a[22] -0.12 -0.12 0.14 0.12 -0.34 0.12 1.0 5770. 2980.
23 a[23] -0.18 -0.17 0.15 0.13 -0.45 0.055 1.0 4969. 3242.
24 a[24] -0.15 -0.16 0.14 0.12 -0.38 0.087 1.0 5564. 3202.
25 a[25] -0.26 -0.25 0.15 0.13 -0.53 -0.027 1.0 5440. 2982.
26 a[26] -0.025 -0.036 0.18 0.19 -0.28 0.28 1.0 1727. 2992.
27 a[27] -0.29 -0.29 0.15 0.14 -0.55 -0.047 1.0 4937. 3189.
28 a[28] -0.41 -0.39 0.17 0.16 -0.72 -0.17 1.0 2228. 2924.
29 a[29] -0.30 -0.30 0.15 0.13 -0.54 -0.046 1.0 6154. 3503.
30 a[30] -0.42 -0.40 0.16 0.15 -0.70 -0.17 1.0 3527. 3247.
31 a[31] -0.21 -0.23 0.17 0.16 -0.45 0.083 1.0 2332. 2777.
32 a[32] -0.39 -0.39 0.14 0.13 -0.63 -0.15 1.0 5790. 2998.
33 b 0.45 0.45 0.10 0.099 0.28 0.61 1.0 3335. 2483.
34 sigma_a 0.17 0.17 0.075 0.075 0.035 0.29 1.0 839. 953.
35 sigma_y 0.42 0.42 0.053 0.052 0.34 0.51 1.0 1960. 2380.
fit_1$draws("a") |>
mcmc_intervals(prob = 0) +
scale_y_discrete(labels = rev(teamnames), limits = rev) +
labs(x = "Team quality estimate with 90% intervals")
#if (savefigs) ggsave(root("world_cup/figs","worldcup_1.pdf", height=7, width=8))3.1 Check fit of the first model
model_1_rep <- cmdstan_model(stan_file = root("world_cup", "worldcup_with_replication.stan"))print_stan_code(model_1_rep$code())/* This program has a mistake in it, as will be explained later */
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games) {
sqrt_dif[i] = (step(dif[i]) - 0.5)*sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
sqrt_dif ~ student_t(df, a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] y_rep_original_scale;
for (n in 1:N_games) {
y_rep[n] = student_t_rng(df, a[team_1[n]] - a[team_2[n]], sigma_y);
}
y_rep_original_scale = y_rep .* abs(y_rep);
}fit_1_rep <- model_1_rep$sample(data = stan_data, refresh = 0)ppc_intervals(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_1_rep$draws("y_rep_original_scale", format = "draws_matrix"),
fatten = 0,
prob = 1e-12
) +
scale_x_continuous(labels = gamenames, breaks=1:64)+
labs(y="Game score differentials\ncompared to 90% predictive interval from model",
x="") +
coord_flip()
#if (savefigs) ggsave(root("world_cup/figs","worldcup_3.pdf", height=10, width=8))4 Second model without sqrt transformation
model_2 <- cmdstan_model(stan_file = root("world_cup", "worldcup_no_sqrt.stan"))print_stan_code(model_2$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
dif ~ student_t(df, a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
for (n in 1:N_games) {
y_rep[n] = student_t_rng(df, a[team_1[n]] - a[team_2[n]], sigma_y);
}
}fit_2 <- model_2$sample(data = stan_data, refresh = 0)fit_2$summary(c("a", "b", "sigma_a", "sigma_y"))# A tibble: 35 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.81 0.83 0.38 0.35 0.15 1.4 1.0 4568. 3260.
2 a[2] 0.84 0.84 0.32 0.29 0.33 1.4 1.0 5728. 3274.
3 a[3] 1.1 1.0 0.41 0.38 0.51 1.8 1.0 2867. 3068.
4 a[4] 0.54 0.61 0.44 0.37 -0.29 1.1 1.0 2749. 3032.
5 a[5] 0.71 0.71 0.36 0.31 0.13 1.3 1.0 4517. 2659.
6 a[6] 0.81 0.78 0.36 0.31 0.29 1.4 1.0 3573. 2759.
7 a[7] 0.84 0.78 0.38 0.34 0.31 1.5 1.0 2775. 3393.
8 a[8] 0.35 0.40 0.38 0.32 -0.33 0.91 1.0 3479. 3055.
9 a[9] 0.23 0.30 0.40 0.33 -0.53 0.77 1.0 2736. 2686.
10 a[10] 0.43 0.41 0.32 0.26 -0.086 0.97 1.0 4932. 3058.
11 a[11] 0.70 0.62 0.42 0.39 0.16 1.5 1.0 1880. 3177.
12 a[12] 0.20 0.23 0.37 0.27 -0.46 0.77 1.0 3842. 2449.
13 a[13] 0.14 0.17 0.35 0.27 -0.49 0.70 1.0 4620. 2874.
14 a[14] 0.038 0.086 0.38 0.29 -0.64 0.61 1.0 4423. 2828.
15 a[15] -0.0095 0.038 0.36 0.28 -0.64 0.53 1.0 4571. 2995.
16 a[16] -0.095 -0.042 0.35 0.27 -0.71 0.42 1.0 3899. 2654.
17 a[17] -0.085 -0.056 0.36 0.28 -0.70 0.49 1.0 4810. 2966.
18 a[18] -0.019 -0.050 0.35 0.27 -0.57 0.59 1.0 4609. 2678.
19 a[19] -0.086 -0.12 0.33 0.25 -0.61 0.50 1.0 4478. 3362.
20 a[20] -0.034 -0.097 0.36 0.29 -0.53 0.63 1.0 3397. 3232.
21 a[21] -0.27 -0.28 0.35 0.27 -0.83 0.31 1.0 4596. 2510.
22 a[22] -0.36 -0.35 0.34 0.28 -0.94 0.20 1.0 5276. 2750.
23 a[23] -0.42 -0.41 0.37 0.28 -1.1 0.16 1.0 6043. 3247.
24 a[24] -0.41 -0.42 0.34 0.28 -0.94 0.20 1.0 4663. 3266.
25 a[25] -0.60 -0.57 0.37 0.29 -1.2 -0.043 1.0 4787. 2878.
26 a[26] -0.22 -0.30 0.42 0.40 -0.77 0.57 1.0 1760. 2919.
27 a[27] -0.73 -0.70 0.38 0.32 -1.4 -0.15 1.0 4544. 3144.
28 a[28] -0.97 -0.89 0.44 0.38 -1.8 -0.39 1.0 2988. 3071.
29 a[29] -0.76 -0.76 0.36 0.32 -1.3 -0.16 1.0 4558. 2623.
30 a[30] -1.0 -1.00 0.43 0.37 -1.8 -0.42 1.0 3089. 2565.
31 a[31] -0.61 -0.67 0.41 0.37 -1.2 0.15 1.0 2699. 3128.
32 a[32] -1.0 -0.99 0.38 0.34 -1.7 -0.40 1.0 5336. 3245.
33 b 1.1 1.1 0.27 0.26 0.68 1.6 1.0 3125. 2486.
34 sigma_a 0.37 0.36 0.21 0.22 0.050 0.72 1.0 925. 1418.
35 sigma_y 1.3 1.3 0.15 0.15 1.0 1.6 1.0 3327. 2333.
fit_2$draws("a") |>
mcmc_intervals(prob = 0) +
scale_y_discrete(labels = rev(teamnames), limits = rev) +
labs(x = "Team quality estimate with 90% intervals\n(model with no square root)")
#if (savefigs) ggsave(root("world_cup/figs","worldcup_4.pdf", height=7, width=8))4.1 Check fit of the second model to data
ppc_intervals(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_2$draws("y_rep", format = "draws_matrix"),
fatten = 0,
prob = 1e-12
) +
scale_x_continuous(labels = gamenames, breaks = 1:64)+
labs(y = "Game score differentials ncompared to 90% predictive interval\n(model with no square root)", x = "") +
coord_flip()
#if (savefigs) ggsave(root("world_cup/figs","worldcup_5.pdf", height=10, width=8))5 Fix the first model
model_3 <- cmdstan_model(stan_file = root("world_cup", "worldcup_fixed.stan"))print_stan_code(model_3$code())/* The error in the transformed data block has been fixed */
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games){
sqrt_dif[i] = 2 * (step(dif[i]) - 0.5) * sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
sqrt_dif ~ student_t(df, a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] y_rep_original_scale;
vector[N_games] log_lik;
for (n in 1:N_games) {
y_rep[n] = student_t_rng(df, a[team_1[n]] - a[team_2[n]], sigma_y);
}
y_rep_original_scale = y_rep .* abs(y_rep);
}fit_3 <- model_3$sample(data = stan_data, refresh = 0)fit_3$summary(c("a", "b", "sigma_a", "sigma_y"))# A tibble: 35 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.57 0.59 0.28 0.27 0.093 1.0 1.0 3527. 3318.
2 a[2] 0.73 0.72 0.25 0.23 0.33 1.2 1.0 4707. 3064.
3 a[3] 0.96 0.94 0.32 0.32 0.49 1.5 1.0 1708. 2376.
4 a[4] 0.33 0.37 0.37 0.34 -0.33 0.85 1.00 2209. 2609.
5 a[5] 0.54 0.54 0.28 0.25 0.077 1.0 1.0 5735. 3142.
6 a[6] 0.60 0.58 0.28 0.25 0.17 1.1 1.0 2669. 1642.
7 a[7] 0.67 0.64 0.30 0.29 0.24 1.2 1.0 2802. 1998.
8 a[8] 0.27 0.31 0.30 0.28 -0.25 0.74 1.0 3354. 3364.
9 a[9] 0.059 0.11 0.34 0.33 -0.56 0.53 1.0 1958. 2966.
10 a[10] 0.40 0.38 0.26 0.24 0.0022 0.87 1.0 3857. 2249.
11 a[11] 0.66 0.64 0.32 0.35 0.21 1.2 1.0 1396. 2390.
12 a[12] 0.089 0.12 0.30 0.26 -0.45 0.54 1.00 3873. 3028.
13 a[13] 0.098 0.12 0.29 0.25 -0.43 0.56 1.0 4388. 2797.
14 a[14] 0.061 0.079 0.30 0.25 -0.43 0.54 1.00 4341. 2312.
15 a[15] -0.041 -0.010 0.29 0.26 -0.55 0.40 1.0 2894. 1934.
16 a[16] -0.15 -0.11 0.31 0.26 -0.69 0.29 1.0 2457. 2318.
17 a[17] -0.11 -0.085 0.30 0.26 -0.62 0.37 1.0 3653. 2840.
18 a[18] 0.0053 -0.020 0.28 0.24 -0.44 0.51 1.0 4025. 2820.
19 a[19] -0.048 -0.071 0.27 0.23 -0.48 0.42 1.0 3839. 2668.
20 a[20] 0.025 -0.014 0.29 0.26 -0.41 0.55 1.0 2044. 1768.
21 a[21] -0.26 -0.25 0.30 0.25 -0.76 0.24 1.0 3195. 1563.
22 a[22] -0.23 -0.25 0.28 0.24 -0.67 0.25 1.0 3692. 2502.
23 a[23] -0.34 -0.33 0.31 0.26 -0.85 0.16 1.0 3369. 2102.
24 a[24] -0.30 -0.31 0.28 0.23 -0.73 0.17 1.0 4320. 2224.
25 a[25] -0.52 -0.50 0.31 0.26 -1.1 -0.039 1.0 4251. 3243.
26 a[26] -0.044 -0.078 0.35 0.37 -0.54 0.57 1.0 1340. 2715.
27 a[27] -0.58 -0.56 0.29 0.25 -1.1 -0.12 1.0 4022. 1856.
28 a[28] -0.80 -0.76 0.35 0.32 -1.5 -0.30 1.0 2862. 2907.
29 a[29] -0.59 -0.59 0.28 0.26 -1.1 -0.13 1.0 4355. 2965.
30 a[30] -0.83 -0.80 0.32 0.30 -1.4 -0.36 1.00 3186. 3271.
31 a[31] -0.42 -0.45 0.33 0.33 -0.91 0.16 1.0 2126. 3054.
32 a[32] -0.76 -0.75 0.30 0.27 -1.3 -0.27 1.0 3698. 1529.
33 b 0.86 0.87 0.21 0.19 0.53 1.2 1.0 1335. 597.
34 sigma_a 0.33 0.34 0.15 0.15 0.072 0.58 1.0 665. 846.
35 sigma_y 0.84 0.83 0.11 0.11 0.68 1.0 1.0 1859. 2467.
fit_3$draws("a") |>
mcmc_intervals(prob = 0) +
scale_y_discrete(labels = rev(teamnames), limits = rev) +
labs(x = "Team quality estimate with 90% intervals\n(corrected model)")
#if (savefigs) ggsave(root("world_cup/figs","worldcup_6.pdf", height=7, width=8))5.1 Check the fit of the fixed first model
ppc_intervals(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_3$draws("y_rep_original_scale", format = "draws_matrix"),
fatten = 0,
prob = 1e-12
) +
scale_x_continuous(labels = gamenames, breaks = 1:64) +
labs(y = "Game score differentials ncompared to 90% predictive interval\n(corrected model with square root)", x = "") +
coord_flip()
#if (savefigs) ggsave(root("world_cup/figs","worldcup_7.pdf", height=10, width=8))5.2 Fit the same model without the powerindex prior
set b=0 in the data
model_3_no_prior <- cmdstan_model(stan_file = root("world_cup", "worldcup_no_prior.stan"))print_stan_code(model_3_no_prior$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
real b; // To remove the prior score in the model, just set b=0 when running this program.
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games){
sqrt_dif[i] = 2 * (step(dif[i]) - 0.5) * sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
sqrt_dif ~ student_t(df, a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] log_lik;
for (n in 1:N_games) {
log_lik[n] = student_t_lpdf(sqrt_dif[n] | df, a[team_1[n]] - a[team_2[n]], sigma_y);
}
}fit_3_no_prior <- model_3_no_prior$sample(data = c(stan_data, b = 0), refresh = 0)fit_3_no_prior$draws("a") |>
mcmc_intervals(prob = 0) +
scale_y_discrete(labels = rev(teamnames), limits = rev) +
labs(x = "Team quality estimate with 90% intervals\nModel without prior rankings")
#if (savefigs) ggsave(root("world_cup/figs","worldcup_2.pdf", height=7, width=8))6 Discrete models and LOO-CV comparison
6.1 Discrete model with explicit latent z sampled
model_discr_z <- cmdstan_model(stan_file = root("world_cup", "worldcup_discrete_z.stan"))print_stan_code(model_discr_z$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_z;
vector<lower=dif-0.5, upper=dif+0.5>[N_games] z;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_z ~ normal(0, 1);
z ~ normal(a[team_1] - a[team_2], sigma_z);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] z_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
z_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_z);
y_rep[n] = round(z_rep[n]);
log_lik[n] = log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_z), normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_z));
}
}fit_discr_z <- model_discr_z$sample(data = stan_data, refresh = 0)
loo_discr_z <- fit_discr_z$loo()fit_discr_z$summary(c("b", "sigma_a", "sigma_z"))# A tibble: 3 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b 1.1 1.1 0.32 0.31 0.53 1.6 1.0 3180. 2132.
2 sigma_a 0.51 0.52 0.23 0.22 0.12 0.87 1.0 768. 1035.
3 sigma_z 1.5 1.5 0.16 0.16 1.2 1.8 1.00 1531. 2207.
6.2 Discrete model with latent z integrated out
model_discr <- cmdstan_model(stan_file = root("world_cup", "worldcup_discrete.stan"))print_stan_code(model_discr$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_z;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_z ~ normal(0, 1);
for (n in 1:N_games) {
target += log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_z),
normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_z));
}
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] z_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
z_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_z);
y_rep[n] = round(z_rep[n]);
log_lik[n] = log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_z), normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_z));
}
}fit_discr <- model_discr$sample(data = stan_data, refresh = 0)
loo_discr <- fit_discr$loo(save_psis = TRUE)fit_discr$summary(c("a[1]", "a[32]", "b", "sigma_a", "sigma_z"))# A tibble: 5 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.49 0.51 0.44 0.44 -0.30 1.1 1.0 3127. 3362.
2 a[32] -1.0 -1.0 0.48 0.45 -1.8 -0.27 1.0 5880. 3316.
3 b 1.1 1.1 0.32 0.30 0.54 1.6 1.0 3312. 2552.
4 sigma_a 0.52 0.52 0.22 0.21 0.13 0.87 1.0 1025. 974.
5 sigma_z 1.5 1.5 0.16 0.16 1.2 1.8 1.0 1790. 2542.
6.3 Discrete model with no power score
model_discr_nopower <- cmdstan_model(stan_file = root("world_cup", "worldcup_discrete_nopower.stan"))print_stan_code(model_discr_nopower$code())data {
int N_teams;
int N_games;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha;
real<lower=0> sigma_a;
real<lower=0> sigma_z;
}
transformed parameters {
vector[N_teams] a = sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_z ~ normal(0, 1);
for (n in 1:N_games) {
target += log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_z),
normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_z));
}
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] z_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
real a_dif_n = a[team_1[n]] - a[team_2[n]];
z_rep[n] = normal_rng(a_dif_n, sigma_z);
y_rep[n] = round(z_rep[n]);
log_lik[n] = log_diff_exp(normal_lcdf(dif[n]+0.5 | a_dif_n, sigma_z),
normal_lcdf(dif[n]-0.5 | a_dif_n, sigma_z));
}
}fit_discr_nopower <- model_discr_nopower$sample(data = stan_data, refresh = 0)
loo_discr_nopower <- fit_discr_nopower$loo()fit_discr_nopower$summary(c("a[1]", "a[32]", "sigma_a", "sigma_z"))# A tibble: 4 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] -0.062 -0.050 0.48 0.47 -0.85 0.70 1.0 5679. 3024.
2 a[32] -0.71 -0.67 0.66 0.66 -1.9 0.30 1.0 2996. 3039.
3 sigma_a 0.76 0.76 0.24 0.22 0.35 1.1 1.0 888. 553.
4 sigma_z 1.5 1.5 0.19 0.19 1.2 1.8 1.0 1388. 1521.
6.4 Discrete model with power score only
model_discr_poweronly <- cmdstan_model(stan_file = root("world_cup", "worldcup_discrete_poweronly.stan"))print_stan_code(model_discr_poweronly$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
real b0;
real b;
real<lower=0> sigma_z;
}
transformed parameters {
vector[N_teams] a = b0 + b * prior_score;
}
model {
b0 ~ normal(0, 1);
b ~ normal(0, 1);
sigma_z ~ normal(0, 1);
for (n in 1:N_games) {
target += log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_z),
normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_z));
}
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] z_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
z_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_z);
y_rep[n] = round(z_rep[n]);
log_lik[n] = log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_z), normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_z));
}
}fit_discr_poweronly <- model_discr_poweronly$sample(data = stan_data, refresh = 0)
loo_discr_poweronly <- fit_discr_poweronly$loo()fit_discr_poweronly$summary(c("b0", "b", "sigma_z"))# A tibble: 3 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b0 -0.0021 0.012 0.96 0.96 -1.6 1.6 1.0 3990. 2620.
2 b 1.1 1.1 0.27 0.27 0.64 1.5 1.0 4150. 3110.
3 sigma_z 1.6 1.6 0.14 0.14 1.4 1.9 1.0 3527. 3066.
6.5 Discrete model with no power score and pooled effect
model_discr_pool <- cmdstan_model(stan_file = root("world_cup", "worldcup_discrete_pooled.stan"))print_stan_code(model_discr_pool$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
real mu_z;
real<lower=0> sigma_z;
}
model {
mu_z ~ normal(0, 1);
sigma_z ~ normal(0, 1);
for (n in 1:N_games) {
target += log_diff_exp(normal_lcdf(dif[n]+0.5 | mu_z, sigma_z),
normal_lcdf(dif[n]-0.5 | mu_z, sigma_z));
}
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] z_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
z_rep[n] = normal_rng(mu_z, sigma_z);
y_rep[n] = round(z_rep[n]);
log_lik[n] = log_diff_exp(normal_lcdf(dif[n]+0.5 | mu_z, sigma_z), normal_lcdf(dif[n]-0.5 | mu_z, sigma_z));
}
}fit_discr_pool <- model_discr_pool$sample(data = stan_data, refresh = 0)
loo_discr_pool <- fit_discr_pool$loo()fit_discr_pool$summary(c("mu_z","sigma_z"))# A tibble: 2 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 mu_z -0.13 -0.13 0.23 0.23 -0.51 0.25 1.0 2964. 2346.
2 sigma_z 1.8 1.8 0.17 0.16 1.6 2.1 1.0 3217. 2602.
6.6 Model comparison
Now that we have implemented discrete model, we compare various models with or without different components. Hierarchical model with power score is the best, but not much better than power score only model or hierarchical model without power score. Clearly the power score and the score differences are providing similar information. Looking at the posterior we do see that using them both, does decrease posterior uncertainty, but as the match outcomes still have significant randomness the difference in predictive performance is small.
loo_compare(
list(
"Hier. w power score" = loo_discr,
"Power score only" = loo_discr_poweronly,
"Hier. w/o power score" = loo_discr_nopower,
"Pooled w/o power score" = loo_discr_pool
)
) elpd_diff se_diff
Hier. w power score 0.0 0.0
Power score only -1.3 1.8
Hier. w/o power score -3.3 2.8
Pooled w/o power score -8.4 3.4
7 Discretizing continuous models
7.1 Continuous model with log predictive probability using midpoint rule
model_cont_midp <- cmdstan_model(stan_file = root("world_cup", "worldcup_continuous_midpoint_ll.stan"))print_stan_code(model_cont_midp$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
dif ~ normal(a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
y_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_y);
log_lik[n] = normal_lpdf(dif[n] | a[team_1[n]] - a[team_2[n]], sigma_y);
}
}fit_cont_midp <- model_cont_midp$sample(data = stan_data, refresh = 0)
loo_cont_midp <- fit_cont_midp$loo()fit_cont_midp$summary(c("a[1]", "a[32]", "b", "sigma_a", "sigma_y"))# A tibble: 5 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.48 0.51 0.45 0.46 -0.28 1.2 1.0 3280. 3344.
2 a[32] -1.0 -0.99 0.49 0.46 -1.9 -0.25 1.0 5860. 2903.
3 b 1.1 1.1 0.31 0.31 0.59 1.6 1.0 4527. 3015.
4 sigma_a 0.51 0.52 0.22 0.22 0.12 0.87 1.0 877. 1252.
5 sigma_y 1.5 1.5 0.16 0.16 1.3 1.8 1.0 1952. 2682.
7.2 Continuous model with log predictive probability using exact integration
model_cont <- cmdstan_model(stan_file = root("world_cup", "worldcup_continuous.stan"))print_stan_code(model_cont$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha; // latent team abilities
real b; // prior weight
real<lower=0> sigma_a; // scale of ability prior distribution
real<lower=0> sigma_y; // scale of score difference distribution
}
transformed parameters {
// team abilities as weighted sum of prior score and latent team ability
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
dif ~ normal(a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
y_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_y);
log_lik[n] = log_diff_exp(normal_lcdf(dif[n]+0.5 | a[team_1[n]] - a[team_2[n]], sigma_y), normal_lcdf(dif[n]-0.5 | a[team_1[n]] - a[team_2[n]], sigma_y));
}
}fit_cont <- model_cont$sample(data = stan_data, refresh = 0)
loo_cont <- fit_cont$loo()fit_cont$summary(c("a[1]", "a[32]", "b", "sigma_a", "sigma_y"))# A tibble: 5 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.48 0.50 0.45 0.46 -0.28 1.2 1.0 2837. 2898.
2 a[32] -1.0 -1.0 0.48 0.44 -1.8 -0.26 1.0 5976. 3588.
3 b 1.1 1.1 0.31 0.31 0.57 1.6 1.0 3725. 2819.
4 sigma_a 0.51 0.52 0.22 0.22 0.12 0.87 1.0 864. 1031.
5 sigma_y 1.5 1.5 0.16 0.17 1.3 1.8 1.0 1818. 2354.
7.3 Model comparison
The elpd_diff’s between models are small enough to be caused by Monte Carlo variation
loo_compare(
list(
"Discrete model" = loo_discr,
"Continuous + midpoint log_lik" = loo_cont_midp,
"Continuous model" = loo_cont
)
) elpd_diff se_diff
Continuous model 0.0 0.0
Discrete model -0.6 0.4
Continuous + midpoint log_lik -0.6 0.3
7.4 More discretized continuous models
Continuous sqrt model with log predictive probability using midpoint rule and no Jacobian
model_sqrt_cont_noj <- cmdstan_model(stan_file = root("world_cup", "worldcup_sqrt_continuous_nojacobian.stan"))print_stan_code(model_sqrt_cont_noj$code())/* The error in the transformed data block has been fixed */
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games){
sqrt_dif[i] = 2 * (step(dif[i]) - 0.5) * sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
sqrt_dif ~ normal(a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] y_rep_original_scale;
vector[N_games] log_lik;
real lprior;
for (n in 1:N_games) {
y_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_y);
log_lik[n] = normal_lpdf(sqrt_dif[n] | a[team_1[n]] - a[team_2[n]], sigma_y);
}
y_rep_original_scale = y_rep .* abs(y_rep);
lprior = normal_lpdf(b | 0, 1) +
normal_lpdf(sigma_a | 0, 1) +
normal_lpdf(sigma_y | 0, 1);
real round15 = round(1.5);
real round25 = round(2.5);
}fit_sqrt_cont_noj <- model_sqrt_cont_noj$sample(data = stan_data, refresh = 0)
loo_sqrt_cont_noj <- fit_sqrt_cont_noj$loo()fit_sqrt_cont_noj$summary(c("a[1]", "a[32]", "b", "sigma_a", "sigma_y"))# A tibble: 5 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.49 0.50 0.27 0.27 0.015 0.90 1.0 3447. 3115.
2 a[32] -0.74 -0.73 0.32 0.30 -1.3 -0.23 1.0 5175. 3116.
3 b 0.82 0.83 0.21 0.20 0.48 1.2 1.0 3790. 2541.
4 sigma_a 0.34 0.34 0.15 0.14 0.071 0.57 1.0 802. 788.
5 sigma_y 0.92 0.92 0.10 0.10 0.76 1.1 1.0 1551. 2411.
Continuous sqrt model with log predictive probability using midpoint rule and Jacobian is not possible as Jacobian is infinite for midpoint 0 Continuous sqrt model with log predictive probability using Jacobian and quadrature integration
model_sqrt_cont <- cmdstan_model(stan_file = root("world_cup", "worldcup_sqrt_continuous.stan"))print_stan_code(model_sqrt_cont$code())/* The error in the transformed data block has been fixed */
functions {
real integrand(real z, real notused, array[] real theta,
array[] real X_i, array[] int y_i) {
real sigma_y = theta[1];
real a_diff = theta[2];
real sqrt_z = 2 * (step(z) - 0.5) * sqrt(abs(z));
real p = exp(normal_lpdf(sqrt_z | a_diff, sigma_y) - log(abs(z))/2 -log(2));
return (is_inf(p) || is_nan(p)) ? 0 : p;
}
}
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games){
sqrt_dif[i] = 2 * (step(dif[i]) - 0.5) * sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
sqrt_dif ~ normal(a[team_1] - a[team_2], sigma_y);
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] y_rep_original_scale;
vector[N_games] log_lik;
real lprior;
for (n in 1:N_games) {
y_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_y);
log_lik[n] = (dif[n] == 0)
? log(integrate_1d(integrand,
-0.5,
0,
append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-6) +
integrate_1d(integrand,
0,
0.5,
append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-6))
: log(integrate_1d(integrand,
dif[n]-0.5,
dif[n]+0.5,
append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-6));
}
y_rep_original_scale = y_rep .* abs(y_rep);
lprior = normal_lpdf(b | 0, 1) +
normal_lpdf(sigma_a | 0, 1) +
normal_lpdf(sigma_y | 0, 1);
}fit_sqrt_cont <- model_sqrt_cont$sample(data = stan_data, refresh = 0)
loo_sqrt_cont <- fit_sqrt_cont$loo(save_psis = TRUE)fit_sqrt_cont$summary(c("a[1]", "a[32]", "b", "sigma_a", "sigma_y"))# A tibble: 5 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.48 0.49 0.27 0.27 0.014 0.90 1.0 4421. 2790.
2 a[32] -0.74 -0.73 0.32 0.29 -1.3 -0.24 1.00 6450. 3272.
3 b 0.83 0.83 0.21 0.20 0.49 1.2 1.0 3810. 2938.
4 sigma_a 0.34 0.34 0.14 0.14 0.097 0.57 1.0 944. 1095.
5 sigma_y 0.92 0.91 0.10 0.10 0.76 1.1 1.0 1841. 2516.
Discrete sqrt model with log predictive probability using Jacobian and quadrature integration. The sampling is slow as the quadrature integration is done at each HMC/NUTS leapfrog step.
model_sqrt_discr <- cmdstan_model(stan_file = root("world_cup", "worldcup_sqrt_discrete.stan"))print_stan_code(model_sqrt_discr$code())/* The error in the transformed data block has been fixed */
functions {
real integrand(real z, real notused, array[] real theta,
array[] real X_i, array[] int y_i) {
real sigma_y = theta[1];
real a_diff = theta[2];
real sqrt_z = 2 * (step(z) - 0.5) * sqrt(abs(z));
real p = exp(normal_lpdf(sqrt_z | a_diff, sigma_y) - log(abs(z))/2 -log(2));
return (is_inf(p) || is_nan(p)) ? 0 : p;
}
}
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
real df;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
vector[N_games] sqrt_dif;
for (i in 1:N_games){
sqrt_dif[i] = 2 * (step(dif[i]) - 0.5) * sqrt(abs(dif[i]));
}
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
real<lower=0> sigma_y;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
sigma_y ~ normal(0, 1);
for (n in 1:N_games) {
target += (dif[n] == 0)
? log(integrate_1d(integrand,
-0.5,
0,
append_array(append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}), {df}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-5) +
integrate_1d(integrand,
0,
0.5,
append_array(append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}), {df}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-5))
: log(integrate_1d(integrand,
dif[n]-0.5,
dif[n]+0.5,
append_array(append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}), {df}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-5));
// integrate_1d_reltol));
}
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] y_rep_original_scale;
vector[N_games] log_lik;
real lprior;
for (n in 1:N_games) {
y_rep[n] = normal_rng(a[team_1[n]] - a[team_2[n]], sigma_y);
// log_lik[n] = student_t_lpdf(sqrt_dif[n] | df, a[team_1[n]] - a[team_2[n]], sigma_y) + (dif[n]==0 ? 0 : -log(abs(dif[n]))/2 -log(2));
log_lik[n] = (dif[n] == 0)
? log(integrate_1d(integrand,
-0.5,
0,
append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-6) +
integrate_1d(integrand,
0,
0.5,
append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-6))
: log(integrate_1d(integrand,
dif[n]-0.5,
dif[n]+0.5,
append_array({sigma_y}, {a[team_1[n]] - a[team_2[n]]}),
{0}, // not used, but an empty array not allowed
{0}, // not used, but an empty array not allowed
1e-6));
// integrate_1d_reltol));
}
y_rep_original_scale = y_rep .* abs(y_rep);
lprior = normal_lpdf(b | 0, 1) +
normal_lpdf(sigma_a | 0, 1) +
normal_lpdf(sigma_y | 0, 1);
}fit_sqrt_discr <- model_sqrt_discr$sample(data = stan_data, refresh = 0)
loo_sqrt_discr <- fit_sqrt_discr$loo()fit_sqrt_discr$summary(c("a[1]", "a[32]", "b", "sigma_a", "sigma_y"))# A tibble: 5 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.48 0.49 0.28 0.27 -0.0067 0.91 1.0 4459. 3367.
2 a[32] -0.74 -0.73 0.32 0.29 -1.3 -0.23 1.0 5592. 3292.
3 b 0.83 0.83 0.20 0.20 0.49 1.2 1.0 3630. 2925.
4 sigma_a 0.36 0.36 0.15 0.14 0.091 0.60 1.0 840. 799.
5 sigma_y 0.90 0.89 0.11 0.11 0.73 1.1 1.0 1502. 2431.
7.5 Model comparison
Without taking Jacobian into account, the continuous model for square root of score difference looks like the best
loo_compare(
list(
"Discrete" = loo_discr,
"Discrete sqrt" = loo_sqrt_discr,
"Cont-sqrt + midp, -Jacobian" = loo_sqrt_cont_noj
)
) elpd_diff se_diff
Cont-sqrt + midp, -Jacobian 0.0 0.0
Discrete -32.5 5.2
Discrete sqrt -40.3 4.3
Taking Jacobian into account, the square root models are worse, and the difference between continuous and discrete square root model is small enough to be explained by Monte Carlo variation.
loo_compare(
list(
"Discrete" = loo_discr,
"Discrete sqrt" = loo_sqrt_discr,
"Continuous sqrt +Jacobian" = loo_sqrt_cont
)
) elpd_diff se_diff
Discrete 0.0 0.0
Continuous sqrt +Jacobian -7.2 5.5
Discrete sqrt -7.8 5.4
7.6 LOO-CV predictive checking
LOO-CV predictive checking with LOO-PIT for the discrete model looks fine
ppc_loo_pit_ecdf(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_discr$draws(format = "matrix", variables = "y_rep"),
psis_object = loo_discr$psis_object,
method = "correlated"
)LOO-CV predictive checking for the continuous model indicates slight miscalibration with too many low PIT values (left tail of the predictive distribution is shorter than expected)
ppc_loo_pit_ecdf(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_sqrt_cont$draws(format = "matrix", variables = "y_rep"),
psis_object = loo_sqrt_cont$psis_object,
method = "correlated"
)8 Bivariate Poisson and Poisson difference models
Bivariate Poisson model is commonly used for football scores (Karlis and Ntzoufras 2003). If we care only about the score difference we can also use Poisson difference model (Karlis and Ntzoufras 2003).
model_bipois <- cmdstan_model(stan_file = root("world_cup", "worldcup_bivariate_poisson.stan"))print_stan_code(model_bipois$code())functions {
real bivariate_poisson_log_lpmf_(int score1, int score2,
real log_lambda_1, real log_lambda_2, real log_lambda_3) {
// bivariate Poisson with lambda_3 presenting the correlation
// e.g. Karlis and Ntzoufras (2003). Analysis of sports data by using bivariate Poisson models
int m = min(score1, score2);
vector[m + 1] log_terms;
for (z in 0 : m) {
int r1 = score1 - z;
int r2 = score2 - z;
log_terms[z + 1] = r1 * log_lambda_1 - lgamma(r1 + 1)
+ r2 * log_lambda_2 - lgamma(r2 + 1)
+ z * log_lambda_3 - lgamma(z + 1);
}
return -exp(log_lambda_1) - exp(log_lambda_2) - exp(log_lambda_3) + log_sum_exp(log_terms);
}
real bivariate_poisson_log_lpmf(array[,] int score,
vector log_lambda_1, vector log_lambda_2, real log_lambda_3) {
// bivariate Poisson lpmf for array[2,N] scores
array[2] int score_dims = dims(score);
real total = 0;
for (i in 1:score_dims[2]) {
total += bivariate_poisson_log_lpmf_(score[1,i], score[2,i],
log_lambda_1[i], log_lambda_2[i], log_lambda_3);
}
return total;
}
real bivariate_poisson_log_lpmf(array[] int score,
real log_lambda_1, real log_lambda_2, real log_lambda_3) {
// bivariate Poisson lpmf for array[2] scores
return bivariate_poisson_log_lpmf_(score[1], score[2],
log_lambda_1, log_lambda_2, log_lambda_3);
}
}
data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
array[N_games] int score_1;
array[N_games] int score_2;
}
parameters {
vector[N_teams] z_o;
vector[N_teams] z_d;
real a;
real b_o;
real b_d;
real c;
real<lower=0> sigma_o;
real<lower=0> sigma_d;
}
transformed parameters {
vector[N_teams] o = b_o * prior_score + sigma_o * z_o;
vector[N_teams] d = b_d * prior_score + sigma_d * z_d;
}
model {
z_o ~ normal(0, 1);
z_d ~ normal(0, 1);
b_o ~ normal(0, 1);
b_d ~ normal(0, 1);
a ~ normal(0, 1);
// prior centered on 1991–1992 Italian serie A data estimate as reported by
// Karlis and Ntzoufras (2003) and very wide prior around.
c ~ normal(-1.5, 20);
sigma_o ~ normal(0, 1);
sigma_d ~ normal(0, 1);
{score_1, score_2} ~ bivariate_poisson_log(a + o[team_1] + d[team_2],
a + o[team_2] + d[team_1],
c);
}
generated quantities {
array[N_games] int y_rep;
array[N_games] int score_1_rep;
array[N_games] int score_2_rep;
vector[N_games] log_lik;
vector[N_games] log_lik2;
for (n in 1:N_games) {
int score_c = poisson_log_rng(c);
score_1_rep[n] = poisson_log_rng(a + o[team_1[n]] + d[team_2[n]]) + score_c;
score_2_rep[n] = poisson_log_rng(a + o[team_2[n]] + d[team_1[n]]) + score_c;
y_rep[n] = score_1_rep[n] - score_2_rep[n];
// log probability using the bivariate Poisson lpmf
log_lik2[n] = bivariate_poisson_log_lpmf({score_1[n], score_2[n]} |
a + o[team_1[n]] + d[team_2[n]],
a + o[team_2[n]] + d[team_1[n]],
c);
// log probability using the Poisson difference lpmf
// note that lambda3 cancels out in the difference
int dif = score_1[n] - score_2[n];
real l1 = exp(o[team_1[n]] + d[team_2[n]]);
real l2 = exp(o[team_2[n]] + d[team_1[n]]);
log_lik[n] = -(l1 + l2) + (log(l1) - log(l2))*(dif/2.0) +
log(modified_bessel_first_kind(dif, 2.0*sqrt(l1*l2)));
}
}fit_bipois <- model_bipois$sample(data = stan_data, refresh = 0, adapt_delta = 0.95)fit_bipois$summary(c("a","o[1]","o[32]","d[1]","d[32]","b_o","b_d","sigma_o","sigma_d"))# A tibble: 9 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a 0.056 0.11 0.23 0.19 -0.41 0.33 1.0 558. 865.
2 o[1] 0.41 0.41 0.22 0.21 0.050 0.78 1.0 1673. 1678.
3 o[32] -0.42 -0.41 0.28 0.23 -0.89 0.014 1.0 1579. 1893.
4 d[1] -0.032 -0.056 0.31 0.33 -0.49 0.53 1.0 1364. 2459.
5 d[32] 0.46 0.44 0.27 0.25 0.027 0.90 1.0 3558. 2615.
6 b_o 0.53 0.52 0.23 0.23 0.19 0.94 1.0 1057. 1060.
7 b_d -0.45 -0.44 0.21 0.19 -0.78 -0.11 1.0 1219. 767.
8 sigma_o 0.18 0.16 0.13 0.13 0.016 0.41 1.0 909. 1365.
9 sigma_d 0.30 0.29 0.17 0.17 0.041 0.61 1.0 561. 1146.
(loo_bipois <- fit_bipois$loo(save_psis = TRUE))
Computed from 4000 by 64 log-likelihood matrix.
Estimate SE
elpd_loo -124.7 8.1
p_loo 12.9 3.2
looic 249.4 16.3
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.2, 1.2]).
Pareto k diagnostic values:
Count Pct. Min. ESS
(-Inf, 0.7] (good) 57 89.1% 175
(0.7, 1] (bad) 6 9.4% <NA>
(1, Inf) (very bad) 1 1.6% <NA>
See help('pareto-k-diagnostic') for details.
model_poisdif <- cmdstan_model(stan_file = root("world_cup", "worldcup_poisson_difference.stan"))print_stan_code(model_poisdif$code())data {
int N_teams;
int N_games;
vector[N_teams] prior_score;
array[N_games] int team_1;
array[N_games] int team_2;
vector[N_games] score_1;
vector[N_games] score_2;
}
transformed data {
vector[N_games] dif = score_1 - score_2;
}
parameters {
vector[N_teams] alpha;
real b;
real<lower=0> sigma_a;
}
transformed parameters {
vector[N_teams] a = b * prior_score + sigma_a * alpha;
}
model {
alpha ~ normal(0, 1);
b ~ normal(0, 1);
sigma_a ~ normal(0, 1);
for (n in 1:N_games) {
// log probability using the Poisson difference lpmf
// e.g. Karlis and Ntzoufras (2003). Analysis of sports data by using bivariate Poisson models
real l1 = exp(a[team_1[n]] - a[team_2[n]]);
real l2 = exp(a[team_2[n]] - a[team_1[n]]);
target += -(l1 + l2) + (log(l1) - log(l2))*(dif[n]/2.0) +
log(modified_bessel_first_kind(to_int(dif[n]), 2.0*sqrt(l1*l2)));
}
}
generated quantities {
vector[N_games] y_rep;
vector[N_games] log_lik;
for (n in 1:N_games) {
int score_1_rep = poisson_log_rng(a[team_1[n]] - a[team_2[n]]);
int score_2_rep = poisson_log_rng(a[team_2[n]] - a[team_1[n]]);
y_rep[n] = score_1_rep - score_2_rep;
// log probability using the Poisson difference lpmf
// e.g. Karlis and Ntzoufras (2003). Analysis of sports data by using bivariate Poisson models
real l1 = exp(a[team_1[n]] - a[team_2[n]]);
real l2 = exp(a[team_2[n]] - a[team_1[n]]);
log_lik[n] = -(l1 + l2) + (log(l1) - log(l2))*(dif[n]/2.0) +
log(modified_bessel_first_kind(to_int(dif[n]), 2.0*sqrt(l1*l2)));
}
}fit_poisdif <- model_poisdif$sample(data = stan_data, refresh = 0)fit_poisdif$summary(c("a[1]", "a[32]", "b", "sigma_a"))# A tibble: 4 × 10
variable mean median sd mad q5 q95 rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a[1] 0.22 0.24 0.21 0.21 -0.13 0.53 1.0 3472. 3231.
2 a[32] -0.47 -0.46 0.21 0.19 -0.81 -0.12 1.0 5942. 3242.
3 b 0.52 0.52 0.14 0.14 0.28 0.75 1.0 2947. 2142.
4 sigma_a 0.24 0.24 0.092 0.085 0.078 0.39 1.0 1257. 994.
(loo_poisdif <- fit_poisdif$loo(save_psis = TRUE))
Computed from 4000 by 64 log-likelihood matrix.
Estimate SE
elpd_loo -122.9 7.6
p_loo 11.0 2.4
looic 245.8 15.2
------
MCSE of elpd_loo is 0.1.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.5, 1.6]).
All Pareto k estimates are good (k < 0.7).
See help('pareto-k-diagnostic') for details.
The set in this case study is small, and we don’t see practical difference in the predictive performance.
loo_compare(
list(
"Discrete" = loo_discr,
"Bivariate Poisson" = loo_bipois,
"Poisson difference" = loo_poisdif
)
) elpd_diff se_diff
Poisson difference 0.0 0.0
Discrete -1.7 1.7
Bivariate Poisson -1.8 0.9
8.1 LOO-CV predictive checking
LOO-CV predictive checking with LOO-PIT for the binary Poisson model looks fine.
ppc_loo_pit_ecdf(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_bipois$draws(format = "matrix", variables = "y_rep"),
psis_object = loo_bipois$psis_object,
method = "correlated"
)LOO-CV predictive checking with LOO-PIT for the Poisson difference model looks fine.
ppc_loo_pit_ecdf(
y = stan_data$score_1 - stan_data$score_2,
yrep = fit_poisdif$draws(format = "matrix", variables = "y_rep"),
psis_object = loo_poisdif$psis_object,
method = "correlated"
)In this case study, we used many other models for illustration, but for real football score modeling, it is a good idea to start with the bivariate Poisson model. For real football analysis footBayes R package (Egidi, Macrì Demartino, and Palaskas 2025) has several different models including dynamic models allowing the latent performance to evolve in time.
References
Licenses
- Code © 2021–2025, Andrew Gelman, Aki Vehtari, licensed under BSD-3.
- Text © 2021–2025, Andrew Gelman, Aki Vehtari, licensed under CC-BY-NC 4.0.