Gibbs sampling

ggplot2 is used for plotting, tidyr for manipulating data frames

library(ggplot2)
theme_set(theme_minimal())
library(tidyr)
library(gganimate)
library(MASS)
library(posterior)
library(rprojroot)
root<-has_file(".BDA_R_demos_root")$make_fix_file()

Parameters of a normal distribution used as a toy target distribution

y1 <- 0
y2 <- 0
r <- 0.8
Sigma <- diag(2)
Sigma[1, 2] <- r
Sigma[2, 1] <- r

Sample from the toy distribution to visualize 90% HPD interval with ggplot’s stat_ellipse()

dft <- data.frame(mvrnorm(100000, c(0, 0), Sigma))

see BDA3 p. 85 for how to compute HPD for multivariate normal in 2d-case contour for 90% HPD is an ellipse, whose semimajor axes can be computed from the eigenvalues of the covariance matrix scaled by a value selected to get ellipse match the density at the edge of 90% HPD. Angle of the ellipse could be computed from the eigenvectors, but since the marginals are same we know that angle is pi/4 Starting value of the chain

t1 <- -2.5
t2 <- 2.5

Number of iterations.

M <- 2*2500

N.B. In this implementation one iteration updates only one parameter and one complete iteration updating both parameters takes two basic iterations. This implementation was used to make plotting of Gibbs sampler’s zig-zagging. In plots You can implement this also by saving only the final state of complete iteration updating all parameters. Insert your own Gibbs sampling here

# Allocate memory for the sample
tt <- matrix(rep(0, 2*M), ncol = 2)
tt[1,] <- c(t1, t2)    # Save starting point
# For demonstration load pre-computed values
# Replace this with your algorithm!
# tt is a M x 2 array, with M draws of both theta_1 and theta_2
load(root("demos_ch11","demo11_1.RData"))

The rest is for illustration Take the first 50 draws to illustrate how the sampler works

df100 <- data.frame(id=rep(1,100),
                    iter=1:100, 
                    th1 = tt[1:100, 1],
                    th2 = tt[1:100, 2],
                    th1l = c(tt[1, 1], tt[1:(100-1), 1]),
                    th2l = c(tt[1, 2], tt[1:(100-1), 2]))

Take the first 1000 observations

S <- 1000
dfs <- data.frame(th1 = tt[1:S, 1], th2 = tt[1:S, 2])

Remove warm-up period of 50 first draws later

warm <- 50

# labels and frame indices for the plot
labs1 <- c('Draws', 'Steps of the sampler', '90% HPD')
ind1 <- (1:50)*2-1
df100s <- df100
df100s[ind1+1,3:4]=df100s[ind1,3:4]
p1 <- ggplot() +
  geom_point(data = df100s,
             aes(th1, th2, group=id, color ='1')) +
  geom_segment(data = df100, aes(x = th1, xend = th1l, color = '2',
                                 y = th2, yend = th2l)) +
  stat_ellipse(data = dft, aes(x = X1, y = X2, color = '3'), level = 0.9) +
  coord_cartesian(xlim = c(-4, 4), ylim = c(-4, 4)) +
  labs(x = 'theta1', y = 'theta2') +
  scale_color_manual(values = c('red', 'forestgreen','blue'), labels = labs1) +
  guides(color = guide_legend(override.aes = list(
    shape = c(16, NA, NA), linetype = c(0, 1, 1)))) +
  theme(legend.position = 'bottom', legend.title = element_blank())

The following generates a gif animation of the steps of the sampler (might take 10 seconds).

anim <- animate(p1 +
          transition_reveal(along=iter) + 
          shadow_trail(0.01))

Show the animation

anim

Show only the end result as a static figure

p1

Highlight warm-up period of the 30 first draws with purple

p1 + geom_point(data = df100[ind1[1:30],],
                aes(th1, th2), color = 'green')