Linear least squares regression with a single predictor. See Chapters 6 and 7 in Regression and Other Stories.


Load packages

library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("arm")

Fitting a regression using a data frame in R

Simulate fake data

x <- 1:20
n <- length(x)
a <- 0.2
b <- 0.3
sigma <- 0.5
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2141) 
y <- a + b*x + sigma*rnorm(n)
fake <- data.frame(x, y)

Linear least squares regression

fit <- lm(y ~ x, data=fake)
display(fit)
lm(formula = y ~ x, data = fake)
            coef.est coef.se
(Intercept) 0.40     0.22   
x           0.28     0.02   
---
n = 20, k = 2
residual sd = 0.47, R-Squared = 0.93

Plot for the book

par(mar=c(3,3,1,1), mgp=c(1.7,.5,0), tck=-.01)
plot(fake$x, fake$y, main="Data and fitted regression line", bty="l", pch=20,
     xlab = "x", ylab = "y")
a_hat <- coef(fit)[1]
b_hat <- coef(fit)[2]
abline(a_hat, b_hat)
x_bar <- mean(fake$x)
text(x_bar, a_hat + b_hat*x_bar, paste("   y =", round(a_hat, 2), "+", round(b_hat, 2), "* x"), adj=0)

Formulating comparisons as regression models

Simulate fake data

n_0 <- 20
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2141)
y_0 <- rnorm(n_0, 2, 5)
y_0 <- round(y_0, 1)
round(y_0, 1)
 [1]  9.1  2.1  0.3  3.3  2.2 12.7 -2.3  1.8 -1.0  4.1 -5.1  7.0 -4.5 -0.8  0.4
[16]  4.0 -1.8 10.2 -0.9 -0.7
round(mean(y_0), 2)
[1] 2
round(sd(y_0)/sqrt(n), 2)
[1] 1.06

Estimating the mean is the same as regressing on a constant term

display(lm(y_0 ~ 1))
lm(formula = y_0 ~ 1)
            coef.est coef.se
(Intercept) 2.00     1.06   
---
n = 20, k = 1
residual sd = 4.76, R-Squared = 0.00

Simulate fake data

n_1 <- 30
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2141)
y_1 <- rnorm(n_1, 8, 5)
diff <- mean(y_1) - mean(y_0)
se_0 <- sd(y_0)/sqrt(n_0)
se_1 <- sd(y_1)/sqrt(n_1)
se <- sqrt(se_0^2 + se_1^2)
print(diff)
[1] 6.68748
print(se)
[1] 1.381859

Estimating a difference is the same as regressing on an indicator variable

n <- n_0 + n_1
y <- c(y_0, y_1)
x <- c(rep(0, n_0), rep(1, n_1))
fit <- lm(y ~ x)
display(fit)
lm(formula = y ~ x)
            coef.est coef.se
(Intercept) 2.00     1.07   
x           6.69     1.39   
---
n = 50, k = 2
residual sd = 4.80, R-Squared = 0.33

Plot for the book

par(mar=c(3,3,3,2), mgp=c(1.7,.5,0), tck=-.01)
plot(x, y, xlab="Indicator, x", ylab="y", bty="l", xaxt="n", main="Regression on an indicator is the same\nas computing a difference in means",  pch=19, cex=.5)
axis(1, c(0, 1))
abline(h=mean(y[x==0]), lty=2, col="gray50")
abline(h=mean(y[x==1]), lty=2, col="gray50")
abline(coef(fit)[1], coef(fit)[2])
text(.5, -1 + coef(fit)[1] + .5*coef(fit)[2], paste("y =", fround(coef(fit)[1], 2), "+", fround(coef(fit)[2], 2), "x"), cex=.9, adj=0)
text(.05, -1 + mean(y[x==0]), expression(paste(bar(y)[0], " = 2.68")), col="gray30", cex=.9, adj=0)
text(.95, 1 + mean(y[x==1]), expression(paste(bar(y)[1], " = 8.38")), col="gray30", cex=.9, adj=1)

LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogU2ltcGxlIGxlYXN0IHNxdWFyZSByZWdyZXNzaW9uIgphdXRob3I6ICJBbmRyZXcgR2VsbWFuLCBKZW5uaWZlciBIaWxsLCBBa2kgVmVodGFyaSIKZGF0ZTogImByIGZvcm1hdChTeXMuRGF0ZSgpKWAiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMgogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQpMaW5lYXIgbGVhc3Qgc3F1YXJlcyByZWdyZXNzaW9uIHdpdGggYSBzaW5nbGUgcHJlZGljdG9yLiBTZWUKQ2hhcHRlcnMgNiBhbmQgNyBpbiBSZWdyZXNzaW9uIGFuZCBPdGhlciBTdG9yaWVzLgoKLS0tLS0tLS0tLS0tLQoKCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQobWVzc2FnZT1GQUxTRSwgZXJyb3I9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGNvbW1lbnQ9TkEpCiMgc3dpdGNoIHRoaXMgdG8gVFJVRSB0byBzYXZlIGZpZ3VyZXMgaW4gc2VwYXJhdGUgZmlsZXMKc2F2ZWZpZ3MgPC0gRkFMU0UKYGBgCgoqKkxvYWQgcGFja2FnZXMqKgoKYGBge3IgfQpsaWJyYXJ5KCJycHJvanJvb3QiKQpyb290PC1oYXNfZmlsZSgiLlJPUy1FeGFtcGxlcy1yb290IikkbWFrZV9maXhfZmlsZSgpCmxpYnJhcnkoImFybSIpCmBgYAoKIyMjIyBGaXR0aW5nIGEgcmVncmVzc2lvbiB1c2luZyBhIGRhdGEgZnJhbWUgaW4gUgoKKipTaW11bGF0ZSBmYWtlIGRhdGEqKgoKYGBge3IgfQp4IDwtIDE6MjAKbiA8LSBsZW5ndGgoeCkKYSA8LSAwLjIKYiA8LSAwLjMKc2lnbWEgPC0gMC41CiMgc2V0IHRoZSByYW5kb20gc2VlZCB0byBnZXQgcmVwcm9kdWNpYmxlIHJlc3VsdHMKIyBjaGFuZ2UgdGhlIHNlZWQgdG8gZXhwZXJpbWVudCB3aXRoIHZhcmlhdGlvbiBkdWUgdG8gcmFuZG9tIG5vaXNlCnNldC5zZWVkKDIxNDEpIAp5IDwtIGEgKyBiKnggKyBzaWdtYSpybm9ybShuKQpmYWtlIDwtIGRhdGEuZnJhbWUoeCwgeSkKYGBgCgoqKkxpbmVhciBsZWFzdCBzcXVhcmVzIHJlZ3Jlc3Npb24qKgoKYGBge3IgfQpmaXQgPC0gbG0oeSB+IHgsIGRhdGE9ZmFrZSkKZGlzcGxheShmaXQpCmBgYAoKKipQbG90IGZvciB0aGUgYm9vayoqCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBkZihyb290KCJTaW1wbGVzdC9maWdzIiwic2ltcGxlLnBkZiIpLCBoZWlnaHQ9NCwgd2lkdGg9NS41KQpgYGAKYGBge3IgfQpwYXIobWFyPWMoMywzLDEsMSksIG1ncD1jKDEuNywuNSwwKSwgdGNrPS0uMDEpCnBsb3QoZmFrZSR4LCBmYWtlJHksIG1haW49IkRhdGEgYW5kIGZpdHRlZCByZWdyZXNzaW9uIGxpbmUiLCBidHk9ImwiLCBwY2g9MjAsCiAgICAgeGxhYiA9ICJ4IiwgeWxhYiA9ICJ5IikKYV9oYXQgPC0gY29lZihmaXQpWzFdCmJfaGF0IDwtIGNvZWYoZml0KVsyXQphYmxpbmUoYV9oYXQsIGJfaGF0KQp4X2JhciA8LSBtZWFuKGZha2UkeCkKdGV4dCh4X2JhciwgYV9oYXQgKyBiX2hhdCp4X2JhciwgcGFzdGUoIiAgIHkgPSIsIHJvdW5kKGFfaGF0LCAyKSwgIisiLCByb3VuZChiX2hhdCwgMiksICIqIHgiKSwgYWRqPTApCmBgYApgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIGRldi5vZmYoKQpgYGAKCiMjIyMgRm9ybXVsYXRpbmcgY29tcGFyaXNvbnMgYXMgcmVncmVzc2lvbiBtb2RlbHMKCioqU2ltdWxhdGUgZmFrZSBkYXRhKioKCmBgYHtyIH0Kbl8wIDwtIDIwCiMgc2V0IHRoZSByYW5kb20gc2VlZCB0byBnZXQgcmVwcm9kdWNpYmxlIHJlc3VsdHMKIyBjaGFuZ2UgdGhlIHNlZWQgdG8gZXhwZXJpbWVudCB3aXRoIHZhcmlhdGlvbiBkdWUgdG8gcmFuZG9tIG5vaXNlCnNldC5zZWVkKDIxNDEpCnlfMCA8LSBybm9ybShuXzAsIDIsIDUpCnlfMCA8LSByb3VuZCh5XzAsIDEpCnJvdW5kKHlfMCwgMSkKcm91bmQobWVhbih5XzApLCAyKQpyb3VuZChzZCh5XzApL3NxcnQobiksIDIpCmBgYAoKKipFc3RpbWF0aW5nIHRoZSBtZWFuIGlzIHRoZSBzYW1lIGFzIHJlZ3Jlc3Npbmcgb24gYSBjb25zdGFudCB0ZXJtKioKCmBgYHtyIH0KZGlzcGxheShsbSh5XzAgfiAxKSkKYGBgCgoqKlNpbXVsYXRlIGZha2UgZGF0YSoqCgpgYGB7ciB9Cm5fMSA8LSAzMAojIHNldCB0aGUgcmFuZG9tIHNlZWQgdG8gZ2V0IHJlcHJvZHVjaWJsZSByZXN1bHRzCiMgY2hhbmdlIHRoZSBzZWVkIHRvIGV4cGVyaW1lbnQgd2l0aCB2YXJpYXRpb24gZHVlIHRvIHJhbmRvbSBub2lzZQpzZXQuc2VlZCgyMTQxKQp5XzEgPC0gcm5vcm0obl8xLCA4LCA1KQpkaWZmIDwtIG1lYW4oeV8xKSAtIG1lYW4oeV8wKQpzZV8wIDwtIHNkKHlfMCkvc3FydChuXzApCnNlXzEgPC0gc2QoeV8xKS9zcXJ0KG5fMSkKc2UgPC0gc3FydChzZV8wXjIgKyBzZV8xXjIpCnByaW50KGRpZmYpCnByaW50KHNlKQpgYGAKCioqRXN0aW1hdGluZyBhIGRpZmZlcmVuY2UgaXMgdGhlIHNhbWUgYXMgcmVncmVzc2luZyBvbiBhbiBpbmRpY2F0b3IgdmFyaWFibGUqKgoKYGBge3IgfQpuIDwtIG5fMCArIG5fMQp5IDwtIGMoeV8wLCB5XzEpCnggPC0gYyhyZXAoMCwgbl8wKSwgcmVwKDEsIG5fMSkpCmZpdCA8LSBsbSh5IH4geCkKZGlzcGxheShmaXQpCmBgYAoKKipQbG90IGZvciB0aGUgYm9vayoqCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBkZihyb290KCJTaW1wbGVzdC9maWdzIiwic2ltcGxlc3RfMS5wZGYiKSwgaGVpZ2h0PTQsIHdpZHRoPTUpCmBgYApgYGB7ciB9CnBhcihtYXI9YygzLDMsMywyKSwgbWdwPWMoMS43LC41LDApLCB0Y2s9LS4wMSkKcGxvdCh4LCB5LCB4bGFiPSJJbmRpY2F0b3IsIHgiLCB5bGFiPSJ5IiwgYnR5PSJsIiwgeGF4dD0ibiIsIG1haW49IlJlZ3Jlc3Npb24gb24gYW4gaW5kaWNhdG9yIGlzIHRoZSBzYW1lXG5hcyBjb21wdXRpbmcgYSBkaWZmZXJlbmNlIGluIG1lYW5zIiwgIHBjaD0xOSwgY2V4PS41KQpheGlzKDEsIGMoMCwgMSkpCmFibGluZShoPW1lYW4oeVt4PT0wXSksIGx0eT0yLCBjb2w9ImdyYXk1MCIpCmFibGluZShoPW1lYW4oeVt4PT0xXSksIGx0eT0yLCBjb2w9ImdyYXk1MCIpCmFibGluZShjb2VmKGZpdClbMV0sIGNvZWYoZml0KVsyXSkKdGV4dCguNSwgLTEgKyBjb2VmKGZpdClbMV0gKyAuNSpjb2VmKGZpdClbMl0sIHBhc3RlKCJ5ID0iLCBmcm91bmQoY29lZihmaXQpWzFdLCAyKSwgIisiLCBmcm91bmQoY29lZihmaXQpWzJdLCAyKSwgIngiKSwgY2V4PS45LCBhZGo9MCkKdGV4dCguMDUsIC0xICsgbWVhbih5W3g9PTBdKSwgZXhwcmVzc2lvbihwYXN0ZShiYXIoeSlbMF0sICIgPSAyLjY4IikpLCBjb2w9ImdyYXkzMCIsIGNleD0uOSwgYWRqPTApCnRleHQoLjk1LCAxICsgbWVhbih5W3g9PTFdKSwgZXhwcmVzc2lvbihwYXN0ZShiYXIoeSlbMV0sICIgPSA4LjM4IikpLCBjb2w9ImdyYXkzMCIsIGNleD0uOSwgYWRqPTEpCmBgYApgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIGRldi5vZmYoKQpgYGAKCg==