Linear least squares regression with a single predictor. See Chapters 6 and 7 in Regression and Other Stories.
Load packages
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("arm")
Fitting a regression using a data frame in R
Simulate fake data
x <- 1:20
n <- length(x)
a <- 0.2
b <- 0.3
sigma <- 0.5
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2141)
y <- a + b*x + sigma*rnorm(n)
fake <- data.frame(x, y)
Linear least squares regression
fit <- lm(y ~ x, data=fake)
display(fit)
lm(formula = y ~ x, data = fake)
coef.est coef.se
(Intercept) 0.40 0.22
x 0.28 0.02
---
n = 20, k = 2
residual sd = 0.47, R-Squared = 0.93
Plot for the book
par(mar=c(3,3,1,1), mgp=c(1.7,.5,0), tck=-.01)
plot(fake$x, fake$y, main="Data and fitted regression line", bty="l", pch=20,
xlab = "x", ylab = "y")
a_hat <- coef(fit)[1]
b_hat <- coef(fit)[2]
abline(a_hat, b_hat)
x_bar <- mean(fake$x)
text(x_bar, a_hat + b_hat*x_bar, paste(" y =", round(a_hat, 2), "+", round(b_hat, 2), "* x"), adj=0)
Formulating comparisons as regression models
Simulate fake data
n_0 <- 20
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2141)
y_0 <- rnorm(n_0, 2, 5)
y_0 <- round(y_0, 1)
round(y_0, 1)
[1] 9.1 2.1 0.3 3.3 2.2 12.7 -2.3 1.8 -1.0 4.1 -5.1 7.0 -4.5 -0.8 0.4
[16] 4.0 -1.8 10.2 -0.9 -0.7
round(mean(y_0), 2)
[1] 2
round(sd(y_0)/sqrt(n), 2)
[1] 1.06
Estimating the mean is the same as regressing on a constant term
display(lm(y_0 ~ 1))
lm(formula = y_0 ~ 1)
coef.est coef.se
(Intercept) 2.00 1.06
---
n = 20, k = 1
residual sd = 4.76, R-Squared = 0.00
Simulate fake data
n_1 <- 30
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2141)
y_1 <- rnorm(n_1, 8, 5)
diff <- mean(y_1) - mean(y_0)
se_0 <- sd(y_0)/sqrt(n_0)
se_1 <- sd(y_1)/sqrt(n_1)
se <- sqrt(se_0^2 + se_1^2)
print(diff)
[1] 6.68748
print(se)
[1] 1.381859
Estimating a difference is the same as regressing on an indicator variable
n <- n_0 + n_1
y <- c(y_0, y_1)
x <- c(rep(0, n_0), rep(1, n_1))
fit <- lm(y ~ x)
display(fit)
lm(formula = y ~ x)
coef.est coef.se
(Intercept) 2.00 1.07
x 6.69 1.39
---
n = 50, k = 2
residual sd = 4.80, R-Squared = 0.33
Plot for the book
par(mar=c(3,3,3,2), mgp=c(1.7,.5,0), tck=-.01)
plot(x, y, xlab="Indicator, x", ylab="y", bty="l", xaxt="n", main="Regression on an indicator is the same\nas computing a difference in means", pch=19, cex=.5)
axis(1, c(0, 1))
abline(h=mean(y[x==0]), lty=2, col="gray50")
abline(h=mean(y[x==1]), lty=2, col="gray50")
abline(coef(fit)[1], coef(fit)[2])
text(.5, -1 + coef(fit)[1] + .5*coef(fit)[2], paste("y =", fround(coef(fit)[1], 2), "+", fround(coef(fit)[2], 2), "x"), cex=.9, adj=0)
text(.05, -1 + mean(y[x==0]), expression(paste(bar(y)[0], " = 2.68")), col="gray30", cex=.9, adj=0)
text(.95, 1 + mean(y[x==1]), expression(paste(bar(y)[1], " = 8.38")), col="gray30", cex=.9, adj=1)
LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogU2ltcGxlIGxlYXN0IHNxdWFyZSByZWdyZXNzaW9uIgphdXRob3I6ICJBbmRyZXcgR2VsbWFuLCBKZW5uaWZlciBIaWxsLCBBa2kgVmVodGFyaSIKZGF0ZTogImByIGZvcm1hdChTeXMuRGF0ZSgpKWAiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMgogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQpMaW5lYXIgbGVhc3Qgc3F1YXJlcyByZWdyZXNzaW9uIHdpdGggYSBzaW5nbGUgcHJlZGljdG9yLiBTZWUKQ2hhcHRlcnMgNiBhbmQgNyBpbiBSZWdyZXNzaW9uIGFuZCBPdGhlciBTdG9yaWVzLgoKLS0tLS0tLS0tLS0tLQoKCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQobWVzc2FnZT1GQUxTRSwgZXJyb3I9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIGNvbW1lbnQ9TkEpCiMgc3dpdGNoIHRoaXMgdG8gVFJVRSB0byBzYXZlIGZpZ3VyZXMgaW4gc2VwYXJhdGUgZmlsZXMKc2F2ZWZpZ3MgPC0gRkFMU0UKYGBgCgoqKkxvYWQgcGFja2FnZXMqKgoKYGBge3IgfQpsaWJyYXJ5KCJycHJvanJvb3QiKQpyb290PC1oYXNfZmlsZSgiLlJPUy1FeGFtcGxlcy1yb290IikkbWFrZV9maXhfZmlsZSgpCmxpYnJhcnkoImFybSIpCmBgYAoKIyMjIyBGaXR0aW5nIGEgcmVncmVzc2lvbiB1c2luZyBhIGRhdGEgZnJhbWUgaW4gUgoKKipTaW11bGF0ZSBmYWtlIGRhdGEqKgoKYGBge3IgfQp4IDwtIDE6MjAKbiA8LSBsZW5ndGgoeCkKYSA8LSAwLjIKYiA8LSAwLjMKc2lnbWEgPC0gMC41CiMgc2V0IHRoZSByYW5kb20gc2VlZCB0byBnZXQgcmVwcm9kdWNpYmxlIHJlc3VsdHMKIyBjaGFuZ2UgdGhlIHNlZWQgdG8gZXhwZXJpbWVudCB3aXRoIHZhcmlhdGlvbiBkdWUgdG8gcmFuZG9tIG5vaXNlCnNldC5zZWVkKDIxNDEpIAp5IDwtIGEgKyBiKnggKyBzaWdtYSpybm9ybShuKQpmYWtlIDwtIGRhdGEuZnJhbWUoeCwgeSkKYGBgCgoqKkxpbmVhciBsZWFzdCBzcXVhcmVzIHJlZ3Jlc3Npb24qKgoKYGBge3IgfQpmaXQgPC0gbG0oeSB+IHgsIGRhdGE9ZmFrZSkKZGlzcGxheShmaXQpCmBgYAoKKipQbG90IGZvciB0aGUgYm9vayoqCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBkZihyb290KCJTaW1wbGVzdC9maWdzIiwic2ltcGxlLnBkZiIpLCBoZWlnaHQ9NCwgd2lkdGg9NS41KQpgYGAKYGBge3IgfQpwYXIobWFyPWMoMywzLDEsMSksIG1ncD1jKDEuNywuNSwwKSwgdGNrPS0uMDEpCnBsb3QoZmFrZSR4LCBmYWtlJHksIG1haW49IkRhdGEgYW5kIGZpdHRlZCByZWdyZXNzaW9uIGxpbmUiLCBidHk9ImwiLCBwY2g9MjAsCiAgICAgeGxhYiA9ICJ4IiwgeWxhYiA9ICJ5IikKYV9oYXQgPC0gY29lZihmaXQpWzFdCmJfaGF0IDwtIGNvZWYoZml0KVsyXQphYmxpbmUoYV9oYXQsIGJfaGF0KQp4X2JhciA8LSBtZWFuKGZha2UkeCkKdGV4dCh4X2JhciwgYV9oYXQgKyBiX2hhdCp4X2JhciwgcGFzdGUoIiAgIHkgPSIsIHJvdW5kKGFfaGF0LCAyKSwgIisiLCByb3VuZChiX2hhdCwgMiksICIqIHgiKSwgYWRqPTApCmBgYApgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIGRldi5vZmYoKQpgYGAKCiMjIyMgRm9ybXVsYXRpbmcgY29tcGFyaXNvbnMgYXMgcmVncmVzc2lvbiBtb2RlbHMKCioqU2ltdWxhdGUgZmFrZSBkYXRhKioKCmBgYHtyIH0Kbl8wIDwtIDIwCiMgc2V0IHRoZSByYW5kb20gc2VlZCB0byBnZXQgcmVwcm9kdWNpYmxlIHJlc3VsdHMKIyBjaGFuZ2UgdGhlIHNlZWQgdG8gZXhwZXJpbWVudCB3aXRoIHZhcmlhdGlvbiBkdWUgdG8gcmFuZG9tIG5vaXNlCnNldC5zZWVkKDIxNDEpCnlfMCA8LSBybm9ybShuXzAsIDIsIDUpCnlfMCA8LSByb3VuZCh5XzAsIDEpCnJvdW5kKHlfMCwgMSkKcm91bmQobWVhbih5XzApLCAyKQpyb3VuZChzZCh5XzApL3NxcnQobiksIDIpCmBgYAoKKipFc3RpbWF0aW5nIHRoZSBtZWFuIGlzIHRoZSBzYW1lIGFzIHJlZ3Jlc3Npbmcgb24gYSBjb25zdGFudCB0ZXJtKioKCmBgYHtyIH0KZGlzcGxheShsbSh5XzAgfiAxKSkKYGBgCgoqKlNpbXVsYXRlIGZha2UgZGF0YSoqCgpgYGB7ciB9Cm5fMSA8LSAzMAojIHNldCB0aGUgcmFuZG9tIHNlZWQgdG8gZ2V0IHJlcHJvZHVjaWJsZSByZXN1bHRzCiMgY2hhbmdlIHRoZSBzZWVkIHRvIGV4cGVyaW1lbnQgd2l0aCB2YXJpYXRpb24gZHVlIHRvIHJhbmRvbSBub2lzZQpzZXQuc2VlZCgyMTQxKQp5XzEgPC0gcm5vcm0obl8xLCA4LCA1KQpkaWZmIDwtIG1lYW4oeV8xKSAtIG1lYW4oeV8wKQpzZV8wIDwtIHNkKHlfMCkvc3FydChuXzApCnNlXzEgPC0gc2QoeV8xKS9zcXJ0KG5fMSkKc2UgPC0gc3FydChzZV8wXjIgKyBzZV8xXjIpCnByaW50KGRpZmYpCnByaW50KHNlKQpgYGAKCioqRXN0aW1hdGluZyBhIGRpZmZlcmVuY2UgaXMgdGhlIHNhbWUgYXMgcmVncmVzc2luZyBvbiBhbiBpbmRpY2F0b3IgdmFyaWFibGUqKgoKYGBge3IgfQpuIDwtIG5fMCArIG5fMQp5IDwtIGMoeV8wLCB5XzEpCnggPC0gYyhyZXAoMCwgbl8wKSwgcmVwKDEsIG5fMSkpCmZpdCA8LSBsbSh5IH4geCkKZGlzcGxheShmaXQpCmBgYAoKKipQbG90IGZvciB0aGUgYm9vayoqCgpgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIHBkZihyb290KCJTaW1wbGVzdC9maWdzIiwic2ltcGxlc3RfMS5wZGYiKSwgaGVpZ2h0PTQsIHdpZHRoPTUpCmBgYApgYGB7ciB9CnBhcihtYXI9YygzLDMsMywyKSwgbWdwPWMoMS43LC41LDApLCB0Y2s9LS4wMSkKcGxvdCh4LCB5LCB4bGFiPSJJbmRpY2F0b3IsIHgiLCB5bGFiPSJ5IiwgYnR5PSJsIiwgeGF4dD0ibiIsIG1haW49IlJlZ3Jlc3Npb24gb24gYW4gaW5kaWNhdG9yIGlzIHRoZSBzYW1lXG5hcyBjb21wdXRpbmcgYSBkaWZmZXJlbmNlIGluIG1lYW5zIiwgIHBjaD0xOSwgY2V4PS41KQpheGlzKDEsIGMoMCwgMSkpCmFibGluZShoPW1lYW4oeVt4PT0wXSksIGx0eT0yLCBjb2w9ImdyYXk1MCIpCmFibGluZShoPW1lYW4oeVt4PT0xXSksIGx0eT0yLCBjb2w9ImdyYXk1MCIpCmFibGluZShjb2VmKGZpdClbMV0sIGNvZWYoZml0KVsyXSkKdGV4dCguNSwgLTEgKyBjb2VmKGZpdClbMV0gKyAuNSpjb2VmKGZpdClbMl0sIHBhc3RlKCJ5ID0iLCBmcm91bmQoY29lZihmaXQpWzFdLCAyKSwgIisiLCBmcm91bmQoY29lZihmaXQpWzJdLCAyKSwgIngiKSwgY2V4PS45LCBhZGo9MCkKdGV4dCguMDUsIC0xICsgbWVhbih5W3g9PTBdKSwgZXhwcmVzc2lvbihwYXN0ZShiYXIoeSlbMF0sICIgPSAyLjY4IikpLCBjb2w9ImdyYXkzMCIsIGNleD0uOSwgYWRqPTApCnRleHQoLjk1LCAxICsgbWVhbih5W3g9PTFdKSwgZXhwcmVzc2lvbihwYXN0ZShiYXIoeSlbMV0sICIgPSA4LjM4IikpLCBjb2w9ImdyYXkzMCIsIGNleD0uOSwgYWRqPTEpCmBgYApgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIGRldi5vZmYoKQpgYGAKCg==