Hamermesh and Parker (2005) data on student evaluations of instructors’ beauty and teaching quality for several courses at the University of Texas. See Chapter 10 in Regression and Other Stories.
Hamermesh, D. S., and Parker, A. M. (2005). Beauty in the classroom: Instructors' pulchritude and putative pedagogical productivity. Economics of Education Review, 24:369-376.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("ggplot2")
library("bayesplot")
theme_set(bayesplot::theme_default(base_family = "sans"))
beauty <- read.csv(root("Beauty/data","beauty.csv"))
head(beauty)
eval beauty female age minority nonenglish lower course_id
1 4.3 0.2015666 1 36 1 0 0 3
2 4.5 -0.8260813 0 59 0 0 0 0
3 3.7 -0.6603327 0 51 0 0 0 4
4 4.3 -0.7663125 1 40 0 0 0 2
5 4.4 1.4214450 1 31 0 0 0 0
6 4.2 0.5002196 0 62 0 0 0 0
par(mar=c(3,3,1,1), mgp=c(1.7, .5, 0), tck=-.01)
plot(beauty$beauty, beauty$eval)
fit_1 <- stan_glm(eval ~ beauty, data=beauty, refresh=0)
print(fit_1, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty
observations: 463
predictors: 2
------
Median MAD_SD
(Intercept) 4.01 0.03
beauty 0.13 0.03
Auxiliary parameter(s):
Median MAD_SD
sigma 0.55 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
# Labeling the axes
plot(beauty$beauty, beauty$eval, xlab="Beauty", ylab="Average teaching evaluation")
# Display the regression line, added onto the scatterplot (add=TRUE)
coefs <- coef(fit_1)
curve(coefs[1] + coefs[2]*x, add=TRUE)
# Add dotted lines to show +/- 1 standard deviation
sigma <- sigma(fit_1)
curve(coefs[1] + coefs[2]*x + sigma, lty=2, add=TRUE)
curve(coefs[1] + coefs[2]*x - sigma, lty=2, add=TRUE)
ggplot(data=beauty, aes(beauty, eval)) +
geom_point(size = 2, alpha = 0.75) +
geom_abline(
slope = rep(coefs[2], 3),
intercept = c(coefs[1], coefs[1] - sigma, coefs[1] + sigma),
linetype = c(1, 2, 2),
color = "darkgray",
size = 1
) +
labs(
x = "Beauty",
y = "Average teaching evaluation"
)
fit_2 <- stan_glm(eval ~ beauty + female, data=beauty, refresh=0)
print(fit_2, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + female
observations: 463
predictors: 3
------
Median MAD_SD
(Intercept) 4.09 0.03
beauty 0.15 0.03
female -0.20 0.05
Auxiliary parameter(s):
Median MAD_SD
sigma 0.54 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
coefs2 <- coef(fit_2)
# Set up a 2x2 grid of plots
par(mfrow=c(2,2))
# Make separate plot for men, ...
plot(beauty$beauty[beauty$female==0], beauty$eval[beauty$female==0], xlim=range(beauty$beauty), ylim=range(beauty$eval),
xlab="Beauty", ylab="Average teaching evaluation", main="Men")
curve(coefs2[1] + coefs2[2]*x + coefs2[3]*0, add=TRUE)
# ... women, ...
plot(beauty$beauty[beauty$female==1], beauty$eval[beauty$female==1], xlim=range(beauty$beauty), ylim=range(beauty$eval),
xlab="Beauty", ylab="Average teaching evaluation", main="Women")
curve(coefs2[1] + coefs2[2]*x + coefs2[3]*1, add=TRUE)
# ... and both sexes on the same plot
# First make the plot with type="n" (which displays axes but does not plot
# the points), then plot the points and lines separately for each sex
plot(beauty$beauty, beauty$eval, xlab="Beauty", ylab="Average teaching evaluation",
main="Both sexes", type="n")
points(beauty$beauty[beauty$female==0], beauty$eval[beauty$female==0], col="blue")
curve(coefs2[1] + coefs2[2]*x + coefs2[3]*0, add=TRUE, col="blue")
points(beauty$beauty[beauty$female==1], beauty$eval[beauty$female==1], col="red")
curve(coefs2[1] + coefs2[2]*x + coefs2[3]*1, add=TRUE, col="red")
# Men
gg_male <-
ggplot(subset(beauty, female == 0), aes(beauty, eval)) +
geom_point() +
geom_abline(slope = coefs2[2], intercept = coefs2[1], color = "darkgray")
# Women
gg_female <-
ggplot(subset(beauty, female == 1), aes(beauty, eval)) +
geom_point() +
geom_abline(slope = coefs2[2], intercept = coefs2[1] + coefs2[3], color = "darkgray")
# Both
gg_both <-
ggplot(data=beauty, aes(beauty, eval)) +
geom_point(aes(color = factor(female)), show.legend = FALSE) +
scale_color_manual(values = c("red", "blue")) +
geom_abline(
slope = coefs2[2],
intercept = c(coefs2[1], coefs2[1] + coefs2[3]),
color = c("blue3", "red3"),
size = 1
)
# Put them in a grid
bayesplot_grid(
gg_male, gg_female, gg_both,
grid_args = list(ncol = 2),
xlim = range(beauty$beauty),
ylim = range(beauty$eval),
titles = c("Men", "Women", "Both sexes")
)
fit_3 <- stan_glm(eval ~ beauty + female + beauty*female, data=beauty, refresh=0)
print(fit_3, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + female + beauty * female
observations: 463
predictors: 4
------
Median MAD_SD
(Intercept) 4.11 0.03
beauty 0.20 0.04
female -0.21 0.05
beauty:female -0.11 0.06
Auxiliary parameter(s):
Median MAD_SD
sigma 0.54 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
coefs3 <- coef(fit_3)
# Set up a new 1x2 grid of plots
par(mfrow=c(1,2))
# Display the parallel regression lines in gray and the non-parallel lines
# in heavy black
# Make separate plot for men ...
plot(beauty$beauty[beauty$female==0], beauty$eval[beauty$female==0], xlim=range(beauty$beauty), ylim=range(beauty$eval),
xlab="Beauty", ylab="Average teaching evaluation", main="Men")
curve(coefs2[1] + coefs2[2]*x + coefs2[3]*0,
lwd=.5, col="gray", add=TRUE)
curve(coefs3[1] + coefs3[2]*x + coefs3[3]*0 + coefs3[4]*x*0,
lwd=2, col="black", add=TRUE)
# ... and women
plot (beauty$beauty[beauty$female==1], beauty$eval[beauty$female==1], xlim=range(beauty$beauty), ylim=range(beauty$eval),
xlab="Beauty", ylab="Average teaching evaluation", main="Women")
curve(coefs2[1] +coefs2[2]*x +coefs2[3]*1,
lwd=.5, col="gray", add=TRUE)
curve(coefs3[1] + coefs3[2]*x + coefs3[3]*1 +coefs3[4]*x*1,
lwd=2, col="black", add=TRUE)
# we can add to the gg_male and gg_female plots we already made above
gg_male2 <- gg_male + geom_abline(intercept = coefs3[1], slope = coefs3[2], size = 1)
gg_female2 <- gg_female + geom_abline(intercept = coefs3[1] + coefs3[3], slope = coefs3[2] + coefs3[4], size = 1)
# Put them in a grid
bayesplot_grid(
gg_male2, gg_female2,
grid_args = list(ncol = 2),
xlim = range(beauty$beauty),
ylim = range(beauty$eval),
titles = c("Men", "Women")
)
fit_4 <- stan_glm(eval ~ beauty + female + age, data=beauty, refresh=0)
print(fit_4, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + female + age
observations: 463
predictors: 4
------
Median MAD_SD
(Intercept) 4.22 0.14
beauty 0.14 0.03
female -0.21 0.05
age 0.00 0.00
Auxiliary parameter(s):
Median MAD_SD
sigma 0.54 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fit_5 <- stan_glm(eval ~ beauty + female + minority, data=beauty, refresh=0)
print(fit_5, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + female + minority
observations: 463
predictors: 4
------
Median MAD_SD
(Intercept) 4.10 0.03
beauty 0.15 0.03
female -0.19 0.05
minority -0.10 0.07
Auxiliary parameter(s):
Median MAD_SD
sigma 0.54 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fit_6 <- stan_glm(eval ~ beauty + female + nonenglish, data=beauty, refresh=0)
print(fit_6, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + female + nonenglish
observations: 463
predictors: 4
------
Median MAD_SD
(Intercept) 4.12 0.03
beauty 0.15 0.03
female -0.20 0.05
nonenglish -0.33 0.10
Auxiliary parameter(s):
Median MAD_SD
sigma 0.53 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fit_7 <- stan_glm(eval ~ beauty + female + nonenglish + lower,
data=beauty, refresh=0)
print(fit_7, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + female + nonenglish + lower
observations: 463
predictors: 5
------
Median MAD_SD
(Intercept) 4.08 0.04
beauty 0.15 0.03
female -0.19 0.05
nonenglish -0.31 0.11
lower 0.09 0.05
Auxiliary parameter(s):
Median MAD_SD
sigma 0.53 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg
fit_8 <- stan_glm(eval ~ beauty + factor(course_id), data=beauty, refresh=0)
print(fit_8, digits=2)
stan_glm
family: gaussian [identity]
formula: eval ~ beauty + factor(course_id)
observations: 463
predictors: 32
------
Median MAD_SD
(Intercept) 4.03 0.03
beauty 0.14 0.03
factor(course_id)1 0.37 0.23
factor(course_id)2 0.42 0.38
factor(course_id)3 -0.17 0.20
factor(course_id)4 -0.20 0.13
factor(course_id)5 0.02 0.26
factor(course_id)6 -0.13 0.22
factor(course_id)7 -0.32 0.27
factor(course_id)8 -0.14 0.38
factor(course_id)9 -0.43 0.19
factor(course_id)10 0.43 0.23
factor(course_id)11 -0.08 0.37
factor(course_id)12 0.03 0.30
factor(course_id)13 -0.08 0.30
factor(course_id)14 -0.52 0.30
factor(course_id)15 -1.44 0.36
factor(course_id)16 0.18 0.27
factor(course_id)17 0.34 0.19
factor(course_id)18 0.27 0.27
factor(course_id)19 -0.31 0.22
factor(course_id)20 0.45 0.24
factor(course_id)21 -0.39 0.15
factor(course_id)22 -0.29 0.16
factor(course_id)23 0.37 0.22
factor(course_id)24 -0.23 0.31
factor(course_id)25 -0.14 0.31
factor(course_id)26 0.24 0.30
factor(course_id)27 0.13 0.37
factor(course_id)28 0.43 0.28
factor(course_id)29 -0.08 0.38
factor(course_id)30 0.30 0.19
Auxiliary parameter(s):
Median MAD_SD
sigma 0.53 0.02
------
* For help interpreting the printed output see ?print.stanreg
* For info on the priors used see ?prior_summary.stanreg