Bootstrapping to simulate the sampling distribution. See Chapter 5 in Regression and Other Stories.
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
earnings <- read.csv(root("Earnings/data","earnings.csv"))
head(earnings)
height weight male earn earnk ethnicity education mother_education
1 74 210 1 50000 50 White 16 16
2 66 125 0 60000 60 White 16 16
3 64 126 0 30000 30 White 16 16
4 65 200 0 25000 25 White 17 17
5 63 110 0 50000 50 Other 16 16
6 68 165 0 62000 62 Black 18 18
father_education walk exercise smokenow tense angry age
1 16 3 3 2 0 0 45
2 16 6 5 1 0 0 58
3 16 8 1 2 1 1 29
4 NA 8 1 2 0 0 57
5 16 5 6 2 0 0 91
6 18 1 1 2 2 2 54
earn <- earnings$earn
male <- earnings$male
print(median(earn[male==0]) / median(earn[male==1]))
[1] 0.6
n <- nrow(earnings)
boot <- sample(n, replace=TRUE)
earn_boot <- earn[boot]
male_boot <- male[boot]
ratio_boot <- median(earn_boot[male_boot==0]) / median(earn_boot[male_boot==1])
Boot_ratio <- function(data){
n <- nrow(data)
boot <- sample(n, replace=TRUE)
earn_boot <- data$earn[boot]
male_boot <- data$male[boot]
ratio_boot <- median(earn_boot[male_boot==0]) / median(earn_boot[male_boot==1])
return(ratio_boot)
}
n_sims <- 10000
output <- replicate(n_sims, Boot_ratio(data=earnings))
hist(output)
round(sd(output), 2)
[1] 0.03