Health Expenditure - Discovery through graphs of data and models. See Chapter 2 in Regression and Other Stories.
Load packages
library("ggplot2")
library("bayesplot")
theme_set(bayesplot::theme_default(base_family = "sans"))
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
Load data
health <- read.table(root("HealthExpenditure/data","healthdata.txt"), header=TRUE)
head(health)
country spending lifespan
1 Australia 3357 81.4
2 Austria 3763 80.1
3 Belgium 3595 79.8
4 Canada 3895 80.7
5 Czech 1626 77.0
6 Denmark 3512 78.4
# Use red for USA and Mexico
color <- ifelse(health$country %in% c("USA","Mexico"), "red","black")
Scatterplot
(see ggplot versions at the end)
All countries:
par(mgp=c(1.7,.5,0), tck=-.01, mar=c(3,3,.1,.1))
plot(health$spending, health$lifespan, xlim=c(0,1.05*max(health$spending)), xaxs="i",
type="n", xlab="Health care spending (PPP US$)",
ylab="Life expectancy (years)")
text(health$spending, health$lifespan, health$country, col=color)
Plot scatterplot, excluding some countries for improved clarity
removec <- health$country %in% c("Netherlands", "Belgium", "Germany",
"Ireland", "Iceland", "Greece", "Italy", "Sweden", "UK")
par(mgp=c(2.5,.7,0), tck=-.01, mar=c(4,4,.1,.1))
plot(health$spending[!removec], health$lifespan[!removec], xlim=c(0,1.05*max(health$spending)),
xaxs="i", type="n", xlab="Health care spending (PPP US$)",
ylab="Life expectancy (years)", cex.axis=1.3, cex.lab=1.3, las=1, xaxt="n", bty="l")
axis(1, seq(0,8000,2000), cex.axis=1.3, cex.lab=1.3)
text(health$spending[!removec], health$lifespan[!removec], health$country[!removec],
col=color[!removec], cex=1.3)
for (x in seq(2000,6000,2000)) abline(v=x, col="gray", lwd=.5)
for (y in seq(74,82,2)) abline(y,0,col="gray", lwd=.5)
par(mgp=c(1.7,.5, 0), tck=-.01, mar=c(3,3,.1,.1))
plot(health$spending[!removec], health$lifespan[!removec], xlim=c(0,1.05*max(health$spending)),
xaxs="i", type="n", xlab="Health care spending (PPP US$)",
ylab="Life expectancy (years)", bty="l", xaxt="n")
axis(1, seq(0,6000,2000))
text(health$spending[!removec], health$lifespan[!removec], health$country[!removec], cex=.9)
ggplot versions
All countries:
# this could be further modified to include color in the dataframe
ggplot(data=health, aes(x=spending, y=lifespan, label=country)) +
geom_text(color=color) +
labs(x="Health care spending (PPP US$)",
y="Life expectancy (years)")
Selected countries:
# this could be further modified to include color in the dataframe
ggplot(data=subset(health, !removec),
aes(x=spending, y=lifespan, label=country)) +
geom_text(color=color[!removec]) +
labs(x="Health care spending (PPP US$)",
y="Life expectancy (years)")
LS0tCnRpdGxlOiAiUmVncmVzc2lvbiBhbmQgT3RoZXIgU3RvcmllczogSGVhbHRoIEV4cGVuZGl0dXJlIgphdXRob3I6ICJBbmRyZXcgR2VsbWFuLCBKZW5uaWZlciBIaWxsLCBBa2kgVmVodGFyaSIKZGF0ZTogImByIGZvcm1hdChTeXMuRGF0ZSgpKWAiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdGhlbWU6IHJlYWRhYmxlCiAgICB0b2M6IHRydWUKICAgIHRvY19kZXB0aDogMgogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQpIZWFsdGggRXhwZW5kaXR1cmUgLSBEaXNjb3ZlcnkgdGhyb3VnaCBncmFwaHMgb2YgZGF0YSBhbmQKbW9kZWxzLiBTZWUgQ2hhcHRlciAyIGluIFJlZ3Jlc3Npb24gYW5kIE90aGVyIFN0b3JpZXMuCgotLS0tLS0tLS0tLS0tCgoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmtuaXRyOjpvcHRzX2NodW5rJHNldChtZXNzYWdlPUZBTFNFLCBlcnJvcj1GQUxTRSwgd2FybmluZz1GQUxTRSwgY29tbWVudD1OQSkKIyBzd2l0Y2ggdGhpcyB0byBUUlVFIHRvIHNhdmUgZmlndXJlcyBpbiBzZXBhcmF0ZSBmaWxlcwpzYXZlZmlncyA8LSBGQUxTRQpgYGAKCiMjIyMgTG9hZCBwYWNrYWdlcwoKYGBge3IgfQpsaWJyYXJ5KCJnZ3Bsb3QyIikKbGlicmFyeSgiYmF5ZXNwbG90IikKdGhlbWVfc2V0KGJheWVzcGxvdDo6dGhlbWVfZGVmYXVsdChiYXNlX2ZhbWlseSA9ICJzYW5zIikpCmxpYnJhcnkoInJwcm9qcm9vdCIpCnJvb3Q8LWhhc19maWxlKCIuUk9TLUV4YW1wbGVzLXJvb3QiKSRtYWtlX2ZpeF9maWxlKCkKYGBgCgojIyMjIExvYWQgZGF0YQoKYGBge3IgfQpoZWFsdGggPC0gcmVhZC50YWJsZShyb290KCJIZWFsdGhFeHBlbmRpdHVyZS9kYXRhIiwiaGVhbHRoZGF0YS50eHQiKSwgaGVhZGVyPVRSVUUpCmhlYWQoaGVhbHRoKQojIFVzZSByZWQgZm9yIFVTQSBhbmQgTWV4aWNvCmNvbG9yIDwtIGlmZWxzZShoZWFsdGgkY291bnRyeSAlaW4lIGMoIlVTQSIsIk1leGljbyIpLCAicmVkIiwiYmxhY2siKQpgYGAKCiMjIyMgU2NhdHRlcnBsb3QKCihzZWUgZ2dwbG90IHZlcnNpb25zIGF0IHRoZSBlbmQpCgpBbGwgY291bnRyaWVzOgoKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KcG5nKHJvb3QoIkhlYWx0aEV4cGVuZGl0dXJlL2ZpZ3MiLCJoZWFsdGhzY2F0dGVyLnBuZyIpLCBoZWlnaHQ9NjAwLCB3aWR0aD03MDApCmBgYApgYGB7ciB9CnBhcihtZ3A9YygxLjcsLjUsMCksIHRjaz0tLjAxLCBtYXI9YygzLDMsLjEsLjEpKQpwbG90KGhlYWx0aCRzcGVuZGluZywgaGVhbHRoJGxpZmVzcGFuLCB4bGltPWMoMCwxLjA1Km1heChoZWFsdGgkc3BlbmRpbmcpKSwgeGF4cz0iaSIsCiAgICAgIHR5cGU9Im4iLCB4bGFiPSJIZWFsdGggY2FyZSBzcGVuZGluZyAoUFBQIFVTJCkiLAogICAgICB5bGFiPSJMaWZlIGV4cGVjdGFuY3kgKHllYXJzKSIpCnRleHQoaGVhbHRoJHNwZW5kaW5nLCBoZWFsdGgkbGlmZXNwYW4sIGhlYWx0aCRjb3VudHJ5LCBjb2w9Y29sb3IpCmBgYApgYGB7ciBldmFsPUZBTFNFLCBpbmNsdWRlPUZBTFNFfQppZiAoc2F2ZWZpZ3MpIGRldi5vZmYoKQpgYGAKCiMjIyMgUGxvdCAgc2NhdHRlcnBsb3QsIGV4Y2x1ZGluZyBzb21lIGNvdW50cmllcyBmb3IgaW1wcm92ZWQgY2xhcml0eQoKYGBge3IgfQpyZW1vdmVjIDwtIGhlYWx0aCRjb3VudHJ5ICVpbiUgYygiTmV0aGVybGFuZHMiLCAiQmVsZ2l1bSIsICJHZXJtYW55IiwKICAiSXJlbGFuZCIsICJJY2VsYW5kIiwgIkdyZWVjZSIsICJJdGFseSIsICJTd2VkZW4iLCAiVUsiKQpgYGAKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KcG5nKHJvb3QoIkhlYWx0aEV4cGVuZGl0dXJlL2ZpZ3MiLCJoZWFsdGhzY2F0dGVyMi5wbmciKSwgaGVpZ2h0PTYwMCwgd2lkdGg9NzAwKQpgYGAKYGBge3IgfQpwYXIobWdwPWMoMi41LC43LDApLCB0Y2s9LS4wMSwgbWFyPWMoNCw0LC4xLC4xKSkKcGxvdChoZWFsdGgkc3BlbmRpbmdbIXJlbW92ZWNdLCBoZWFsdGgkbGlmZXNwYW5bIXJlbW92ZWNdLCB4bGltPWMoMCwxLjA1Km1heChoZWFsdGgkc3BlbmRpbmcpKSwKICAgICAgeGF4cz0iaSIsICB0eXBlPSJuIiwgeGxhYj0iSGVhbHRoIGNhcmUgc3BlbmRpbmcgKFBQUCBVUyQpIiwKICAgICAgeWxhYj0iTGlmZSBleHBlY3RhbmN5ICh5ZWFycykiLCBjZXguYXhpcz0xLjMsIGNleC5sYWI9MS4zLCBsYXM9MSwgeGF4dD0ibiIsIGJ0eT0ibCIpCmF4aXMoMSwgc2VxKDAsODAwMCwyMDAwKSwgY2V4LmF4aXM9MS4zLCBjZXgubGFiPTEuMykKdGV4dChoZWFsdGgkc3BlbmRpbmdbIXJlbW92ZWNdLCBoZWFsdGgkbGlmZXNwYW5bIXJlbW92ZWNdLCBoZWFsdGgkY291bnRyeVshcmVtb3ZlY10sCiAgICAgIGNvbD1jb2xvclshcmVtb3ZlY10sIGNleD0xLjMpCmZvciAoeCBpbiBzZXEoMjAwMCw2MDAwLDIwMDApKSBhYmxpbmUodj14LCBjb2w9ImdyYXkiLCBsd2Q9LjUpCmZvciAoeSBpbiBzZXEoNzQsODIsMikpIGFibGluZSh5LDAsY29sPSJncmF5IiwgbHdkPS41KQpgYGAKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KaWYgKHNhdmVmaWdzKSBkZXYub2ZmICgpCgpgYGAKYGBge3IgZXZhbD1GQUxTRSwgaW5jbHVkZT1GQUxTRX0KaWYgKHNhdmVmaWdzKSBwZGYocm9vdCgiSGVhbHRoRXhwZW5kaXR1cmUvZmlncyIsImhlYWx0aHNjYXR0ZXIzLnBkZiIpLCBoZWlnaHQ9NCwgd2lkdGg9NS41KQpgYGAKYGBge3IgfQpwYXIobWdwPWMoMS43LC41LCAwKSwgdGNrPS0uMDEsIG1hcj1jKDMsMywuMSwuMSkpCnBsb3QoaGVhbHRoJHNwZW5kaW5nWyFyZW1vdmVjXSwgaGVhbHRoJGxpZmVzcGFuWyFyZW1vdmVjXSwgeGxpbT1jKDAsMS4wNSptYXgoaGVhbHRoJHNwZW5kaW5nKSksCiAgICAgIHhheHM9ImkiLCAgdHlwZT0ibiIsIHhsYWI9IkhlYWx0aCBjYXJlIHNwZW5kaW5nIChQUFAgVVMkKSIsCiAgICAgIHlsYWI9IkxpZmUgZXhwZWN0YW5jeSAoeWVhcnMpIiwgYnR5PSJsIiwgeGF4dD0ibiIpCmF4aXMoMSwgc2VxKDAsNjAwMCwyMDAwKSkKdGV4dChoZWFsdGgkc3BlbmRpbmdbIXJlbW92ZWNdLCBoZWFsdGgkbGlmZXNwYW5bIXJlbW92ZWNdLCBoZWFsdGgkY291bnRyeVshcmVtb3ZlY10sIGNleD0uOSkKYGBgCmBgYHtyIGV2YWw9RkFMU0UsIGluY2x1ZGU9RkFMU0V9CmlmIChzYXZlZmlncykgZGV2Lm9mZigpCmBgYAoKIyMjIyBnZ3Bsb3QgdmVyc2lvbnMKCkFsbCBjb3VudHJpZXM6CgpgYGB7ciB9CiMgdGhpcyBjb3VsZCBiZSBmdXJ0aGVyIG1vZGlmaWVkIHRvIGluY2x1ZGUgY29sb3IgaW4gdGhlIGRhdGFmcmFtZQpnZ3Bsb3QoZGF0YT1oZWFsdGgsIGFlcyh4PXNwZW5kaW5nLCB5PWxpZmVzcGFuLCBsYWJlbD1jb3VudHJ5KSkgKwogIGdlb21fdGV4dChjb2xvcj1jb2xvcikgKwogIGxhYnMoeD0iSGVhbHRoIGNhcmUgc3BlbmRpbmcgKFBQUCBVUyQpIiwKICAgICAgIHk9IkxpZmUgZXhwZWN0YW5jeSAoeWVhcnMpIikKYGBgCgpTZWxlY3RlZCBjb3VudHJpZXM6CgpgYGB7ciB9CiMgdGhpcyBjb3VsZCBiZSBmdXJ0aGVyIG1vZGlmaWVkIHRvIGluY2x1ZGUgY29sb3IgaW4gdGhlIGRhdGFmcmFtZQpnZ3Bsb3QoZGF0YT1zdWJzZXQoaGVhbHRoLCAhcmVtb3ZlYyksICAgICAgICAgICAgICAgICAgIAogICAgICAgYWVzKHg9c3BlbmRpbmcsIHk9bGlmZXNwYW4sIGxhYmVsPWNvdW50cnkpKSArCiAgZ2VvbV90ZXh0KGNvbG9yPWNvbG9yWyFyZW1vdmVjXSkgKwogIGxhYnMoeD0iSGVhbHRoIGNhcmUgc3BlbmRpbmcgKFBQUCBVUyQpIiwKICAgICAgIHk9IkxpZmUgZXhwZWN0YW5jeSAoeWVhcnMpIikKYGBgCgo=