# Example 2.12, Student Math Performance and the School Lunch Program
# Data set: meap93
load("meap93.Rdata")
# Describe the model and summarize the variables
cat("Model to estimate: math10 = beta0 + beta1 * lnchprg + u",
"\n where math10 is", paste(desc[desc[,1]=="math10",2]),
"\n and lnchprg is", paste(desc[desc[,1]=="lnchprg",2]))
summary(data$math10)
summary(data$lnchprg)
# Estimate and show results
model=lm(math10~lnchprg, data=data)
summary(model)
cat("The estimated regression line is\n",
"math10hat = ", round(model$coefficients[1],digits=2),
if(model$coefficients[2]>0) " + " else " - ",
abs(round(model$coefficients[2],digits=3)), " * lnchprg\n",
"n = ", nrow(data), ", R^2 = ", round(summary(model)$r.squared,digits=3), sep="")
# Interpretation
cat("When eligibility in the lunch program increases by 10 percentage points, the math exam passing rate is predicted to DECREASE by ",
10*abs(round(model$coefficients[2],digits=3)), " percentage points, which is counterintuitive",
"\nThe negative coefficient is unlikely to indicate a causal relationship. Instead, the error term u in the model is likely to contain variables such as school quality and resources, which are correlated with lnchprg.",
"\nThe resulting correlation between lnchprg and u causes the zero conditional mean assumption (SLR.4) to be violated, and the OLS estimators to be biased",
sep="")