# Example 4.2, Student Performance and School Size
# Data set: meap93
# Functions for result reporting
source("_report.R")
# Load the data and estimate the model in the background
load("meap93.Rdata")
model=lm(math10~totcomp+staff+enroll,data=data)
dig=c(3,5,3,5,4)
# Describe the model
cat("We want to test the null hypothesis that school size has no effect on standardized test scores against the alternative that size has a negative effect",
"\nModel to estimate: math10 = beta0 + beta1 * totcomp + beta2 * staff + beta3 * enroll + u",
"\nwhere math10 is ", paste(desc[desc[,1]=="math10",2]), ", a proxy of student performance,",
"\nenroll is ", paste(desc[desc[,1]=="enroll",2]), ", a proxy of school size,",
"\ntotcomp is ", paste(desc[desc[,1]=="totcomp",2]),
"\nand staff is ", paste(desc[desc[,1]=="staff",2]),
sep="")
# Report results
{
cat("The estimated regression line is")
reportreg(model,dig)
}
# Interpretation
cat("The coefficient on enroll, ", printcoef(model,4,dig[4]), ", has a negative sign and indicates that larger schools hamper performance. To assess its statistical significance, we conduct the test:",
"\n\tH0: beta3 = 0 vs H1: beta3 < 0",
"\nWith df = ", nrow(model$model)-nrow(summary(model)$coef), ", the 5% critical value is about 1.65. Since the t statistic on enroll is ",
printcoef(model,4,dig[4]), "/", printse(model,4,dig[4]), " = ", printt(model,4,dig[4]),
" > -1.65, so we fail to reject H0 at the 5% significance level, i.e. enroll is not statistically significant at the 5% level",
sep="")
# Estimate a level-log model
model1=lm(math10~ltotcomp+lstaff+lenroll,data=data)
dig1=c(2,2,2,2,4)
{
cat("Now we estimate another model, with all the three independent variables in logarithmic form. This allows, for example, the school size effect to diminish as school size increases",
"And the estimation result is")
reportreg(model1,dig1)
}
# Interpretation of the level-log model
cat("The coefficient on lenroll is ", printcoef(model1,4,dig1[4]),
", indicating that when enroll increases by 10%, math10 is predicted to decrease by 0.013 ( = ",
printcoef(model1,4,dig1[4]), "/100 * 10) percentage points",
"The t statistic on lenroll is ", printt(model1,4,dig1[4]),
" < -1.65, so this time we reject H0: beta(lenroll) = 0 in favor of H1: beta(lenroll) < 0 at the 5% level",
"\nWhile enrollment is not statistically significant in the level-level model, it is in the level-log model. This translates into a higher R-squared for the level-log model, which means we explain more of the variation in math10 by using enroll in logarithmic form. The level-log model is preferred because it more closely captures the relationship between math10 and enroll.",
sep="")