# Example 7.8, Effects of Law School Rankings on Starting Salaries
# Data set: lawsch85
# Function for result reporting
source("_report.R")
# Load the data and estimate the model in the background
load("lawsch85.Rdata")
data$r61_100=as.numeric((data$rank>=61)&(data$rank<=100)) # Create missing variable in data set
model=lm(lsalary~top10+r11_25+r26_40+r41_60+r61_100+LSAT+GPA+llibvol+lcost,data=data)
dig=c(2,3,3,3,3,3,4,3,3,4,3)
# Describe the model
cat("Model to estimate: lsalary = beta0 + beta1 * top10 + beta2 * r11_25 + beta3 * r26_40 + beta4 * r41_60 + beta5 * r61_100 + beta6 * LSAT + beta7 * GPA + beta8 * llibvol + beta9 * lcost + u",
"\nwhere LSAT is ", paste(desc[desc[,1]=="LSAT",2]),
"\nGPA is ", paste(desc[desc[,1]=="GPA",2]),
"\nllibvol is ", paste(desc[desc[,1]=="llibvol",2]), " (libvol: ", paste(desc[desc[,1]=="libvol",2]), ")",
"\nlcost is ", paste(desc[desc[,1]=="lcost",2]), " (cost: ", paste(desc[desc[,1]=="cost",2]), ")",
"\nand top10, r11_25, r26_40, r41_60 and r61_100 are dummy variables showing the range where a school's rank falls. Schools ranked below 100 are used as the base group",
sep="")
# Report results
{
cat("The estimated regression line is")
reportreg(model,dig,adj=T)
}
# Interpretation
cat("All the dummy variables are very statistically significant. The coefficient on r61_100 indicates that the median salary at a school ranked between 61 and 100 is predicted to be ",
100*as.numeric(printcoef(model,6,dig[6])), " higher than that at one ranked below 100. And for the percentage salary difference between a top 10 school and a below 100 school, the exact calculation gives exp(",
printcoef(model,2,dig[2]), ") - 1 = ", round(exp(as.numeric((printcoef(model,2,dig[2]))))-1,3),
", i.e. the median salary at a top 10 school is predicted to be ",
100*round(exp(as.numeric((printcoef(model,2,dig[2]))))-1,3), "% higher than that at a below 100 school",
"\nTo see whether dividing rank into different categories is an improvement in terms of goodness-of-fit over using only the rank variable, we compare the adjusted R^2 for both models: the former is ",
sprintf(paste("%.3f",sep=""),round(summary(model)$adj.r.squared,3)), ", and the latter ",
sprintf(paste("%.3f",sep=""),round(summary(lm(lsalary~rank+LSAT+GPA+llibvol+lcost,data=data))$adj.r.squared,3)),
", so our original model does perform better",
sep="")