# Example 6.5, Confidence Interval for Predicted College GPA # Data set: gpa2 # Function for result reporting source("_report.R") # Load the data, create new variables and estimate the models load("gpa2.Rdata") data$hsizesq=(data$hsize)^2 data$sat0=data$sat-1200 data$hsperc0=data$hsperc-30 data$hsize0=data$hsize-5 data$hsizesq0=data$hsizesq-25 model1=lm(colgpa~sat+hsperc+hsize+hsizesq,data=data) model2=lm(colgpa~sat0+hsperc0+hsize0+hsizesq0,data=data) dig=c(3,5,5,5,5,3) # Describe the model cat("First model to estimate: colgpa = beta0 + beta1 * sat + beta2 * hsperc + beta3 * hsize + beta4 * hsizesq + u", "\nwhere colgpa is ", paste(desc[desc[,1]=="colgpa",2]), "\nsat is ", paste(desc[desc[,1]=="sat",2]), "\nhsperc is ", paste(desc[desc[,1]=="hsperc",2]), "\nhsize is ", paste(desc[desc[,1]=="hsize",2]), "\nand hsizesq is a quadratic term in hsize", sep="") # Report results { cat("The estimated regression line is") reportreg(model1,dig,adj=T) cat(", sigmahat = ", sprintf("%.3f",round((summary(model1)$sigma),3)), sep="") } # Interpret model1 and describe model2 vars=c(1,1200,30,5,5^2) # Given values of independent variables coefs=as.numeric(printcoef(model1,1:5,dig[1:5])) cat("From this model, we can get the predicted colgpa when sat = 1200, hsperc = 30 and hsize = 5, which is ", round(sum(vars*coefs),2), ". However, we cannot directly get the confidence interval for the expected colgpa at the given values of the independent variables. To obtain the confidence interval, we can define a new set of variables:", "\nsat0 = sat - 1200\nhsperc0 = hsperc - 30\nhsize0 = hsize - 5\nand hsizesq0 = hsizesq - 25", "\nThen we estimate a new model: colgpa = beta0 + beta1 * sat0 + beta2 * hsperc0 + beta3 * hsize0 + beta4 * hsizesq0 + u", sep="") # Report results { cat("The new estimated regression line is") reportreg(model2,dig,adj=T) cat(", sigmahat = ", sprintf("%.3f",round((summary(model2)$sigma),3)), sep="") } # Interpret model2 cat("The only difference between the two regression results is the value of the intercept; the slope coefficients, the R^2, etc. are exactly the same in both models. Now we can use the intercept and its standard error to construct a 95% confidence interval for the expected colgpa, which is ", printcoef(model2,1,dig[1]), " ± 1.96 * ", printse(model2,1,dig[1]), ", or (", round(as.numeric(printcoef(model2,1,dig[1]))-1.96*as.numeric(printse(model2,1,dig[1])),2), ",", round(as.numeric(printcoef(model2,1,dig[1]))+1.96*as.numeric(printse(model2,1,dig[1])),2), ")", sep="")