# Example 3.3, Participation in 401(k) Pension Plans # Data set: 401k load("401k.Rdata") # Describe the model cat("Model to estimate: prate = beta0 + beta1 * mrate + beta2 * age + u", "\nwhere prate is ", paste(desc[desc[,1]=="prate",2]), ",\nmrate is ", paste(desc[desc[,1]=="mrate",2]), ",\nand age is ", paste(desc[desc[,1]=="age",2]), sep="") # Estimate and show results model=lm(prate~mrate+age, data=data) summary(model) cat("The estimated regression line is\n", "pratehat = ", if(model$coefficients[1]>0) "" else "- ", abs(round(model$coefficients[1],digits=2)), if(model$coefficients[2]>0) " + " else " - ", abs(round(model$coefficients[2],digits=2)), " * mrate", if(model$coefficients[3]>0) " + " else " - ", abs(round(model$coefficients[3],digits=3)), " * age\n", "n = ", nrow(data), sep="") # Compare with the simple regression model1=lm(prate~mrate, data=data) cat("If we do not control for age any more and estimate a simple regression model instead, the resulting estimate will be\n", "pratehat = ", if(model1$coefficients[1]>0) "" else "- ", abs(round(model1$coefficients[1],digits=2)), if(model1$coefficients[2]>0) " + " else " - ", abs(round(model1$coefficients[2],digits=2)), " * mrate\n", "The difference between the two estimates of the coefficient on mrate is not big (", abs(round(model$coefficients[2],digits=2)), " vs ", abs(round(model1$coefficients[2],digits=2)), "). This can be explained by the fact that the correlation between mrate and age, the omitted variable, is only ", round(cor(data$mrate, data$age), digits=2), sep="")