# Example 8.4, Heteroskedasticity in Housing Price Equations # Data set: hprice1 # Function for result reporting source("_report.R") # Load the data and estimate the models in the background load("hprice1.Rdata") model1=lm(price~lotsize+sqrft+bdrms,data=data) model2=lm(lprice~llotsize+lsqrft+bdrms,data=data) dig1=c(2,5,3,2,3) dig2=c(2,3,3,3,3) # Describe the model cat("Model to estimate: price = beta0 + beta1 * lotsize + beta2 * sqrft + beta3 * bdrms + u", "\nwhere price is ", paste(desc[desc[,1]=="price",2]), "\nlotsize is ", paste(desc[desc[,1]=="lotsize",2]), "\nsqrft is ", paste(desc[desc[,1]=="sqrft",2]), "\nand bdrms is ", paste(desc[desc[,1]=="bdrms",2]), sep="") # Report results { cat("The estimated regression line is") reportreg(model1,dig1) } # Test for HC m1=lm(model1$residuals^2~lotsize+sqrft+bdrms,data=data) F1=round(as.numeric(printr(m1,4))/(1-as.numeric(printr(m1,4)))*(nrow(model1$model)-nrow(summary(model1)$coef))/3,2) LM1=round(nrow(model1$model)*as.numeric(printr(m1,4)),2) cat("Regressing squared uhat on all the independent variables gives an R^2 of ", as.numeric(printr(m1,4)), ", and the F statistic for significance of the independent variables is given by [", as.numeric(printr(m1,4)), "/(1 - ", as.numeric(printr(m1,4)), ")](", nrow(model1$model)-nrow(summary(model1)$coef), "/3) = ", F1, ". The associated p-value is ", round(pf(F1,3,nrow(model1$model)-nrow(summary(model1)$coef),lower.tail=F),3), ", strong evidence against the null. The LM statistic is ", nrow(model1$model), "(", as.numeric(printr(m1,4)), ") = ", LM1, ", and with df = 3, the p-value is ", round(pchisq(LM1,3,lower.tail=F),4), ", giving essentially the same conclusion as the F statistic. Thus, heteroskedasticity is present in the error in the population model, and the usual standard errors reported are not reliable", sep="") # Describe and report the log model { cat("To reduce heteroskedasticity, we try a log model, where we use the logarithmic forms of price, lotsize and sqrft", "\nThe estimated regression line is") reportreg(model2,dig2) } # Test for HC again m2=lm(model2$residuals^2~llotsize+lsqrft+bdrms,data=data) F2=round(as.numeric(printr(m2,4))/(1-as.numeric(printr(m2,4)))*(nrow(model2$model)-nrow(summary(model2)$coef))/3,2) LM2=round(nrow(model2$model)*as.numeric(printr(m2,4)),2) cat("This time, regressing squared uhat on the independent variables gives an R^2 of ", printr(m2,4), ". The F statistic for significance of the independent variables is ", F2, ", with a p-value of ", round(pf(F2,3,nrow(model2$model)-nrow(summary(model2)$coef),lower.tail=F),3), ". The LM statistic is ", LM2, ", with a p-value of ", round(pchisq(LM2,3,lower.tail=F),3), ". Therefore, using the log model, we fail to reject the null hypothesis of homoskedasticity in the model", sep="")