# Example 8.7, Demand For Cigarettes
# Data set: smoke
# Function for result reporting
source("_report.R")
# Load the data and estimate the model in the background
load("smoke.Rdata")
model1=lm(cigs~lincome+lcigpric+educ+age+agesq+restaurn,data=data)
dig1=c(2,3,3,3,3,4,2,4)
# Describe the model
cat("Model to estimate: cigs = beta0 + beta1 * lincome + beta2 * lcigpric + beta3 * educ + beta4 * age + beta5 * agesq + beta6 * restaurn + u",
"\nwhere lincome is ", paste(desc[desc[,1]=="lincome",2]), " (income: ", paste(desc[desc[,1]=="income",2]), ")",
"\nlcigpric is ", paste(desc[desc[,1]=="lcigpric",2]), " (cigpric: ", paste(desc[desc[,1]=="cigpric",2]), ")",
"\neduc is ", paste(desc[desc[,1]=="educ",2]),
"\nage is ", paste(desc[desc[,1]=="age",2]),
"\nagesq is ", paste(desc[desc[,1]=="agesq",2]),
"\nand restaurn is ", paste(desc[desc[,1]=="restaurn",2]),
sep="")
# Report OLS results
{
cat("First we estimated the model using OLS, reporting only the usual OLS standard errors. The estimated regression line is")
reportreg(model1,dig1)
}
# Interpretation
cat("Neither lincome nor lcigpric is statistically significant. Their magnitudes are also small: for example, a 10% increase in income is predicted to raise cigs by ",
0.10*as.numeric(printcoef(model1,2,dig1[2])), " only. An increase of 1 year in educ is predicted to reduce cigs by ",
printabscoef(model1,4,dig1[4]), ", and the effect is statistically significant. Increase in age has a positive effect on cigs at earlier ages, and the opposite later. The cutoff age is ",
printcoef(model1,5,dig1[5]), "/[2(", printabscoef(model1,6,dig1[6]), ")] = ",
round(as.numeric(printcoef(model1,5,dig1[5]))/2/as.numeric(printabscoef(model1,6,dig1[6])),2),
", and both age and agesq are significant. The presence of a restriction on smoking in restaurants is predicted to decrease cigs by ",
printabscoef(model1,7,dig1[7]),
sep="")
# Test for HC
data$rhatsq=model1$residuals^2
m1=lm(rhatsq~lincome+lcigpric+educ+age+agesq+restaurn,data=data)
LM=nrow(model1$model)*as.numeric(printr(m1,3))
cat("Now we use the BP test and the LM statistic to test for heteroskedasticity in the model. Regress squared uhat on all the independent variables gives an R^2 of ",
printr(m1,3), ", and the LM statistic is ", nrow(model1$model), "(", printr(m1,3), ") = ",
LM, ". With df = 6, the p-value is ", sprintf("%.6f",round(pchisq(LM,6,lower.tail=F),6)), ", which is very strong evidence of heteroskedasticity",
sep="")
# Do FGLS
data$lrhatsq=log(data$rhatsq)
m2=lm(lrhatsq~lincome+lcigpric+educ+age+agesq+restaurn,data=data)
data$ghat=m2$fitted.values
data$hhat=exp(data$ghat)
model2=lm(cigs~lincome+lcigpric+educ+age+agesq+restaurn,weights=1/hhat,data=data)
dig2=c(2,2,2,3,3,4,2,4)
{
cat("Therefore, we estimate the model again using feasible GLS. The results are")
reportreg(model2,dig2)
}
# Interpretation
cat("The income effect is now statistically significant and larger in magnitude. The price effect is also notably bigger, but still statistically insignificant. The estimates on the other variables changed slightly")