# Example 3.5, Explaining Arrest Records
# Data set: crime1
load("crime1.Rdata")
# Describe the model
cat("The data set contains data on information during the year 1986 on 2,725 men. Each of them was arrested at least once prior to 1986\n",
"Model to estimate: narr86 = beta0 + beta1 * pcnv + beta2 * avgsen + beta3 * ptime86 + beta4 * qemp86 + u",
"\nwhere narr86 is ", paste(desc[desc[,1]=="narr86",2]),
",\npcnv is ", paste(desc[desc[,1]=="pcnv",2]),
",\navgsen is ", paste(desc[desc[,1]=="avgsen",2]),
",\nptime86 is ", paste(desc[desc[,1]=="ptime86",2]),
",\nand qemp86 is ", paste(desc[desc[,1]=="qemp86",2]),
sep="")
# Estimate the restricted model first
model=lm(narr86~pcnv+ptime86+qemp86, data=data)
summary(model)
cat("We first estimate the model without avgsen. The estimated regression line is\n",
"pratehat = ",
if(model$coefficients[1]>0) "" else "- ", abs(round(model$coefficients[1],digits=3)),
if(model$coefficients[2]>0) " + " else " - ", abs(round(model$coefficients[2],digits=3)), " * pcnv",
if(model$coefficients[3]>0) " + " else " - ", abs(round(model$coefficients[3],digits=3)), " * ptime86",
if(model$coefficients[4]>0) " + " else " - ", abs(round(model$coefficients[4],digits=3)), " * qemp86\n",
"n = ", nrow(data), ", R^2 = ", round(summary(model)$r.squared,digits=4), sep="")
# Interpretation of restricted model
cat("The R^2 indicate that pcnv, ptime86 and qemp86 together explains ", 100*round(summary(model)$r.squared,digits=4),
"% of the variation in narr86\n",
"The slope coefficients have anticipated signs:\n",
"Here pcnv is a proxy for the likelihood for being convicted of a crime; when it rises, narr86 is predicted to fall\n",
"When ptime86 rises, an individual spends more time in prison in 1986, and is less likely to be arrested for a crime out of prison; therefore, narr86 is predicted to fall\n",
"And qemp86 crudely captures labor market opportunities; a higher qemp86 reduces the chance of an individual committing a crime, and the predicted narr86 falls",
sep="")
# Now estimate the unrestricted model first
model1=lm(narr86~pcnv+avgsen+ptime86+qemp86, data=data)
summary(model1)
cat("Now we add avgsen to the model. The estimated regression line is\n",
"pratehat = ",
if(model1$coefficients[1]>0) "" else "- ", abs(round(model1$coefficients[1],digits=3)),
if(model1$coefficients[2]>0) " + " else " - ", abs(round(model1$coefficients[2],digits=3)), " * pcnv",
if(model1$coefficients[3]>0) " + " else " - ", abs(round(model1$coefficients[3],digits=4)), " * avgsen",
if(model1$coefficients[4]>0) " + " else " - ", abs(round(model1$coefficients[4],digits=3)), " * ptime86",
if(model1$coefficients[5]>0) " + " else " - ", abs(round(model1$coefficients[5],digits=3)), " * qemp86\n",
"n = ", nrow(data), ", R^2 = ", round(summary(model1)$r.squared,digits=4), sep="")
# Further interpretation
cat("Adding avgsen raises the R^2 from ", round(summary(model)$r.squared,digits=4), " to ",
round(summary(model1)$r.squared,digits=4), ", which is not a large increase. But it is worthnoting that a small R^2 does not necessarily mean the model is useless; the coefficients can still be reliable estimates of the ceteris paribus effects of each independent variable\n",
"The coefficient on avgsen, ", round(model1$coefficients[3],digits=4),
", has an unexpected sign, indicating that criminal activity actually rises as average sentence length becomes longer",
sep="")