# Example 4.7, Effect of Job Training on Firm Scrap Rates
# Data set: jtrain
# Function for result reporting
source("_report.R")
# Load the data and estimate the model in the background
load("jtrain.Rdata")
index=(data$year==1987)&(data$union==0) # Refine the data to year 1987, nonunionized firms
data=data[index,]
model=lm(lscrap~hrsemp+lsales+lemploy, data=data)
dig=c(2,3,3,3,3)
# Describe the model
cat("In this example, we would like to examine the effect of training on worker productivity, and we use the scrap rate of a firm as a proxy of productivity",
"\nModel to estimate: lscrap = beta0 + beta1 * hrsemp + beta2 * lsales + beta3 * lemploy + u",
"\nwhere lscrap is ", paste(desc[desc[,1]=="lscrap",2]), " (scrap: ", paste(desc[desc[,1]=="scrap",2]), ")",
"\nhrsemp is ", paste(desc[desc[,1]=="hrsemp",2]),
"\nlsales is ", paste(desc[desc[,1]=="lsales",2]), " (sales: ", paste(desc[desc[,1]=="sales",2]), ")",
"\nand lemploy is ", paste(desc[desc[,1]=="lemploy",2]), " (employ: ", paste(desc[desc[,1]=="employ",2]), ")",
sep="")
# Report results
{
cat("The estimated regression line is")
reportreg(model,dig)
}
# Interpretation
cat("When hrsemp increases by 1 hour, scrap is predicted to decrease by ",
as.numeric(printabscoef(model,2,dig[2]))*100, "%. Considering hrsemp is annual hours of training, this is a rather large effect",
"\nHowever, the statistical significance is a different matter. With df = ", nrow(model$model)-nrow(summary(model)$coef),
", the 5% one-sided critical value is 1.71. Since the t statistic on hrsemp is ",
printt(model,2,dig[2]), " > -1.71, hrsemp is not statistically significant at the 5% level. The 10% critical value is 1.32, so hrsemp is almost significant at the 10% level",
sep="")