# Example 7.11, Effects of Race on Baseball Player Salaries
# Data set: mlb1
# Function for result reporting
source("_report.R")
# Load the data, create new variables and estimate the model
load("mlb1.Rdata")
index=!is.na(data$black)&!is.na(data$hispan)&!is.na(data$percblck)&!is.na(data$perchisp)
data=data[index,] # Refine the data, keep those with racial statistics
data$black.percblck=(data$black)*(data$percblck)
data$hispan.perchisp=(data$hispan)*(data$perchisp)
model=lm(lsalary~years+gamesyr+bavg+hrunsyr+rbisyr+runsyr+fldperc+allstar+black+hispan+black.percblck+hispan.perchisp,data=data)
dig=c(2,4,4,5,4,4,4,4,4,3,3,4,4,3)
# Describe the model
cat("Model to estimate: lsalary = beta0 + beta1 * years + beta2 * gamesyr + beta3 * bavg + beta4 * hrunsyr + beta5 * rbisyr + beta6 * runsyr + beta7 * fldperc + beta8 * allstar + beta9 * black + beta10 * hispan + beta11 * black.percblck + beta12 * hispan.perchisp + u",
"\nwhere lsalary is ", paste(desc[desc[,1]=="lsalary",2]), " (salary: ", paste(desc[desc[,1]=="salary",2]), ")",
"\nyears is ", paste(desc[desc[,1]=="years",2]),
"\ngamesyr is ", paste(desc[desc[,1]=="gamesyr",2]),
"\nbavg is ", paste(desc[desc[,1]=="bavg",2]),
"\nhrunsyr is ", paste(desc[desc[,1]=="hrunsyr",2]),
"\nrbisyr is ", paste(desc[desc[,1]=="rbisyr",2]),
"\nrunsyr is ", paste(desc[desc[,1]=="runsyr",2]),
"\nfldperc is ", paste(desc[desc[,1]=="fldperc",2]),
"\nallstar is ", paste(desc[desc[,1]=="allstar",2]),
"\nblack is ", paste(desc[desc[,1]=="black",2]),
"\nhispan is ", paste(desc[desc[,1]=="hispan",2]),
"\nblack.percblck is an interaction term between black and percblck (", paste(desc[desc[,1]=="percblck",2]), ")",
"\nand hispan.perchisp is an interaction term between hispan and perchisp (", paste(desc[desc[,1]=="perchisp",2]), ")",
sep="")
# Report results
{
cat("The estimated regression line is")
reportreg(model,dig)
}
# Interpretation
r1=as.numeric(printr(lm(lsalary~years+gamesyr+bavg+hrunsyr+rbisyr+runsyr+fldperc+allstar,data=data),3))
r2=as.numeric(printr(model,dig[14]))
denomdf=nrow(model$model)-nrow(summary(model)$coef)
f=round(((r2-r1)/(1-r2))/(4/(nrow(model$model)-nrow(summary(model)$coef))),2)
cat("We first test the joint significance of the four race variables. Dropping them from the regression gives an R^2 of ",
r1, ", while the R^2 of the unrestricted model is ", r2,
". With numerator df = 4 and denominator df = ", denomdf, ", the F statistic is ",
f, ", and the p-value is ", round(pf(f,4,denomdf,lower.tail=F),3), ". Therefore, the four variables are joint significant at the 5% level",
"\nNow, holding all productivity factors fixed, we examine the effect of race on salary:",
"\nFirst, holding perchisp fixed, we consider the effect of being black. When in a city with no blacks (percblck = 0), a black player is predicted to earn ",
100*as.numeric(printabscoef(model,10,dig[10])), "% less than a comparable white player. As percblck increases, the salary of blacks increases relative to that for whites. When percblck = 10, the percentage difference becomes ",
printcoef(model,10,dig[10]), " + ", printcoef(model,12,dig[12]), "(10) = ",
round(as.numeric(printcoef(model,10,dig[10]))+10*as.numeric(printcoef(model,12,dig[12])),3),
", i.e. black players are predicted to earn ",
100*(abs(round(as.numeric(printcoef(model,10,dig[10]))+10*as.numeric(printcoef(model,12,dig[12])),3))),
"% less than whites. When percblck = 20, black players are predicted to earn ",
100*(round(as.numeric(printcoef(model,10,dig[10]))+20*as.numeric(printcoef(model,12,dig[12])),3)),
"% MORE than whites\nSimilarly, Hispanics earn less than whites in cities with a low perchisp. We can find the cutoff value of perchisp:\n\t",
printcoef(model,11,dig[11]), " + ", printcoef(model,13,dig[13]), " perchisp = 0\n, which gives perchisp = ",
round(-as.numeric(printcoef(model,11,dig[11]))/as.numeric(printcoef(model,13,dig[13])),2),
". Holding percblck fixed, Hispanics are predicted to earn less than whites in cities where the percentage of Hispanics is less than ",
round(-as.numeric(printcoef(model,11,dig[11]))/as.numeric(printcoef(model,13,dig[13])),2),
"%, and the opposite is true if the percentage of Hispanics is above that percentage",
sep="")