# Example 7.11, Effects of Race on Baseball Player Salaries # Data set: mlb1 # Function for result reporting source("_report.R") # Load the data, create new variables and estimate the model load("mlb1.Rdata") index=!is.na(data$black)&!is.na(data$hispan)&!is.na(data$percblck)&!is.na(data$perchisp) data=data[index,] # Refine the data, keep those with racial statistics data$black.percblck=(data$black)*(data$percblck) data$hispan.perchisp=(data$hispan)*(data$perchisp) model=lm(lsalary~years+gamesyr+bavg+hrunsyr+rbisyr+runsyr+fldperc+allstar+black+hispan+black.percblck+hispan.perchisp,data=data) dig=c(2,4,4,5,4,4,4,4,4,3,3,4,4,3) # Describe the model cat("Model to estimate: lsalary = beta0 + beta1 * years + beta2 * gamesyr + beta3 * bavg + beta4 * hrunsyr + beta5 * rbisyr + beta6 * runsyr + beta7 * fldperc + beta8 * allstar + beta9 * black + beta10 * hispan + beta11 * black.percblck + beta12 * hispan.perchisp + u", "\nwhere lsalary is ", paste(desc[desc[,1]=="lsalary",2]), " (salary: ", paste(desc[desc[,1]=="salary",2]), ")", "\nyears is ", paste(desc[desc[,1]=="years",2]), "\ngamesyr is ", paste(desc[desc[,1]=="gamesyr",2]), "\nbavg is ", paste(desc[desc[,1]=="bavg",2]), "\nhrunsyr is ", paste(desc[desc[,1]=="hrunsyr",2]), "\nrbisyr is ", paste(desc[desc[,1]=="rbisyr",2]), "\nrunsyr is ", paste(desc[desc[,1]=="runsyr",2]), "\nfldperc is ", paste(desc[desc[,1]=="fldperc",2]), "\nallstar is ", paste(desc[desc[,1]=="allstar",2]), "\nblack is ", paste(desc[desc[,1]=="black",2]), "\nhispan is ", paste(desc[desc[,1]=="hispan",2]), "\nblack.percblck is an interaction term between black and percblck (", paste(desc[desc[,1]=="percblck",2]), ")", "\nand hispan.perchisp is an interaction term between hispan and perchisp (", paste(desc[desc[,1]=="perchisp",2]), ")", sep="") # Report results { cat("The estimated regression line is") reportreg(model,dig) } # Interpretation r1=as.numeric(printr(lm(lsalary~years+gamesyr+bavg+hrunsyr+rbisyr+runsyr+fldperc+allstar,data=data),3)) r2=as.numeric(printr(model,dig[14])) denomdf=nrow(model$model)-nrow(summary(model)$coef) f=round(((r2-r1)/(1-r2))/(4/(nrow(model$model)-nrow(summary(model)$coef))),2) cat("We first test the joint significance of the four race variables. Dropping them from the regression gives an R^2 of ", r1, ", while the R^2 of the unrestricted model is ", r2, ". With numerator df = 4 and denominator df = ", denomdf, ", the F statistic is ", f, ", and the p-value is ", round(pf(f,4,denomdf,lower.tail=F),3), ". Therefore, the four variables are joint significant at the 5% level", "\nNow, holding all productivity factors fixed, we examine the effect of race on salary:", "\nFirst, holding perchisp fixed, we consider the effect of being black. When in a city with no blacks (percblck = 0), a black player is predicted to earn ", 100*as.numeric(printabscoef(model,10,dig[10])), "% less than a comparable white player. As percblck increases, the salary of blacks increases relative to that for whites. When percblck = 10, the percentage difference becomes ", printcoef(model,10,dig[10]), " + ", printcoef(model,12,dig[12]), "(10) = ", round(as.numeric(printcoef(model,10,dig[10]))+10*as.numeric(printcoef(model,12,dig[12])),3), ", i.e. black players are predicted to earn ", 100*(abs(round(as.numeric(printcoef(model,10,dig[10]))+10*as.numeric(printcoef(model,12,dig[12])),3))), "% less than whites. When percblck = 20, black players are predicted to earn ", 100*(round(as.numeric(printcoef(model,10,dig[10]))+20*as.numeric(printcoef(model,12,dig[12])),3)), "% MORE than whites\nSimilarly, Hispanics earn less than whites in cities with a low perchisp. We can find the cutoff value of perchisp:\n\t", printcoef(model,11,dig[11]), " + ", printcoef(model,13,dig[13]), " perchisp = 0\n, which gives perchisp = ", round(-as.numeric(printcoef(model,11,dig[11]))/as.numeric(printcoef(model,13,dig[13])),2), ". Holding percblck fixed, Hispanics are predicted to earn less than whites in cities where the percentage of Hispanics is less than ", round(-as.numeric(printcoef(model,11,dig[11]))/as.numeric(printcoef(model,13,dig[13])),2), "%, and the opposite is true if the percentage of Hispanics is above that percentage", sep="")