# Empirical Bayes Example with National Hockey League (NHL) Data # More specifically, trying to model the winning percentage of # goalies in the NHL #Open the Dataset d<-read.csv(file.choose()) d<-d[,c(2,7,8)] head(d) #Compute Games Played (gp) from Wins + Losses d$gp <- d$W + d$L head(d) #Compute Winning Percentage (wp) d$wp <- d$W/d$gp d<-d[complete.cases(d),] head(d) #"Prior" Distribution on Winning Percentage using the Beta Distribution #Use players who have gp > 10 and to estimate the (hyper)parameters of #the Beta distribution d2<-subset(d, gp > 20) m <- MASS::fitdistr(d2$wp, dbeta, start = list(shape1 = 2, shape2 = 2)) # Shape Parameters (alpha, beta) (alpha <- m$estimate[1]) (beta <- m$estimate[2]) # E(wp) alpha/(alpha+beta) # How well does the Beta prior distribution fit the (subsetted) data? p = seq(0, 1, length=1000) plot(p,dbeta(p, alpha, beta)) lines(density(d2$wp)) # Empirical Bayes Estimates of the Winning Percentage # Note: E(x) = alpha/(alpha + beta) so: # EB(x) = (Wins + alpha)/(gp + alpha + beta) d$wp_EB <- (d$W + alpha)/(d$gp + alpha + beta) head(d, n = 10) tail(d, n = 10) # Empirical Bayes Estimates for Players With a Large wp d[d$wp > .8,] # Empirical Bayes Estimates for a New Goalie with 2 wins in 6 games (2 + alpha)/(6 + alpha + beta) # Multileve Model Approach library(lme4) mlm <- glmer(cbind(W, L) ~ 1 + (1|Name), data = d, family = binomial) d$wp_MLM <- fitted(mlm) head(d) tail(d) # Bayesian Approach library(brms) bayes <- brm(W|trials(gp) ~ 1 + (1|Name), data = d, family = binomial, iter = 1000, thin = 10) d$wp_bayes <- fitted(bayes)[,1] / d$gp head(d) tail(d)