# Empirical Bayes Example with National Hockey League (NHL) Data

# More specifically, trying to model the winning percentage of 
# goalies in the NHL

#Open the Dataset
d<-read.csv(file.choose())
d<-d[,c(2,7,8)]
head(d)


#Compute Games Played (gp) from Wins + Losses
d$gp <- d$W + d$L
head(d)

#Compute Winning Percentage (wp)
d$wp <- d$W/d$gp
d<-d[complete.cases(d),]
head(d)

#"Prior" Distribution on Winning Percentage using the Beta Distribution

#Use players who have gp > 10 and to estimate the (hyper)parameters of 
#the Beta distribution
d2<-subset(d, gp > 20)
m <- MASS::fitdistr(d2$wp, dbeta,
                    start = list(shape1 = 2, shape2 = 2))

# Shape Parameters (alpha, beta)
(alpha <- m$estimate[1])
(beta <- m$estimate[2])
# E(wp)
alpha/(alpha+beta)

# How well does the Beta prior distribution fit the (subsetted) data?
p = seq(0, 1, length=1000)
plot(p,dbeta(p, alpha, beta))
lines(density(d2$wp))

# Empirical Bayes Estimates of the Winning Percentage
# Note: E(x) = alpha/(alpha + beta) so:
# EB(x) = (Wins + alpha)/(gp + alpha + beta)
d$wp_EB <- (d$W + alpha)/(d$gp + alpha + beta)
head(d, n = 10)
tail(d, n = 10)

# Empirical Bayes Estimates for Players With a Large wp
d[d$wp > .8,]

# Empirical Bayes Estimates for a New Goalie with 2 wins in 6 games
(2 + alpha)/(6 + alpha + beta)



# Multileve Model Approach
library(lme4)
mlm <- glmer(cbind(W, L) ~ 1 + (1|Name), 
                         data = d,
                         family = binomial)
d$wp_MLM <- fitted(mlm)
head(d)
tail(d)


# Bayesian Approach

library(brms)
bayes <- brm(W|trials(gp) ~ 1 + (1|Name), 
                 data = d,
                 family = binomial,
                 iter = 1000,
                 thin = 10)
d$wp_bayes <- fitted(bayes)[,1] / d$gp
head(d)
tail(d)