#Open a Dataset - 
#Real Data 
#Online Intervention for Perfectionism

dat<-read.csv(file.choose())
names(dat)

#We will be using a couple pretest variables
#Predictor:
#mpsfpre.cm (concern over mistakes)
hist(dat$mpsfpre.cm)

#Outcome:
#cesdpre (depression)
hist(dat$cesdpre.total)

#Our interest is in predicting depression 
#from concern over mistakes
cor(dat$cesdpre.total,dat$mpsfpre.cm)
library(ggplot2)
ggplot(dat,aes(x=mpsfpre.cm, y=cesdpre.total)) +
  geom_point() + geom_smooth()

#Traditional Regression Model
m<-lm(cesdpre.total ~ mpsfpre.cm, data=dat)
summary(m)
confint(m)

#Interpretation

#p-value
#The probability of observing a test 
#statistic this extreme, if Ho:b*=0 is true, 
#is .00016

#Confidence Interval
#If we were to repeat the study many times,
#95% of the CIs are expected to contain b*


#Bayesian Regression (all defaults)

#4 chains, 2000 samples per chain

#Burn-in (warm-up): 
# 1st 1000 samples of each chain

library(rstanarm)
mb<-stan_glm(cesdpre.total ~ mpsfpre.cm, 
              data=dat)
#Equivalent Model (specifying number of 
#  chains and iterations):
mb<-stan_glm(cesdpre.total ~ mpsfpre.cm,
             chains=4, iter=2000,
             data=dat)
#Priors

#What priors are being used
prior_summary(mb)

#Intercept Prior Calculation
#Mean
mean(dat$cesdpre.total)
#SD
sd(dat$cesdpre.total)*2.5

#Plot the intercept prior
x<-seq(-75,125,length=500)
y<-dnorm(x, mean=mean(dat$cesdpre.total),
         sd=sd(dat$cesdpre.total)*2.5)
plot(x,y)

#Regression Coefficient Prior Calculation
#Mean (mean = 0)
#SD
sd(dat$cesdpre.total)/sd(dat$mpsfpre.cm)*2.5

#Plot the regression coefficient prior
x<-seq(-15,15,length=500)
y<-dnorm(x, mean=0,
     sd=sd(dat$cesdpre.total)/sd(dat$mpsfpre.cm)*2.5)
plot(x,y)

#Sigma Prior (SD of the residuals)
#Exponential Rate
1/sd(dat$cesdpre.total)


#MCMC Progress/Convergence

library(bayesplot)
#Trace plot for chains by parameter
mcmc_trace(mb)
mcmc_trace(mb,window=c(300,325))

#Plot of rhat
#Comparison of between and within chain variances)
mcmc_rhat(rhat(mb)) + yaxis_text(hjust = 1)

summary(mb) #mean_ppd, n_eff, mcse 
mean(dat$cesdpre.total) #compare against mean_ppd
neff_ratio(mb)

#Posterior Predictive Check
pp_check(mb, nreps = 100)


#Results Summary
summary(mb)
coef(m)
posterior_vs_prior(mb,pars = "beta")

#Distribution of the Parameters
library(bayesplot)
mcmc_dens(mb)
mcmc_dens(mb,pars=c("mpsfpre.cm"))

library(bayestestR)
describe_posterior(mb,
            parameters = "mpsfpre.cm",
            test="none", ci_method="HDI")

#CI Interpretation: There is a 95% chance
#that the true parameter falls between
#.31 and .92 (Much improved!)

#Explore the posterior (for our reg coef)
post<-as.matrix(mb)
sum(post[,"mpsfpre.cm"]<0)/4000

#Interpretation: There is 0% chance that
#the regression parameter falls below
#0

min(post[,"mpsfpre.cm"])

#Equivalence Test (Ho: b*<=-.3|b*>.3)
describe_posterior(mb, rope_range=c(-.3,.3),
               parameters = "mpsfpre.cm",
               test=c("rope","equitest"))

#Bayes Factors (Effect Size?)
bayesfactor_parameters(mb)
bayesfactor_parameters(mb, null=c(-.3,.3))

#Change Prior (More informative)
x<-seq(-15,15,length=500)
y<-dnorm(x, mean=0,
         sd=sd(dat$cesdpre.total)/sd(dat$mpsfpre.cm)*2.5)
plot(x,y,ylim=c(0,.5))
y1<-dnorm(x, mean=.5, sd=1.5)
lines(x,y1,col="red")

mb2<-stan_glm(cesdpre.total ~ mpsfpre.cm, 
     prior = normal(.5, 1.5, autoscale=FALSE),
     data=dat)
prior_summary(mb2)
describe_posterior(mb2,
       parameters = "mpsfpre.cm",
       test="none", ci_method="HDI")