#Open a Dataset - #Real Data #Online Intervention for Perfectionism dat<-read.csv(file.choose()) names(dat) #We will be using a couple pretest variables #Predictor: #mpsfpre.cm (concern over mistakes) hist(dat$mpsfpre.cm) #Outcome: #cesdpre (depression) hist(dat$cesdpre.total) #Our interest is in predicting depression #from concern over mistakes cor(dat$cesdpre.total,dat$mpsfpre.cm) library(ggplot2) ggplot(dat,aes(x=mpsfpre.cm, y=cesdpre.total)) + geom_point() + geom_smooth() #Traditional Regression Model m<-lm(cesdpre.total ~ mpsfpre.cm, data=dat) summary(m) confint(m) #Interpretation #p-value #The probability of observing a test #statistic this extreme, if Ho:b*=0 is true, #is .00016 #Confidence Interval #If we were to repeat the study many times, #95% of the CIs are expected to contain b* #Bayesian Regression (all defaults) #4 chains, 2000 samples per chain #Burn-in (warm-up): # 1st 1000 samples of each chain library(rstanarm) mb<-stan_glm(cesdpre.total ~ mpsfpre.cm, data=dat) #Equivalent Model (specifying number of # chains and iterations): mb<-stan_glm(cesdpre.total ~ mpsfpre.cm, chains=4, iter=2000, data=dat) #Priors #What priors are being used prior_summary(mb) #Intercept Prior Calculation #Mean mean(dat$cesdpre.total) #SD sd(dat$cesdpre.total)*2.5 #Plot the intercept prior x<-seq(-75,125,length=500) y<-dnorm(x, mean=mean(dat$cesdpre.total), sd=sd(dat$cesdpre.total)*2.5) plot(x,y) #Regression Coefficient Prior Calculation #Mean (mean = 0) #SD sd(dat$cesdpre.total)/sd(dat$mpsfpre.cm)*2.5 #Plot the regression coefficient prior x<-seq(-15,15,length=500) y<-dnorm(x, mean=0, sd=sd(dat$cesdpre.total)/sd(dat$mpsfpre.cm)*2.5) plot(x,y) #Sigma Prior (SD of the residuals) #Exponential Rate 1/sd(dat$cesdpre.total) #MCMC Progress/Convergence library(bayesplot) #Trace plot for chains by parameter mcmc_trace(mb) mcmc_trace(mb,window=c(300,325)) #Plot of rhat #Comparison of between and within chain variances) mcmc_rhat(rhat(mb)) + yaxis_text(hjust = 1) summary(mb) #mean_ppd, n_eff, mcse mean(dat$cesdpre.total) #compare against mean_ppd neff_ratio(mb) #Posterior Predictive Check pp_check(mb, nreps = 100) #Results Summary summary(mb) coef(m) posterior_vs_prior(mb,pars = "beta") #Distribution of the Parameters library(bayesplot) mcmc_dens(mb) mcmc_dens(mb,pars=c("mpsfpre.cm")) library(bayestestR) describe_posterior(mb, parameters = "mpsfpre.cm", test="none", ci_method="HDI") #CI Interpretation: There is a 95% chance #that the true parameter falls between #.31 and .92 (Much improved!) #Explore the posterior (for our reg coef) post<-as.matrix(mb) sum(post[,"mpsfpre.cm"]<0)/4000 #Interpretation: There is 0% chance that #the regression parameter falls below #0 min(post[,"mpsfpre.cm"]) #Equivalence Test (Ho: b*<=-.3|b*>.3) describe_posterior(mb, rope_range=c(-.3,.3), parameters = "mpsfpre.cm", test=c("rope","equitest")) #Bayes Factors (Effect Size?) bayesfactor_parameters(mb) bayesfactor_parameters(mb, null=c(-.3,.3)) #Change Prior (More informative) x<-seq(-15,15,length=500) y<-dnorm(x, mean=0, sd=sd(dat$cesdpre.total)/sd(dat$mpsfpre.cm)*2.5) plot(x,y,ylim=c(0,.5)) y1<-dnorm(x, mean=.5, sd=1.5) lines(x,y1,col="red") mb2<-stan_glm(cesdpre.total ~ mpsfpre.cm, prior = normal(.5, 1.5, autoscale=FALSE), data=dat) prior_summary(mb2) describe_posterior(mb2, parameters = "mpsfpre.cm", test="none", ci_method="HDI")