# New Statistics Short Course
# Exercise 1: Intro to NHST

## Differences in Males and Females on 
## Math Ability

# Open and inspect the dataset
dat<-read.csv(file.choose()) #ex1_fisher_neyman_pearson
names(dat)
head(dat)

#Q1: Fisher
tapply(X=dat$math,INDEX=dat$sex,FUN=mean)
#Ho: mu1 = mu2
t.test(math ~ sex, data=dat, alternative="greater")
#Conclusion: Small p-value, 
#Females substantially better than males in Math

#Q2: Neyman-Pearson
#Hypotheses
#Ho: mu1 = mu2 / mu(males) = mu (females)
#Ha: mu1 != mu2 / mu(males) != mu (females)

#Power Analysis (MMES = d = .8)
library(pwr)
pow1<-pwr.t.test(d=.8,sig.level=.05,power=.95,type="two.sample")
print(pow1)
plot(pow1)
#What if the MMES was d = .3
pow1<-pwr.t.test(d=.3,sig.level=.05,power=.95,type="two.sample")
print(pow1)
plot(pow1)

#Plot the Potential Distributions (Difference in Popn Means) 
#to Show Power (using n=42 from above)
library(ggplot2)
pop1<-rnorm(10000,mean=0,sd=sqrt(1/42 + 1/42))
pop2<-rnorm(10000,mean=.8,sd=sqrt(1/42 + 1/42))
cv<-quantile(x=pop1,probs=.975)
dat2 <- data.frame(dens = c(pop1, pop2),
            lines = rep(c("a", "b"), each = 10000))
ggplot(dat2, aes(x = dens, fill = lines)) + geom_density(alpha = 0.5) +
  geom_vline(xintercept=cv) + annotate("text",x=.34,y=.1,label=".05") +
  annotate("text",x=0,y=1,label="Null") +  annotate("text",x=.8,y=1,label="Alternate") +
  annotate("text",x=.52,y=.05,label=".025")

#Test Statistic/p-value
t.test(dat$math ~ dat$sex)
#Conclusion: Reject Ho in favor of Ha