#Example 1
# Testing hypothesis about two  population means when the
# population variances are unknown

#Set working directory
setwd("~/Dropbox/ISI SCB/Data sets")

# read in csv file
rd <-read.csv("reading.csv", TRUE)

#assign the list of reading scores to the variable x 
#for "Standard" method
x <- rd$Reading.score[rd$Method=="Standard"]

#assign the list of reading scores to the variable y 
#for "New" method
y <- rd$Reading.score[rd$Method=="New"]


#1:standard method
#2:new method

#H0: mu1 -mu2 = 0 , H1: mu1 - mu2 < 0 
#t-test
t.test(x,y, alternative="less", var.equal = FALSE)
#var.equal = TRUE, when population variances 
#are assumed to be equal.
#var.equal = FALSE is the default when population variances
#are assumed to be unequal.

# this will enable us to get a 95% two-sided confidence iterval
#H0: mu1-mu2 = 0 , H1: mu1-mu2 not= 0 
t.test(x,y, alternative="two.sided", var.equal = FALSE)


#check for outliers
boxplot(x,y)

#assess normality assumptions of x and y
qqnorm(x, ylab = "x Sample Quantiles")
shapiro.test(x)
qqnorm(y, ylab = "y Sample Quantiles")
shapiro.test(y)

#removing outliers

sortx <- sort(x)
xx <- sortx[2:9]

# checks after removal of outliers
boxplot(xx,y)
qqnorm(xx, ylab = "xx Sample Quantiles")
shapiro.test(xx)

t.test(xx,y, alternative="less", var.equal = FALSE)

 # to get 95% confidence intervals
t.test(xx,y, alternative="two.sided", var.equal = FALSE)

var.test(x,y)