#----------------------------------------------------------------------
#             Answers to exercises of Chapter 5
#                     One sample tests
#----------------------------------------------------------------------

#----------------------------------------------------------------------
#                      Lab session 1
#----------------------------------------------------------------------

#----------------------------------------------------------------------
#                       Exercise 1.1
#----------------------------------------------------------------------

#----------------------- question 1 -----------------------------------
n <- 10                                     # sample size
sig <- 20                                   # known standard deviation
mu0 <- 1000                                 # null hypothesis
xbar <- 1011                                # empirical mean
Test <- sqrt(n)*(xbar-mu0)/sig              # test statistic
qnorm(c(0.005,0.995))                       # limits for test satistic
2*pnorm(Test,lower.tail=FALSE)              # p-value two-sided
#----------------------- question 2 -----------------------------------
qnorm(0.99)                                 # limit for test satistic
pnorm(Test,lower.tail=FALSE)                # p-value one-sided
#----------------------- question 3 -----------------------------------
n <- 100                                    # sample size
xbar <- 1005                                # empirical mean
Test <- sqrt(n)*(xbar-mu0)/sig              # test statistic
2*pnorm(Test,lower.tail=FALSE)              # p-value two-sided
pnorm(Test,lower.tail=FALSE)                # p-value one-sided
 
#----------------------------------------------------------------------
#                       Exercise 1.2
#----------------------------------------------------------------------

#----------------------- question 1 -----------------------------------
n <- 4                                      # sample size
sig <- 5                                    # known standard deviation
mu0 <- 150                                  # null hypothesis
# Test <- sqrt(n)*(xbar-mu0)/sig            # test statistic
# reject H0 (declare patient not at risk) if Test smaller than
qnorm(0.05)                                 # limit for test statistic
#----------------------- question 2 -----------------------------------
X <- c(140,133,148,144)                     # sample
xbar <- mean(X)                             # empirical mean
Test <- sqrt(n)*(xbar-mu0)/sig; Test        # test statistic
pnorm(Test)                                 # p-value
# patient not at risk
#----------------------- question 3 -----------------------------------
# Test <- sqrt(n)*(xbar-mu0)/ss2            # test statistic
# reject H0 (declare patient not at risk) if Test smaller than
qt(0.05,df=n-1)                             # limit for test statistic
#----------------------- question 4 -----------------------------------
ss2 <- sqrt(mean(X^2)-(mean(X))^2)          # empirical standard deviation
Test <- sqrt(n-1)*(xbar-mu0)/ss2            # test statistic
pt(Test,df=n-1)                             # limit for test statistic
# patient not at risk

#----------------------------------------------------------------------
#                       Exercise 1.3
#----------------------------------------------------------------------

#----------------------- question 1 -----------------------------------
bosson <- read.table("data/bosson.csv",header=TRUE,sep=";")
B <- bosson[,"bmi"]
G <- bosson[,"gender"]
C <- bosson[,"country"]
boxplot(B~G)
boxplot(B~C)
#----------------------- question 2 -----------------------------------
mean(B)                                     # 22.76071
t.test(B,mu=23, alternative="less")
# No it is not significantly smaller than 23
t.test(B,mu=22, alternative="greater")
# Yes it is  significantly larger than 22
#----------------------- question 3 -----------------------------------
t.test(B[C=="France"], mu=25, alternative="greater")
# Yes, it is significantly larger than 25
t.test(B[C=="Vietnam"], mu=22, alternative="less")
# Yes, it is significantly smaller than 22


#----------------------------------------------------------------------
#                      Lab session 2
#----------------------------------------------------------------------

#----------------------------------------------------------------------
#                       Exercise 2.1
#----------------------------------------------------------------------

#----------------------- question 1 -----------------------------------
chisq.test(c(1600, 4900, 3500), p=c(0.16,0.48,0.36))
# p-value=0.08799>0.05: accept H0, the theoretical model is acceptable
chisq.test(c(16000, 49000, 35000), p=c(0.16,0.48,0.36))
# p-value=2.781e-11<0.05: reject H0, the theoretical model is not acceptable

#----------------------------------------------------------------------
#                       Exercise 2.2
#----------------------------------------------------------------------

tab <- c(687,1986,1762,565) 
#----------------------- question 1 -----------------------------------
# compare with binomial distribution with parameters:
n <- 3; p <- 0.5
chisq.test(tab,p=dbinom(0:n,n,p))
# p-value=1.342e-05<0.05, the goodness-of-fit is rejected
#----------------------- question 2 -----------------------------------
# compare with binomial distribution with parameters:
n <- 3; p <- 1/(1+1.05); p
chisq.test(tab,p=dbinom(0:n,n,p))
# p-value=0.1422, the goodness-of-fit is accepted
#----------------------- question 3 -----------------------------------
# the total number of children is:
N <- 3*5000
# the relative frequency of girls is:
phat <- sum(tab*(0:3))/N; phat
# the chi-squared distance is:
dist <- chisq.test(tab,p=dbinom(0:n,n,phat))$statistic
# the p-value is:
pchisq(dist,df=2,lower.tail=FALSE)
# p-value=0.351, the goodness-of-fit is accepted

#----------------------------------------------------------------------
#                       Exercise 2.3
#----------------------------------------------------------------------
#----------------------- question 1 -----------------------------------
Bo <- read.table("data/bosson.csv",header=TRUE,sep=";")
Bo[1:10,]
B <- Bo[,"bmi"]; 
C <- Bo[,"country"];
G <- Bo[,"gender"]; 

Bv <- B[C=="Vietnam"]
Bf <- B[C=="France"]
Bfw=B[(C=="France")&(G=="F")]
Bfm=B[(C=="France")&(G=="M")]
Bfm=B[(G=="M")&(C=="France")]
R <- Bo[,"risk"]
summary(B)
summary(Bv)
summary(Bf)
summary(Bfw)
summary(Bfm)
summary(R)
#----------------------- question 2 -----------------------------------
ks.test(Bf,pnorm,mean=26,sd=3)              # Kolmogorov-Smirnov
plot(ecdf(Bf)); 
curve(pnorm(x,mean=26,sd=3),add=TRUE,col="red")
ks.test(Bv,pnorm,mean=20,sd=2.7)
plot(ecdf(Bv)); 
curve(pnorm(x,mean=20,sd=2.7),add=TRUE,col="red")
#----------------------- question 3 -----------------------------------
qqnorm(B)
shapiro.test(B)                             # Shapiro-Wilk
qqnorm(Bv)
shapiro.test(Bv)
qqnorm(Bf)
shapiro.test(Bf)
#----------------------- question 4 -----------------------------------
t.test(Bfw,mu=25,alternative="less")        # Student's t-test
t.test(Bfm,mu=25,alternative="greater")
#----------------------- question 5 -----------------------------------
tR <- table(R); round(tR/length(R),3)
p <- dbinom(0:5,5,0.3); round(p,3)
chisq.test(tR,p=p)                          # chi-squared test
tRr <- c(40,81,61,27)
pr <- dbinom(0:3,3,0.3); round(pr,3)
chisq.test(tRr,p=pr)
chisq.test(tRr,p=c(0.2,0.4,0.3,0.1))

