################################################################################ ## Chapter 10 ## April 30, 2009 ## Alan T. Arnholt options(width=65) library(PASWR) ################################################################################ # Example 10.1 attach(Phone) SIGN.test(call.time,md=2.1) detach(Phone) ################################################################################ # Example 10.3 PH <- c(7.2,7.3,7.3,7.4) # Enter data DIFF <- PH-7.25 # Create differences (DIFF) absD <- abs(DIFF) # Absolute value of DIFF (absD) rankabsD <- rank(absD) # Rank the absD values signD <- sign(DIFF) # Store the signs of DIFF signrank <- rankabsD*signD # Create a vector of signed ranks tp <- sum(signrank[signrank>0]) # Calculate t+ tp # n <- length(DIFF) signs <- as.matrix(expand.grid(rep(list(0:1),n))) signs # 1s represent positive ranks mat <- matrix(rankabsD) # Put rankabsD in matrix form mat # Tp <- signs%*%mat # (16X4)*(4X1) = 16X1 vector of T+ Tp <- sort(Tp) # Sort the distribution of T+ SampDist <- table(Tp)/2^n SampDist # Sampling distribution of T+ # p.value <- sum(Tp>=tp)/2^n # Calculate p-value p.value # wilcoxE.test(PH, mu=7.25, alternative="greater") ################################################################################ # Example 10.4 attach(Wait) wilcox.test(wt,mu=6,alternative="less") # n2means <- apply(SRS(wt,2),1,mean) #Computing the n choose 2 means WalshAverages <- c(wt,n2means) # qsignrank(0.05,15) # psi <- round(psignrank(28:33,15),3) names(psi) <- 28:33 # SWA <- sort(WalshAverages) SWA[90] # wilcox.test(wt,mu=6,alternative="less",conf.int=TRUE) # wilcoxE.test(wt,mu=6,alternative="less") detach(Wait) ################################################################################ # Example 10.5 attach(Aggression) # part a. EDA(violence-noviolence) # part b. wilcox.test(violence,noviolence,paired=TRUE,alternative="greater") # part c. wilcoxE.test(violence,noviolence,paired=TRUE,alternative="greater") # part d. PD <- violence-noviolence n2means <- apply(SRS(PD,2),1,mean) #Computing the n choose 2 means SWA <- sort(c(PD,n2means)) #Sorted Walsh averages n <- length(PD) k <- 0.5 + n*(n+1)/4 + qnorm(.05)*sqrt(n*(n+1)*(2*n+1)/24) k <- floor(k) k SWA[k] #Walsh average k # Or ADD <- outer(PD, PD, "+") SWA2 <- sort(ADD[!lower.tri(ADD)])/2 SWA2[k] #Walsh average k detach(Aggression) ################################################################################ # Example 10.6 x <- c(2,5) y <- c(9,12,14) n <- length(x) m <- length(y) N <- n + m r <- rank(c(x, y)) u <- sum(r[seq(along = x)])- n*(n + 1)/2 # observed u value w <- sum(r[seq(along = x)]) # observed w value val <- SRS(r,n) # possible rankings for X W <- apply(val,1,sum) # W values U <- W - n*(n + 1)/2 # U values display <- cbind(val,W,U) # X rankings with W and U display # table(W)/choose(5,2) # Sampling distribution of W # # dwilcox(3:9,2,3) # Produces distribution of W in S-PLUS # table(U)/choose(5,2) # Sampling distribution of U dwilcox(0:6,2,3) # Produces distribution of U in R ################################################################################ # Example 10.7 x <- c(7.2,7.2,7.3,7.3) y <- c(7.3,7.3,7.4,7.4) n <- length(x) m <- length(y) N <- n + m r <- rank(c(x, y)) w <- sum(r[seq(along = x)]) # observed w value w val <- SRS(r,n) # possible rankings W <- apply(SRS(r,n),1,sum) # W values table(W)/choose(8,4) # p.value <- 2*(sum(W <= w)/choose(N,n)) p.value # wilcoxE.test(x, y) ################################################################################ # Example 10.8 library(lattice) # Use to get "trellis" graphs in R A <- c(1.2, 1.5, 2.3, 4.3) B <- c(4.5, 5.7, 6.1, 8.6) n <- length(A) m <- length(B) r <- c(A,B) f <- factor(c(rep("A",n), rep("B",m))) graph1 <- bwplot(f~r, xlab="", ylab="Diets", main="Weight Gain") graph2 <- dotplot(f~r, xlab="", ylab="Diets", main="Weight Gain") print(graph1, split=c(1,1,2,1), more=TRUE) print(graph2, split=c(2,1,2,1), more=FALSE) # pwilcox(1:(n*m),n,m) # R only # pwilcox( (1+n*(n+1)/2):(n*m+n*(n+1)/2), n, m) # S-PLUS only # pwil <- pwilcox(1:(n*m),n,m) # For R which(pwil >= 0.05)[1] # k <- 2 diffs <- matrix(sort(outer(A,B,"-")),byrow=FALSE,nrow=4) diffs CL <- 1-2*pwilcox((k-1),n,m) CL CI <- c(diffs[k],diffs[n*m-k+1]) CI # wilcox.test(A, B, conf.int=TRUE, conf.level=.90) # conf.int=TRUE: only R # wilcoxE.test(A, B) ################################################################################ # Example 10.9 attach(Swimtimes) wilcox.test(highfat,lowfat) # library(coin) # needed for wilcox_test() GR <- factor(c(rep("lowfat",14),rep("highfat",14))) wilcox_test(c(lowfat,highfat)~GR,distribution="exact", conf.int=TRUE,conf.level=.90) # Only R # part d. n <- length(highfat) m <- length(lowfat) N <- n + m diffs <-sort(outer(highfat,lowfat,"-")) k <- 0.5 + n*m/2 + qnorm(.05)*sqrt(n*m*(N+1)/12) # k for 90% CI k <- floor(k) k CI <- c(diffs[k],diffs[n*m-k+1]) #90% CI CI detach(Swimtimes) ################################################################################ # Example 10.10 library(lattice) # Only for R Method1 <- c(6,1,2,0,0,1,1,3,1,2,1,2,4,2,1,1,1,3,7,1) Method2 <- c(3,2,1,2,1,6,2,1,1,2,1,1,2,3,2,2,3,2,5,2) Method3 <- c(2,1,2,3,2,2,4,3,2,3,2,5,1,1,3,7,6,2,2,2) Method4 <- c(2,1,1,3,1,2,1,6,1,1,0,1,1,1,1,2,2,1,5,4) n1 <- length(Method1); n2 <- length(Method2) n3 <- length(Method3); n4 <- length(Method4) NumberFT <- c(Method1,Method2,Method3,Method4) g <- as.factor(c(rep("Method1",n1),rep("Method2",n2), rep("Method3",n3),rep("Method4",n4))) # Methods # Equivalently, g could be created using g <- factor(rep(c("Method1","Method2","Method3","Method4"),rep(20,4))) A <- bwplot(g~NumberFT,xlab="Free Throws",xlim=c(-1,9)) B <- densityplot(~NumberFT|g,layout=c(1,4), xlab="Free Throws",xlim=c(-1,9)) print(A,split=c(1,1,2,1),more=TRUE) print(B,split=c(2,1,2,1),more=FALSE) # Test # RR <- qchisq(.95,3) # Rejection region RR RKs <- rank(NumberFT) # Ranks for all MRKs <- unstack(RKs,RKs~g) # Only R not S-PLUS MRKs[1:5,] # Show first five rows RK <- apply(MRKs,2,mean) # Treatment ranks names(RK) <- c("MRKT1","MRKT2","MRKT3","MRKT4") RK MRK <- mean(RK) # Overall mean rank MRK N <- length(RKs) Hobs <- 12*(n1*(RK[1] - MRK)^2 + n2*(RK[2] - MRK)^2 + n3*(RK[3] - MRK)^2 + n4*(RK[4] - MRK)^2)/(N*(N+1)) names(Hobs) <- "statistic" Hobs tj <- table(RKs) tj CF <- 1-(sum(tj^3 - tj)/(N^3-N)) # correction factor Hc <- Hobs/CF # corrected statistic hs <- c(Hobs,Hc) hs pval <- 1-pchisq(hs,3) names(pval) <- c("p.value","p.value") pval # kruskal.test(NumberFT~g) # For S-PLUS change ~ to , ################################################################################ # Multiple Comparisons a <- 4 # Four methods Method1 <- c(6,1,2,0,0,1,1,3,1,2,1,2,4,2,1,1,1,3,7,1) Method2 <- c(3,2,1,2,1,6,2,1,1,2,1,1,2,3,2,2,3,2,5,2) Method3 <- c(2,1,2,3,2,2,4,3,2,3,2,5,1,1,3,7,6,2,2,2) Method4 <- c(2,1,1,3,1,2,1,6,1,1,0,1,1,1,1,2,2,1,5,4) n1 <- length(Method1); n2 <- length(Method2) n3 <- length(Method3); n4 <- length(Method4) NumberFT <- c(Method1,Method2,Method3,Method4) N <- length(NumberFT) RKs <- rank(NumberFT) # Ranks for all MRKs <- unstack(RKs,RKs~g) # Only R not S-PLUS RK <- apply(MRKs,2,mean) # Treatment ranks names(RK) <- c("MRKT1","MRKT2","MRKT3","MRKT4") alpha <- 0.20 Z12 <- abs(RK[1]-RK[2])/sqrt((N*(N+1)/12)*(1/n1 + 1/n2)) Z13 <- abs(RK[1]-RK[3])/sqrt((N*(N+1)/12)*(1/n1 + 1/n3)) Z14 <- abs(RK[1]-RK[4])/sqrt((N*(N+1)/12)*(1/n1 + 1/n4)) Z23 <- abs(RK[2]-RK[3])/sqrt((N*(N+1)/12)*(1/n2 + 1/n3)) Z24 <- abs(RK[2]-RK[4])/sqrt((N*(N+1)/12)*(1/n2 + 1/n4)) Z34 <- abs(RK[3]-RK[4])/sqrt((N*(N+1)/12)*(1/n3 + 1/n4)) Zij <- round(c(Z12,Z13,Z14,Z23,Z24,Z34),2) names(Zij) <- c("Z12","Z13","Z14","Z23","Z24","Z34") CV <- round(qnorm(1- alpha/(a*(a-1))),2) Zij CV which(Zij > CV) ################################################################################ # Example 10.11 attach(HSwrestler) HSwrestler[1:5,] FAT <- c(HWFAT,TANFAT,SKFAT) GROUP <- factor(rep(c("HWFAT","TANFAT","SKFAT"),rep(78,3))) BLOCK <- factor(rep(1:78,3)) # used later library(lattice) # Only for R A <- bwplot(GROUP~FAT,xlab="% Fat") B <- densityplot(~FAT|GROUP,layout=c(1,3),xlab="% Fat") print(A,split=c(1,1,2,1),more=TRUE) print(B,split=c(2,1,2,1),more=FALSE) # cfat <- cbind(HWFAT,TANFAT,SKFAT) RK <- t(apply(cfat,1,rank)) OBSandRK <- cbind(cfat,RK) OBSandRK[1:5,] Rj <- apply(RK,2,sum) b <- length(HWFAT) k <- dim(cfat)[2] S <- (12/(b*k*(k+1)))*sum(Rj^2)-3*b*(k+1) S pval <- 1-pchisq(S,k-1) pval # friedman.test(FAT~GROUP|BLOCK) # R syntax # friedman.test(FAT, GROUP, BLOCK) # For S-PLUS # # Multiple comparisons alpha <- 0.20 ZR12 <- abs(Rj[1]-Rj[2])/sqrt(b*k*(k+1)/6) ZR13 <- abs(Rj[1]-Rj[3])/sqrt(b*k*(k+1)/6) ZR23 <- abs(Rj[2]-Rj[3])/sqrt(b*k*(k+1)/6) CV <- round(qnorm(1- alpha/(k*(k-1))),2) ZRij <- round(c(ZR12,ZR13,ZR23),2) names(ZRij) <- c("ZR12","ZR13","ZR23") ZRij CV which(ZRij > CV) # detach(HSwrestler) ################################################################################ # Example 10.12 attach(Soccer) table(Goals) # PX <- c(dpois(0:5,2.5),1-ppois(5,2.5)) PX # EX <- 232*PX OB <- c(19,49,60,47,32,18,7) ans <- cbind(PX,EX,OB) row.names(ans) <- c(" X=0", " X=1", " X=2", " X=3", " X=4", " X=5", "X>=6") ans # # Step 3 chi.obs <- sum((OB-EX)^2/EX) chi.obs # Step 4 p.val <- 1-pchisq(chi.obs,7-1) p.val # chisq.test(x=OB,p=PX) # hist(Goals, breaks=c((-.5+0):(8+.5)), col=13, ylab="", freq=TRUE, main="") x <- 0:8 fx <- (dpois(0:8, lambda=2.5))*232 lines(x, fx, type="h") lines(x, fx, type="p", pch=16) detach(Soccer) ################################################################################ # Example 10.13 # Step 2 attach(Grades) mu <- mean(sat) sig <- sd(sat) # stdev(sat) for S-PLUS c(mu,sig) bin <- seq(mu-3*sig,mu+3*sig,sig) bin table(cut(sat,breaks=bin)) # OB <- hist(sat,breaks=bin,plot=F)$counts PR <- c(pnorm(-2), pnorm(-1:2)- pnorm(-2:1),1-pnorm(2)) EX <- 200*PR ans <- cbind(PR,EX,OB) ans # Step 3. chi.obs <- sum((OB-EX)^2/EX) chi.obs # Step 4. p.val <- 1-pchisq(chi.obs,6-2-1) p.val # chisq.test(x=OB,p=PR) # Figure 10.12 hist(sat,breaks=bin,col=13,ylab="",freq=TRUE,main="") x <- bin[2:7]-sig/2 fx <- PR*200 lines(x,fx,type="h") lines(x,fx,type="p",pch=16) ################################################################################ # Example 10.14 options(digits=5) x <- 5:9 mu <- 6.5 sig <- sqrt(2) x <- sort(x) n <- length(x) FoX <- pnorm(x,mean=mu,sd=sig) FoX # FnX <- seq(1:n)/n Fn1X <- (seq(1:n)-1)/n DP <- (FnX - FoX) DM <- FoX - Fn1X Dp <- abs(DP) Dm <- abs(DM) EXP <- cbind(x,FnX,Fn1X,FoX,Dp,Dm) Mi <-apply(EXP[,c(5,6)],1,max) TOT <- cbind(EXP,Mi) TOT Dn <- max(Mi) Dn # ks.test(x,y="pnorm",mean=mu,sd=sig) # # Figure like 10.13 ksdist(n=5, sims=10000, alpha=0.05) # # Figure like 10.14 ksLdist(sims=10000,n=10) ################################################################################ # Example 10.15 attach(Phone) library(nortest) lillie.test(call.time) # DWA <- function(Dn = .3,n =10) { p.value<- exp(-7.01256*Dn^2*(n + 2.78019) + 2.99587*Dn*(n + 2.78019)^.5-0.122119 + .974598/n^.5+1.67997/n) round(p.value,4) } DWA(Dn=0.1910237,n=23) ################################################################################ # Example 10.16 x <- c(47,50,57,54,52,54,53,65,62,67,69,74,51,57,57,59) shapiro.test(x) ################################################################################ # Scenario One # Step 3. options(digits=7) HA <- c(110,277,50,163,302,63) HAT <- matrix(data=HA,nrow=2,byrow=TRUE) dimnames(HAT) <- list(Gender=c("Male","Female"), Category=c("Very Happy","Pretty Happy", "Not To Happy")) HAT E <- outer(rowSums(HAT), colSums(HAT), "*")/sum(HAT) E # chi.obs <- sum((HAT-E)^2/E ) chi.obs # Step 4. p.val <- 1-pchisq(chi.obs,2) p.val # chisq.test(HAT) # # Scenario Two DT <- c(67,76,57,48,73,79) DTT <- matrix(data=DT,nrow=2,byrow=TRUE) dimnames(DTT) <- list(Treatment=c("Drug","Placebo"), Category=c("Improve","No Change", "Worse")) DTT E <- outer(rowSums(DTT), colSums(DTT), "*")/sum(DTT) E # chi.obs <- sum((DTT-E)^2/E ) chi.obs # p.val <- 1-pchisq(chi.obs,2) p.val # chisq.test(DTT) ################################################################################ # Example 10.17 attach(SDS4) Times # Figure 10.17 options(digits=5) TT <- max(cumsum(Times)) # Time period (seconds) n <- length(lag(Times)) # Number of Cars Elamb <- n/TT # Cars/Time period EMeanExp <- 1/Elamb # Time period/Cars ans <- c(TT, n, Elamb, EMeanExp) names(ans) <- c("Total Time", "# of Cars", "Est. lambda", "Est. Mean") ans hist(Times, prob=TRUE) curve(dexp(x, Elamb), 0, 35, add=TRUE) # Only R c(mean(Times), sd(Times)) # Distribution Check # part b. library(boot) Times.mean <- function(data,i) { d <- data[i] M <- mean(d) M } # set.seed(10) B <- 9999 b.obj <- boot(Times,Times.mean,R=B) b.obj # plot(b.obj) # Histogram and Q-Q plot of t* values # boot.ci(b.obj,type=c("norm","basic","perc","bca")) # T0 <- b.obj$t0 Tstar <- b.obj$t BIAS <- mean(Tstar)-T0 BIAS conf.level <- .95 alpha <- 1-conf.level # Normal CI using 10.38 c( (T0-BIAS)-qnorm(1-alpha/2)*sd(Tstar), (T0-BIAS)+qnorm(1-alpha/2)*sd(Tstar) ) # Basic CI using 10.42 bt <- sort(Tstar) c(2*T0 - bt[(B+1)*(1-alpha/2)], 2*T0 - bt[(B+1)*(alpha/2)]) # Percentile CI using 10.43 c(bt[(B+1)*(alpha/2)],bt[(B+1)*(1-alpha/2)]) # BCa CI z <- qnorm(sum(Tstar < T0)/B) # Using (10.44) z n <- length(Times) u <- rep(0, n) for (i in 1:n) { u[i] <- mean(Times[-i]) } ubar <- mean(u) numa <- sum((ubar-u)^3) dena <- 6*sum((ubar-u)^2)^(3/2) a <- numa/dena a a1 <- pnorm( z + (z-qnorm(1-alpha/2))/(1-a*(z-qnorm(1-alpha/2))) ) a2 <- pnorm( z + (z+qnorm(1-alpha/2))/(1-a*(z+qnorm(1-alpha/2))) ) # kl <- floor((B+1)*a1) ll <- bt[kl] + (qnorm(a1)-qnorm(kl/(B+1)))/(qnorm((kl+1)/(B+1)) -qnorm(kl/(B+1)))*(bt[kl+1]-bt[kl]) ku <- floor((B+1)*a2) ul <- bt[ku] + (qnorm(a2)-qnorm(ku/(B+1)))/(qnorm((ku+1)/(B+1)) -qnorm(ku/(B+1)))*(bt[ku+1]-bt[ku]) c(ll,ul) ################ Times.sd <- function(data,i) { d <- data[i] SD <- sqrt(var(d)) SD } set.seed(13) B <- 9999 b.obj <- boot(Times,Times.sd,R=B) b.obj # plot(b.obj) # Histogram and Q-Q plot of t* values # boot.ci(b.obj,type=c("norm","basic","perc","bca")) detach(SDS4) ################################################################################ # Example 10.18 library(boot) B <- 999 attach(Ratbp) Ratbp Blood <- c(Treat,Cont) # pdT6 <- SRS(1:12,6) # OR t(Combinations(12,6)) Comb <- matrix(rep(c(Treat,Cont),924),ncol=12,byrow=TRUE) Theta <- array(0,924) for(i in 1:924) { Theta[i] <- mean(Comb[i,pdT6[i,]])-mean(Comb[i,-pdT6[i,]]) } Theta.obs <- mean(Treat) - mean(Cont) pval <- sum(Theta >= Theta.obs)/choose(12,6) pval ## library(coin) GR <- as.factor(c(rep("Treat",6),rep("Cont",6))) oneway_test(Blood~GR,distribution="exact",alternative="less") ## Estimated Permutation Method blood.fun <- function(data,i) { d <- data[i] MD <-mean(d[1:6]) - mean(d[7:12]) MD } set.seed(13) B <- 499 boot.blood <- boot(Blood, blood.fun, R=B, sim="permutation") plot(boot.blood) pval.boot <- (sum(boot.blood$t >= boot.blood$t0)+1)/(B+1) pval.boot ## Estimated Bootstrap Method blood.fun <- function(data,i) { d <- data[i] MD <-mean(d[1:6]) - mean(d[7:12]) MD } set.seed(13) B <- 499 boot.blood.b <- boot(Blood, blood.fun, R=B, sim="ordinary") plot(boot.blood.b) pval.boot.b <- (sum(boot.blood.b$t >= boot.blood.b$t0)+1)/(B+1) pval.boot.b ################################################################################