# Chapter 2 Script # November 16, 2007 # Alan T. Arnholt library(PASWR) options(width=65) # # Example 2.1 Grades <- c("A","D","C","D","C","C","C","C","F","B") Grades table(Grades) table(Grades)/10 # Relative frequency table # # Example 2.2 library(MASS) attach(quine) table(Age) # # Example 2.3 par(mfrow=c(2,2)) barplot(table(Grades),col=3,xlab="Grades",ylab="Frequency") barplot(table(Grades)/length(Grades),col=3,xlab="Grades",ylab="Proportion") barplot(table(Age),col=7,xlab="Age",ylab="Frequency") barplot(table(Age)/length(Age),col=7,xlab="Age",ylab="Proportion") par(mfrow=c(1,1)) # # Example 2.4 par(mfrow=c(1,2)) dotchart(table(Grades)) dotchart(table(Age)) par(mfrow=c(1,1)) # # Example 2.5 par(mfrow=c(1,2)) pie(table(Grades),radius=2.5, col=gray(c(.1,.4,.7,.8,.95))) title("Grades") pie(table(Age),radius=2.5, col=gray(c(.1,.4,.7,.8,.95))) title("Age") par(mfrow=c(1,1)) detach(quine) # # Example 2.6 attach(Baberuth) # Assumes package PASWR is loaded NYYHR <- HR[Team=='NY-A'] NYYHR stem(NYYHR) detach(Baberuth) # # Example 2.7 # part a. attach(Baberuth) Baberuth[1:5,] # or head(Baberuth, n=5) NYYHR <- HR[7:21] # Extracts the 7th through 21st season HR values. # Figure 2.5 stripchart(NYYHR,xlab="Home runs per season",pch=1,method="stack", main="Dotplot of home runs while a New York Yankee") # part b. # Figure 2.6 par(mfrow=c(1,2),pty="s") stripchart(HR~Team,pch=1,method="stack", main="Dotplot of home runs \n by team", xlab="Home runs per season") par(las=1) # Makes labels horizontal stripchart(HR~Team,pch=19,col=c("red","green","blue"), method="stack",main="Color dotplot of home runs \n by team", xlab="Home runs per season") par(mfrow=c(1,1),las=0,pty="m") detach(Baberuth) # # Example 2.8 attach(Baberuth) par(mfrow=c(1,2)) bin <- seq(20,70,10) # Creating bins 20-70 by 10 hist(HR[7:21], breaks=bin, xlab="Home Runs", col="pink", main="Histogram using bins of the form (]") hist(HR[7:21], breaks=bin, right=FALSE, xlab="Home Runs", col="pink", main="Histogram using bins of the form [)") # R detach(Baberuth) # # Example 2.9 par(mfrow=c(1,2)) # Make device region 1 by 2 attach(geyser) hist(waiting,prob=TRUE,col="cyan") lines(density(waiting),col="red",lwd=2) # Add density to Histogram plot(density(waiting),col="red",lwd=2) # Create density by itself par(mfrow=c(1,1)) detach(geyser) # # Example 2.10 attach(Baberuth) NYYHR <- HR[7:21] NYYHR SNYYHR <- sort(NYYHR) SNYYHR p.05 <- floor(.05*15) p.10 <- floor(.10*15) p.15 <- floor(.15*15) p.50 <- floor(.50*15) num.to.delete <-c(p.05,p.10,p.15,p.50) num.to.delete m.05 <- mean(SNYYHR[(1+p.05):(15-p.05)]) m.10 <- mean(SNYYHR[(1+p.10):(15-p.10)]) m.15 <- mean(SNYYHR[(1+p.15):(15-p.15)]) m.50 <- mean(SNYYHR[(1+p.50):(15-p.50)]) t.m <- c(m.05, m.10, m.15, m.50) names(t.m) <- c("5%tmean","10%tmean","15%tmean","50%tmean") t.m tm.05 <- mean(NYYHR,trim=.05) tm.10 <- mean(NYYHR,trim=.10) tm.15 <- mean(NYYHR,trim=.15) tm.50 <- mean(NYYHR,trim=.50) tms <- c(tm.05,tm.10,tm.15,tm.50) names(tms) <- c("5%tmean","10%tmean","15%tmean","50%tmean") tms detach(Baberuth) # # Example 2.11 Student1 <- c(73,75,74,74) Student2 <- c(95,94,12,95) Student3 <- c(66,67,63,100) median(Student1) median(Student2) median(Student3) SM <- rbind(Student1,Student2,Student3) # combine rows colnames(SM) <- c("Test1","Test2","Test3", "Test4") SM means <- apply(SM,1,mean) # mean of rows medians <- apply(SM,1,median) # median of rows TOT <- cbind(SM,means,medians) # combine columns TOT # # Example 2.12 x <- c(1,4,7,9,10,14,15,16,20,21) p <- c(.25,.5,.75) # desired quantiles n <- length(x) # number of values, n order.stat <- p*(n-1)+1 # computing order statistics order.stat # order statistics Q1 <- x[3]+.25*(x[4]-x[3]) # linear interpolation Q2 <- x[5]+.50*(x[6]-x[5]) # linear interpolation Q3 <- x[7]+.75*(x[8]-x[7]) # linear interpolation QU <- c(Q1,Q2,Q3) names(QU) <- c("Q1","Q2","Q3") QU # quartiles quantile(x,probs=c(.25,.5,.75)) # the easy way! # # Example 2.13 attach(Baberuth) # Assumes package PASWR is loaded NYYRBI <- RBI[7:21] # Extract RBIs only while a NYY SNYYRBI <- sort(NYYRBI) p <- c(.25,.50,.75) n <- length(NYYRBI) n order.stat <- p*(n-1)+1 order.stat Q1 <- SNYYRBI[4]+.5*(SNYYRBI[5]-SNYYRBI[4]) Q2 <- SNYYRBI[8] Q3 <- SNYYRBI[11]+.5*(SNYYRBI[12]-SNYYRBI[11]) QU <- c(Q1,Q2,Q3) names(QU) <- c("Q1","Q2","Q3") QU quantile(NYYRBI,probs=c(.25,.50,.75)) j <- (floor((n+1)/2)+1)/2 # Number to count in j lower.hinge <- SNYYRBI[4]+.5*(SNYYRBI[5]-SNYYRBI[4]) upper.hinge <- SNYYRBI[11]+.5*(SNYYRBI[12]-SNYYRBI[11]) small <- min(NYYRBI) large <- max(NYYRBI) five.numbers <- c(small,lower.hinge,Q2,upper.hinge,large) five.numbers fivenum(NYYRBI) # Only works in R detach(Baberuth) # # Example 2.14 attach(Cars93) boxplot(Min.Price,ylab="Minimum Price (in $1,000) for basic version",col="gray") f <- fivenum(Min.Price) text(rep(1.25,5), f, labels=c("Min", expression(H[L]), expression(Q[2]) ,expression(H[U]), "Max"), pos=4) detach(Cars93) # Clean up # # For S-PLUS # library(MASS) # attach(Cars93) # n <- length(Min.Price) # smp <- sort(Min.Price) # count <- (floor((n+1)/2)+1)/2 # Using Equation 2.4 # count # lower.hinge <- smp[count] # upper.hinge <- smp[(n-count+1)] # five.num <- c(min(smp),lower.hinge,median(smp),upper.hinge,max(smp)) # boxplot(Min.Price,ylab="Minimum Price (in $1,000) for basic version") # text(rep(85,5),five.num,labels=c("Minimum", "Lower Hinge", "Median", # "Upper Hinge", "Maximum")) # detach(Cars93) # Clean up # # Section 2.6 range(1:10) diff(range(1:10)) quantile(1:10) IQR(1:10) # Section 2.6.3 x <- 1:5 x n <- length(x) mean.x <- mean(x) mean.x x-mean.x (x-mean.x)^2 NUM <- sum((x-mean.x)^2) # numerator of s^2 hard way NUM DEN <- n-1 # denominator of s^2 DEN VAR <- NUM/DEN # variance hard way VAR var(x) # variance easy way SD <- sqrt(VAR) # standard deviation hard way SD sd(x) # standard deviation with R # stdev(x) is standard deviation with S-PLUS summary(x) # # Example 2.15 head(EPIDURAL, n=5) # Shows first five rows of EPIDURAL attach(EPIDURAL) table(Doctor,Ease) # Levels of Ease not in increasing order Teasy <- factor(Ease,levels=c("Easy","Difficult","Impossible")) table(Doctor, Teasy) # Levels of Ease in increasing order # ftable(Doctor, Treatment, Teasy) detach(EPIDURAL) # # Example 2.16 attach(EPIDURAL) Teasy <- factor(Ease,levels=c("Easy","Difficult","Impossible")) X <- table(Doctor, Teasy) X t(X) # Transpose X # Figure 2.12 par(mfrow=c(2,2)) barplot(X,main="Barplot where Doctor is Stacked \n within Levels of Palpitation") barplot(t(X),main="Barplot where Levels of Palpitation \n is Stacked within Doctor") barplot(X,beside=TRUE,main="Barplot where Doctor is Grouped \n within Levels of Palpitation") barplot(t(X),beside=TRUE,main="Barplot where Levels of Palpitation \n is Grouped within Doctor") par(mfrow=c(1,1)) detach(EPIDURAL) # # Example 2.17 attach(EPIDURAL) table(Treatment,OC) addmargins(table(Treatment,OC)) # addmargins is an R command X <-prop.table(table(Treatment,OC),1) # Percents by rows X par(mfrow=c(2,1)) barplot(X,beside=TRUE,legend=TRUE) barplot(t(X),beside=TRUE,legend=TRUE) par(mfrow=c(1,1)) detach(EPIDURAL) # # Example 2.18 attach(EPIDURAL) BWI <- kg/(cm/100)^2 Control <- BWI[Treatment=="Traditional Sitting"] Treated <- BWI[Treatment=="Hamstring Stretch"] par(mfrow=c(2,2)) # 2*2 plotting region hist(Control) hist(Control,xlim=c(20,60),ylim=c(0,17)) hist(Treated) hist(Treated,xlim=c(20,60),ylim=c(0,17)) par(mfrow=c(1,1)) # 1*1 plotting region detach(EPIDURAL) # # Example 2.19 attach(EPIDURAL) par(pty="s") # Make plotting region square BWI <- kg/(cm/100)^2 # Define body weight index Control <- BWI[Treatment=="Traditional Sitting"] Treated <- BWI[Treatment=="Hamstring Stretch"] # Figure 2.15 boxplot(Control,Treated,horizontal=TRUE,col=c(13,4), names=c("Traditional Sitting","Hamstring Stretch"),las=1) # Figure 2.16 plot(density(Control),xlim=c(20,60),col=13,lwd=2,main="",xlab="") lines(density(Treated),lty=2,col=4,lwd=2) detach(EPIDURAL) # # Example 2.20 attach(EPIDURAL) # Figure 2.17 par(pty="s") # Make plotting region square qqplot(Control,Treated,xlim=c(20,60),ylim=c(20,60)) abline(a=0,b=1) # Line y=0+1*x par(pty="m") # Maximize plotting region detach(EPIDURAL) # # Example 2.21 attach(Animals) range(body) range(brain) range(log(body)) range(log(brain)) par(pty="s") # Figure 2.18 (almost) plot(log(body),log(brain)) identify(log(body),log(brain),labels=row.names(Animals)) detach(Animals) # # Example 2.22 attach(Animals) options(digits=3) # Three digits for output logbody <- log(body) logbrain <- log(brain) Zbody <- (logbody - mean(logbody))/sd(logbody) Zbrain <- (logbrain - mean(logbrain))/sd(logbrain) Anim <- cbind(Animals,logbody,logbrain,Zbody,Zbrain) n <- length(logbody) r <- (1/(n-1))*sum(Zbody*Zbrain) # Definition of r r cor(logbody,logbrain) Anim[1:6,] # Show first 6 rows of Anim ZBO <- scale(logbody) # Z score of logbody ZBR <- scale(logbrain) # Z score of logbrain SAME <- cbind(Zbody,ZBO,Zbrain,ZBR) SAME[1:5,] # Show first five rows of data frame options(digits=7) # Default detach(Animals) # # Section 2.7.6 x <- c(1,1,1,3,3,3,2,2,2) y <- c(3,2,3,6,2,6,10,4,4) z <- c(7,4,2,9,6,4,5,3,1) DF <- data.frame(x,y,z) rm(x,y,z) attach(DF) t(DF) t(DF[order(x,y,z),]) detach(DF) # # Example 2.23 attach(Animals) cor(log(body),log(brain)) SA <- Animals[order(body),] # Sorted by body weight detach(Animals) tail(SA, n=4) # Equivalently SA[25:28,], shows four heaviest animals NoDINO <- SA[-(28:26),] # Remove rows 26-28 of SA attach(NoDINO) # NoDINO contains 25 rows NoDINO[22:25,] # Show four heaviest animals cor(log(body),log(brain)) # Correlation without dinosaurs detach(NoDINO) # # Example 2.24 attach(Animals) Y <- log(brain) X <- log(body) plot(X,Y,xlab="log(body)",ylab="log(brain)") b1 <- sum((X-mean(X))*(Y-mean(Y)))/sum((X-mean(X))^2) b0 <- mean(Y) - b1*mean(X) estimates <- c(b0,b1) estimates modDINO <- lm(Y~X) modDINO abline(modDINO,col="pink",lwd=2) SA <- Animals[order(body),] # Sorted by body weight NoDINO <- SA[-(28:26),] # Remove rows 26-28 (dinosuars) detach(Animals) attach(NoDINO) # NoDINO contains 25 rows Y <- log(brain) X <- log(body) b1 <- sum((X-mean(X))*(Y-mean(Y)))/sum((X-mean(X))^2) b0 <- mean(Y) - b1*mean(X) estimates <- c(b0,b1) estimates modNODINO <- lm(Y~X) modNODINO abline(modNODINO,col="blue",lwd=2,lty=2) leglabels <- c("OLS with Dinosaurs", "OLS without Dinosaurs") leglty <- c(1,2) legcol=c("pink","blue") legend("topleft",legend=leglabels,lty=leglty,col=legcol,lwd=2) detach(NoDINO) # # Example 2.25 attach(Animals) plot(log(body),log(brain),col="blue") f <- log(brain)~log(body) modelLM <- lm(f) modelLM abline(modelLM,col="pink",lwd=2) modelLQS <- lqs(f) modelLQS abline(modelLQS,lty=2,col="red",lwd=2) modelRLM <- rlm(f,method="MM") modelRLM abline(modelRLM,lty=3,col="black",lwd=2) leglabels <- c("Least Squares Line","Least Trimmed Squares", "Robust line: M-estimator ") leglty <- c(1,2,3) legend("topleft",legend=leglabels,lty=leglty, col=c("pink","red","black"), lwd=2, cex=0.85) detach(Animals) # # Example 2.26 # Figure 2.22 attach(EPIDURAL) BWI <- kg/(cm/100)^2 library(lattice) # only for R histogram(~BWI|Treatment,layout=c(1,2)) detach(EPIDURAL) # # Example 2.27 # Figure 2.23 attach(EPIDURAL) BWI <- kg/(cm/100)^2 library(lattice) bwplot(Treatment~BWI|Doctor,as.table=TRUE) # Order: as one reads # Figure 2.24 stripplot(Treatment~BWI|Doctor,jitter=TRUE,as.table=TRUE) detach(EPIDURAL) # # Example 2.28 attach(EPIDURAL) library(lattice) # Figure 2.25 graph1 <- bwplot(~BWI|Doctor,as.table=TRUE) graph2 <- xyplot(cm~kg|Doctor,as.table=TRUE) graph3 <- histogram(~BWI) graph4 <- densityplot(~BWI|Treatment) print(graph1,split=c(1,2,2,2),more=TRUE) # Lower left print(graph2,split=c(2,2,2,2),more=TRUE) # Lower right print(graph3,split=c(1,1,2,2),more=TRUE) # Upper left print(graph4,split=c(2,1,2,2),more=FALSE) # Upper right # Using literal position of graph print(graph1,position=c(0,0,.5,.5),more=TRUE) # Lower left print(graph2,position=c(.5,0,1,.5),more=TRUE) # Lower right print(graph3,position=c(0,.5,.5,1),more=TRUE) # Upper left print(graph4,position=c(.5,.5,1,1),more=FALSE) # Upper right detach(EPIDURAL) # # Example 2.29 library(lattice) # Only for R attach(EPIDURAL) xyplot(cm~kg|Doctor,as.table=TRUE, panel=function(x,y) { panel.xyplot(x,y) # x-y plot panel.abline(lm(y~x)) # Least sq line panel.abline(lqs(y~x),col=3,lty=2,lwd=2) # Least trim sq line } ) # Second approach panel.scatreg <- function(x,y) # name function { panel.xyplot(x,y) # make x-y plot panel.abline(lm(y~x),lwd=2) # regression line panel.abline(lqs(y~x),col=3,lty=2,lwd=2) # least trim sq line } xyplot(cm~kg|Doctor,as.table=TRUE, panel=panel.scatreg) ################################################################################