################################################################################ # R Code to Supplement # Design of Experiments with MINITAB # by Paul Mathews # published by ASQ Quality Press (2004) ################################################################################ #This file contains R code that reproduces the examples from Design of Experiments #with MINITAB by Mathews. In some cases, alternative and extra methods are given where #R has special capabilities. #The code contained here was run on R Version 2.0.1 and produces output comparable to #that from MINITAB V14. There are probably more accurate, efficient, or better ways of #producing the same or similar output in R than the methods shown here, but the methods #shown work. #If you find any mistakes or have recommendations on how to improve this document #please communicate them to me by e-mail at the address below. #Paul Mathews and MM&B Inc. take no responsibility for the accuracy, stability, use, #or misuse of any of the R code contained here. #Copyright © 2005 Paul Mathews #Paul Mathews, President #Mathews Malnar and Bailey, Inc. #217 Third Street, Fairport Harbor, OH 44077 #Phone: 440-350-0911 #Fax: 440-350-7210 #E-mail: paul@mmbstatistical.com #Web: www.mmbstatistical.com #Rev. 4/20/05 (PGM) ################################################################################## # R Conventions ################################################################################## #Command prompt: R commands are submitted by typing them at the command prompt ">". #Two or more commands separated by semicolons may appear on the same line. If an R #command is too long for one line, hit and R will allow you to continue the #command on the next line at the "+" prompt. For example: # > help( # + lm) #Find help for the lm function. #Several commands that need to be considered together may be collected within braces #{} which may be separated over several lines. #Objects: Data in R and the output of R analyses are all "objects" that have properties #defined within R. For example, the R command: # > Y.lm = lm(Y~X) #fits a linear model for Y as a function of X. The output of the lm function is assigned #to the new object Y.lm, but the lm function by itself doesn't create any visible output. #There are other functions which extract information/output from Y.lm: summary(), anova(), #coefficients(), residuals(), plot(), etc. The results from each of these functions are #themselves objects. #Object names: R commands and object names are case sensitive. For example, the command #"anova" is different from "Anova". Object names must start with a letter and can contain #numbers and limited special characters. Periods are commonly used as delimitors in object #names, e.g. y.lm.residuals. #Object class: Objects have different classes that have different properties. When objects #are passed to a function, they must be compatible with the expected class required by the #function. Many errors in R commands are caused by mismatched object classes. #Assignment operations: Objects are assigned values using the R assignment operators "=" #or "<-". For example, y.lm = lm(y~x) assigns the output from the lm function to the object #y.lm. The class of y.lm will be determined by the class of lm's output. #Boolean operations: Boolean operations are used to control branching in if statements. #They must appear inside of parentheses. The results from Boolean operations are either #TRUE or FALSE. The Boolean "equals" operator is two equals signs: "==". For example, the #Boolean operation (x1==5) is TRUE if x1 equals 5 and FALSE if it's not. The Boolean "not #equals" operator is "!=", as in (x1!=5). The Boolean "or" operator is "||", as in (A || B). #Missing values: Missing values in data sets are indicated with "NA". For example: # > x1 = c(1,3,2,3,1,1,NA,2,2) #Comments: Anything typed after a pound (#) symbol on the command line is interpreted as #a comment by R. ################################################################################## # R Utility Functions ################################################################################## #The following utility functions are basic to operations in R. Knowledge of their usage #is assumed throughout the material that follows. This list does not include any #analysis functions. #The functions are given in alphabetical order. Use help() and help.search() to find #more details about a function or topic. #Function #Description ------------------------------------------------------------------------------------------- #apropos("for") #Returns all objects that contain the indicated string. #attach(Y.data) #Puts the data in object Y.data on R's search list. #x1=c(1,1,1,2,2,2,3,NA,3) #c() is the concatenate function - assigns x1 the #values 1,...,3. NA is a missing value. #Y.des.mat=cbind(des.mat,Y) #Appends columns of two objects into a new object #class(y.lm) #Returns the class of the object y.lm. #data.entry(x=c(NA)) #Opens a spreadsheet environment for data entry for #object x. #y.data.frame=data.frame(y,x1,x2,x3) #Creates a data frame y.data.frame of y, ... #detach(y.data.frame) #Remove y.data.frame from the search path. #ID=factor(ID) #Changes ID into a factor, e.g. for a predictor in #ANOVA. #for (i in 1:5) {} #Loop from i=1 to 5, do the operation(s) in {} each #time through the loop. #x1=gl(3,2,24) #Generate a list of integers 1 to 3, repeated twice #each, for a total of 24 values. #help(lm) #Find help files for the lm function. In some cases, #the argument must appear in quotes, e.g. help("if"). #help.search("residuals") #Search the help files for the word "residuals". #if (x==1) {y=5} else {y=0} #An if/else statement. This statement can be split on #several lines, but "} else {" must appear in the same #line. #length(x) #Returns the length of the vector x, i.e. its number #of observations. #letters[1:5] #Create a list of lower case letters "a", ..., "e". #LETTERS[1:5] #Upper case letters "A", ..., "E". #library(multcomp) #Load the package multcomp. #ls() #List the current objects. #load("c:/R/my.Rdata") #Loads the data in the indicated file. See save(). #names(Y.data.frame) #Prints the names of the variables in the data frame. #Yby3 = Y[order(Y[,3]),] #Orders the data frame Y by its third column. #par(mfrow=c(2,2)) #par sets graphics parameters, mfrow makes figures #arranged in rows (2) and columns (2). #print(y) #Print the object y, equivalent to just typing "y" at #the command prompt. #quit() #Quit the R program, equivalent to File> Exit. #Y.DOE=rbind(replicate1,replicate2) #Appends rows from one object onto another. #read.table("c:/R/junk.dat",header=TRUE,sep="") #Reads white-space separated data from #file junk.dat using a one-line header. #x.rep=rep(x,4) #Repeat the contents of x four times, store result in x.rep. #rm(x,y) #Remove (delete) objects x and y from the R session. #save(x, y, file = "my.Rdata") #Save objects x and y in file my.Rdata. See load(). #save.image() #Save objects, packages, etc., i.e. the whole R environment. #x=seq(10,30,2) #Generate a sequence of numbers from 10 to 30 in steps of size 2 #source("c:/R/mycode.R") #Runs the R code in the indicated file. ################################################################################# # CHAPTER 1: Graphical Presentation of Data ################################################################################# ### Example 1.1 (p. 2) Bargraph of defects data. defect.freq=c(450,150,50,50,300) defect.names=c("Scratches","Pits","Burrs","Inclusions","Other") barplot(defect.freq,names.arg=defect.names,xlab="Defect Category",ylab="Number of Defects") ### Example 1.2 (p. 3) Histogram of example data with forced categories. hist.data=c(52,88,56,79,72,91,85,88,68,63,76,73,86,95,12,69) hist(hist.data,breaks=c(10,20,30,40,50,60,70,80,90,100)) ### Extra: Density plot. plot(density(hist.data)) ### Example 1.3 (p. 4) Dotplot (or stripchart in R). stripchart(hist.data);title("Stripchart or dotplot of histogram data.") #Add the title to the dotplot ### Alternative dotplot using lattice package: library(lattice) dotplot(hist.data) ### Example 1.4 (p. 4) Stem and leaf plot. stem(hist.data) ### Example 1.5 (p. 6) Boxplot. boxplot(hist.data,horizontal=TRUE) ### Example 1.6 (p. 7) Scatter plot. Quiz=c(12,14,13,15,15,16,16) Exam=c(55,60,70,75,90,90,100) plot(Quiz,Exam) ### Example 1.7 (p. 8) Multi-vari chart. Quiz.Student=c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5) #Alternatively: Quiz.Student=gl(5,1,30) Quiz.Class=c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3) #Quiz.Class=gl(3,10,30) Quiz.Which=c(1,1,1,1,1,2,2,2,2,2,1,1,1,1,1,2,2,2,2,2,1,1,1,1,1,2,2,2,2,2) #Quiz.Which=gl(2,5,30) Quiz.Score=c(87,82,78,85,73,86,92,82,85,97,81,85,85,80,96,97,93,92,88,88,84, 96,86,92,83,100,91,102,99,101) Quiz=data.frame(Quiz.Student,Quiz.Class,Quiz.Which,Quiz.Score) #Makes the data frame plot(Quiz.Score~Quiz.Class,pch=Quiz.Which) ### After the plot is created, add the legend to it with the following command: legend(2.5,80,legend=c(1,2),pch=c(1,2)) #Adds legend at position(2.5,80) ### Alternative multi-vari chart using lattice package: library(lattice) xyplot(Quiz.Score~Quiz.Class|Quiz.Which) #Plots Score vs. Class in two panels defined by Which ### Example 1.9 (p. 16) Script that creates and plots random normal data. random.normal=rnorm(40,300,20) #(sample size, mean, standard deviation) par(mfrow=c(1,3)) #Three graphs on one page, 1 row by 3 columns hist(random.normal) stripchart(random.normal,vertical=TRUE) boxplot(random.normal) par(mfrow=c(1,1) #Reset the graphics display to 1 row and 1 column ################################################################################# # CHAPTER 2: Descriptive Statistics ################################################################################# ### Example 2.15 (p. 34) Calculating statistics from sample data. Example.Data=c(16,14,12,18,9,15) length(Example.Data) #Sample size mean(Example.Data) sd(Example.Data) range(Example.Data) #Reports min and max values diff(range(Example.Data)) #Sample range summary(Example.Data) #Common summary statistics ### Example 2.16 (p. 35) Calculating and plotting the normal probability density function. x=seq(320,480,1) #The array of x values pdf=dnorm(x,400,20) #The corresponding pdf y.max=1.1*max(pdf) #Upper limit for y axis plot(x,pdf,type="l") #Create the plot, type is "l"ine ### Now add the requested reference lines xref=c(370,370) yref=c(0,y.max) lines(xref,yref) #Reference line at x=370 xref=c(400,400) lines(xref,yref) #Reference line at x=400 xref=c(410,410) lines(xref,yref) #Reference line at x=410 xref=range(x) yref=c(0,0) #Reference line at y=0 lines(xref,yref) ################################################################################## # CHAPTER 3: Inferential Statistics ################################################################################## #Many of the problems in this chapter make use of summarized data. For example, the #sample mean, standard deviation, and sample size are given instead of the raw data #for problems in calculating confidence intervals and performing hypothesis tests. #Normally one would use R to perform all of these operations. To fill these data gaps #and demonstrate the use of R, the affected examples shown here use data sets from #other examples in the book. ### Example 3.2 (p. 42) Confidence interval for the population mean (sigma known). ### Example: For the data from Example 3.24, find the 95% confidence interval for the population #mean assuming that the population standard deviation is known to be sigma = 5. one.sample.z.ci=function(x,sigma,conf=0.95) #Function to find the one-sample two-sided z CI { zhalfalpha=-qnorm((1-conf)/2);SE=sigma/sqrt(length(x)) mean(x)+c(-zhalfalpha*SE,zhalfalpha*SE) #Here's the CI calculation } Y=c(22,25,32,18,23,15,30,27,19,23) #Data from Example 3.24 one.sample.z.ci(Y,5) #Call the function with sigma=5, 95% default confidence ### Example 3.6 (p. 47) Hypothesis test for one sample location (sigma known). ### Example: Test the data from Example 3.24 to see if the population mean is different from #mu = 20 assuming that the population standard deviation is known to be sigma = 5. one.sample.z.test=function(x,sigma,mu0) #Function for the one-sample two-sided z test { z=(mean(x)-mu0)/(sigma/sqrt(length(x))) 2*pnorm(-abs(z)) } Y=c(22,25,32,18,23,15,30,27,19,23) #Data from Example 3.24 one.sample.z.test(Y,5,20) #Function reports the two-sided p value ### Example 3.10 (p. 54) Hypothesis test for one sample location (sigma unknown). ### Example: Test the data from Example 3.24 to see if the population mean is different from mu = 20. Y=c(22,25,32,18,23,15,30,27,19,23) #Data from Example 3.24 t.test(Y,mu=20) #Reports the p value and CI ### Example 3.11 (p. 55) Confidence interval for the population mean (sigma unknown). ### Example: For the data from Example 3.24, determine the 95% confidence interval for the population mean. Y=c(22,25,32,18,23,15,30,27,19,23) #Data from Example 3.24 t.test(Y) #Reports the p value for mu0=0 and the CI ### Example 3.12 (p. 57) Hypothesis test for two samples location (sigmas unknown but equal). ### Example: Test the data from Example 3.20 for a difference between the population means #assuming that the population variances are equal. Mfg=gl(2,10,20) Gain=c(44,41,48,33,39,51,42,36,48,47,51,54,46,53,56,43,47,50,56,53) t.test(Gain~Mfg,var.equal=TRUE) #Equal variance assumption should be tested t.test(Gain~Mfg) #Welch's method is preferred ### Example 3.13 (p. 60) Paired sample t test. x1=c(44,62,59,29,78,79,92,38) x2=c(46,58,56,26,72,80,90,35) x=c(x1,x2) ID=gl(2,8,16) t.test(x~ID,paired=TRUE) ### Note: the p-value reported in the book is incorrect. The correct p-value is ### given by: P(-2.443 < t < 2.443; df=7) = 0.04456 ### Example 3.14 (p. 64) Chi-square test for one population variance. ### Example: Test the data from Example 3.24 to determine if the population variance ### is larger than sigma = 3. chisq.p=function(x,sigma0,alt) { df=length(x)-1 chisq.statistic=df*var(x)/sigma0^2 if (alt==1) { #Right tail test for Ho: sigma > sigma0 pchisq(chisq.statistic,df,lower.tail=FALSE) } else { #Left tail test for Ha: sigma < sigma0 pchisq(chisq.statistic,df,lower.tail=TRUE)} } Y=c(22,25,32,18,23,15,30,27,19,23) chisq.p(Y,3,1) ### Example 3.20 (p. 70) Tukey's quick test. Mfg=c("A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B","B","B") Gain=c(44,41,48,33,39,51,42,36,48,47,51,54,46,53,56,43,47,50,56,53) Mfg.Gain=data.frame(Mfg,Gain) plot(Mfg.Gain) lines(c(1,2),rep(max(subset(Mfg.Gain$Gain,Mfg=="A")),2)) lines(c(1,2),rep(min(subset(Mfg.Gain$Gain,Mfg=="B")),2)) ### Example 3.24 (p. 77) Normal probability plot. Y=c(22,25,32,18,23,15,30,27,19,23) qqnorm(Y);qqline(Y) #Creates the normal plot and a line through Q1 and Q3 shapiro.test(Y) #Shapiro-Wilk quantitative test for normality ################################################################################## # CHAPTER 4: DOE Language and Concepts ################################################################################## ### Example 4.9 (p. 111) Golf ball flight distance as a function of temperature. Distance=c(31.5,32.7,33.98,32.1,32.78,34.65,32.18,33.53,34.98,32.63,33.98,35.3,32.7,34.64,36.53,32.0,34.5,38.2) Temperature=rep(c(66,-12,23),6) plot(Distance~Temperature) ### Example 4.12 (p. 4.12) Analysis of saw blade cuts versus lubricant. Blade=c(5,1,4,9,3,2,3,10,2,6,7,9,8,6,5,8,1,7,4,10) Lube=c(2,2,1,1,2,1,1,2,2,2,1,2,1,1,1,2,1,2,2,1) Cuts=c(162,145,117,135,145,124,131,147,146,134,130,142,134,138,123,161,139,139,149,139) Cuts.data = data.frame(Blade,Lube,Cuts) library(lattice) xyplot(Cuts~Lube,groups=Blade,type = "b",main="Cuts versus Lube by Blade",data=Cuts.data) Cuts.data.1 = subset(Cuts.data,Lube==1) #Subset of the first lube (LAU-003) Cuts.data.2 = subset(Cuts.data,Lube==2) #Subset of the second lube (LAU-016) Cuts.data.1=Cuts.data.1[order(Cuts.data.1[,1]),] #Ordered by blade Cuts.data.2=Cuts.data.2[order(Cuts.data.2[,1]),] t.test(Cuts.data.1$Cuts,Cuts.data.2$Cuts,paired=TRUE) #Paired sample t test ################################################################################## # CHAPTER 5: Experiments for One-Way Classifications ################################################################################## ### Example 5.9 (p. 169) ANOVA for a one-way classification with nine treatments. Y=c(80.9,78.3,77.8,76.6,82.2,74.5,80.5,77,82.5,78.2,81.8,83.5,84.2,75.7,81.4, 78,81.9,83.2,76.2,78.7,79.5,75.3,82.2,78.7,74.2,84.1,83.7,80.6,84.3,80.5, 77.2,82.6,79.1,83.7,81.9,77.9,78.3,83.1,78.9,83.4,81,77.8,77.4,78.4,78.1, 74.5,79,79.7,83.1,76.4,80.2,80.9,81.2,84,83.7,80.3,80.8,83.6,83.4,78.6, 82.8,73.6,82.7,86.6,83.6,85.6,83.9,86,77,80,84.2,76.7,83.1,77.9,77.9,79.9, 83.7,80.5,81.4,74.8,86.1) X=gl(9,9,81) boxplot(Y~X) #Boxplots Y.aov=aov(Y~X) #Perform the ANOVA summary(Y.aov) #Report the ANOVA table aggregate(Y,list(X),FUN=mean) #Report the Y means by X TukeyHSD(Y.aov) #Report the Tukey HSD CIs plot(TukeyHSD(Y.aov)) #Plot the CIs dotplot(residuals(Y.aov)~X) #Dotplot of residuals qqnorm(residuals(Y.aov)); qqline(residuals(Y.aov)) #Normalplot of the residuals ### Example 5.11 (p. 180) ANOVA of log-transformed data. Y=c(31,36,11,24,37,16,18,20,18,20,13,23,6,9,11,9,6,8,11,5,12,4,9,6,10, 15,21,9,29,32,28,27,16,16,20,32,35,19,17,24,18,20,33,24,40,24,10,14,45,36, 49,32,47,89,47,27,58,73,66,77) X=gl(5,12,60) boxplot(Y~X) #Check the boxplots - trouble! Y.aov=aov(Y~X) #Do the ANOVA anyway qqnorm(residuals(Y.aov)) #Check the ANOVA residuals - trouble! qqline(residuals(Y.aov)) #Add the line for reference boxplot(log10(Y)~X,ylab="log(Y)") #Boxplot of log transformed Y values - looks better! logY.aov=aov(log10(Y)~X) #Do the ANOVA qqnorm(residuals(logY.aov)) #Normal plot of residuals - looks better! qqline(residuals(logY.aov)) #Add the reference line summary(logY.aov) #Reports the ANOVA of the log-transformed data ### Example 5.12 (p. 183) Plots of original and transformed Poisson random samples with different means. Random.Poisson = c(rpois(20,3),rpois(20,9),rpois(20,27),rpois(20,81),rpois(20,243)) #Random Poisson samples Treatment=gl(5,20,100) #Treatment codes 1:5 plot(Random.Poisson~Treatment) #Plot of raw counts - trouble! ftc.Random.Poisson=(sqrt(Random.Poisson)+sqrt(Random.Poisson+1))/2 #Transform the counts plot(ftc.Random.Poisson~Treatment) #Plot of the transformed counts ftc.resid=residuals(aov(ftc.Random.Poisson~Treatment)) #ANOVA residuals after transform qqnorm(ftc.resid);qqline(ftc.resid) #Normal plot of residuals – looks great! ### Example 5.14 (p. 188) Sample-size and power calculations for one-way ANOVA. #This function (power.anova.test) specifies the sample size problem in terms of the between-treatment #and within-treatment variances, which have to be calculated first. Biases=c(-5,5,0,0,0) #Treatment biases relative to grand mean BTV=var(Biases) #Between treatment variance #BTV=2*5^2/(5-1) #Alternative BTV calculation WTV=4.2^2 #Within treatment variance power.anova.test(groups=5,between.var=BTV,within.var=WTV,power=0.90) #Find the sample size (Answer is n=7) power.anova.test(groups=5,n=7,between.var=BTV,within.var=WTV) #Check the exact power for n=7 ################################################################################## # CHAPTER 6: Experiments for Multi-Way Classifications ################################################################################## ########################## WARNING!!! ############################################ ### The analyses shown here using the aov() function are only valid for balanced ### experiment designs. If you have an unbalanced design or a balanced design with ### missing values, you MUST use the Anova() function in the car package with ### Type II sums of squares. (What R and SAS call Type II sums of squares are called ### Type III sums of squares in MINITAB and some other packages.) ################################################################################## ### Example 6.2 (p. 195) Review of one-way ANOVA Y=c(14,17,13,12,20,21,16,15,25,29,24,22) Tr=gl(3,4,12) Y.aov=aov(Y~Tr) summary(Y.aov) par(mfrow=c(2,2)) plot(Y.aov) #Residuals diagnostic plots ### Example 6.12 (p. 216) Two-way ANOVA. Y=c(18,16,11,42,40,35,34,30,29,46,42,41) A=gl(4,3,12) B=gl(3,1,12) Y.aov=aov(Y~A+B) summary(Y.aov) par(mfrow=c(2,2)) plot(Y.aov) ### Example 6.12 (p. 216) Two-way ANOVA with interaction. Y=c(29,33,24,22,46,48,48,44,36,32,26,22) A=gl(3,4,12) B=gl(2,2,12) Y.aov=aov(Y~A*B) summary(Y.aov) ### The interaction term is not significant, so drop it from the model. Y.aov=aov(Y~A+B) summary(Y.aov) par(mfrow=c(2,2)) plot(Y.aov) ### Example 6.13 (p. 217) Analysis of a rotator cuff repair anchor. Anchor=c(1,3,2,3,2,1,2,2,3,1,3,1,2,3,2,3,1,1,1,2,2,3,3,1,2,2,1,3,1,3,2,1,3,1,2,3,3,3,2,1,1,2,1,2,2,3,1,3) Foam=c(1,1,2,2,1,2,1,2,2,1,1,2,2,2,1,1,2,1,2,1,2,2,1,1,1,2,2,1,1,2,1,2,1,1,2,2,2,1,1,2,1,2,2,1,2,1,1,2) Force=c(191,194,75,146,171,79,188,76,136,195,207,86,71,145,184,195,81,198,98,178,77,138,202,193,169,63,90, 194,191,132,172,86,203,196,64,130,132,209,180,85,182,67,88,191,70,197,205,143) par(mfrow=c(1,2)) plot(Force~Foam,pch=Anchor) plot(Force~Anchor,pch=Foam) Anchor=factor(Anchor) Foam=factor(Foam) Force.aov=aov(Force~Anchor*Foam) summary(Force.aov) par(mfrow=c(2,2)) plot(Force.aov) #Residuals diagnostic plots TukeyHSD(Force.aov) par(mfrow=c(2,2)) plot(TukeyHSD(Force.aov)) ### Example 6.14 (p. 224) Commands to create the 3x8x5 factorial design matrix with two replicates. ### Variables are going to be named A, B, C, and Rep: A=gl(3,80,240) B=gl(8,10,240) C=gl(5,2,24) Rep=gl(2,1,120) expt.design = data.frame(A,B,C) expt.design ### Example 6.15 (p. 226) Analysis of a two-way design with blocking on replicates. Y=c(57,75,46,78,69,68,97,83,81,64,83,60) A=c(1,2,1,2,2,1,2,2,1,1,2,1) B=c(1,1,3,3,2,2,1,3,1,3,2,2) Block=gl(2,6,12) A=factor(A) B=factor(B) Block=factor(Block) Y.aov=aov(Y~Block+A*B) summary(Y.aov) par(mfrow=c(2,2)) plot(Y.aov) ################################################################################## # CHAPTER 7: Advanced ANOVA Topics ################################################################################## ### Example 7.1 (p. 233) Analysis of a three-variable Latin Square design. A=c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,3,3,3) #equivalent to A=gl(3,3,18) B=c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3) #equivalent to B=gl(3,1,18) C=c(1,2,3,2,3,1,3,1,2,1,2,3,2,3,1,3,1,2) Y=c(63,73,78,66,63,92,59,49,99,52,67,82,60,62,73,46,73,79) A=factor(A) B=factor(B) C=factor(C) Y.lm = lm(Y~A+B+C) anova(Y.lm) #Report the ANOVA table ... summary(Y.lm) #And summary statistics including coefficients. TukeyHSD(aov(Y~A+B+C),which="B") #Reports Tukey HSD CIs for differences between the means of B. plot(TukeyHSD(aov(Y~A+B+C),which="B")) #Plots the CIS for B ### Example 7.5 (p. 242) GR&R study analysis using random effects model. ### Warning: The R code required to perform this variance components analysis is cryptic! ### For help on the methods from Chapter 7, see Pinheiro and Bates, Mixed-Effects Models in S and S-Plus, Springer, 2000. Y=c(65,68,60,63,44,45,75,76,63,66,59,60,81,83,42,42,62,63,56,56,38,43,68,71,57,57,55,53,79,77,32, 37,64,65,60,61,46,46,73,71,63,62,57,60,78,82,44,42,71,69,65,66,50,47,78,78,65,68,65,62,90,85,39,41) Part=gl(8,2,64) #Integers 1 to 8, two times in succession, 64 values total Op=gl(4,16,64) Y.aov=aov(Y~1+Error(Part*Op)) #Part and Op are crossed random factors summary(Y.aov) ### Now use lme() to extract the variance components: OpPart = 10*as.numeric(Op)+as.numeric(Part) #Create a code for the Op*Part interaction Block=rep(1,64) #All of the observations come from a single block Part=factor(Part) #Change variables from quantitative to qualitative Op=factor(Op) OpPart = factor(OpPart) grr.dataframe=data.frame(Y,Part,Op,OpPart,Block) library(nlme) #non-linear mixed effects package grr.groupedData = groupedData(Y~1|Block,data=grr.dataframe) #This object wraps the dataframe and its equation grr.lme=lme(Y~1,data=grr.groupedData,random=pdBlocked(list(pdIdent(~Part-1),pdIdent(~Op-1),pdIdent(~OpPart-1)))) VarCorr(grr.lme) #Calculate the variance components intervals(grr.lme) #Calculates confidence intervals for the variance components plot(grr.lme) #The default plot is residuals vs. predicted values. plot.design(grr.groupedData) #Main effects plot for the groupedData object plot(grr.lme,form=resid(.)~fitted(.)|Op,abline=0) #Plots residuals vs. fitted values by Op with 0 reference line. plot(grr.lme,form=resid(.)~fitted(.)|Part,abline=0) #Plots residuals vs. fitted values by Part ... interaction.plot(Op,Part,Y) #Interaction plot qqnorm(resid(grr.lme)); qqline(resid(grr.lme)) #Residuals normal plot ### Example 7.7 (p. 249) Analysis of a nested design. Batch=c(1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3) ### or use Batch=gl(3,8,48) Tote=c(1,1,2,2,3,3,4,4,1,1,2,2,3,3,4,4,1,1,2,2,3,3,4,4,1,1,2,2,3,3,4,4,1,1,2,2,3,3,4,4,1,1,2,2,3,3,4,4) #or use Tote=gl(4,2,48) Cup=c(1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2) ### or use Cup=gl(2,1,48) Y=c(12.8,12.8,13,12.9,13.5,13.5,12.9,13.2,11.2,11.2,12.3,12.3,10.7,10.4,11.8,12.1,11.5,11.5,11.3,11.3, 11.6,11.4,11.2,11.1,12.5,12.2,13,12.8,13.5,13.4,12.5,12.7,11.3,11,12.4,12,10.5,10.9,11.5,11.8,11.7,11.3, 11.5,11.4,11.7,11.2,11,11.2) Batch=factor(Batch) Tote = factor(Tote) Cup=factor(Cup) Y.aov=aov(Y~1+Error(Batch/Tote/Cup)) #Fully nested random factors summary(Y.aov) ### Now use lme() to extract the variance components: Block=rep(1,48) #All of the observations are in one block for lme() Homogeneity.dataframe=data.frame(Batch,Tote,Cup,Y,Block) library(nlme) Homogeneity.groupedData=groupedData(Y~1|Block,Homogeneity.dataframe) Homogeneity.lme=lme(Y~1,data=Homogeneity.groupedData,random=~1|Batch/Tote/Cup) VarCorr(Homogeneity.lme) ### Example 7.8 (p. 254) Power calculation for a 3x5 fixed-effects factorial design. Falpha = qf(0.95,2,30) Power = pf(Falpha,2,30,ncp=20.8) Power #Display the result ### Alternative solution combines two steps. Power = 1-pf(qf(0.95,2,30),2,30,ncp=20.8) Power ### Example 7.9 (p. 255) Another power calculation for the 3x5 factorial design. Power = 1-pf(qf(0.95,4,30),4,30,ncp=12.5) Power ### Example 7.10 (p. 256) Power calculation for a random effect. Falpha = qf(0.95,7,32) E.FA = 1+5*(30/50)^2 Power = pf(E.FA/Falpha,32,7) Power ################################################################################# # CHAPTER 8: Linear Regression ################################################################################# ### Example 8.14 (p. 299) Linear regression analysis. x=c(1,2,6,8,8) y=c(3,7,14,18,23) y.x=lm(y~x) #Fits y as a linear function of x plot(x,y);abline(y.x) #Creates the scatter plot with fitted line summary(y.x) #Complete summary of the model anova(y.x) #ANOVA table ### The following functions provide access to other output from the model: fitted.values(y.x) #Vector of fitted values (y-hat) residuals(y.x) #Vector of residuals coefficients(y.x) #Vector of regression coefficients ### Resdiduals diagnostic plots: par(mfrow=c(2,2)) plot.lm(y.x) #Creates four diagnostic plots par(mfrow=c(1,1)) hist(residuals(y.x)) #Creates histogram of the residuals plot(fitted.values(y.x),residuals(y.x)) #Plot of residuals (y-axis) vs. fitted values (x axis) qqnorm(residuals(y.x)); qqline(residuals(y.x)) #Residuals normal plot plot.ts(residuals(y.x)) #Residuals run chart (i.e. time series) lag.plot(residuals(y.x),lags=1,do.lines=F,labels=F) #Lag-1 residuals plot ### Examples 8.7 (p. 289) and 8.10 (p. 292) Adding confidence and prediction intervals to the graph. newx=data.frame(x=seq(min(x),max(x),diff(range(x))/100)) #Vector of new x values for plotting newy.PL=predict(y.x,newdata=newx,interval="predict") #Find prediction limits for the new x values newy.CL=predict(y.x,newdata=newx,interval="confidence") #Find confidence limits for the new x values matplot(newx$x,cbind(newy.PL,newy.CL[,-1]),lty=c(1,2,2,3,3),type="l", xlab="x",ylab="y") #Matrix plot title("Polynomial fit with 95% confidence and prediction intervals.") ### Example 8.21 (p. 307) Fitting a third-order polynomial. x=c(8.9,8.7,0.1,5.4,4.3,2.4,3.4,6.8,2.9,5.6,8.4,0.7,3.8,9.5,0.7) y=c(126,143,58,50,40,38,41,66,47,65,138,49,56,163,45) y.x=lm(y~x+I(x^2)+I(x^3)) #The I() notation is required to identify the powers of x plot(x,y) #Scatter plot summary(y.x) #Complete summary of the model anova(y.x) #ANOVA table of the model ### Example 8.21, Figure 8.14 (p. 308) Create the scatter plot with the superimposed fitted function. ### First (brute force) method: x.forplot=seq(min(x),max(x),diff(range(x))/100) #Vector of x for plotting b=coefficients(y.x) #Cubic equation coefficients y.forplot=b[1]+b[2]*x.forplot+b[3]*x.forplot^2+b[4]*x.forplot^3 #Vector of fitted y for plotting plot(x,y,pch=1) #Make the scatter plot lines(x.forplot,y.forplot,type="l") #Add the fitted curve title("Third-order polynomial fit to example data.") #Add the title ### Example 8.21, Figure 8.14 (p. 308) Create the scatter plot with the superimposed fitted function. ### Second method using predict(): newx=data.frame(x=seq(min(x),max(x),diff(range(x))/100)) #Vector of new x values for plotting newy=predict(y.x,newdata=newx) #Find y-hat for the new x values plot(x,y);lines(newx$x,newy);title("Third-order polynomial fit to example data.") ### Example 8.21, Extra: Polynomial fit with 95% prediction interval. newy=predict(y.x,newdata=newx,interval="predict") #Find the fit and prediction limits matplot(newx$x,newy,lty=c(1,2,2),type="l", xlab = "x", ylab="y") title("Polynomial fit with 95% prediction interval.") ### Example 8.27, Figure 8.26 (p. 323) Matrix plot of response and uncoded predictors. x1=c(10,10,10,10,100,100,100,100) x2=c(40,40,50,50,40,40,50,50) y=c(286,1,114,91,803,749,591,598) x12=x1*x2 y.x1x2=data.frame(y,x1,x2,x12) library(lattice) pairs(y.x1x2) ### Example 8.27, Figure 8.27 (p. 324) Multiple regression of y=f(x1,x2,x12) using uncoded variables. MR.Uncoded=lm(y~x1+x2+x12) summary(MR.Uncoded) anova(MR.Uncoded) ### Example 8.27, Figure 8.28 (p. 325) Matrix plot of response and coded predictors. cx1=(x1-55)/45 #Code the levels of x1 and x2 cx2=(x2-45)/5 cx12=cx1*cx2 y.cx1cx2=data.frame(y,cx1,cx2,cx12) pairs(y.cx1cx2) #Matrix plot of y, cx1, cx2, and cx12 ### Example 8.27, Figure 8.29 (p. 326) Multiple regression of y=f(cx1,cx2,cx12) using coded variables. MR.Coded=lm(y~cx1+cx2+cx12) summary(MR.Coded) anova(MR.Coded) ### Example 8.29, Figure 8.30 (p. 328) One-way ANOVA of Life=f(Dopant). Life=c(316,330,311,286,258,309,291,363,341,369,354,364,400,381,330,243,298,322,317,273) Dopant=c("A","A","A","A","A","B","B","B","B","B","C","C","C","C","C","D","D","D","D","D") Dopant=factor(Dopant) plot(Life~Dopant) Life.Dopant=lm(Life~Dopant) summary(Life.Dopant) #Note: R uses the first treatment group as the reference level, so its coefficient is zero by definition. anova(Life.Dopant) par(mfrow=c(2,2)) plot(Life.Dopant) #Residuals diagnostic plots ### Example 8.30 (p. 331) Torque = f(Lube,Unit(Lube),Angle) by general linear model. Unit=gl(6,4,72) Angle=rep(c(180,270,360,450),18,each=1) Lube=rep(c("LAU","MIS","SWW"),each=24) Torque=c(72.1,103.6,129.9,173.9,77.2,122.6,162.9,210.1,61.1,88.9,130.3,157.8,75.8,116.4,153, 198.4,67.3,105.4,154.1,222.5,70.6,107.6,144.9,197.3,70.6,102.1,145.7,193.3,65.2,91.9,123.7, 162.9,58.9,83.5,117.9,156.3,71.4,101.4,158.5,204.2,73.6,111.6,165.1,198.4,63.3,92.6,130.7, 168.7,53.4,80.2,112.4,138,49.4,70.6,100.7,135.4,53.4,80.2,122.2,153.7,50.5,72.8,106.1,129.6, 57.8,85.3,120.4,154.8,51.6,71.7,108.3,147.9) Unit=factor(Unit) Lube=factor(Lube) lnTorque=log(Torque) library(lattice) xyplot(lnTorque~Angle|Lube) ### DO NOT USE THE aov() SOLUTION. IT USES SEQUENTIAL INSTEAD OF ADJUSTED SUMS OF SQUARES!!! ### lnTorque.aov=aov(lnTorque~Lube*Angle+I(Angle^2)+Error(Lube/Unit)) ### summary(lnTorque.aov) lnTorque.lme=lme(fixed=lnTorque~Lube+Angle+Lube:Angle+I(Angle^2),random=~1|Lube/Unit) summary(lnTorque.lme) par(mfrow=c(2,2)) plot(resid(lnTorque.lme)~fitted(lnTorque.lme)) plot(resid(lnTorque.lme)~Lube) plot(resid(lnTorque.lme)~Angle) qqnorm(resid(lnTorque.lme)); qqline(resid(lnTorque.lme)) ####################################################################################################### # CHAPTER 9: Two-Level Factorial Experiments ####################################################################################################### ### Example 9.2 (p. 351) Analysis of a 2^1 experiment. x1=c(-1,-1,1,1) y=c(47,51,21,17) y.fit=lm(y~x1) plot(y~x1);abline(y.fit) summary(y.fit) anova(y.fit) ### Example 9.6 (p. 362) Analysis of a 2^2 experiment with two replicates. x1=c(-1,-1,1,1,-1,-1,1,1) x2=c(-1,-1,-1,-1,1,1,1,1) x12=x1*x2 Y=c(61,63,76,72,41,35,68,64) Y.data=data.frame(Y,x1,x2) par(mfrow=c(1,3)) #Plot the data plot(Y~x1,pch=x2); abline(lm(Y~x1)) plot(Y~x2,pch=x1); abline(lm(Y~x2)) plot(Y~x12); abline(lm(Y~x12)) Y.fit=lm(Y~x1*x2,data=Y.data) #linear model with main effects and interaction summary(Y.fit) #Table of regression coeficients anova(Y.fit) #ANOVA table Y.diagnostics=data.frame(Y,x1,x2,residuals(Y.fit),predict(Y.fit)) #Collect up the data, residuals, and fits Y.diagnostics par(mfrow=c(2,2)) #Prep for 2x2 matrix of diagnostic plots plot(Y.fit) #Default diagnostic plots ### Example 9.7 (p. 367) Refining the model for a 2^3 design. A=c(1,1,-1,1,1,-1,-1,-1) B=c(-1,1,-1,-1,1,-1,1,1) C=c(-1,1,-1,1,-1,1,-1,1) Y=c(91,123,68,131,85,87,64,57) par(mfrow=c(2,3)) #Graphs: two rows, three columns AB=A*B;AC=A*C;BC=B*C #Interactions plot(Y~A);abline(lm(Y~A));plot(Y~B);abline(lm(Y~B));plot(Y~C);abline(lm(Y~C)) #Plot the main effects plot(Y~AB);abline(lm(Y~AB));plot(Y~AC);abline(lm(Y~AC));plot(Y~BC);abline(lm(Y~BC)) #... and the interactions Y.fit.0=lm(Y~A*B*C) #Fits the full model summary(Y.fit.0) anova(Y.fit.0) Y.fit.1=lm(Y~A*B*C-A:B:C) #Drops the ABC interaction Y.fit.2=lm(Y~A*B*C-A:B:C-A:B) Y.fit.3=lm(Y~A*B*C-A:B:C-A:B-B:C) #or equivalently: Y.fit.3=lm(Y~A+B+C+A:C) Y.fit.4=lm(Y~A+C+A:C) Y.fit.5=lm(Y~A+C) Y.fit.6=lm(Y~A) Y.fit.7=lm(Y~1) #Model constant (mean) only anova(Y.fit.0,Y.fit.1,Y.fit.2,Y.fit.3,Y.fit.4,Y.fit.5,Y.fit.6,Y.fit.7) #Compare all eight models ### Example 9.10 (p. 379) Analysis of a 2^5 design. Y=c(226,150,284,190,287,149,53,232,221,-30,76,270,59,-32,142,121,-43,200,123,137,1, -51,187,265,233,217,71,187,207,40,179,266) A=c(1,-1,-1,1,1,-1,1,1,-1,1,1,1,1,1,-1,1,-1,1,1,-1,-1,-1,1,-1,-1,-1,1,-1,1,-1,-1,-1) B=c(1,1,1,1,1,-1,-1,1,1,-1,-1,1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,1,1,1,1,-1,1,1,-1,-1,1) C=c(-1,1,1,1,1,-1,1,-1,-1,1,-1,1,1,1,-1,-1,1,1,-1,-1,1,1,-1,1,-1,-1,-1,1,-1,1,-1,-1) D=c(-1,-1,1,-1,-1,-1,1,1,1,1,1,1,-1,-1,1,-1,1,1,1,1,-1,-1,-1,-1,1,-1,-1,1,1,1,-1,-1) E=c(-1,1,-1,1,-1,1,-1,1,-1,1,1,-1,-1,1,-1,-1,1,1,-1,1,-1,1,1,-1,1,1,1,1,-1,-1,-1,-1) Y.fit=lm(Y~A+B+C+D+E+A:B+A:C+A:D+A:E+B:C+B:D+B:E+C:D+C:E+D:E) summary(Y.fit) anova(Y.fit) par(mfrow=c(2,2)) plot(Y.fit) #Default residuals plots par(mfrow=c(1,1)) coeff=coefficients(Y.fit)[2:16] #The coefficients without the constant qqnorm(coeff,main="Coefficients Normal Plot"); qqline(coeff) #Normal plot the coefficients resid=residuals(Y.fit) plot(resid,type="b");xref=c(1,length(resid));yref=c(0,0);lines(xref,yref) #Residuals run chart old.par = par(no.readonly = TRUE) par(mfrow=c(1,5),bty="n",yaxt="n") #Five plots in one row plot(resid~A);lines(c(-1,1),c(0,0)) #Residuals versus A plot(resid~B,ylab="");lines(c(-1,1),c(0,0)) plot(resid~C,ylab="");lines(c(-1,1),c(0,0)) plot(resid~D,ylab="");lines(c(-1,1),c(0,0)) plot(resid~E,ylab="");lines(c(-1,1),c(0,0)) par(old.par) par(mfrow=c(1,5),xaxt="n",bty="n") #Five plots in one row plot(aggregate(Y,list(A),mean),type="b",xlab="A",ylab="Y",ylim=c(min(Y),max(Y))) #Main effects plots plot(aggregate(Y,list(B),mean),type="b",xlab="B",ylab="",ylim=c(min(Y),max(Y))) plot(aggregate(Y,list(C),mean),type="b",xlab="C",ylab="",ylim=c(min(Y),max(Y))) plot(aggregate(Y,list(D),mean),type="b",xlab="D",ylab="",ylim=c(min(Y),max(Y))) plot(aggregate(Y,list(E),mean),type="b",xlab="E",ylab="",ylim=c(min(Y),max(Y))) par(old.par) #Restore old graphics parameters ### Example 9.12 (p. 393) Power calculation for a 2^k design. power=function(k,r,delta,sigma) #Find the power for a 2^k design with r replicates { N=r*2^k #Total number of runs lambda=N/2/2*(delta/sigma)^2 #Noncentrality parameter dfmodel=k+k*(k-1)/2 #Main effects and two factor interactions only dferror=N-1-dfmodel #Errror degrees of freedom Falpha=qf(0.95,1,dferror) #F(alpha=0.05) assumed pf(Falpha,1,dferror,lambda,lower.tail=FALSE) #The power to detect effect delta } power(4,6,400,800) #Gives the answer to the example ### Example 9.13 (p. 394) Sample size calculation for a 2^k design. r=c(1:15) #Guess that the required r is in this range P=power(4,r,400,800) #Find the powers associated with r plot(P~r);lines(c(1,15),c(0.90,0.90)) #Use plot to find min r that gives power > 0.90 power(4,11,400,800) #Exact power for r=11 replicates ### Example 9.17 (p. 397) Find the number of replicates to quantify a coefficient. replicates=function(k,delta,sigma) #k is the number of variables in the 2^k experiment { dfmodel=k+k*(k-1)/2 #Main effects and two-factor interactions RHS=(1.96*sigma/delta)^2/2^k #For priming the loop, will always give low r r=trunc(RHS) #Conservative integer starting point while (r7) print("Error: k out of range.");return N=2^k #Number of runs: N = 2^k A=rep(c(-1,1),1,each=N/2) #N/2 -1's followed by N/2 1's B=rep(c(-1,1),2,each=N/4) AB=A*B #AB interaction design.matrix=data.frame(A,B,AB) #Combine in a data.frame if (k>2) #Then add third variable (C) { C=rep(c(-1,1),4,each=N/8) AC=A*C; BC=B*C design.matrix=data.frame(A,B,C,AB,AC,BC) } if (k>3) #Then add fourth variable (D) { D=rep(c(-1,1),8,each=N/16) AD=A*D; BD=B*D; CD=C*D design.matrix=data.frame(A,B,C,D,AB,AC,AD,BC,BD,CD) } if (k>4) #Then add fifth variable (E) { E=rep(c(-1,1),16,each=N/32) AE=A*E; BE=B*E; CE=C*E; DE=D*E design.matrix=data.frame(A,B,C,D,E,AB,AC,AD,AE,BC,BD,BE,CD,CE,DE) } if (k>5) #Then add sixth variable (F) { F=rep(c(-1,1),32,each=N/64) AF=A*F;BF=B*F;CF=C*F;DF=D*F;EF=E*F design.matrix=data.frame(A,B,C,D,E,F,AB,AC,AD,AE,AF,BC,BD,BE,BF,CD,CE,CF,DE,DF,EF) } if (k>6) #Then add seventh variable (G) { G=rep(c(-1,1),64,each=N/128) AG=A*G;BG=B*G;CG=C*G;DG=D*G;EG=E*G;FG=F*G design.matrix=data.frame(A,B,C,D,E,F,G,AB,AC,AD,AE,AF,AG,BC,BD,BE,BF,BG,CD,CE,CF,CG,DE,DF,DG,EF,EG,FG) } design.matrix } #End function ############################################################################################################ ############################################################################################################ # CHAPTER 10: Fractional-Factorial Designs ############################################################################################################ ### Example 10.5 (p. 419) Analysis of a 2^(5-1) half-fractional factorial experiment. ### Start with the data from Example 9.10: Y=c(226,150,284,190,287,149,53,232,221,-30,76,270,59,-32,142,121,-43,200,123,137,1, -51,187,265,233,217,71,187,207,40,179,266) A=c(1,-1,-1,1,1,-1,1,1,-1,1,1,1,1,1,-1,1,-1,1,1,-1,-1,-1,1,-1,-1,-1,1,-1,1,-1,-1,-1) B=c(1,1,1,1,1,-1,-1,1,1,-1,-1,1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,1,1,1,1,-1,1,1,-1,-1,1) C=c(-1,1,1,1,1,-1,1,-1,-1,1,-1,1,1,1,-1,-1,1,1,-1,-1,1,1,-1,1,-1,-1,-1,1,-1,1,-1,-1) D=c(-1,-1,1,-1,-1,-1,1,1,1,1,1,1,-1,-1,1,-1,1,1,1,1,-1,-1,-1,-1,1,-1,-1,1,1,1,-1,-1) E=c(-1,1,-1,1,-1,1,-1,1,-1,1,1,-1,-1,1,-1,-1,1,1,-1,1,-1,1,1,-1,1,1,1,1,-1,-1,-1,-1) Y.full.factorial=data.frame(Y,A,B,C,D,E) #Catch all of the data rm(Y,A,B,C,D,E) #Clean up Y.half.fraction=subset(Y.full.factorial,(A*B*C*D==E)) #Create the subset attach(Y.half.fraction) AB=A*B;AC=A*C;AD=A*D;AE=A*E;BC=B*C;BD=B*D;BE=B*E;CD=C*D;CE=C*E;DE=D*E #Make the interactions Terms=data.frame(A,B,C,D,E,AB,AC,AD,AE,BC,BD,BE,CD,CE,DE) cor(Terms) Y.fit=lm(Y~A+B+C+D+E+AB+AC+AD+AE+BC+BD+BE+CD+CE+DE) summary(Y.fit) anova(Y.fit) coeff=coefficients(Y.fit)[2:16] #The coefficients without the constant qqnorm(coeff);qqline(coeff) #Normal plot the coefficients Y.fit=lm(Y~A+B+C+D+E+AC+BC+CD+CE) summary(Y.fit) anova(Y.fit) par(mfrow=c(2,2)) plot(Y.fit) ### Example 10.8 (p. 424) Analysis of NIST sonoluminescence screening experiment in seven variables. x1=c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1) x2=c(-1,-1,1,1,-1,-1,1,1,-1,-1,1,1,-1,-1,1,1) x3=c(-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1,1,1,1,1) x4=c(-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1) x5=c(-1,-1,1,1,1,1,-1,-1,1,1,-1,-1,-1,-1,1,1) x6=c(-1,1,-1,1,1,-1,1,-1,1,-1,1,-1,-1,1,-1,1) x7=c(-1,1,1,-1,1,-1,-1,1,-1,1,1,-1,1,-1,-1,1) Y=c(80.6,66.1,59.1,68.9,75.1,373.8,66.8,79.6,114.3,84.1,68.4,88.1,78.1,327.2,77.6,61.9) x12=x1*x2;x13=x1*x3;x14=x1*x4;x15=x1*x5;x16=x1*x6;x17=x1*x7 #Create the interactions x23=x2*x3;x24=x2*x4;x25=x2*x5;x26=x2*x6;x27=x2*x7 x34=x3*x4;x35=x3*x5;x36=x3*x6;x37=x3*x7 x45=x4*x5;x46=x4*x6;x47=x4*x7 x56=x5*x6;x57=x5*x7 x67=x6*x7 X=data.frame(x1,x2,x3,x4,x5,x6,x7,x12,x13,x14,x15,x16,x17,x23,x24,x25,x26,x27,x34,x35,x36,x37,x45,x46,x47,x56,x57,x67) cor(X) #Correlation matrix Y.fit=lm(Y~x1+x2+x3+x4+x5+x6+x7+x12+x13+x14+x15+x16+x17+x24) #All other terms are confounded summary(Y.fit) anova(Y.fit) Y.fit=lm(Y~x1+x2+x3+x7+x12+x13+x17) #These are, or are almost, significant summary(Y.fit) anova(Y.fit) plot(Y.fit) ### Example 10.10 (p. 430) Creation of a resolution IV design by folding. A=c(-1,-1,-1,-1,1,1,1,1) #Base design: 2^3 in A, B, C B=c(-1,-1,1,1,-1,-1,1,1) C=c(-1,1,-1,1,-1,1,-1,1) D=A*B;E=A*C;F=B*C;G=A*B*C #Apply the generators A=c(A,-A);B=c(B,-B);C=c(C,-C);D=c(D,-D);E=c(E,-E);F=c(F,-F);G=c(G,-G) #Create the fold-over design AB=A*B;AC=A*C;AD=A*D;AE=A*E;AF=A*F;AG=A*G #Create the interactions BC=B*C;BD=B*D;BE=B*E;BF=B*F;BG=B*G CD=C*D;CE=C*E;CF=C*F;CG=C*G DE=D*E;DF=D*F;DG=D*G EF=E*F;EG=E*G FG=F*G Terms=data.frame(A,B,C,D,E,F,G,AB,AC,AD,AE,AF,AG,BC,BD,BE,BF,BG,CD,CE,CF,CG,DE,DF,DG,EF,EG,FG) cor(Terms) ### Inspection of the correlation matrix shows that the fold-over design is resolution IV. ### Example 10.11 (p. 433) Power calculation for a fractional factorial design with blocking. ### Start from the power function created in Example 9.12 and make appropriate modifications: power=function(k,p,r,dfmodel,delta,sigma) #Find the power for a 2^(k-p) design with r replicates in blocks { N=r*2^(k-p) #Total number of runs lambda=N/2/2*(delta/sigma)^2 #Noncentrality parameter dferror=N-1-dfmodel #Errror degrees of freedom Falpha=qf(0.95,1,dferror) #F(alpha=0.05) assumed pf(Falpha,1,dferror,lambda,lower.tail=FALSE) #The power to detect effect delta } power(5,2,4,10,100,80) #dfmodel = 5 + 2 + 3 # (main effects) + (interactions) + (blocks) ### Example: Use the TLFF() function from Chapter 9 to create and analyze an experiment using two replicates of a 2^(7-4) ### sixteenth-fractional factorial design. des.mat=TLFF(7) #Create the 2^7 full-factorial design des.mat=subset(des.mat,(D==A*B & E==A*C & F==B*C & G==A*B*C)) #Use the generators to isolate the sixteenth fraction cor(des.mat) #Check the correlation matrix des.mat=rbind(des.mat,des.mat) #Two replicates Block=gl(2,8,16) #Block identifier Y=rnorm(16) #Create a column of response data Y.des.mat=cbind(des.mat,Block,Y) #Bind the design matrix, block identifier, and response Y.fit=lm(Y~Block+A+B+C+D+E+F+G,data=Y.des.mat) #Create the model summary(Y.fit) ############################################################################################################ # CHAPTER 11: Response Surface Designs ############################################################################################################ ### Example 11.9 (p. 460) Optimization of lamp lumens as a function of three geometry variables. Lumens=c(4010,5135,5879,6073,3841,4933,5569,5239,5017,5243,6412,6210,5805,5624,5843,4746,6052,6105,6232,4549, 4080,5006,5438,4903,6129,6234,6860,6794,5780,6053) A=c(-1,-1,1,1,-1,-1,1,1,0,0,0,0,0,0,0,-1,-1,1,1,-1,-1,1,1,0,0,0,0,0,0,0) B=c(-1,1,-1,1,0,0,0,0,-1,-1,1,1,0,0,0,-1,1,-1,1,0,0,0,0,-1,-1,1,1,0,0,0) C=c(0,0,0,0,-1,1,-1,1,-1,1,-1,1,0,0,0,0,0,0,0,-1,1,-1,1,-1,1,-1,1,0,0,0) Block=gl(2,15,30) Run=c(9,8,10,12,3,15,6,14,13,5,1,11,4,7,2,26,16,27,19,23,30,22,18,17,25,20,29,21,24,28) AB=A*B;AC=A*C;BC=B*C;AA=A*A;BB=B*B;CC=C*C Terms=data.frame(A,B,C,AB,AC,BC,AA,BB,CC) cor(Terms) Lumens.fit=lm(Lumens~Block+A+B+C+AB+AC+BC+AA+BB+CC) summary(Lumens.fit) anova(Lumens.fit) coeff=coefficients(Lumens.fit)[2:11] qqnorm(coeff);qqline(coeff) par(mfrow=c(2,2)) plot(Lumens.fit) resid=residuals(Lumens.fit) par(mfrow=c(1,3)) plot(resid~A);plot(resid~B);plot(resid~C) ############################################################################################################ # Appendices: Statistical Tables ############################################################################################################# ### Appendix A.2 (p. 478) Normal distribution. pnorm(-1.96) #cdf: P(-inf < z < -1.96) = 0.02499790 qnorm(0.025) #inverse cdf: P(-inf < z < -1.959964) = 0.025 ### Appendix A.3 (p. 480) Student's t distribution. pt(-2.5,23) #cdf: P(-inf < t < -2.5;df = 23) = 0.01 qt(0.025,12) #inverse cdf: P(-inf < t < -2.179;df = 12) = 0.025 ### Appendix A.4 (p. 481) Chi-square distribution. pchisq(8.0,4) #cdf: P(0 < X2 < 8.0;df=4) = 0.9084 qchisq(0.975,10) #inverse cdf: P(0 < X2 < 20.48;df = 10) = 0.975 ### Appendix A.5 (p. 482) F distribution. pf(4.0,4,15) #cdf: P(0 < F < 4.0;dfnum=4,dfdenom=15) = 0.9790 qf(0.95,4,15) #inverse cdf: P(0 < F < 3.056) = 0.95 ### Appendix A.6 (p. 484) Duncan's multiple range test. ### Not available? ### Appendix A.7 (p. 485) Studentized range distribution. ptukey(4.020,4,17) #Inverse cdf: P(0 < Q < 4.020;k=4,df=17) = 0.95 qtukey(0.95,4,17) #SRD cdf: P(0 < Q < 4.020;k=4,df=17) = 0.95 ### Appendix A.9 (p. 487) Fisher's Z transform. FishersZ=function(r) log((1+r)/(1-r))/2 #Returns Z for a given r FishersZ(0.98) #Fisher’s Z: Z(r = 0.98) = 2.29756 invFishersZ=function(thisZ) #Returns r for a given Z { r=-9999:9999 r=r/10000 Z=FishersZ(r) thisr=approx(Z,r,xout=thisZ) thisr$y } invFishersZ(2.29756) #Inverse Fisher’s Z: r(Z=2.29756) = 0.98