## Chapter 7 ## Sec 7.1: Levels of a Factor -- Using Indicator Variables ## ss 7.1.1: Example -- sugar weight ## Display model matrix: uses data frame sugar (DAAG) sugar.aov <- aov(weight ~ trt, data=sugar) model.matrix(sugar.aov) summary.lm(sugar.aov) # NB: summary.lm(), sem <- summary.lm(sugar.aov)$sigma/sqrt(3) # 4 results/trt # Alternatively, sem <- 6.33/sqrt(2) qtukey(p=.95, nmeans=4, df=66) * sem ## ss 7.1.2: Different choices for the model matrix when there are factors oldoptions <- options(contrasts=c("contr.sum", "contr.poly")) # The mean over all treatment levels is now the baseline. # (The second setting ("contr.poly") is for ordered factors.) sugar.aov <- aov(formula = weight ~ trt, data = sugar) summary.lm(sugar.aov) options(oldoptions) # Restore default treatment contrasts ## Sec 7.2: Block Designs and Balanced Incomplete Block Designs ## ss 7.2.1: Analysis of the rice data, allowing for block effects ricebl.aov <- aov(ShootDryMass ~ Block + variety * fert, data=rice) summary(ricebl.aov) summary.lm(ricebl.aov) ## Footnote code rice.aov <- aov(ShootDryMass ~ variety * fert, data=rice) summary.lm(rice.aov)$sigma ## ss 7.2.2: A balanced incomplete block design table(appletaste$product, appletaste$panelist) sapply(appletaste, is.factor) # panelist & product are factors appletaste.aov <- aov(aftertaste ~ panelist + product, data=appletaste) summary(appletaste.aov) ## Sec 7.3: Fitting Multiple Lines\label{sec:xlines} ## Fit various models to columns of data frame leaftemp (DAAG) leaf.lm1 <- lm(tempDiff ~ 1 , data = leaftemp) leaf.lm2 <- lm(tempDiff ~ vapPress, data = leaftemp) leaf.lm3 <- lm(tempDiff ~ CO2level + vapPress, data = leaftemp) leaf.lm4 <- lm(tempDiff ~ CO2level + vapPress + vapPress:CO2level, data = leaftemp) anova(leaf.lm1, leaf.lm2, leaf.lm3, leaf.lm4) summary(leaf.lm2) ## Sec 7.4: Polynomial Regression ## Fit quadratic curve: data frame seedrates (DAAG) seedrates.lm2 <- lm(grain ~ rate + I(rate^2), data = seedrates) # The wrapper function I() ensures that the result from # calculating rate^2 is treated as a variable in its own right. plot(grain ~ rate, data = seedrates, pch = 16, xlim = c(50, 160), cex=1.4) new.df <- data.frame(rate = (1:14) * 12.5) # for plotting the fitted curve hat2 <- predict(seedrates.lm2, newdata = new.df, interval="predict", coverage = 0.95) lines(new.df$rate, hat2[, "fit"], lty = 2, lwd=2) summary(seedrates.lm2, corr=TRUE) ## ss 7.4.1: Issues in the choice of model ## Footnote code ## Code to fit the line, the curve, determine the pointwise coverage ## bounds and create the plots CIcurves <- function(form=grain~rate, data=seedrates, lty=1, col=3, newdata=data.frame(rate=seq(from=50, to=175, by=25))){ seedrates.lm <- lm(form, data=data) x <- newdata[, all.vars(form)[2]] hat <- predict(seedrates.lm, newdata=newdata, interval="confidence") lines(spline(x, hat[, "fit"])) lines(spline(x, hat[, "lwr"]), lty=lty, col=col) lines(spline(x, hat[, "upr"]), lty=lty, col=col) } plot(grain ~ rate, data=seedrates, xlim=c(50,175), ylim=c(15.5,22)) CIcurves() CIcurves(form=grain~rate+I(rate^2), lty=2) ## Sec 7.5: \kern-4pt$^{*}$ Methods for Passing Smooth Curves through Data ## ss 7.5.1: Scatterplot smoothing -- regression splines ## Footnote code ## Fit various models to columns of data frame fruitohms (DAAG) library(splines) attach(fruitohms) ## Code to obtain Panel A plot(ohms ~ juice, cex=0.8, xlab="Apparent juice content (%)", ylab="Resistance (ohms)", data=fruitohms) CIcurves(form=ohms ~ ns(juice, 3), data=fruitohms, newdata=data.frame(juice=pretty(fruitohms$juice,20))) ## For panels B, C, D replace: form = ohms ~ ns(juice,3) ## form = ohms ~ ns(juice,4) # panel B: nspline, df = 4 ## ohms ~ poly(juice,3) # panel C: polynomial, df = 3 ## ohms ~ poly(juice,4) # panel D: polynomial, df = 4 # For more information on poly(), see help(poly) and Exercise 15. ## Footnote code par(mfrow = c(2,2)) fruit.lmb4 <- lm(ohms ~ ns(juice,4)) # panel B: nspline, df=4 plot(fruit.lmb4) par(mfrow = c(1,1)) fruit.lmb4 <- lm(ohms ~ ns(juice,4)) summary(fruit.lmb4) ## Footnote code ## Code to obtain the curve shown in panel A plot(juice, model.matrix(fruit.lmb4)[, 2], ylab="Column 2 of model matrix", xlab="Apparent Juice Content (%)", type="n") ord <- order(juice) lines(juice[ord], model.matrix(fruit.lmb4)[ord, 2]) ## For panels B, C and D, take columns 3, 4 and 5 respectively of ## the model matrix. ## ss 7.5.2: *Penalized splines and generalized additive models library(mgcv) attach(fruitohms) fruit.ps <- gam(ohms ~ s(juice, bs="cs")) # cs: a cubic spline plot(fruitohms) lines(juice, predict(fruit.ps)) fruit.bs <- gam(ohms ~ s(juice, bs="cr", fx=TRUE)) # bs="cr" causes uses of a cubic regression spline for the smooth ## compare with non-penalized spline lines(juice, predict(fruit.bs), col=2) detach(fruitohms) ## ss 7.5.3: Other smoothing methods ## *Monotone curves library(monoProc) fit.mono <- monoproc(loess(ohms~juice, data=fruitohms), bandwidth=0.1, mono1="decreasing", gridsize=30) plot(ohms ~ juice, data=fruitohms, xlab="Apparent juice content (%)", ylab="Resistance (ohms)") lines(fit.mono) ## Sec 7.6: Smoothing Terms in Additive Models ## Footnote code ## Regression of dewpt vs maxtemp: data frame dewpoint (DAAG) library(splines) attach(dewpoint) ds.lm <- lm(dewpt ~ bs(maxtemp,5) + bs(mintemp,5), data=dewpoint) ds.fit <- predict(ds.lm, type="terms", interval="confidence") oldpar <- par(mfrow = c(1,2), pty="s") plot(maxtemp, ds.fit$fit[, 1], xlab="Maximum temperature", ylab="Change from dewpoint mean", type="l") lines(maxtemp, ds.fit$lwr[, 1], lty=2) lines(maxtemp, ds.fit$upr[, 1], lty=2) ord <- order(mintemp) plot(mintemp[ord], ds.fit$fit[ord, 2], xlab="Minimum temperature", ylab="Change from dewpoint mean", type="l") lines(mintemp[ord], ds.fit$lwr[ord, 2], lty=2) lines(mintemp[ord], ds.fit$upr[ord, 2], lty=2) par(oldpar) detach(dewpoint) ## Footnote code library(lattice) mintempRange <- equal.count(dewpoint$mintemp, number=3) xyplot(residuals(ds.lm) ~ maxtemp | mintempRange, data=dewpoint, aspect=1, layout=c(3,1), type=c("p","smooth"), xlab="Maximum temperature", ylab="Residual") ## ss 7.6.1: *The fitting of penalized spline terms ## Sec 7.7: Further Reading roller.lm <- lm(depression~weight, data=roller) roller.lm2 <- lm(depression~weight+I(weight^2), data=roller) seedrates.lm <- lm(grain ~ rate + I(rate^2), data=seedrates) seedrates.pol <- lm(grain ~ poly(rate,2), data=seedrates)