## Chapter ## Sec 10.1: A One-Way Random Effects Model ## ss 10.1.1: Analysis with \texttt{aov()} library(DAAG) ant111b.aov <- aov(harvwt ~ Error(site), data=ant111b) summary(ant111b.aov) ## Interpreting the mean squares ## Details of the calculations ## Practical use of the analysis of variance results ## Random effects vs. fixed effects ## Nested factors -- a variety of applications ## ss 10.1.2: A More Formal Approach ## Relations between variance components and mean squares ## Interpretation of variance components ## Intra-class correlation ## ss 10.1.3: Analysis using \texttt{lmer()} library(lme4) ant111b.lmer <- lmer(harvwt ~ 1 + (1 | site), data=ant111b) options(digits=4) ## Output is from version 0.995-2 of lmer() ## Note that there is no degrees of freedom information. ant111b.lmer ## Fitted values and residuals in \texttt{lmer()} s2W <- 0.578; s2L <- 2.37; n <- 4 sitemeans <- with(ant111b, sapply(split(harvwt, site), mean)) grandmean <- mean(sitemeans) shrinkage <- (n*s2L)/(n*s2L+s2W) grandmean + shrinkage*(sitemeans - grandmean) ## ## More directly, use fitted() with the lmer object unique(fitted(ant111b.lmer)) ## ## Compare with site means sitemeans ## *Uncertainty in the parameter estimates set.seed(41) ant111b.samp <- mcmcsamp(ant111b.lmer, n=1000) ## Now check the column names names(ant111b.samp) CI95 <- apply(ant111b.samp,2,function(x)quantile(x, prob=c(.025,.975))) # 95% limits for the overall mean CI95[,1] # 95% limits for the residual variance (sigma^2) exp(CI95[,2]) # 95% limits for the between site variance exp(CI95[,3]) ## Handling more than two levels of random variation ## Sec 10.2: Survey Data, with Clustering ## Footnote code ## Means of like (data frame science: DAAG), by class classmeans <- with(science, aggregate(like, by=list(PrivPub, Class), mean) ) # NB: Class identifies classes independently of schools # class identifies classes within schools names(classmeans) <- c("PrivPub", "Class", "avlike") attach(classmeans) ## Boxplots: class means by Private or Public school boxplot(split(avlike, PrivPub), horizontal=TRUE, las=2, xlab = "Class average of score", boxwex = 0.4) rug(avlike[PrivPub == "private"], side = 1) rug(avlike[PrivPub == "public"], side = 3) detach(classmeans) ## ss 10.2.1: Alternative models science.lmer <- lmer(like ~ sex + PrivPub + (1 | school) + (1 | school:class), data = science, na.action=na.omit) summary(science.lmer) ## Footnote code ## The variances are included in the output from VarCorr() VarCorr(science.lmer) # Displayed output differs slightly # The between students (\texttt{Residual}) component of variance is # attr(VarCorr(science.lmer),"sc")^2 science1.lmer <- lmer(like ~ sex + PrivPub + (1 | school:class), data = science, na.action=na.omit) summary(science1.lmer) science1.samp <- mcmcsamp(science1.lmer, n=1000) CI <- exp(apply(science1.samp[, 4:5], 2, function(x)quantile(x, c(.025, .975)))) colnames(CI) <- c("sigma^2", "sigma_Class^2") round(CI, 2) class.df <- ranef(science1.lmer)[["school:class"]] names(class.df) <- "classEff" Class <- with(science, paste(school, class, sep=":")) class.df$var <- with(science, sapply(split(like, Class), var)) class.df$num <- with(science, sapply(split(like, Class), length)) class.df$PrivPub <- with(science, sapply(split(PrivPub, Class), function(x)x[1])) par(mfrow=c(2,2)) plot(classEff ~ num, data=class.df, pch=c(1,3)[unclass(PrivPub)]) plot(var ~ num, data=class.df, pch=c(1,3)[unclass(PrivPub)]) qqnorm(class.df$classEff) qqnorm(residuals(science1.lmer)) par(mfrow=c(1,1)) ## ss 10.2.2: Instructive, though faulty, analyses ## Ignoring class as the random effect science2.lmer <- lmer(like ~ sex + PrivPub + (1 | school), data = science, na.action=na.exclude) science2.lmer ## Footnote code ## The numerical values were extracted from VarCorr(science2.lmer) # The between schools component of variance # is the square of the scale parameter ## Ignoring the random structure in the data ## Faulty analysis, using lm science.lm <- lm(like ~ sex + PrivPub, data=science) summary(science.lm)$coef ## ss 10.2.3: Predictive accuracy ## Sec 10.3: A Multi-level Experimental Design ## ss 10.3.1: The anova table ## Analysis of variance: data frame kiwishade (DAAG) kiwishade.aov <- aov(yield ~ shade + Error(block/shade), data=kiwishade) summary(kiwishade.aov) ## ss 10.3.2: Expected values of mean squares model.tables(kiwishade.aov, type="means") ## Footnote code with(kiwishade, sapply(split(yield, shade), mean)) ## ss 10.3.3: {\kern-4pt}$^*$ The sums of squares breakdown ## Footnote code vine <- paste("vine", rep(1:4, 12), sep="") vine1rows <- seq(from=1, to=45, by=4) kiwivines <- unstack(kiwishade, yield ~ vine) kiwimeans <- apply(kiwivines, 1, mean) kiwivines <- cbind(kiwishade[vine1rows, c("block","shade")], Mean=kiwimeans, kiwivines-kiwimeans) kiwivines <- with(kiwivines, kiwivines[order(block, shade), ]) mean(kiwimeans) # the grand mean ## ss 10.3.4: The variance components ## ss 10.3.5: The mixed model analysis kiwishade.lmer <- lmer(yield ~ shade + (1|block) + (1|block:plt), data=kiwishade) # block:shade is an alternative to block:plt kiwishade.lmer ## Plots of residuals ## Footnote code ## Simplified version of plot library(lattice) xyplot(residuals(kiwishade.lmer) ~ fitted(kiwishade.lmer)|block, groups=shade, pch=1:4, layout=c(3,1), data=kiwishade, xlab="Level 2 fitted values (Treatment + block + plot effects)", ylab="Residuals (level 2)", aspect=1, key=list( points=list(pch=1:4, col=trellis.par.get("superpose.symbol")$col[1:4]), text=list(levels(kiwishade$shade)), space="top", columns=4)) ## Footnote code ## Simplified version of graph that shows the plot effects kiwimeans <- with(kiwishade, aggregate(yield, by=list(block=block, shade=shade), mean)) names(kiwimeans)[3] <- "avyield" kiwimeans.lmer <- lmer(avyield ~ shade + (1 | block), data=kiwimeans) ploteffects <- residuals(kiwimeans.lmer) plothat <- fitted(kiwimeans.lmer) xyplot(ploteffects ~ plothat|block, layout=c(3,1), groups=shade, data=kiwimeans, aspect=1, pch=1:4, key=list( points=list(pch=1:4, col=trellis.par.get("superpose.symbol")$col[1:4]), text=list(levels(kiwishade$shade)), space="top", columns=4)) ## ss 10.3.6: Predictive accuracy ## ss 10.3.7: Different sources of variance -- complication or focus of interest? ## Sec 10.4: Within and Between Subject Effects ## Model fitting criteria ## ss 10.4.1: Model selection ## Change initial letters of levels of tinting$agegp to upper case levels(tinting$agegp) <- toupper.initial(levels(tinting$agegp)) ## Fit all interactions: data frame tinting (DAAG) it3.lmer <- lmer(log(it) ~ tint*target*agegp*sex + (1 | id), data=tinting, method="ML") it2.lmer <- lmer(log(it) ~ (tint+target+agegp+sex)^2 + (1 | id), data=tinting, method="ML") it1.lmer <- lmer(log(it)~(tint+target+agegp+sex) + (1 | id), data=tinting, method= "ML") anova(it1.lmer, it2.lmer, it3.lmer) ## Footnote code ## Code that gives the first four such plots, for the observed data interaction.plot(tinting$tint, tinting$agegp, log(tinting$it)) interaction.plot(tinting$target, tinting$sex, log(tinting$it)) interaction.plot(tinting$tint, tinting$target, log(tinting$it)) interaction.plot(tinting$tint, tinting$sex, log(tinting$it)) ## ss 10.4.2: Estimates of model parameters it2.reml <- update(it2.lmer, method="REML") summary(it2.reml) # NB: The final column, giving degrees of freedom, is not in the # summary output for version 0.995-2 of lme4. It is our addition. uid <- unique(tinting$id) subs <- match(uid, tinting$id) with(tinting, table(sex[subs], agegp[subs])) ## Sec 10.5: Repeated Measures in Time ## The theory of repeated measures modeling ## *Correlation structure ## Different approaches to repeated measures analysis ## ss 10.5.1: Example -- random variation between profiles ## Footnote code ## Plot points and fitted lines (panel A) library(lattice) xyplot(o2 ~ wattsPerKg, groups=id, data=humanpower1, panel=function(x,y,subscripts,groups,...){ yhat <- fitted(lm(y ~ groups*x)) panel.superpose(x, y, subscripts, groups, pch=1:5 ) panel.superpose(x, yhat, subscripts, groups, type="l") }, xlab="Watts per kilogram", ylab=expression("Oxygen intake ("*ml.min^{-1}*.kg^{-1}*")")) ## Separate lines for different athletes ## Calculate intercepts and slopes; plot Slopes vs Intercepts ## Uses the function lmList() from the lme4 package library(lme4) hp.lmList <- lmList(o2 ~ wattsPerKg | id, data=humanpower1) coefs <- coef(hp.lmList) names(coefs) <- c("Intercept", "Slope") plot(Slope ~ Intercept, data=coefs) abline(lm(Slope~Intercept, data=coefs)) ## A random coefficients model hp.lmer <- lmer(o2 ~ wattsPerKg + (wattsPerKg | id), data=humanpower1) summary(hp.lmer) ## Footnote code ## Alternatively, plot different lines in different panels panelfun2 <- function(x, y, subscripts, ...){ llines(x, hat[subscripts], type="l", lty=2) llines(x, lmhat[subscripts], type="l", lty=1) } xyplot(o2 ~ wattsPerKg | id, data=humanpower1, panel=panelfun2, xlab="Watts", ylab=expression("Oxygen intake ("*ml.min^{-1}*"."*kg^{-1}*")")) hat<-fitted(hp.lmer) lmhat<-with(humanpower1, fitted(lm(o2 ~ id*wattsPerKg))) panelfun <- function(x, y, subscripts, groups, ...){ panel.superpose(x, hat, subscripts, groups, type="l",lty=2) panel.superpose(x, lmhat, subscripts, groups, type="l",lty=1) } xyplot(o2 ~ wattsPerKg, groups=id, data=humanpower1, panel=panelfun, xlab="Watts", ylab=expression("Oxygen intake ("*ml.min^{-1}*"."*kg^{-1}*")")) ## Footnote code xyplot(resid(hp.lmer) ~ wattsPerKg, groups=id, type="b", data=humanpower1) ## Footnote code ## Derive the sd from the data frame coefs that was calculated above sd(coefs$Slope) ## ss 10.5.2: Orthodontic measurements on chlldren ## Preliminary data exploration ## Footnote code library(MEMSS) xyplot(distance ~ age | Subject, groups=Sex, data=Orthodont, scale=list(y=list(log=2)), type=c("p","r"), layout=c(11,3)) ## Footnote code ## Use lmList() to find the slopes ab <- coef(lmList(distance ~ age | Subject, Orthodont)) names(ab) <- c("a", "b") ## Obtain the intercept at x=mean(x) ## (For each subject, this is independent of the slope) ab$ybar <- ab$a + ab$b*11 # mean age is 11, for each subject. sex <- substring(rownames(ab), 1 ,1) plot(ab[, 3], ab[, 2], col=c(F="red", M="blue")[sex], pch=c(F=1, M=3)[sex], xlab="Intercept", ylab="Slope") extremes <- ab$ybar %in% range(ab$ybar) | ab$b %in% range(ab$b[sex=="M"]) | ab$b %in% range(ab$b[sex=="F"]) text(ab[extremes, 3], ab[extremes, 2], rownames(ab)[extremes], pos=4, xpd=TRUE) ## The following makes clear M13's difference from other points qqnorm(ab$b) Orthodont$logdist <- log(Orthodont$distance) ## Now repeat, with logdist replacing distance ## Footnote code Orthodont$logdist <- log(Orthodont$distance) ablog <- coef(lmList(logdist ~ age | Subject, Orthodont)) names(ablog) <- c("a", "b") ## Obtain the intercept at mean age (= 11), for each subject ## (For each subject, this is independent of the slope) ablog$ybar <- with(ablog, a + b*11) extreme.males <- rownames(ablog) %in% c("M04","M13") sex <- substring(rownames(ab), 1, 1) with(ablog, t.test(b[sex=="F"], b[sex=="M" & !extreme.males], var.equal=TRUE)) # Specify var.equal=TRUE, to allow comparison with anova output ## A random coefficients model keep <- !(Orthodont$Subject%in%c("M04","M13")) orthdiff.lmer <- lmer(logdist ~ Sex * I(age-11) + (I(age-11) | Subject), data=Orthodont, subset=keep, method="ML") orthdiff.lmer orthsame.lmer <- lmer(logdist ~ Sex + I(age - 11) + (I(age - 11) | Subject), data=Orthodont, method="ML", subset=keep) anova(orthsame.lmer, orthdiff.lmer) orthdiffr.lmer <- update(orthdiff.lmer, method="REML") summary(orthdiffr.lmer) orth.mcmc <- mcmcsamp(orthdiffr.lmer, n=1000) sum(orth.mcmc[, "SexMale:I(age - 11)"] < 0)/1000 ## Correlation between successive times res <- resid(orthdiffr.lmer) Subject <- factor(Orthodont$Subject[keep]) orth.acf <- t(sapply(split(res, Subject), function(x)acf(x, lag=4, plot=FALSE)$acf)) ## Calculate respective proportions of Subjects for which ## autocorrelations at lags 1, 2 and 3 are greater than zero. apply(orth.acf[,-1], 2, function(x)sum(x>0)/length(x)) ## *The variance for the difference in slopes ## Sec 10.6: Error structure considerations ## ss 10.6.1: Predictions from models with a complex error structure ## Consequences from assuming an overly simplistic error structure ## ss 10.6.2: Error structure in explanatory variables ## Sec 10.7: Further Notes on Multi-level and Other Models with CorrelatedErrors ## ss 10.7.1: An historical perspective on multi-level models ## ss 10.7.2: Meta-analysis ## ss 10.7.3: Functional data analysis ## Sec 10.8: Recap ## Sec 10.9: Further Reading ## Sec 10.10: References ## References for further reading ## Analysis of variance with multiple error terms ## Multi-level models and repeated measures ## Meta-analysis ## ss 10.9.1: References for further reading n.omit <- 2 take <- rep(TRUE, 48) take[sample(1:48,2)] <- FALSE kiwishade.lmer <- lmer(yield ~ shade + (1|block) + (1|block:plot), data = kiwishade,subset=take) vcov <- show(VarCorr(kiwishade.lmer)) gps <- vcov[, "Groups"] print(vcov[gps=="block:plot", "Variance"]) print(vcov[gps=="Residual", "Variance"]) cult.lmer <- lmer(ct ~ Cultivar + Dose + factor(year) + (-1 + Dose | gp), data = sorption, method = "ML") cultdose.lmer <- lmer(ct ~ Cultivar/Dose + factor(year) + (-1 + Dose | gp), data = sorption, method = "ML")