## Introduction ## Commentary on R ## General ## Getting help ## Use of an editor as a run-time environment ## The development model, and development strategies ## Unifying ideas ## Retrospect, prospect and alternatives to R ## Data set size, and databases ## The statistics of data collection ## Documentation ## Chapter 1: Preliminaries ## Sec 1.1: Installation of R and of R Packages ## ss 1.1.1: Installation of packages from the command line install.packages("Rcmdr", dependencies=TRUE) ## Sec 1.2: The R Commander GUI library(Rcmdr) ## Chapter 2: An Overview of R ## Sec 2.1: Use of the console (i.e., command line) window 2+2 q() ## Practice with R commands 1:5 # The numbers 1, 2, 3, 4, 5 mean(1:5) sum(1:5) # Apply the sum function to the vector # of numbers 1, 2, 3, 4, 5 (1:5) > 2 # Returns FALSE FALSE TRUE TRUE TRUE # Other relational operators are: >=, <, <=, ==, != (2:5)^10 # 2 to the power of 10, 3 to the power of 10, ... log2(c(0.5, 1, 2, 4, 8)) # Values that differ by a factor of 2 # are, on this scale, one unit apart. help() # help on use of the help function help(plot) # the help page for the plot function example(plot) # Runs the examples from the help page for plot() par(ask=FALSE) # Do not now ask, before displaying a new plot. ## Sec 2.2: A Short R Session ## Entry of vector elements from the command line volume <- c(351, 955, 662, 1203, 557, 460) weight <- c(250, 840, 550, 1360, 640, 420) description <- c("Aird's Guide to Sydney", "Moon's Australia handbook", "Explore Australia Road Atlas", "Australian Motoring Guide", "Penguin Touring Atlas", "Canberra - The Guide") ## Operations with {vectors} volume # Final element of volume volume[6] ## Ratio of weight to volume, i.e., density round(weight/volume,2) ## A simple plot ## Code plot(weight ~ volume, pch=16, cex=1.5) # pch=16: use solid blob as plot symbol # cex=1.5: point size is 1.5 times default ## Alternative plot(volume, weight, pch=16, cex=1.5) plot(weight ~ volume, pch=16, cex=1.5, xlab="Volume (cubic mm)", ylab="Weight (g)") identify(weight ~ volume, labels=description) ## Formatting and layout of plots ## Sec 2.3: Data frames -- Grouping together columns of data ## NB, the row names will now be shortened travelbooks <- data.frame( thickness = c(1.3, 3.9, 1.2, 2, 0.6, 1.5), width = c(11.3, 13.1, 20, 21.1, 25.8, 13.1), height = c(23.9, 18.7, 27.6, 28.5, 36, 23.4), weight = weight, # Include values of weight, entered earlier volume = volume, # Include values of volume, entered earlier type = c("Guide", "Guide", "Roadmaps", "Roadmaps", "Roadmaps", "Guide"), row.names = description ) ## Remove objects that are not now needed. rm(volume, weight, description) ## Accessing the columns of data frames travelbooks[, 4] travelbooks[, "weight"] travelbooks$weight travelbooks[["weight"]] # This treats the data frame as a list. ## 1: Use the data parameter in the function call plot( weight ~ volume, data=travelbooks) # ## 2: Use with(); take columns from the specified data frame with(travelbooks, plot(weight ~ volume)) # ## 3: Use attach() to include the column names in the search list attach(travelbooks) plot( weight ~ volume) detach(travelbooks) # Detach when no longer required ## Sec 2.4: Input of Data from a File ## Place the file in the working directory library(DAAGxtras) # DAAGxtras has the needed function datafile("travelbooks") # Place file in directory dir() # List contents of the working directory file.show("travelbooks.txt") # Display travelbooks.txt ## Now input the file, to the data frame travelbooks travelbooks <- read.table("travelbooks.txt", header=TRUE, row.names=1) # Row 1 of the file gives column names. Column 1 gives row names travelbooks <- read.table("travelbooks.txt") # Less explicit alternative; requires file to have a suitable format ## Sec 2.5: Summary ## Sec 2.6: Exercise ## Chapter 3: The Working Environment of an R Session ## Sec 3.1: The Working Directory and the Workspace ## Listing Workspace Contents ls() ls(pattern="^w") ## Setting the Working Directory ## Sec 3.2: Saving and retrieving R objects save.image(file="archive.RData") save(volume, weight, file="books.RData") # Can save many objects in the same file load("books.RData") # Recover the saved objects ## Writing data frames to text files ## Sec 3.3: Installations, packages and sessions ## ss 3.3.1: The architecture of an R installation -- Packages sessionInfo() ## Installation of R packages install.packages(pkgs="D:/DAAG_0.97.zip", repos=NULL) ## ss 3.3.2: The search path: library() and attach() search() ## Attachment of R packages ## Attachment of image files attach("books.RData") detach("file:books.RData") ## Sec 3.4: Demonstrations, \& Help Examples demo() # List available demonstrations demo(graphics) # Demonstration of R's graphics abilities ## example(plot) # Run examples from help page for plot() demo() demo(image) demo(graphics) demo(persp) demo(plotmath) # Mathematical symbols can be visually interesting library(lattice) demo(lattice) # Demonstrates lattice graphics demo(package = .packages(all.available = TRUE)) library(vcd) # The vcd package must of course be installed. demo(mosaic) ## Examples that are included on help pages ## Access to help resources from a browser screen ## Searching for key words or strings help.search("bar") help.search("str", package="base") help.search("char", package="base") ## Sec 3.5: Summary ## Sec 3.6: Exercises summary(worldRecords) ## The following is for pages beyond page 26, for anyone who ## wants to explore a bit further. ## Chapter 4: Worked Examples ## Sec 4.1: World record times for track and field events ## Data exploration str(worldRecords) library(DAAGxtras) # Version 0.6-6 or later of DAAGxtras ## Plot with untransformed scales plot(Time ~ Distance, data=worldRecords) ## Now use log scales plot(Time ~ Distance, data=worldRecords, log="xy") ## Attach lattice package library(lattice) ## Left panel xyplot(Time ~ Distance, groups=roadORtrack, data=worldRecords, scales=list(log=10), auto.key=list(columns=2)) ## Right panel xyplot(log(Time) ~ log(Distance), groups=roadORtrack, data=worldRecords, auto.key=list(columns=2)) ## Fitting a regression line lm(log(Time) ~ log(Distance), data=worldRecords) ## ss 4.1.1: Summary information from model objects ## Store the result in worldrec.lm worldrec.lm <- lm(log(Time) ~ log(Distance), worldRecords) plot(log(Time) ~ log(Distance), data = worldRecords) abline(worldrec.lm) print(worldrec.lm) # Equivalent to typing wtvol.lm at the command line summary(worldrec.lm) ## Diagnostic plots par(mfrow=c(1,2)) # Subsequent plots appear in a 1 x 2 layout plot(worldrec.lm, which=1:2) par(mfrow=c(1,1)) # Reset to 1 plot per page, for any later plots ## ss 4.1.2: The model object worldrec.lm$call coef(worldrec.lm) ## Sec 4.2: Time series -- Australian annual climate data ## Code for Panel A library(splines) plot(mdRain ~ Year, data=bomregions) ## Calculate and plot curve showing long-term trend hat <- predict(lm(mdRain ~ bs(Year,5), data=bomregions)) lines(hat ~ Year, data=bomregions) ## Calculate and plot curve showing short-term trends hat2 <- predict(lm(mdRain ~ bs(Year,36), data=bomregions)) lines(hat2 ~ Year, data=bomregions) ## Panel B: Replace mdRain by mdAVt ## Panel C: Replace mdRain by SOI ## Sec 4.3: Regression with two explanatory variables ## Data exploration str(nihills) ## Create scatterplot matrix library(lattice) splom(~nihills) ## Correlation matrix round(cor(nihills), 2) ## Create a data frame that holds the logged data lognihills <- log(nihills) names(lognihills) <- c("ldist", "lclimb", "ltime", "ltimef") ## Scatterplot matrix; logarithmic scales splom(~ log(nihills), library(lattice) ## Correlation matrix round(cor(lognihills), 2) ## ss 4.3.1: The regression fit lognihills <- log(nihills) names(lognihills) <- paste("l", names(nihills), sep="") lognihills.lm <- lm(ltime ~ ldist + lclimb, data=lognihills) round(coef(lognihills.lm),3) nihills$gradient <- with(nihills, climb/dist) lognihills <- log(nihills) names(lognihills) <- paste("l", names(nihills), sep="") lognigrad.lm <- lm(ltime ~ ldist + lgradient, data=lognihills) round(coef(lognigrad.lm),3) ## Plot the terms in the model termplot(lognigrad.lm, col.term="gray", partial=TRUE, col.res="black", smooth=panel.smooth) ## Sec 4.4: Exercises huron <- data.frame(year=as(time(LakeHuron), "vector"), mean.height=LakeHuron) lag.plot(huron$mean.height)