ESCI 340 Mixed Nut problem data analysis (data from several groups) 2 March 2012 ______________________________________________________________________________________ DATA ____ height <- c(0.5, 0.7, 0.6, 4.5, 4.8, 8.5, 0.4, 6.0, 2.5, 1.0, 0.0, 2.0, 0.7, 3.7, 0.6, 0.5, 1.3, 3.4, 3.3, 2.0, 1.2, 6.0, 4.5, 5.1, 3.25, 5.1, 3.5, 1.25, 2, 3.8, 3.5, 2.5, 6.5, 6.25, 5.0, 7.5, 8.0, 6.5, 8.75, 9.0, 8.5, 7.0, 7.75, 10.0, 2.0, 1.0, 3.5, 3.3, 5.5, 6.0, 8.5, 4.0, 7.0, 6.5, 3.5, 4.5, 6.0, 1.5, 3.75, 12.0) length <- c(rep(1.5, 15), rep(1.4, 15), rep(1.8, 5), rep(2.7, 5), rep(2.5, 4), rep(2.4, 15), 5.7) width <- c(rep(1.0, 15), rep(1.1, 15), rep(1.5, 5), rep(1.8, 5), rep(2.3, 4), rep(0.9, 15), 5.7) mass <- c(rep(1.2, 15), rep(1.0, 15), rep(2.6, 5), rep(5.1, 5), rep(10.2, 4), rep(1.9, 15), 142.1) volume <- c(rep(0.7, 15), rep(0.8, 15), rep(1.9, 5), rep(3.7, 5), rep(9.3, 4), rep(1.5, 15), 103.0) density <- c(rep(1.7, 15), rep(1.3, 15), rep(1.4, 5), rep(1.4, 5), rep(1.1, 4), rep(1.3, 15), 1.4) R version 2.11.1 (2010-05-31) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > height <- c(0.5, 0.7, 0.6, 4.5, 4.8, 8.5, 0.4, 6.0, 2.5, 1.0, 0.0, 2.0, 0.7, 3.7, 0.6, + 0.5, 1.3, 3.4, 3.3, 2.0, 1.2, 6.0, 4.5, 5.1, 3.25, 5.1, 3.5, 1.25, 2, 3.8, + 3.5, 2.5, 6.5, 6.25, 5.0, + 7.5, 8.0, 6.5, 8.75, 9.0, + 8.5, 7.0, 7.75, 10.0, + 2.0, 1.0, 3.5, 3.3, 5.5, 6.0, 8.5, 4.0, 7.0, 6.5, 3.5, 4.5, 6.0, 1.5, 3.75, + 12.0) > length(height) [1] 60 > length <- c(rep(1.5, 15), rep(1.4, 15), rep(1.8, 5), rep(2.7, 5), rep(2.5, 4), rep(2.4, 15), 5.7) > length(length) [1] 60 > len <- length > length(len) [1] 60 > width <- c(rep(1.0, 15), rep(1.1, 15), rep(1.5, 5), rep(1.8, 5), rep(2.3, 4), rep(0.9, 15), 5.7) > length(width) [1] 60 > width [1] 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.1 1.1 1.1 1.1 [20] 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.5 1.5 1.5 1.5 1.5 1.8 1.8 1.8 [39] 1.8 1.8 2.3 2.3 2.3 2.3 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 0.9 [58] 0.9 0.9 5.7 > mass <- c(rep(1.2, 15), rep(1.0, 15), rep(2.6, 5), rep(5.1, 5), rep(10.2, 4), rep(1.9, 15), 142.1) > length(mass) [1] 60 > volume <- c(rep(0.7, 15), rep(0.8, 15), rep(1.9, 5), rep(3.7, 5), rep(9.3, 4), rep(1.5, 15), 103.0) > density <- c(rep(1.7, 15), rep(1.3, 15), rep(1.4, 5), rep(1.4, 5), rep(1.1, 4), rep(1.3, 15), 1.4) > length(volume) [1] 60 > length(density) [1] 60 > # Fit linear regression models > reg.len <- lm(height ~ len) > anova(reg.len) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) len 1 193.43 193.430 39.287 4.909e-08 *** Residuals 58 285.56 4.924 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > reg.vol <- lm(height ~ volume) > anova(reg.vol) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) volume 1 93.50 93.500 14.068 0.0004093 *** Residuals 58 385.49 6.646 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > reg.mass <- lm(height ~ mass) > anova(reg.mass) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) mass 1 87.71 87.712 13.002 0.0006482 *** Residuals 58 391.28 6.746 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > 59*var(height) # Null model RSS (= total SS) [1] 478.995 > reg.den <- lm(height ~ density) > anova(reg.den) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) density 1 66.88 66.880 9.4125 0.003273 ** Residuals 58 412.12 7.105 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > torpedo <- len/width > reg.torp <- lm(height ~ torpedo) > anova(reg.torp) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) torpedo 1 1.02 1.0210 0.1239 0.7261 Residuals 58 477.97 8.2409 > reg.torp2 <- lm(height ~ len + mass) > anova(reg.torp2) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) len 1 193.430 193.430 39.0099 5.674e-08 *** mass 1 2.932 2.932 0.5913 0.4451 Residuals 57 282.633 4.958 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > # Calculate AIC_c scores > # create vector of RSS values for each model > reg.rss <- c(478.995, 285.56, 385.49, 391.28, 412.12, 477.97, 282.633) > # Number of parameters in each model > k <- c(2, rep(3, 5), 4) > k [1] 2 3 3 3 3 3 4 > # Use AIC_c because n/k < 40 (n=60) > aicc <- 60*log(reg.rss/60) + 2*k + (2*k*(k+1)/(60-k-1)) > aicc [1] 128.8513 100.0350 118.0388 118.9333 122.0468 130.9408 101.7156 > delta <- aicc - min(aicc) > delta [1] 28.816235 0.000000 18.003785 18.898275 22.011744 30.905748 1.680526 > mod.lik <- exp(-0.5*delta) > mod.lik [1] 5.528842e-07 1.000000e+00 1.231765e-04 7.875746e-05 1.660391e-05 [6] 1.944921e-07 4.315971e-01 > # Calcualte Akaike weights, w_i > wi <- mod.lik / sum(mod.lik) > wi [1] 3.861418e-07 6.984136e-01 8.602811e-05 5.500528e-05 1.159640e-05 [6] 1.358359e-07 3.014333e-01 > # Model confidence set: 3 ways to determine > # Same result for all 3 methods: models 2, 7 [(length), (length, mass)] > log(0.1) [1] -2.302585 > log(0.1)/-0.5 # Delta value for likelihood ratio of 1/10 [1] 4.60517 > # How well does *best* model fit? r^2 = 0.404 > summary(reg.len) Call: lm(formula = height ~ len) Residuals: Min 1Q Median 3Q Max -4.4359 -1.9359 0.3461 1.6658 5.3964 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.7837 0.8602 -0.911 0.366 len 2.5915 0.4135 6.268 4.91e-08 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 2.219 on 58 degrees of freedom Multiple R-squared: 0.4038, Adjusted R-squared: 0.3935 F-statistic: 39.29 on 1 and 58 DF, p-value: 4.909e-08 > # Plot 5 univariate models > par(mfrow=c(2,3)) > max(height) [1] 12 > plot(len, height, xlab="Nut length (cm)", ylab="Height (cm)") > abline(reg.len) > plot(volume, height, xlab="Nut volume (cm)", ylab="Height (cm)") > abline(reg.vol) > plot(mass, height, xlab="Nut mass (g)", ylab="Height (cm)") > abline(reg.mass) > plot(density, height, xlab="Nut density (g/cm^3)", ylab="Height (cm)") > abline(reg.den) > plot(torpedo, height, xlab="Nut shape", ylab="Height (cm)") > abline(reg.torp)