nut_aic.txt JFM 3/7/2014 rev 2/25/2016 Model selection with AIC, for "Mixed nut problem." Models fit using linear models ["lm()"]. AIC_c calculated using residual sum of squares (RSS); assumes normal distribution of residuals. Use AIC_c for because n/K < 40 (n = 96) # Hypotheses: mean, length, volume, mass, density, # fusiform1 (len/width), fusiform2 (length + mass) # DATA height <- c(13.5, 6.0, 7.5, 3.8, 3.6, 5.6, 6.7, 9.0, 3.8, 3.1, 2.9, 5.0, 11.0, 2.8, 6.0, 2.5, 3.7, 6.2, 4.0, 2.2, 2.5, 1.5, 3.7, 2.9, 5.5, 4.0, 1.2, 2.2, 1.4, 3.0, 9.0, 1.9, 4.0, 1.0, 5.5, 2.6, 8.5, 6.0, 6.5, 2.5, 7.8, 6.1, 9.0, 3.0, 7.5, 2.5, 8.0, 4.0, 3.4, 8.5, 6.0, 5.0, 2.5, 4.0, 8.0, 6.1, 0.5, 3.0, 0.8, 6.0, 1.0, 4.5, 6.7, 5.2, 1.8, 3.5, 2.8, 2.0, 1.9, 4.5, 6.0, 4.3, 3.0, 0.6, 1.7, 2.7, 2.1, 4.5, 0.0, 1.0, 3.5, 2.0, 1.2, 2.9, 1.6, 4.7, 2.4, 4.3, 2.3, 3.5, 4.0, 1.0, 2.0, 0.0, 0.5, 5.3) # jawbreaker(1), choc malt ball(5), yogurt almonds(5), choc espresso bean(5), # hot tamale(20), yogurt raisin(30), jelly belly(30) length <- c(5.7, rep(2.5, 5), rep(2.7, 5), rep(1.8, 5), rep(2.4, 20), rep(1.4, 30), rep(1.5, 30)) width <- c(5.7, rep(2.3, 5), rep(1.8, 5), rep(1.5, 5), rep(0.9, 20), rep(1.1, 30), rep(1.0, 30)) mass <- c(141.2, rep(10.2, 5), rep(5.1, 5), rep(2.6, 5), rep(1.9, 20), rep(1.0, 30), rep(1.2, 30)) volume <- c(103.0, rep(9.3, 5), rep(3.7, 5), rep(1.9, 5), rep(1.5, 20), rep(0.8, 30), rep(0.7, 30)) density <- c(1.4, rep(1.1, 5), rep(1.4, 5), rep(1.4, 5), rep(1.3, 20), rep(1.3, 30), rep(1.7, 30)) shape <- length / width length(height) # Fit models using lm() function: m.length <- lm(height ~ length) m.width <- lm(height ~ width) m.mass <- lm(height ~ mass) m.volume <- lm(height ~ volume) m.density <- lm(height ~ density) m.shape <- lm(height ~ shape) m.shape2 <- lm(height ~ length + mass) anova(m.length) anova(m.width) anova(m.mass) anova(m.volume) anova(m.density) anova(m.shape) anova(m.shape2) m.rss <- c(620.68, 575.28, 520.78, 518.01, 548.13, 588.59, 520.52) m.k <- c(2, 3, 3, 3, 3, 3, 4) m.aic <- 96*log(m.rss/96) + 2*m.k + 2*m.k*(m.k +1) / (96 - m.k -1) m.delta <- m.aic - min(m.aic) m.delta [1] 15.2269887 10.0667838 0.5119815 0.0000000 5.4257220 12.2625887 2.6427323 m.weight <- exp(-0.5*m.delta) / sum(exp(-0.5*m.delta)) m.weight [1] 0.0002332885 0.0030790555 0.3657770798 0.4724891696 0.0313481045 [6] 0.0010270807 0.1260462215 sum(m.weight[3:4]) sum(m.weight[3:4],m.weight[7]) # Model confidence set # Method 1, 95%: Models [3, 5, 4] # Method 2, Delta <= 2: Model [3] # Method 3, likelihood ratio =5: Model[3] # Method 3, likelihood ratio =20: Model[3, 5, 4] # Model-averaged forecast: length=2.5 width=0.9 mass=2.6 volume=3.7 # Predict height for null model: use mean height mean(height) # Predict height for Length model: # calculate from model parameters (intercept and slope): m.length p.length <- 1.985 + 1.118*2.5 p.length # Predict height for Length model: use predict() function in R: predict(m.length, list(length=c(2.5))) # Combine all individual model predictions into single variable: # Start w/ place-holding zero values: p.nut <- rep(0,7) # Replace zeros with individual model predictions: p.nut[1] <- mean(height) p.nut[2] <- predict(m.length, list(length=c(2.5))) p.nut[3] <- predict(m.width, list(width=c(0.9))) p.nut[4] <- predict(m.mass, list(mass=c(2.6))) p.nut[5] <- predict(m.volume, list(volume=c(3.7))) p.sh <- 2.5 / 0.9 p.nut[6] <- predict(m.shape, list(shape=c(p.sh))) p.nut[7] <- predict(m.shape2, list(length=c(2.5), mass=c(2.6))) # Show predictions for each model: p.nut # Model averaged prediction = SUM (predictions * weights) p.average <- sum(p.nut * m.weight) p.average # Model-averaged predicted height = 3.5 cm # (for nut 2.5cm long, 0.9cm wide, 2.6g mass, 3.7ml volume) # ********************************************************************** R version 2.11.1 (2010-05-31) Copyright (C) 2010 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > height <- c(13.5, 6.0, 7.5, 3.8, 3.6, 5.6, + 6.7, 9.0, 3.8, 3.1, 2.9, + 5.0, 11.0, 2.8, 6.0, 2.5, + 3.7, 6.2, 4.0, 2.2, 2.5, 1.5, 3.7, 2.9, 5.5, 4.0, + 1.2, 2.2, 1.4, 3.0, 9.0, 1.9, 4.0, 1.0, 5.5, 2.6, + 8.5, 6.0, 6.5, 2.5, 7.8, 6.1, 9.0, 3.0, 7.5, 2.5, + 8.0, 4.0, 3.4, 8.5, 6.0, 5.0, 2.5, 4.0, 8.0, 6.1, + 0.5, 3.0, 0.8, 6.0, 1.0, 4.5, 6.7, 5.2, 1.8, 3.5, + 2.8, 2.0, 1.9, 4.5, 6.0, 4.3, 3.0, 0.6, 1.7, 2.7, + 2.1, 4.5, 0.0, 1.0, 3.5, 2.0, 1.2, 2.9, 1.6, 4.7, + 2.4, 4.3, 2.3, 3.5, 4.0, 1.0, 2.0, 0.0, 0.5, 5.3) > > > > # jawbreaker, choc malt ball, yogurt almonds, choc espresso bean, > # hot tamale, yogurt raisin, jelly belly > > > length <- c(5.7, rep(2.5, 5), rep(2.7, 5), rep(1.8, 5), rep(2.4, 20), rep(1.4, 30), rep(1.5, 30)) > width <- c(5.7, rep(2.3, 5), rep(1.8, 5), rep(1.5, 5), rep(0.9, 20), rep(1.1, 30), rep(1.0, 30)) > mass <- c(141.2, rep(10.2, 5), rep(5.1, 5), rep(2.6, 5), rep(1.9, 20), rep(1.0, 30), rep(1.2, 30)) > volume <- c(103.0, rep(9.3, 5), rep(3.7, 5), rep(1.9, 5), rep(1.5, 20), rep(0.8, 30), rep(0.7, 30)) > density <- c(1.4, rep(1.1, 5), rep(1.4, 5), rep(1.4, 5), rep(1.3, 20), rep(1.3, 30), rep(1.7, 30)) > length(height) [1] 96 > length(volume) [1] 96 > # Look at data: > # 5-panel figure: > par(mfrow=c(3,2)) > plot(length, height) > plot(width, height) > plot(mass, height) > plot(volume, height) > plot(density, height) > m.length <- lm(height ~ length) > m.width <- lm(height ~ width) > m.mass <- lm(height ~ mass) > m.volume <- lm(height ~ volume) > m.density <- lm(height ~ density) > shape <- length / width > m.shape <- lm(height ~ shape) > m.shape2 <- lm(height ~ length + mass) > > anova(m.length) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) length 1 45.40 45.402 7.4186 0.007696 ** Residuals 94 575.28 6.120 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > anova(m.width) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) width 1 130.86 130.856 25.112 2.534e-06 *** Residuals 94 489.83 5.211 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > anova(m.mass) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) mass 1 99.90 99.905 18.033 5.101e-05 *** Residuals 94 520.78 5.540 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > anova(m.volume) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) volume 1 102.67 102.673 18.631 3.922e-05 *** Residuals 94 518.01 5.511 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > anova(m.density) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) density 1 72.56 72.559 12.443 0.0006509 *** Residuals 94 548.13 5.831 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > anova(m.shape) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) shape 1 32.09 32.093 5.1254 0.02588 * Residuals 94 588.59 6.262 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > anova(m.shape2) Analysis of Variance Table Response: height Df Sum Sq Mean Sq F value Pr(>F) length 1 45.40 45.402 8.1119 0.005413 ** mass 1 54.76 54.764 9.7846 0.002350 ** Residuals 93 520.52 5.597 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > 45.40 + 575.28 [1] 620.68 > m.rss <- c(620.68, 575.28, 489.83, 520.78, 518.01, 588.59, 520.52) > m.rss [1] 620.68 575.28 489.83 520.78 518.01 588.59 520.52 > m.k <- c(2, 3, 3, 3, 3, 3, 4) > m.aic <- 96*log(m.rss/96) + 2*m.k > m.aic [1] 183.1809 177.8888 162.4522 168.3340 167.8221 180.0846 170.2861 > m7.aic <- 96*log(m.rss[7]/96) + 2*m.k[7] + + (2*m.k[7]*(m.k[7]+1)) / (96 - m.k[7] -1) > m7.aic [1] 170.7257 > m.aic[7] <- m7.aic > m.aic [1] 183.1809 177.8888 162.4522 168.3340 167.8221 180.0846 170.7257 > m.delta <- m.aic - min(m.aic) > m.delta [1] 20.728697 15.436655 0.000000 5.881852 5.369871 17.632460 8.273473 > m.weight <- exp(-0.5*m.delta) / sum(exp(-0.5*m.delta)) > m.weight [1] 0.0000277214 0.0003908117 0.8790113514 0.0464265561 0.0599710756 0.0001303631 0.0140421208 > # Model confidence set > # Method 1, 95%: Models [3, 5, 4] > # Method 2, Delta <= 2: Model [3] > # Method 3, likelihood ratio =5: Model[3] > # Method 3, likelihood ratio =20: Model[3, 5, 4] > # Method 3, using likelihood ratio =15; determine limiting value for Delta > -2*log(1/15) [1] 5.4161 > # Model-averaged forecast: length=2.5 width=0.9 mass=2.6 volume=3.7 > # Predict height for null model: use mean height > mean(height) [1] 4.03125 > # Predict height for Length model: calculate from model parameters (intercept and slope): > m.length Call: lm(formula = height ~ length) Coefficients: (Intercept) length 1.985 1.118 > p.length <- 1.985 + 1.118*2.5 > p.length [1] 4.78 > # Predict height for Length model: use predict() function in R: > predict(m.length, list(length=c(2.5))) [1] 4.780094 > # Combine all individual model predictions into single variable: > # Start w/ place-holding zero values: > p.nut <- rep(0,7) > # Replace zeros with individual model predictions: > p.nut[1] <- mean(height) > p.nut[2] <- predict(m.length, list(length=c(2.5))) > p.nut[3] <- predict(m.width, list(width=c(0.9))) > p.nut[4] <- predict(m.mass, list(mass=c(2.6))) > p.nut[5] <- predict(m.volume, list(volume=c(3.7))) > p.sh <- 2.5 / 0.9 > p.nut[6] <- predict(m.shape, list(shape=c(p.sh))) > p.nut[7] <- predict(m.shape2, list(length=c(2.5), mass=c(2.6))) > # Show predictions for each model: > p.nut [1] 4.031250 4.780094 3.433698 3.967962 4.136822 2.817397 3.883899 > # Model averaged prediction = SUM (predictions * weights) > p.average <- sum(p.nut * m.weight) > p.average [1] 3.507454 > # Model-averaged predicted height = 3.5 cm > # (for nut 2.5cm long, 0.9cm wide, 2.6g mass, 3.7ml volume)