Riverbird Habitat Use Data Analysis: Example using Canada Goose data John McLaughlin 29 February 2020 Analysis conducted using the R software environment (version 3.6.1). Upper part of this document contains raw R commands, suitable for copying and pasting directly into R. The lower part is a transcript of the analysis, including both R commands and results. ______________________________________________________________________________ # This is a comment. R ignores anything that follows the pound sign (#). # To learn more about R, including how to download your own copy of R, go to # http://cran.R-project.org # Below are tallies of Canada Goose (CAGO) detections in each riverbank habitat type, # followed by tallies of habitat types recorded at 5-minute intervals along river. # Grande Ronde River 5/7-10/2019 # CAGO data # bare 15 # herb 54 # shrub 3 # forest 0 # LWD 0 # total 72 # Assign above tallies to objects for each habitat used bu <- 15 hu <- 54 su <- 3 fu <- 0 lu <- 0 # Random sample of habitats # bare 27 # herb 36 # shrub 61 # forest 73 # LWD 2 # total 199 # Assign above tallies to objects for each habitat "available" ba <- 27 ha <- 36 sa <- 61 fa <- 73 la <- 2 # A total of 271 (=72+199) bank habitat locations were sampled, 72 with geese present # and 199 without geese sampled at 5-minute intervals along the river. # Now construct response (CAGO use) and predictor (habitat) variables for all 271 locations. # For CAGO use, a value of 1 indicates CAGO detected there; 0 indicates non-detection. use.cago <- c(rep(1,bu), rep(0,ba), rep(1,hu), rep(0,ha), rep(1,su), rep(0,sa), rep(1,fu), rep(0,fa), rep(1,lu), rep(0,la)) # For habitat variables, 1 indicates location was that habitat type; 0 indicates a different habitat. # For "bare" habitat, CAGO detected at 15 locations w/ bare habitat, 27 locations in habitat sample were bare. # Each of those bare habitat locations are assigned value "1" for the "bare" category. # The remaining 229 locations were not "bare" and are assigned "0" for the "bare" variable. # Assign 1 and 0 to remaining habitat categories in similar way. # Variable assignments below include 10 CAGO-habitat combinations, to facilitate error-checking. # Use R's repeat function, rep(), to simplify data entry. bare <- c(rep(1,bu), rep(1,ba), rep(0,hu), rep(0,ha), rep(0,su), rep(0,sa), rep(0,fu), rep(0,fa), rep(0,lu), rep(0,la)) herb <- c(rep(0,bu), rep(0,ba), rep(1,hu), rep(1,ha), rep(0,su), rep(0,sa), rep(0,fu), rep(0,fa), rep(0,lu), rep(0,la)) shrub <- c(rep(0,bu), rep(0,ba), rep(0,hu), rep(0,ha), rep(1,su), rep(1,sa), rep(0,fu), rep(0,fa), rep(0,lu), rep(0,la)) forest <- c(rep(0,bu), rep(0,ba), rep(0,hu), rep(0,ha), rep(0,su), rep(0,sa), rep(1,fu), rep(1,fa), rep(0,lu), rep(0,la)) lwd <- c(rep(0,bu), rep(0,ba), rep(0,hu), rep(0,ha), rep(0,su), rep(0,sa), rep(0,fu), rep(0,fa), rep(1,lu), rep(1,la)) # Check that CAGO and all habitat variables contain the same number of values (here n=271). length(use.cago) length(bare) length(herb) length(shrub) length(forest) length(lwd) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Fit Generalized Linear Model, glm(), w/ categorical habitat variable cago.bare <- glm(use.cago ~ bare, family=binomial) summary(cago.bare) cago.herb <- glm(use.cago ~ herb, family=binomial) summary(cago.herb) cago.shrub <- glm(use.cago ~ shrub, family=binomial) summary(cago.shrub) cago.for <- glm(use.cago ~ forest, family=binomial) summary(cago.for) cago.lwd <- glm(use.cago ~ lwd, family=binomial) summary(cago.lwd) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Calculate resource selection function (RSF) values for each habitat # RSF values for each habitat are glm coefficients, exponentiated (= e^coef) cago.coef <- c(cago.bare$coef[2], cago.herb$coef[2], cago.shrub$coef[2], cago.for$coef[2], cago.lwd$coef[2]) # RSF std errors cago.err <- c(0.3565, 0.3286, 0.6094, 0, 0) # No detections in forest, LWD # So set std.error forest & LWD to zero to suppress unrealistic error bars # Function to plot RSF values and standard errors for each habitat type. # Function adapted from Crawley MJ (2007) The R Book. Wiley. p.56. error.bar5 <- function(yv, z, nn, yx){ # yv=bar heights, z=error bar heights, nn=labels for each bar, yx=y axis label xv <- barplot(exp(yv), ylim=c(0,(max(exp(yv+z)))),names=nn, ylab=yx) g=(max(xv)-min(xv))/50 for (i in 1:length(xv)) { lines(c(xv[i], xv[i]), c(exp(yv[i]+z[i]),exp(yv[i]-z[i]))) lines(c(xv[i]-g, xv[i]+g), c(exp(yv[i]+z[i]),exp(yv[i]+z[i]))) lines(c(xv[i]-g, xv[i]+g), c(exp(yv[i]-z[i]),exp(yv[i]-z[i]))) } } cago.habitat <- c("Bare", "Herbaceous", "Shrub", "Forest", "LWD") # x-axis labels cago.label <- "Canada Goose RSF" # y-axis label # Plot RSF values and SE # Generates Figure 1 in "Data Analysis Instructions" document. error.bar5(cago.coef, cago.err, cago.habitat, cago.label) # Add dashed horizontal line at y=1, or no selection; use in same proportion as availability lines(c(0,18), c(1,1), lty=2) # ******************************************************************** # ******************************************************************** R version 3.6.1 (2019-07-05) -- "Action of the Toes" Copyright (C) 2019 The R Foundation for Statistical Computing > # This is a comment. R ignores anything that follows the pound sign (#). > # To learn more about R, including how to download your own copy of R, go to > # http://cran.R-project.org > # Below are tallies of Canada Goose detections in each riverbank habitat type, > # followed by tallies of habitat types recorded at 5-minute intervals along river. > # Grande Ronde River 5/7-10/2019 > # CAGO data > # bare 15 > # herb 54 > # shrub 3 > # forest 0 > # LWD 0 > # total 72 > # Assign above tallies to objects for each habitat used > bu <- 15 > hu <- 54 > su <- 3 > fu <- 0 > lu <- 0 > # Random sample > # bare 27 > # herb 36 > # shrub 61 > # forest 73 > # LWD 2 > # total 199 > # Assign above tallies to objects for each habitat "available" > ba <- 27 > ha <- 36 > sa <- 61 > fa <- 73 > la <- 2 > # A total of 271 (=72+199) bank habitat locations were sampled, 72 with geese present > # and 199 without geese sampled at 5-minute intervals along the river. > # Now construct response (CAGO use) and predictor (habitat) variables for all 271 locations. > # For CAGO use, a value of 1 indicates CAGO detected there; 0 indicates non-detection. > use.cago <- c(rep(1,bu), rep(0,ba), rep(1,hu), rep(0,ha), rep(1,su), rep(0,sa), rep(1,fu), rep(0,fa), rep(1,lu), rep(0,la)) > # For habitat variables, 1 indicates location was that habitat type; 0 indicates a different habitat. > # For "bare" habitat, CAGO detected at 15 locations w/ bare habitat, 27 locations in habitat sample were bare. > # Each of those bare habitat locations are assigned value "1" for the "bare" category. > # The remaining 229 locations were not "bare" and are assigned "0" for the "bare" variable. > # Assign 1 and 0 to remaining habitat categories in similar way. > # Variable assignments below include 10 CAGO-habitat combinations, to facilitate error-checking. > # Use R's repeat function, rep(), to simplify data entry. > bare <- c(rep(1,bu), rep(1,ba), rep(0,hu), rep(0,ha), rep(0,su), rep(0,sa), rep(0,fu), rep(0,fa), rep(0,lu), rep(0,la)) > herb <- c(rep(0,bu), rep(0,ba), rep(1,hu), rep(1,ha), rep(0,su), rep(0,sa), rep(0,fu), rep(0,fa), rep(0,lu), rep(0,la)) > shrub <- c(rep(0,bu), rep(0,ba), rep(0,hu), rep(0,ha), rep(1,su), rep(1,sa), rep(0,fu), rep(0,fa), rep(0,lu), rep(0,la)) > forest <- c(rep(0,bu), rep(0,ba), rep(0,hu), rep(0,ha), rep(0,su), rep(0,sa), rep(1,fu), rep(1,fa), rep(0,lu), rep(0,la)) > lwd <- c(rep(0,bu), rep(0,ba), rep(0,hu), rep(0,ha), rep(0,su), rep(0,sa), rep(0,fu), rep(0,fa), rep(1,lu), rep(1,la)) > # Check that CAGO and all habitat variables contain the same number of values (here n=271). > length(use.cago) [1] 271 > length(bare) [1] 271 > length(herb) [1] 271 > length(shrub) [1] 271 > length(forest) [1] 271 > length(lwd) [1] 271 > > # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > # Fit Generalized Linear Model, glm(), w/ categorical habitat variable > > cago.bare <- glm(use.cago ~ bare, family=binomial) > summary(cago.bare) Call: glm(formula = use.cago ~ bare, family = binomial) Deviance Residuals: Min 1Q Median 3Q Max -0.9400 -0.7566 -0.7566 1.4350 1.6677 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -1.1044 0.1528 -7.226 4.96e-13 *** bare 0.5167 0.3565 1.449 0.147 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 313.77 on 270 degrees of freedom Residual deviance: 311.75 on 269 degrees of freedom AIC: 315.75 Number of Fisher Scoring iterations: 4 > > cago.herb <- glm(use.cago ~ herb, family=binomial) > summary(cago.herb) Call: glm(formula = use.cago ~ herb, family = binomial) Deviance Residuals: Min 1Q Median 3Q Max -1.3537 -0.4577 -0.4577 1.0108 2.1486 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -2.2034 0.2484 -8.872 < 2e-16 *** herb 2.6088 0.3286 7.939 2.03e-15 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 313.77 on 270 degrees of freedom Residual deviance: 238.38 on 269 degrees of freedom AIC: 242.38 Number of Fisher Scoring iterations: 4 > > cago.shrub <- glm(use.cago ~ shrub, family=binomial) > summary(cago.shrub) Call: glm(formula = use.cago ~ shrub, family = binomial) Deviance Residuals: Min 1Q Median 3Q Max -0.9005 -0.9005 -0.9005 1.4823 2.4740 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.6931 0.1474 -4.701 2.59e-06 *** shrub -2.3191 0.6094 -3.805 0.000142 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 313.77 on 270 degrees of freedom Residual deviance: 287.74 on 269 degrees of freedom AIC: 291.74 Number of Fisher Scoring iterations: 5 > > cago.for <- glm(use.cago ~ forest, family=binomial) > summary(cago.for) Call: glm(formula = use.cago ~ forest, family = binomial) Deviance Residuals: Min 1Q Median 3Q Max -0.95077 -0.95077 -0.00013 1.42239 1.42239 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.5596 0.1477 -3.788 0.000152 *** forest -18.0065 763.4171 -0.024 0.981182 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 313.77 on 270 degrees of freedom Residual deviance: 259.57 on 269 degrees of freedom AIC: 263.57 Number of Fisher Scoring iterations: 17 > > cago.lwd <- glm(use.cago ~ lwd, family=binomial) > summary(cago.lwd) Call: glm(formula = use.cago ~ lwd, family = binomial) Deviance Residuals: Min 1Q Median 3Q Max -0.7893 -0.7893 -0.7893 1.6236 1.6236 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -1.0065 0.1377 -7.309 2.69e-13 *** lwd -14.5595 1029.1215 -0.014 0.989 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 313.77 on 270 degrees of freedom Residual deviance: 312.53 on 269 degrees of freedom AIC: 316.53 Number of Fisher Scoring iterations: 14 > # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > # Calculate resource selection function (RSF) values for each habitat > # RSF values for each habitat are glm coefficients, exponentiated (= e^coef) > cago.coef <- c(cago.bare$coef[2], cago.herb$coef[2], cago.shrub$coef[2], + cago.for$coef[2], cago.lwd$coef[2]) > # RSF std errors > cago.err <- c(0.3565, 0.3286, 0.6094, 0, 0) # No detections in forest, LWD > # So set std.error forest & LWD to zero to suppress unrealistic error bars > # Function to plot RSF values and standard errors for each habitat type. > # Function adapted from Crawley MJ (2007) The R Book. Wiley. p.56. > error.bar5 <- function(yv, z, nn, yx){ + # yv=bar heights, z=error bar heights, nn=labels for each bar, yx=y axis label + xv <- + barplot(exp(yv), ylim=c(0,(max(exp(yv+z)))),names=nn, ylab=yx) + g=(max(xv)-min(xv))/50 + for (i in 1:length(xv)) { + lines(c(xv[i], xv[i]), c(exp(yv[i]+z[i]),exp(yv[i]-z[i]))) + lines(c(xv[i]-g, xv[i]+g), c(exp(yv[i]+z[i]),exp(yv[i]+z[i]))) + lines(c(xv[i]-g, xv[i]+g), c(exp(yv[i]-z[i]),exp(yv[i]-z[i]))) + } + } > cago.habitat <- c("Bare", "Herbaceous", "Shrub", "Forest", "LWD") # x-axis labels > cago.label <- "Canada Goose RSF" # y-axis label > # Plot RSF values and SE > # Generates Figure 1 in "Data Analysis Instructions" document. > error.bar5(cago.coef, cago.err, cago.habitat, cago.label) > # Add dashed horizontal line at y=1, or no selection; use in same proportion as availability > lines(c(0,18), c(1,1), lty=2)