ESCI 340 computer lab Friday 15 January 2016
____________________________________________________________________

# Douglas fir dbh data, from 6 January 2016

dbh.edge <- c(93.3, 107.4, 95, 
106.2, 97.2, 94.3, 116.1, 115.3, 
87.6, 80)

> mean(dbh.edge)
[1] 99.24
> sd(dbh.edge)
[1] 11.76012

dbh.int <- c(61.5, 63.4, 191, 58.7, 54.5, 
146, 71.6, 37.2, 48.1, 75.1)

> mean(dbh.int)
[1] 80.71
> sd(dbh.int)
[1] 48.69147

# One sample t-test using dbh edge data; Ho: mu=110cm
> t.edge = (mean(dbh.edge) - 110) / (sd(dbh.edge) / sqrt(10))
> t.edge
[1] -2.893347

# Determine p-value for this t-value:
# Multiply by 2 to get 2-tailed probability
> 2*pt(t.edge, df=9)
[1] 0.0177870


#  t-test function in R produces identical result:
# documentation:
## Default S3 method:
t.test(x, y = NULL,
       alternative = c("two.sided", "less", "greater"),
       mu = 0, paired = FALSE, var.equal = FALSE,
       conf.level = 0.95, ...)


> t.test(dbh.edge, mu=110)

        One Sample t-test

data:  dbh.edge
t = -2.8933, df = 9, p-value = 0.01779
alternative hypothesis: true mean is not equal to 110
95 percent confidence interval:
  90.82732 107.65268
sample estimates:
mean of x 
    99.24 

# ********************************************************************

# Two sample t-test using edge and interior dbh data, Ho: mu_edge = mu_int
# t = [mean(edge) - mean(int)] / se
# First, calculate se, from pooled variance:
> sp.dbh <- ((9*var(dbh.edge) + 9*var(dbh.int)) / (10+10))
> sp.dbh
[1] 1129.122
> se.dbh <- sqrt(sp.dbh/10 + sp.dbh/10)
> se.dbh
[1] 15.02745
# Insert se value into formula for t:
> t.dbh <- (mean(dbh.edge) - mean(dbh.int)) / se.dbh  # d.f. = 10 + 10 - 2 = 18
> t.dbh
[1] 1.233077
> # determine p-value, 2-tailed:
> 2*(1-pt(t.dbh, df=18))
[1] 0.2334088

#  2-sample t-test function in R produces similar result:
> t.test(dbh.edge, dbh.int)

        Welch Two Sample t-test

data:  dbh.edge and dbh.int
t = 1.1698, df = 10.046, p-value = 0.2691
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -16.74234  53.80234
sample estimates:
mean of x mean of y 
    99.24     80.71 

# slight difference in t-value and p-value due to inequality in variances;
#    t.test function applies Welch's approximation

# ************************************************************************
# One-tailed t-test:   Ha: mu_edge > mu_int;  Ho: mu_edge <= mu_int

> t.dbh <- (mean(dbh.edge) - mean(dbh.int)) / se.dbh  # d.f. = 10 + 10 - 2 = 18
> # determine p-value, 1-tailed
> 1-pt(t.dbh, df=18)
[1] 0.1167044


# 2-sample, one-tailed t-test function in R:
> t.test(dbh.edge, dbh.int, alt="greater")

        Welch Two Sample t-test

data:  dbh.edge and dbh.int
t = 1.1698, df = 10.046, p-value = 0.1345
alternative hypothesis: true difference in means is greater than 0
95 percent confidence interval:
 -10.16651       Inf
sample estimates:
mean of x mean of y 
    99.24     80.71 

# ******************************************************************************
#
# Plot maple seed "dispersal" data from a prior year
# NOTE: for ESCI 340 winter 2016 assignment 2, must use data listed on that assignment.
#
# Maple seed dispersal data:
# dist.cm <- c(335, 323, 400, 314, 48, 461, 132, 125, 146, 202,
              160, 156, 177, 118, 151, 170, 268, 233, 202, 220,
              238, 75, 76, 66, 122, 71, 112, 99, 111, 110,
              100, 55, 116, 160, 90, 13, 161, 168, 141, 129,
              160, 178, 150, 272, 324, 156, 287, 235, 212, 155,
              114, 187, 380, 250, 352, 279, 290, 82, 117, 200,
              239, 290, 253, 208)

par(mfrow = c(1,1))
hist(dist.cm, xlim=c(0,1000), xlab="Distance (cm)")  # Plot histogram

# Superimpose normal curve on histogram, fit to the data
x.cm <- seq(0, 1000, by=10)  # Create seqence of distance values

# Use dnorm function to plot normal probabilities
# syntax: dnorm(x-values, mean, standard deviation)
# multiply dnorm probabilities by sample size, n=64, and histogram interval width = 50cm
# Note: sample size in winter 2016, n=60
lines(x.cm, 64*50*dnorm(x.cm, 186.3, 93.9))

# alternatively, could use: 
#   lines(x.cm, 64*50*dnorm(x.cm, mu=mean(dist.cm), sd=sd(dist.cm)))

# Superimpose Uniform distribution on histogram, over range [0, 600]
# Use dunif function to plot uniform distribution probabilities
# syntax: dunif(x-values, lower limit, upper limit)
# As above, multiply dunif probabilities by sample size, n=64, and histogram interval width = 50cm
# Plot as dashed line, "lty=2")
lines(x.cm, 64*50*dunif(x.cm, 0, 600), lty=2)