Last compiled: 2022-04-22

1 Purpose

Try out different ways of doing random effects from these sources:

https://fromthebottomoftheheap.net/2021/02/02/random-effects-in-gams/
Pedersen, Eric J., David L. Miller, Gavin L. Simpson, and Noam Ross. “Hierarchical Generalized Additive Models in Ecology: An Introduction with Mgcv.” PeerJ 7 (May 27, 2019). https://doi.org/10.7717/peerj.6876.

2 Load Data

withr::with_dir(here(), tar_load(data_full))

df <- data_full %>%
  filter(sdlg_prev == FALSE, habitat == "1-ha") %>%
  filter(!is.na(log_size_prev)) %>% 
  select(-L, -spei_history)
df

## # A tibble: 8,729 × 23
##    habitat ranch plot  bdffp_reserve_no ha_id_number  year year_fct  shts    ht
##    <fct>   <fct> <fct> <chr>            <fct>        <dbl> <fct>    <dbl> <dbl>
##  1 1-ha    DIM   2107  2107             1             1999 1999         1    22
##  2 1-ha    DIM   2107  2107             1             2000 2000         2    24
##  3 1-ha    DIM   2107  2107             1             2001 2001         2    28
##  4 1-ha    DIM   2107  2107             1             2002 2002         2    23
##  5 1-ha    DIM   2107  2107             1             2003 2003         2    23
##  6 1-ha    DIM   2107  2107             1             2004 2004         2    23
##  7 1-ha    DIM   2107  2107             1             2005 2005         3    27
##  8 1-ha    DIM   2107  2107             1             2006 2006         3    23
##  9 1-ha    DIM   2107  2107             1             2007 2007         1    17
## 10 1-ha    DIM   2107  2107             1             2008 2008         1    16
## # … with 8,719 more rows, and 14 more variables: log_size <dbl>,
## #   log_size_prev <dbl>, infl <dbl>, flwr <lgl>, sdlg <lgl>, sdlg_prev <lgl>,
## #   surv <lgl>, code <chr>, notes <chr>, date <date>, month <dbl>,
## #   precip <dbl>, eto <dbl>, spei <dbl>

3 Fit GAMs

With no random effect

tic()
m0 <- bam(
  surv ~ s(log_size_prev, bs = "cr", k = 20),
  family = binomial,
    data = df,
    method = "fREML"
)
toc()

## 0.454 sec elapsed

draw(m0)

df <- df %>% mutate(fitted0 = predict(m0, type = "response"))

With random intercepts for each year

tic()
m1 <- bam(
  surv ~ s(log_size_prev, bs = "cr", k = 20) +
    s(year_fct, bs = "re"),
  family = binomial,
    data = df,
    method = "fREML"
)
toc()

## 0.402 sec elapsed

draw(m1)

df <- df %>% mutate(fitted1 = predict(m1, type = "response"))

With random intercept and “slope” for each year. Not clear to me what slope means exactly in this context.

tic()
m2 <- bam(
  surv ~ s(log_size_prev, bs = "cr", k = 20) +
    s(year_fct, bs = "re") +
    s(year_fct, log_size_prev, bs = "re"),
  family = binomial,
    data = df,
    method = "fREML"
)
toc()

## 0.796 sec elapsed

draw(m2)

df <- df %>% mutate(fitted2 = predict(m2, type = "response"))

With a global smoother and a different shapes for each year (but not different wiggliness).

tic()
m3 <- bam(
  surv ~ s(log_size_prev, bs = "cr", k = 20, m = 2) +
    s(log_size_prev, year_fct, bs = "fs",  m = 2),
  family = binomial,
    data = df,
    method = "fREML"
)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated 1-
## d smooths of same variable.

toc()

## 4.983 sec elapsed

draw(m3)

df <- df %>% mutate(fitted3 = predict(m3, type = "response"))

AICtab(m0, m1, m2, m3)

##    dAIC df  
## m3  0.0 25.7
## m2  6.8 14.3
## m1 17.9 12.6
## m0 36.5 4.7

The last one fits the data best, but also takes the longest to fit. m2 is not far off with a dAIC of 6.8.

This is what the fitted values look like

p0 <- ggplot(df, aes(x = log_size_prev)) +
  geom_line(aes(y = fitted0)) +
  labs(title = "no random effect")
p1 <- ggplot(df, aes(x = log_size_prev)) +
  geom_line(aes(y = fitted1, color = year_fct)) +
  labs(title = "random intercept")
p2 <- ggplot(df, aes(x = log_size_prev)) +
  geom_line(aes(y = fitted2, color = year_fct)) +
  labs(title = "random intercept and slope")
p3 <- ggplot(df, aes(x = log_size_prev)) +
  geom_line(aes(y = fitted3, color = year_fct)) +
  labs(title = "random shape")

library(patchwork)
p0 + p1 + p2 + p3 &
  labs(y = ".fitted") &
  theme(legend.position = "none") &
  plot_annotation(title = "survival probability") &
  ylim(0,1)

With newdata to better reflect what the IPM is doing under the hood:

newdata <- expand.grid(
  log_size_prev = seq(min(df$log_size_prev), max(df$log_size_prev), length.out = 50),
  year_fct = unique(df$year_fct)
)

fitted_newdf<- list(m0 = m0, m1 = m1, m2 = m2, m3 = m3) %>% 
  map_dfc(~predict(.x, newdata = newdata, type = "response")) %>% 
  bind_cols(newdata)

p0 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
  geom_line(aes(y = m0)) +
  labs(title = "no random effect")
p1 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
  geom_line(aes(y = m1, color = year_fct)) +
  labs(title = "random intercept")
p2 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
  geom_line(aes(y = m2, color = year_fct)) +
  labs(title = "random intercept and slope")
p3 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
  geom_line(aes(y = m3, color = year_fct)) +
  labs(title = "random shape")

p0 + p1 + p2 + p3 &
  labs(y = ".fitted") &
  theme(legend.position = "none") &
  plot_annotation(title = "survival probability") & 
  ylim(0,1)

Modeling Environmental Stochasticity

Eric R. Scott

2022-04-21

1 Purpose

2 Load Data

3 Fit GAMs