Last compiled: 2022-04-22
Try out different ways of doing random effects from these sources:
https://fromthebottomoftheheap.net/2021/02/02/random-effects-in-gams/
Pedersen, Eric J., David L. Miller, Gavin L. Simpson, and Noam Ross. “Hierarchical Generalized Additive Models in Ecology: An Introduction with Mgcv.” PeerJ 7 (May 27, 2019). https://doi.org/10.7717/peerj.6876.
withr::with_dir(here(), tar_load(data_full))df <- data_full %>%
filter(sdlg_prev == FALSE, habitat == "1-ha") %>%
filter(!is.na(log_size_prev)) %>%
select(-L, -spei_history)
df## # A tibble: 8,729 × 23
## habitat ranch plot bdffp_reserve_no ha_id_number year year_fct shts ht
## <fct> <fct> <fct> <chr> <fct> <dbl> <fct> <dbl> <dbl>
## 1 1-ha DIM 2107 2107 1 1999 1999 1 22
## 2 1-ha DIM 2107 2107 1 2000 2000 2 24
## 3 1-ha DIM 2107 2107 1 2001 2001 2 28
## 4 1-ha DIM 2107 2107 1 2002 2002 2 23
## 5 1-ha DIM 2107 2107 1 2003 2003 2 23
## 6 1-ha DIM 2107 2107 1 2004 2004 2 23
## 7 1-ha DIM 2107 2107 1 2005 2005 3 27
## 8 1-ha DIM 2107 2107 1 2006 2006 3 23
## 9 1-ha DIM 2107 2107 1 2007 2007 1 17
## 10 1-ha DIM 2107 2107 1 2008 2008 1 16
## # … with 8,719 more rows, and 14 more variables: log_size <dbl>,
## # log_size_prev <dbl>, infl <dbl>, flwr <lgl>, sdlg <lgl>, sdlg_prev <lgl>,
## # surv <lgl>, code <chr>, notes <chr>, date <date>, month <dbl>,
## # precip <dbl>, eto <dbl>, spei <dbl>
With no random effect
tic()
m0 <- bam(
surv ~ s(log_size_prev, bs = "cr", k = 20),
family = binomial,
data = df,
method = "fREML"
)
toc()## 0.454 sec elapsed
draw(m0)df <- df %>% mutate(fitted0 = predict(m0, type = "response"))With random intercepts for each year
tic()
m1 <- bam(
surv ~ s(log_size_prev, bs = "cr", k = 20) +
s(year_fct, bs = "re"),
family = binomial,
data = df,
method = "fREML"
)
toc()## 0.402 sec elapsed
draw(m1)df <- df %>% mutate(fitted1 = predict(m1, type = "response"))With random intercept and “slope” for each year. Not clear to me what slope means exactly in this context.
tic()
m2 <- bam(
surv ~ s(log_size_prev, bs = "cr", k = 20) +
s(year_fct, bs = "re") +
s(year_fct, log_size_prev, bs = "re"),
family = binomial,
data = df,
method = "fREML"
)
toc()## 0.796 sec elapsed
draw(m2)df <- df %>% mutate(fitted2 = predict(m2, type = "response"))With a global smoother and a different shapes for each year (but not different wiggliness).
tic()
m3 <- bam(
surv ~ s(log_size_prev, bs = "cr", k = 20, m = 2) +
s(log_size_prev, year_fct, bs = "fs", m = 2),
family = binomial,
data = df,
method = "fREML"
)## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated 1-
## d smooths of same variable.
toc()## 4.983 sec elapsed
draw(m3)df <- df %>% mutate(fitted3 = predict(m3, type = "response"))AICtab(m0, m1, m2, m3)## dAIC df
## m3 0.0 25.7
## m2 6.8 14.3
## m1 17.9 12.6
## m0 36.5 4.7
The last one fits the data best, but also takes the longest to fit. m2 is not far off with a dAIC of 6.8.
This is what the fitted values look like
p0 <- ggplot(df, aes(x = log_size_prev)) +
geom_line(aes(y = fitted0)) +
labs(title = "no random effect")
p1 <- ggplot(df, aes(x = log_size_prev)) +
geom_line(aes(y = fitted1, color = year_fct)) +
labs(title = "random intercept")
p2 <- ggplot(df, aes(x = log_size_prev)) +
geom_line(aes(y = fitted2, color = year_fct)) +
labs(title = "random intercept and slope")
p3 <- ggplot(df, aes(x = log_size_prev)) +
geom_line(aes(y = fitted3, color = year_fct)) +
labs(title = "random shape")
library(patchwork)
p0 + p1 + p2 + p3 &
labs(y = ".fitted") &
theme(legend.position = "none") &
plot_annotation(title = "survival probability") &
ylim(0,1)With newdata to better reflect what the IPM is doing under the hood:
newdata <- expand.grid(
log_size_prev = seq(min(df$log_size_prev), max(df$log_size_prev), length.out = 50),
year_fct = unique(df$year_fct)
)
fitted_newdf<- list(m0 = m0, m1 = m1, m2 = m2, m3 = m3) %>%
map_dfc(~predict(.x, newdata = newdata, type = "response")) %>%
bind_cols(newdata)
p0 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
geom_line(aes(y = m0)) +
labs(title = "no random effect")
p1 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
geom_line(aes(y = m1, color = year_fct)) +
labs(title = "random intercept")
p2 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
geom_line(aes(y = m2, color = year_fct)) +
labs(title = "random intercept and slope")
p3 <- ggplot(fitted_newdf, aes(x = log_size_prev)) +
geom_line(aes(y = m3, color = year_fct)) +
labs(title = "random shape")p0 + p1 + p2 + p3 &
labs(y = ".fitted") &
theme(legend.position = "none") &
plot_annotation(title = "survival probability") &
ylim(0,1)