Last compiled: 2022-04-22
Try out different ways of doing random effects from these sources:
https://fromthebottomoftheheap.net/2021/02/02/random-effects-in-gams/
Pedersen, Eric J., David L. Miller, Gavin L. Simpson, and Noam Ross. “Hierarchical Generalized Additive Models in Ecology: An Introduction with Mgcv.” PeerJ 7 (May 27, 2019). https://doi.org/10.7717/peerj.6876.
::with_dir(here(), tar_load(data_full)) withr
<- data_full %>%
df filter(sdlg_prev == FALSE, habitat == "1-ha") %>%
filter(!is.na(log_size_prev)) %>%
select(-L, -spei_history)
df
## # A tibble: 8,729 × 23
## habitat ranch plot bdffp_reserve_no ha_id_number year year_fct shts ht
## <fct> <fct> <fct> <chr> <fct> <dbl> <fct> <dbl> <dbl>
## 1 1-ha DIM 2107 2107 1 1999 1999 1 22
## 2 1-ha DIM 2107 2107 1 2000 2000 2 24
## 3 1-ha DIM 2107 2107 1 2001 2001 2 28
## 4 1-ha DIM 2107 2107 1 2002 2002 2 23
## 5 1-ha DIM 2107 2107 1 2003 2003 2 23
## 6 1-ha DIM 2107 2107 1 2004 2004 2 23
## 7 1-ha DIM 2107 2107 1 2005 2005 3 27
## 8 1-ha DIM 2107 2107 1 2006 2006 3 23
## 9 1-ha DIM 2107 2107 1 2007 2007 1 17
## 10 1-ha DIM 2107 2107 1 2008 2008 1 16
## # … with 8,719 more rows, and 14 more variables: log_size <dbl>,
## # log_size_prev <dbl>, infl <dbl>, flwr <lgl>, sdlg <lgl>, sdlg_prev <lgl>,
## # surv <lgl>, code <chr>, notes <chr>, date <date>, month <dbl>,
## # precip <dbl>, eto <dbl>, spei <dbl>
With no random effect
tic()
<- bam(
m0 ~ s(log_size_prev, bs = "cr", k = 20),
surv family = binomial,
data = df,
method = "fREML"
)toc()
## 0.454 sec elapsed
draw(m0)
<- df %>% mutate(fitted0 = predict(m0, type = "response")) df
With random intercepts for each year
tic()
<- bam(
m1 ~ s(log_size_prev, bs = "cr", k = 20) +
surv s(year_fct, bs = "re"),
family = binomial,
data = df,
method = "fREML"
)toc()
## 0.402 sec elapsed
draw(m1)
<- df %>% mutate(fitted1 = predict(m1, type = "response")) df
With random intercept and “slope” for each year. Not clear to me what slope means exactly in this context.
tic()
<- bam(
m2 ~ s(log_size_prev, bs = "cr", k = 20) +
surv s(year_fct, bs = "re") +
s(year_fct, log_size_prev, bs = "re"),
family = binomial,
data = df,
method = "fREML"
)toc()
## 0.796 sec elapsed
draw(m2)
<- df %>% mutate(fitted2 = predict(m2, type = "response")) df
With a global smoother and a different shapes for each year (but not different wiggliness).
tic()
<- bam(
m3 ~ s(log_size_prev, bs = "cr", k = 20, m = 2) +
surv s(log_size_prev, year_fct, bs = "fs", m = 2),
family = binomial,
data = df,
method = "fREML"
)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated 1-
## d smooths of same variable.
toc()
## 4.983 sec elapsed
draw(m3)
<- df %>% mutate(fitted3 = predict(m3, type = "response")) df
AICtab(m0, m1, m2, m3)
## dAIC df
## m3 0.0 25.7
## m2 6.8 14.3
## m1 17.9 12.6
## m0 36.5 4.7
The last one fits the data best, but also takes the longest to fit. m2 is not far off with a dAIC of 6.8.
This is what the fitted values look like
<- ggplot(df, aes(x = log_size_prev)) +
p0 geom_line(aes(y = fitted0)) +
labs(title = "no random effect")
<- ggplot(df, aes(x = log_size_prev)) +
p1 geom_line(aes(y = fitted1, color = year_fct)) +
labs(title = "random intercept")
<- ggplot(df, aes(x = log_size_prev)) +
p2 geom_line(aes(y = fitted2, color = year_fct)) +
labs(title = "random intercept and slope")
<- ggplot(df, aes(x = log_size_prev)) +
p3 geom_line(aes(y = fitted3, color = year_fct)) +
labs(title = "random shape")
library(patchwork)
+ p1 + p2 + p3 &
p0 labs(y = ".fitted") &
theme(legend.position = "none") &
plot_annotation(title = "survival probability") &
ylim(0,1)
With newdata to better reflect what the IPM is doing under the hood:
<- expand.grid(
newdata log_size_prev = seq(min(df$log_size_prev), max(df$log_size_prev), length.out = 50),
year_fct = unique(df$year_fct)
)
<- list(m0 = m0, m1 = m1, m2 = m2, m3 = m3) %>%
fitted_newdfmap_dfc(~predict(.x, newdata = newdata, type = "response")) %>%
bind_cols(newdata)
<- ggplot(fitted_newdf, aes(x = log_size_prev)) +
p0 geom_line(aes(y = m0)) +
labs(title = "no random effect")
<- ggplot(fitted_newdf, aes(x = log_size_prev)) +
p1 geom_line(aes(y = m1, color = year_fct)) +
labs(title = "random intercept")
<- ggplot(fitted_newdf, aes(x = log_size_prev)) +
p2 geom_line(aes(y = m2, color = year_fct)) +
labs(title = "random intercept and slope")
<- ggplot(fitted_newdf, aes(x = log_size_prev)) +
p3 geom_line(aes(y = m3, color = year_fct)) +
labs(title = "random shape")
+ p1 + p2 + p3 &
p0 labs(y = ".fitted") &
theme(legend.position = "none") &
plot_annotation(title = "survival probability") &
ylim(0,1)