Setup
In addition to the landmaRk package, we will also use
tidyverse.
set.seed(123)
library(landmaRk)
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.2.1 ✔ readr 2.2.0
#> ✔ forcats 1.0.1 ✔ stringr 1.6.0
#> ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
#> ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
#> ✔ purrr 1.2.1
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(survival)
library(prodlim)Example: aids data
As in the first vignette, we use the epileptic dataset to perform landmarking analysis of time-to-event data with time-varying covariates. Here is the structure of the dataset.
library(JMbayes2)
#> Loading required package: nlme
#>
#> Attaching package: 'nlme'
#> The following object is masked from 'package:dplyr':
#>
#> collapse
#> Loading required package: GLMMadaptive
#> Loading required package: splines
#>
#> Attaching package: 'JMbayes2'
#> The following object is masked from 'package:GLMMadaptive':
#>
#> mixed_model
#> The following object is masked from 'package:nlme':
#>
#> lme
#> The following object is masked from 'package:survival':
#>
#> coxph
data("aids")
aids$patient <- as.numeric(aids$patient)
str(aids)
#> 'data.frame': 1405 obs. of 12 variables:
#> $ patient: num 1 1 1 2 2 2 2 3 3 3 ...
#> $ Time : num 17 17 17 19 19 ...
#> $ death : int 0 0 0 0 0 0 0 1 1 1 ...
#> $ CD4 : num 10.68 8.43 9.43 6.32 8.12 ...
#> $ obstime: int 0 6 12 0 6 12 18 0 2 6 ...
#> $ drug : Factor w/ 2 levels "ddC","ddI": 1 1 1 2 2 2 2 2 2 2 ...
#> $ gender : Factor w/ 2 levels "female","male": 2 2 2 2 2 2 2 1 1 1 ...
#> $ prevOI : Factor w/ 2 levels "noAIDS","AIDS": 2 2 2 1 1 1 1 2 2 2 ...
#> $ AZT : Factor w/ 2 levels "intolerance",..: 1 1 1 1 1 1 1 1 1 1 ...
#> $ start : int 0 6 12 0 6 12 18 0 2 6 ...
#> $ stop : num 6 12 17 6 12 ...
#> $ event : num 0 0 0 0 0 0 0 0 0 1 ...Initialising the landmarking analysis for cross-validation
As in the introductory vignette, the dataset epileptic
comes in wide format. We split it into two dataframes, one for static
covariates and one for dynamic covariates.
# DF with Static covariates
aids_dfs <- split_wide_df(
aids,
ids = "patient",
times = "obstime",
static = c("Time", "death", "drug", "gender", "prevOI"),
dynamic = c("CD4"),
measurement_name = "value"
)
static <- aids_dfs$df_static
head(static)
#> patient Time death drug gender prevOI
#> 1 1 16.97 0 ddC male AIDS
#> 4 2 19.00 0 ddI male noAIDS
#> 8 3 18.53 1 ddI female AIDS
#> 11 4 12.70 0 ddC male AIDS
#> 15 5 15.13 0 ddI male AIDS
#> 19 6 1.90 1 ddC female AIDS
# DF with Dynamic covariates
dynamic <- aids_dfs$df_dynamic
head(dynamic[["CD4"]])
#> patient obstime value
#> 1 1 0 10.677078
#> 2 1 6 8.426150
#> 3 1 12 9.433981
#> 4 2 0 6.324555
#> 5 2 6 8.124038
#> 6 2 12 4.582576As in the introductory vignette, we create an object of class
LandmarkAnalysis, using the helper function of the same
name. We now use the optional argument K to specify the
number of cross-validations folds. In this example, we request five
cross validation folds.
We then calculate the risk sets using
compute_risk_sets().
landmarking_object <- LandmarkAnalysis(
data_static = static,
data_dynamic = dynamic,
event_indicator = "death",
ids = "patient",
event_time = "Time",
times = "obstime",
measurements = "value",
K = 5
)
landmarking_object <- landmarking_object |>
compute_risk_sets(landmarks = c(6, 8))Performance evaluation in hold-out dataset
Now that we have split the dataset into K=5 parts for
cross-validation, we can use one of the five parts as test set and the
remaining four parts as the training set. To do so, use the argument
validation_fold to indicate the that you want to use as
test set when calling fit_longitudinal(),
predict_longitudinal(), fit_survival() and
predict_survival().
landmarking_object <- landmarking_object |>
fit_longitudinal(
landmarks = c(6, 8),
method = "lme4",
formula = value ~ prevOI + obstime + (obstime | patient),
dynamic_covariates = c("CD4"),
validation_fold = 5
) |>
predict_longitudinal(
landmarks = c(6, 8),
method = "lme4",
dynamic_covariates = c("CD4"),
validation_fold = 5,
) |>
fit_survival(
formula = Surv(event_time, event_status) ~ drug,
landmarks = c(6, 8),
horizons = 12 + c(6, 8),
method = "coxph",
dynamic_covariates = c("CD4"),
validation_fold = 5
) |>
predict_survival(
landmarks = c(6, 8),
horizons = 12 + c(6, 8),
method = "coxph",
type = "lp",
validation_fold = 5
)
#> Warning: the 'findbars' function has moved to the reformulas package. Please update your imports, or ask an upstream package maintainer to do so.
#> This warning is displayed once per session.We can also use summary() to print parameter estimates.
Note that this time the sample size is smaller, because we have held out
part of the original dataset for validation.
summary(landmarking_object,
type = "longitudinal",
landmark = 6,
dynamic_covariate = "CD4")
#> Linear mixed model fit by REML ['lmerMod']
#> Formula: value ~ prevOI + obstime + (obstime | patient)
#> Data: dataframe
#> REML criterion at convergence: 4263.201
#> Random effects:
#> Groups Name Std.Dev. Corr
#> patient (Intercept) 4.1584
#> obstime 0.2392 -0.09
#> Residual 1.6223
#> Number of obs: 853, groups: patient, 321
#> Fixed Effects:
#> (Intercept) prevOIAIDS obstime
#> 10.3201 -4.3287 -0.1757
summary(landmarking_object, type = "survival", landmark = 6, horizon = 18)
#> Call:
#> survival::coxph(formula = formula, data = x@survival_datasets[[paste0(landmarks,
#> "-", horizons)]], model = TRUE, x = TRUE)
#>
#> coef exp(coef) se(coef) z p
#> drugddI 0.2081 1.2313 0.2007 1.037 0.3
#>
#> Likelihood ratio test=1.08 on 1 df, p=0.299
#> n= 321, number of events= 100Here are the in-sample performance metrics:
performance_metrics(
landmarking_object,
landmarks = c(6, 8),
horizons = c(18, 20),
auc_t = TRUE, c_index = FALSE,
h_times = c(3, 6, 12)
)
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.07967156 0.1630371 0.2313983 0.5194303 0.5381293
#> 8-20 8 20 0.10531988 0.1699368 0.2449021 0.5118598 0.5353575
#> AUC(12)
#> 6-18 0.4853891
#> 8-20 0.4644064Out-of-sample performance metrics can be obtained by specifying
train = FALSE:
performance_metrics(
landmarking_object,
landmarks = c(6, 8),
horizons = c(18, 20),
auc_t = TRUE, c_index = FALSE,
h_times = c(3, 6, 12),
train = FALSE
)
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.07655582 0.1744548 0.2385842 0.6400376 0.6126645
#> 8-20 8 20 0.08189975 0.1732424 0.2365391 0.6458753 0.6614730
#> AUC(12)
#> 6-18 0.5646015
#> 8-20 0.4347708Cross-validation
Now, we can embed the above pipeline in a for loop to perform cross-validation:
landmarking_object <- LandmarkAnalysis(
data_static = static,
data_dynamic = dynamic,
event_indicator = "death",
ids = "patient",
event_time = "Time",
times = "obstime",
measurements = "value",
K = 5
)
landmarking_object <- landmarking_object |>
compute_risk_sets(landmarks = c(6, 8))
metrics <- list()
for (k in 1:5) {
metrics[[k]] <- landmarking_object |>
fit_longitudinal(
landmarks = c(6, 8),
method = "lme4",
formula = value ~ prevOI + obstime + (obstime | patient),
dynamic_covariates = c("CD4"),
validation_fold = k
) |>
predict_longitudinal(
landmarks = c(6, 8),
method = "lme4",
dynamic_covariates = c("CD4"),
validation_fold = k,
allow.new.levels = TRUE
) |>
fit_survival(
formula = Surv(event_time, event_status) ~ drug,
landmarks = c(6, 8),
horizons = 12 + c(6, 8),
method = "coxph",
dynamic_covariates = c("CD4"),
validation_fold = k
) |>
predict_survival(
landmarks = c(6, 8),
horizons = 12 + c(6, 8),
method = "coxph",
type = "lp",
validation_fold = k
) |>
performance_metrics(
landmarks = c(6, 8),
horizons = c(18, 20),
auc_t = TRUE, brier = TRUE, c_index = FALSE,
h_times = c(3, 6, 12)
)
}
metrics
#> [[1]]
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.07430174 0.1628066 0.2331222 0.5165677 0.5332049
#> 8-20 8 20 0.10693845 0.1686616 0.2472218 0.5129526 0.5464116
#> AUC(12)
#> 6-18 0.4909934
#> 8-20 0.4711122
#>
#> [[2]]
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.07878887 0.1649606 0.2357098 0.5245006 0.5430452
#> 8-20 8 20 0.10599282 0.1717609 0.2397350 0.5231481 0.5536476
#> AUC(12)
#> 6-18 0.5008851
#> 8-20 0.4908798
#>
#> [[3]]
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.07071204 0.1598846 0.2288635 0.5448391 0.5714684
#> 8-20 8 20 0.09567283 0.1690807 0.2444550 0.5446251 0.5772778
#> AUC(12)
#> 6-18 0.5021795
#> 8-20 0.4210204
#>
#> [[4]]
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.08955414 0.1686272 0.2363873 0.5502151 0.5302593
#> 8-20 8 20 0.09355146 0.1656261 0.2322180 0.5037594 0.5340163
#> AUC(12)
#> 6-18 0.4867656
#> 8-20 0.4807579
#>
#> [[5]]
#> landmark horizon Brier(9) Brier(12) Brier(18) AUC(3) AUC(6)
#> 6-18 6 18 0.08130580 0.1681432 0.2359809 0.5782629 0.5907356
#> 8-20 8 20 0.09911599 0.1739465 0.2599817 0.5934627 0.5997129
#> AUC(12)
#> 6-18 0.5300017
#> 8-20 0.4345596