set.seed(123)
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.4 ✔ readr 2.1.5
#> ✔ forcats 1.0.0 ✔ stringr 1.5.1
#> ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
#> ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
#> ✔ purrr 1.0.4
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(landmaRk)
Setup
In this vignette, we use the dataset epileptic to perform landmarking analysis of time-to-event data with time-varying covariates. Here is the structure of the dataset.
data("epileptic")
str(epileptic)
#> 'data.frame': 2797 obs. of 9 variables:
#> $ id : int 1 1 1 1 1 1 1 1 1 1 ...
#> $ time : int 86 119 268 451 535 770 1515 1829 2022 2194 ...
#> $ with.time : int 2400 2400 2400 2400 2400 2400 2400 2400 2400 2400 ...
#> $ with.status: int 0 0 0 0 0 0 0 0 0 0 ...
#> $ dose : num 2 2 2 2.67 2.67 2.67 2.67 2.67 3.33 2.67 ...
#> $ treat : Factor w/ 2 levels "CBZ","LTG": 1 1 1 1 1 1 1 1 1 1 ...
#> $ age : num 75.7 75.7 75.7 75.7 75.7 ...
#> $ gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
#> $ learn.dis : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
The dataset contains the following variables:
id: a unique patient identifier
time: time when time-varying covariate “dose” was recorded
with.time: time when the first of event or censoring happened
with.status: indicates whether event (1) or censoring (0) occurred
dose: a time-varying covariate
treat, age, gender, learn.dis: static (baseline) covariates
The first step is to split the dataset into two, one containing static covariates, event time and indicator of event/censoring, and another one containing dynamic covariates. To that end, we use the function split_wide_df. That function returns a named list with the following elements: - Under the name df_static, a dataframe containing static covariates, event time and indicator of event/censoring. - Under the name df_dynamic, a named list of dataframes, associating dynamic covariates to dataframes in long format containing longitudinal measurement of the relevant dynamic covariate.
# DF with Static covariates
epileptic_dfs <- split_wide_df(
epileptic,
ids = "id",
times = "time",
static = c("with.time", "with.status", "treat", "age", "gender", "learn.dis"),
dynamic = c("dose"),
measurement_name = "value"
)
static <- epileptic_dfs$df_static
head(static)
#> id with.time with.status treat age gender learn.dis
#> 1 1 2400 0 CBZ 75.67 M No
#> 12 2 2324 0 LTG 32.96 M No
#> 25 3 802 0 LTG 29.31 M No
#> 29 4 2364 0 CBZ 44.59 M No
#> 42 5 821 1 LTG 40.61 F No
#> 45 6 2237 0 LTG 28.06 M Yes
# DF with Dynamic covariates
dynamic <- epileptic_dfs$df_dynamic
head(dynamic[["dose"]])
#> id time value
#> 1 1 86 2.00
#> 2 1 119 2.00
#> 3 1 268 2.00
#> 4 1 451 2.67
#> 5 1 535 2.67
#> 6 1 770 2.67
Create Landmarking object
Next step is to create an object of class Landmarking, using the helper function of the same name.
landmarking_object <- Landmarking(
data_static = static,
data_dynamic = dynamic,
event_indicator = "with.status",
ids = "id",
event_time = "with.time",
times = "time",
measurements = "value"
)
Arguments to the helper function are the following:
data_static and data_dynamic: the two datasets that were just created.
event_indicator: name of the column that indicates the censoring indicator in the static dataset.
dynamic_covariates: array column names in the dynamic dataset indicating time-varying covariates.
ids: name of the column that identifies patients in both datasets.
event_time: name of the column that identifies time of event/censoring in the static dataset.
Baseline survival analysis (without time-varying covariates)
First, we perform a survival without time-varying covariates. We can use this as a baseline to evaluate the performance of a subsequent landmark analysis with such covariates. First step is to establish the landmark times, and to work out the risk sets at each of those landmark times.
landmarking_object <- landmarking_object |>
compute_risk_sets(landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25))
landmarking_object
#> Summary of Landmarking Object:
#> Landmarks: 365.25 730.5 1095.75 1461 1826.25
#> Number of observations: 605
#> Event indicator: with.status
#> Event time: with.time
#> Risk sets:
#> Landmark 365.25: 430 subjects
#> Landmark 730.5: 270 subjects
#> Landmark 1095.75: 168 subjects
#> Landmark 1461: 111 subjects
#> Landmark 1826.25: 47 subjects
Now we call the function fit_survival to fit the survival model.
landmarking_object <- landmarking_object |>
fit_survival(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
formula = Surv(event_time, event_status) ~ treat + age + gender + learn.dis,
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
method = "coxph",
dynamic_covariates = c()
) |> predict_survival(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
method = "coxph",
type = "survival"
)
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
performance_metrics(
landmarking_object,
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25)
)
#> landmark window cindex brier
#> 365.25-365.25 365.25 365.25 0.1298831 0.6611129
#> 365.25-730.5 730.50 365.25 0.1209897 0.5571556
#> 730.5-365.25 1095.75 365.25 0.3015631 0.7660603
#> 730.5-730.5 1461.00 365.25 0.1944797 0.6626675
#> 1095.75-365.25 1826.25 365.25 0.2382749 0.7905402
#> 1095.75-730.5 365.25 730.50 0.1903475 0.6748802
#> 1461-365.25 730.50 730.50 0.3918367 0.7898793
#> 1461-730.5 1095.75 730.50 0.3975000 0.7563479
#> 1826.25-365.25 1461.00 730.50 0.9384615 0.9321694
#> 1826.25-730.5 1826.25 730.50 0.9384615 0.9321694
Landmarking analysis with lme4 + coxph
Now we use the package lme4 to fit a linear mixed model of the time-varying covariate, dose. This first step is followed by fitting a Cox PH sub-model using the longitudinal predictions as covariates.
landmarking_object <- Landmarking(
data_static = static,
data_dynamic = dynamic,
event_indicator = "with.status",
ids = "id",
event_time = "with.time",
times = "time",
measurements = "value"
)
landmarking_object <- landmarking_object |>
compute_risk_sets(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25)
)
landmarking_object <- landmarking_object |>
fit_longitudinal(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
method = "lme4",
formula = value ~ treat + age + gender + learn.dis + (time | id),
dynamic_covariates = c("dose")
) |>
predict_longitudinal(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
method = "lme4",
allow.new.levels = TRUE,
dynamic_covariates = c("dose")
) |>
fit_survival(
formula = Surv(event_time, event_status) ~
treat + age + gender + learn.dis + dose,
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
method = "coxph",
dynamic_covariates = c("dose")
) |>
predict_survival(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
method = "coxph",
type = "survival"
)
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
performance_metrics(
landmarking_object,
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25)
)
#> landmark window cindex brier
#> 365.25-365.25 365.25 365.25 0.2241851 0.6672443
#> 365.25-730.5 730.50 365.25 0.2078055 0.5685027
#> 730.5-365.25 1095.75 365.25 0.4266108 0.7758282
#> 730.5-730.5 1461.00 365.25 0.3200958 0.6761504
#> 1095.75-365.25 1826.25 365.25 0.4021563 0.7967850
#> 1095.75-730.5 365.25 730.50 0.2915058 0.6831398
#> 1461-365.25 730.50 730.50 0.3959184 0.7907983
#> 1461-730.5 1095.75 730.50 0.4112500 0.7590662
#> 1826.25-365.25 1461.00 730.50 0.9384615 0.9445921
#> 1826.25-730.5 1826.25 730.50 0.9384615 0.9445921
Landmarking analysis with lcmm + coxph
Now we use the lcmm package to fit a latent class mixed model of the time-varying covariate, dose. This first step is followed by fitting a Cox PH sub-model using the longitudinal predictions as covariates.
landmarking_object <- Landmarking(
data_static = static,
data_dynamic = dynamic,
event_indicator = "with.status",
ids = "id",
event_time = "with.time",
times = "time",
measurements = "value"
)
landmarking_object <- landmarking_object |>
compute_risk_sets(seq(from = 365.25, to = 5 * 365.25, by = 365.25)) |>
fit_longitudinal(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
method = "lcmm",
formula = value ~ treat + age + gender + learn.dis,
mixture = ~ treat + age + gender + learn.dis,
subject = "id",
ng = 2,
dynamic_covariates = c("dose")
) |>
predict_longitudinal(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
method = "lcmm",
subject = "id",
avg = FALSE,
dynamic_covariates = c("dose")
) |>
fit_survival(
formula = Surv(event_time, event_status) ~
treat + age + gender + learn.dis + dose,
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
method = "coxph",
dynamic_covariates = c("dose")
) |>
predict_survival(
landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
method = "coxph",
type = "survival"
)
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge