Skip to contents
set.seed(123)
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#>  dplyr     1.1.4      readr     2.1.5
#>  forcats   1.0.0      stringr   1.5.1
#>  ggplot2   3.5.2      tibble    3.3.0
#>  lubridate 1.9.4      tidyr     1.3.1
#>  purrr     1.0.4     
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#>  dplyr::filter() masks stats::filter()
#>  dplyr::lag()    masks stats::lag()
#>  Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(landmaRk)

Setup

In this vignette, we use the dataset epileptic to perform landmarking analysis of time-to-event data with time-varying covariates. Here is the structure of the dataset.

data("epileptic")
str(epileptic)
#> 'data.frame':    2797 obs. of  9 variables:
#>  $ id         : int  1 1 1 1 1 1 1 1 1 1 ...
#>  $ time       : int  86 119 268 451 535 770 1515 1829 2022 2194 ...
#>  $ with.time  : int  2400 2400 2400 2400 2400 2400 2400 2400 2400 2400 ...
#>  $ with.status: int  0 0 0 0 0 0 0 0 0 0 ...
#>  $ dose       : num  2 2 2 2.67 2.67 2.67 2.67 2.67 3.33 2.67 ...
#>  $ treat      : Factor w/ 2 levels "CBZ","LTG": 1 1 1 1 1 1 1 1 1 1 ...
#>  $ age        : num  75.7 75.7 75.7 75.7 75.7 ...
#>  $ gender     : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
#>  $ learn.dis  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

The dataset contains the following variables:

  • id: a unique patient identifier

  • time: time when time-varying covariate “dose” was recorded

  • with.time: time when the first of event or censoring happened

  • with.status: indicates whether event (1) or censoring (0) occurred

  • dose: a time-varying covariate

  • treat, age, gender, learn.dis: static (baseline) covariates

The first step is to split the dataset into two, one containing static covariates, event time and indicator of event/censoring, and another one containing dynamic covariates. To that end, we use the function split_wide_df. That function returns a named list with the following elements: - Under the name df_static, a dataframe containing static covariates, event time and indicator of event/censoring. - Under the name df_dynamic, a named list of dataframes, associating dynamic covariates to dataframes in long format containing longitudinal measurement of the relevant dynamic covariate.

# DF with Static covariates
epileptic_dfs <- split_wide_df(
  epileptic,
  ids = "id",
  times = "time",
  static = c("with.time", "with.status", "treat", "age", "gender", "learn.dis"),
  dynamic = c("dose"),
  measurement_name = "value"
)
static <- epileptic_dfs$df_static
head(static)
#>    id with.time with.status treat   age gender learn.dis
#> 1   1      2400           0   CBZ 75.67      M        No
#> 12  2      2324           0   LTG 32.96      M        No
#> 25  3       802           0   LTG 29.31      M        No
#> 29  4      2364           0   CBZ 44.59      M        No
#> 42  5       821           1   LTG 40.61      F        No
#> 45  6      2237           0   LTG 28.06      M       Yes
# DF with Dynamic covariates
dynamic <- epileptic_dfs$df_dynamic
head(dynamic[["dose"]])
#>   id time value
#> 1  1   86  2.00
#> 2  1  119  2.00
#> 3  1  268  2.00
#> 4  1  451  2.67
#> 5  1  535  2.67
#> 6  1  770  2.67

Create Landmarking object

Next step is to create an object of class Landmarking, using the helper function of the same name.

landmarking_object <- Landmarking(
  data_static = static,
  data_dynamic = dynamic,
  event_indicator = "with.status",
  ids = "id",
  event_time = "with.time",
  times = "time",
  measurements = "value"
)

Arguments to the helper function are the following:

  • data_static and data_dynamic: the two datasets that were just created.

  • event_indicator: name of the column that indicates the censoring indicator in the static dataset.

  • dynamic_covariates: array column names in the dynamic dataset indicating time-varying covariates.

  • ids: name of the column that identifies patients in both datasets.

  • event_time: name of the column that identifies time of event/censoring in the static dataset.

Baseline survival analysis (without time-varying covariates)

First, we perform a survival without time-varying covariates. We can use this as a baseline to evaluate the performance of a subsequent landmark analysis with such covariates. First step is to establish the landmark times, and to work out the risk sets at each of those landmark times.

landmarking_object <- landmarking_object |>
  compute_risk_sets(landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25))

landmarking_object
#> Summary of Landmarking Object:
#>   Landmarks: 365.25 730.5 1095.75 1461 1826.25 
#>   Number of observations: 605 
#>   Event indicator: with.status 
#>   Event time: with.time 
#>   Risk sets: 
#>     Landmark 365.25: 430 subjects
#>     Landmark 730.5: 270 subjects
#>     Landmark 1095.75: 168 subjects
#>     Landmark 1461: 111 subjects
#>     Landmark 1826.25: 47 subjects

Now we call the function fit_survival to fit the survival model.

landmarking_object <- landmarking_object |>
  fit_survival(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    formula = Surv(event_time, event_status) ~ treat + age + gender + learn.dis,
    windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
    method = "coxph",
    dynamic_covariates = c()
  ) |> predict_survival(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
    method = "coxph",
    type = "survival"
  )
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge

performance_metrics(
  landmarking_object,
  landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
  windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25)
)
#>                landmark window    cindex     brier
#> 365.25-365.25    365.25 365.25 0.1298831 0.6611129
#> 365.25-730.5     730.50 365.25 0.1209897 0.5571556
#> 730.5-365.25    1095.75 365.25 0.3015631 0.7660603
#> 730.5-730.5     1461.00 365.25 0.1944797 0.6626675
#> 1095.75-365.25  1826.25 365.25 0.2382749 0.7905402
#> 1095.75-730.5    365.25 730.50 0.1903475 0.6748802
#> 1461-365.25      730.50 730.50 0.3918367 0.7898793
#> 1461-730.5      1095.75 730.50 0.3975000 0.7563479
#> 1826.25-365.25  1461.00 730.50 0.9384615 0.9321694
#> 1826.25-730.5   1826.25 730.50 0.9384615 0.9321694

Landmarking analysis with lme4 + coxph

Now we use the package lme4 to fit a linear mixed model of the time-varying covariate, dose. This first step is followed by fitting a Cox PH sub-model using the longitudinal predictions as covariates.

landmarking_object <- Landmarking(
  data_static = static,
  data_dynamic = dynamic,
  event_indicator = "with.status",
  ids = "id",
  event_time = "with.time",
  times = "time",
  measurements = "value"
)

landmarking_object <- landmarking_object |>
  compute_risk_sets(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25)
  )
landmarking_object <- landmarking_object |>
  fit_longitudinal(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    method = "lme4",
    formula = value ~ treat + age + gender + learn.dis + (time | id),
    dynamic_covariates = c("dose")
  ) |>
  predict_longitudinal(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    method = "lme4",
    allow.new.levels = TRUE,
    dynamic_covariates = c("dose")
  ) |>
  fit_survival(
    formula = Surv(event_time, event_status) ~
      treat + age + gender + learn.dis + dose,
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
    method = "coxph",
    dynamic_covariates = c("dose")
  ) |>
  predict_survival(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
    method = "coxph", 
    type = "survival"
  )
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge

performance_metrics(
  landmarking_object,
  landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
  windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25)
)
#>                landmark window    cindex     brier
#> 365.25-365.25    365.25 365.25 0.2241851 0.6672443
#> 365.25-730.5     730.50 365.25 0.2078055 0.5685027
#> 730.5-365.25    1095.75 365.25 0.4266108 0.7758282
#> 730.5-730.5     1461.00 365.25 0.3200958 0.6761504
#> 1095.75-365.25  1826.25 365.25 0.4021563 0.7967850
#> 1095.75-730.5    365.25 730.50 0.2915058 0.6831398
#> 1461-365.25      730.50 730.50 0.3959184 0.7907983
#> 1461-730.5      1095.75 730.50 0.4112500 0.7590662
#> 1826.25-365.25  1461.00 730.50 0.9384615 0.9445921
#> 1826.25-730.5   1826.25 730.50 0.9384615 0.9445921

Landmarking analysis with lcmm + coxph

Now we use the lcmm package to fit a latent class mixed model of the time-varying covariate, dose. This first step is followed by fitting a Cox PH sub-model using the longitudinal predictions as covariates.

landmarking_object <- Landmarking(
  data_static = static,
  data_dynamic = dynamic,
  event_indicator = "with.status",
  ids = "id",
  event_time = "with.time",
  times = "time",
  measurements = "value"
)
landmarking_object <- landmarking_object |>
  compute_risk_sets(seq(from = 365.25, to = 5 * 365.25, by = 365.25)) |>
  fit_longitudinal(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    method = "lcmm",
    formula = value ~ treat + age + gender + learn.dis,
    mixture = ~ treat + age + gender + learn.dis,
    subject = "id",
    ng = 2,
    dynamic_covariates = c("dose")
  ) |>
  predict_longitudinal(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    method = "lcmm",
    subject = "id",
    avg = FALSE,
    dynamic_covariates = c("dose")
  ) |>
  fit_survival(
    formula = Surv(event_time, event_status) ~
      treat + age + gender + learn.dis + dose,
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
    method = "coxph",
    dynamic_covariates = c("dose")
  ) |> 
  predict_survival(
    landmarks = seq(from = 365.25, to = 5 * 365.25, by = 365.25),
    windows = seq(from = 1 * 365.25, to = 2 * 365.25, by = 365.25),
    method = "coxph", 
    type = "survival"
  )
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge
#> Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
#> Ran out of iterations and did not converge