Comparison of FC and CRP clustering

Published

November 27, 2024

Code
suppressPackageStartupMessages(library(libdr))
suppressPackageStartupMessages(library(ComplexHeatmap))
suppressPackageStartupMessages(library(tidyverse))
library(ggalluvial)
library(lcmm)
library(patchwork)
library(gameR)
library(pander)
library(libdr)
set.seed(123)
prefix <- "/Volumes/igmm/cvallejo-predicct/libdr/"

########################
### Load FC models ###
########################
# set the number of groups
G.fcal <- numeric()
models.fcal <- list()
G.cands <- seq(2, 10)
for (G.cand in G.cands) {
  file.name <- paste0(prefix, "cache/fcal/ncs/fcal-", G.cand, ".RDS")
  if (file.exists(file.name)) {
    G.fcal <- c(G.fcal, G.cand)
    models.fcal[[G.cand]] <- readRDS(file.name)
  }
}
rm(G.cand, G.cands)

dict <- readRDS(paste0(prefix, "processed/dict.RDS"))

####################################################################
### Load CRP moving average (no autocorrelation structure) models ###
####################################################################
G.crp <- numeric()
models.crp <- list()
G.cands <- seq(2, 10)
for (G.cand in G.cands) {
  file.name <- paste0(prefix, "cache/crp-ma/crp-", G.cand, ".RDS")
  if (file.exists(file.name)) {
    G.crp <- c(G.crp, G.cand)
    models.crp[[G.cand]] <- readRDS(file.name)
  }
}
rm(G.cand, G.cands)
Code
fc.remapping <- function(x) {
  plyr::mapvalues(x,
                  from = seq(1, 8),
                  to = c(7, 6, 4, 8, 1, 5, 2, 3))
}

crp.remapping <- function(x) {
  plyr::mapvalues(x,
                  from = seq(1, 8),
                  to = c(2, 3, 1, 4, 5, 7, 6, 8))
}


col.vec <- c(
  viridis::viridis(8),
  viridis::inferno(8)
)

fcal.pprob <- models.fcal[[8]]$pprob
crp.pprob <- models.crp[[8]]$pprob

fcal.ids <- fcal.pprob$ids
crp.ids <- crp.pprob$ids

# Only IDs in both FCAL and CRP models
ids.comb <- fcal.ids[fcal.ids %in% crp.ids]
ids.comb <- ids.comb[order(ids.comb)]



fc.classes <- data.frame(fcal.pprob[, c(1, 2)], type = "FC")
fc.classes$class <- fc.remapping(fc.classes$class)
fc.classes$class <- as.factor(paste0("FC", fc.classes$class))


crp.classes <- data.frame(crp.pprob[, c(1, 2)], type = "CRP")
crp.classes$class <- crp.remapping(crp.classes$class)
crp.classes$class <- as.factor(paste0("CRP", crp.classes$class))

fc.classes$overlap <- ifelse(fc.classes$ids %in% ids.comb, TRUE, FALSE)
crp.classes$overlap <- ifelse(crp.classes$ids %in% ids.comb, TRUE, FALSE)


p_overlap_fc <- fc.classes %>%
  mutate(overlap = factor(
    overlap, levels = c(TRUE, FALSE),
    labels = c("In the overlap cohort", "Not in the overlap cohort"))) %>%
  plotCat("overlap", class = "class")

p_overlap_crp <- crp.classes %>%
  mutate(overlap = factor(
    overlap, levels = c(TRUE, FALSE),
    labels = c("In the overlap cohort", "Not in the overlap cohort"))) %>%
  plotCat("overlap", class = "class")

p <- wrap_elements(p_overlap_fc) + 
  wrap_elements(p_overlap_crp) + 
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(size = 20, face = "bold"))
ggsave("plots/overlap.pdf", p, width = 14, height = 10)
ggsave("plots/overlap.png", p, width = 14, height = 10)
p

Code
aux <- table(crp.classes$class, crp.classes$overlap)

# Numbers for the text
aux[8,2] / (aux[8,1] + aux[8,2])
[1] 0.2732558
Code
sum(aux[1:7,2]) / sum(aux[1:7,1] + aux[1:7,2])
[1] 0.4567827
Code
classes <- rbind(fc.classes, crp.classes)

classes$class <- as.factor(classes$class)
classes$type <- as.factor(classes$type)
classes$type <- relevel(classes$type, "FC")


classes <- merge(classes,
                 dict[, c("ids", "diagnosis")],
                 by = "ids", all.x = TRUE,
                 all.y = FALSE)

p1 <- classes %>%
  subset(overlap == TRUE) %>%
  ggplot(aes(
  x = type,
  stratum = class,
  alluvium = ids,
  fill = class,
  label = class
)) +
  geom_flow() +
  geom_stratum(alpha = 1) +
  geom_text(stat = "stratum", size = 3) +
  theme_minimal() +
  theme(axis.line = element_line(colour = "gray")) +
  xlab("Biomarker") +
  scale_fill_manual(values = col.vec)

p2 <- classes %>%
  subset(overlap == TRUE) %>%
  filter(diagnosis == "Crohn's Disease") %>%
  ggplot(aes(
  x = type,
  stratum = class,
  alluvium = ids,
  fill = class,
  label = class
)) +
  geom_flow() +
  geom_stratum(alpha = 1) +
  geom_text(stat = "stratum", size = 3) +
  theme_minimal() +
  theme(axis.line = element_line(colour = "gray")) +
  xlab("Biomarker") +
  scale_fill_manual(values = col.vec)


p3 <- classes %>%
  subset(overlap == TRUE) %>%
  filter(diagnosis == "Ulcerative Colitis") %>%
  ggplot(aes(
  x = type,
  stratum = class,
  alluvium = ids,
  fill = class,
  label = class
)) +
  geom_flow() +
  geom_stratum(alpha = 1) +
  geom_text(stat = "stratum", size = 3) +
  theme_minimal() +
  theme(axis.line = element_line(colour = "gray")) +
  xlab("Biomarker") +
  scale_fill_manual(values = col.vec)


p <- p1 + (p2 / p3) +
  plot_annotation(tag_levels = "A") +
  plot_layout(guides = "collect") &
  theme(legend.position = "none",
        plot.tag = element_text(size = 20, face = "bold")) &
  guides(fill = guide_legend(nrow = 1))

p

Code
ggsave("paper/big-comp.png", p, width = 12, height = 11)
ggsave("paper/big-comp.pdf", p, width = 12, height = 11)

Calculate some numbers in the overlap

Code
tab1 <- classes.wide <- classes %>%
  subset(overlap == TRUE) %>%
  pivot_wider(names_from = type, values_from = class)

tab2 <- classes.wide %>%
  subset(FC == "FC1") %>%
  count(CRP) %>%
  mutate(percentage = (n/ sum(n) * 100)) 

tab3 <- classes.wide %>%
  subset(CRP %in% c("CRP1", "CRP2")) %>%
  count(FC) %>%
  mutate(percentage = (n/ sum(n) * 100)) 

tab4 <- classes.wide %>%
  subset(CRP %in% c("CRP8")) %>%
  count(FC) %>%
  mutate(percentage = (n/ sum(n) * 100)) 

tab5 <- classes.wide %>%
  subset(diagnosis == "Ulcerative Colitis") %>%
  subset(FC %in% c("FC7")) %>%
  count(CRP) %>%
  mutate(percentage = (n/ sum(n) * 100)) 

Out of subjects paired in FC, how many are also paired in CRP?

Code
paired <- 0
doublePaired <- 0

for (i in fc.df$ids) {
  for (j in fc.df$ids) {
    if (i != j) {
      if (subset(fc.df, ids == i)$FCcluster == subset(fc.df, ids == j)$FCcluster){
        paired <- paired + 1
        if (subset(crp.df, ids == i)$CRPcluster == subset(crp.df, ids == j)$CRPcluster) {
          doublePaired <- doublePaired + 1
        }
      }
    }
  }
}

doublePaired / paired
[1] 0.2593087

Out of subjects paired in CRP, how many are also paired in FC?

Code
paired <- 0
doublePaired <- 0

for (i in crp.df$ids) {
  for (j in crp.df$ids) {
    if (i != j) {
      if (subset(crp.df, ids == i)$CRPcluster == subset(crp.df, ids == j)$CRPcluster){
        paired <- paired + 1
        if (subset(fc.df, ids == i)$FCcluster == subset(fc.df, ids == j)$FCcluster) {
          doublePaired <- doublePaired + 1
        }
      }
    }
  }
}

doublePaired / paired
[1] 0.1585911

Reuse

Licensed by CC BY unless otherwise stated.

MRC Human Genetics Unit logoCentre for Genomic & Experimental Medicine logo


Session info

Code

R version 4.4.2 (2024-10-31)

Platform: aarch64-apple-darwin20

locale: en_GB.UTF-8||en_GB.UTF-8||en_GB.UTF-8||C||en_GB.UTF-8||en_GB.UTF-8

attached base packages: grid, splines, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: pander(v.0.6.5), gameR(v.0.0.7), lcmm(v.2.1.0), ggalluvial(v.0.12.5), lubridate(v.1.9.3), forcats(v.1.0.0), stringr(v.1.5.1), purrr(v.1.0.2), readr(v.2.1.5), tidyr(v.1.3.1), tibble(v.3.2.1), ggplot2(v.3.5.1), tidyverse(v.2.0.0), ComplexHeatmap(v.2.22.0), libdr(v.1.0.0), dplyr(v.1.1.4), magrittr(v.2.0.3) and patchwork(v.1.3.0)

loaded via a namespace (and not attached): tidyselect(v.1.2.1), viridisLite(v.0.4.2), farver(v.2.1.2), viridis(v.0.6.5), fastmap(v.1.2.0), digest(v.0.6.37), timechange(v.0.3.0), lifecycle(v.1.0.4), cluster(v.2.1.6), survival(v.3.7-0), compiler(v.4.4.2), rlang(v.1.1.4), tools(v.4.4.2), utf8(v.1.2.4), yaml(v.2.3.10), knitr(v.1.49), labeling(v.0.4.3), htmlwidgets(v.1.6.4), plyr(v.1.8.9), RColorBrewer(v.1.1-3), withr(v.3.0.2), numDeriv(v.2016.8-1.1), BiocGenerics(v.0.52.0), stats4(v.4.4.2), fansi(v.1.0.6), colorspace(v.2.1-1), scales(v.1.3.0), iterators(v.1.0.14), cli(v.3.6.3), mvtnorm(v.1.3-2), rmarkdown(v.2.29), crayon(v.1.5.3), marqLevAlg(v.2.0.8), ragg(v.1.3.3), generics(v.0.1.3), tzdb(v.0.4.0), rjson(v.0.2.23), parallel(v.4.4.2), matrixStats(v.1.4.1), vctrs(v.0.6.5), Matrix(v.1.7-1), jsonlite(v.1.8.9), IRanges(v.2.40.0), hms(v.1.1.3), GetoptLong(v.1.0.5), S4Vectors(v.0.44.0), clue(v.0.3-66), systemfonts(v.1.1.0), foreach(v.1.5.2), glue(v.1.8.0), codetools(v.0.2-20), rngWELL(v.0.10-10), stringi(v.1.8.4), shape(v.1.4.6.1), gtable(v.0.3.6), randtoolbox(v.2.0.5), munsell(v.0.5.1), pillar(v.1.9.0), htmltools(v.0.5.8.1), circlize(v.0.4.16), R6(v.2.5.1), textshaping(v.0.4.0), doParallel(v.1.0.17), evaluate(v.1.0.1), lattice(v.0.22-6), png(v.0.1-8), Rcpp(v.1.0.13-1), gridExtra(v.2.3), nlme(v.3.1-166), xfun(v.0.49), pkgconfig(v.2.0.3) and GlobalOptions(v.0.1.2)