Using the Yale dataset as validation cohort, compare cytokine profiles between deceased patients and those with good outcome.
Yale severity scores: 0 - Uninfected, 1 - hospitalization, 2 - low O2 supplement, 3 - Moderate O2 supplement, 4 - ICU, 5 - MV, 6 - Death LatestOutcome: 0 - still admitted, 1 - discharged, 2 - deceased, 3 - CMO/hospice
# yale
cytokine.res.yale <- read.csv(file.path(yale.data.folder,"yale.data.csv"),na="NA")
cytokine.res.yale$severity.outcome <- paste0(cytokine.res.yale$Clinical.score,"-",cytokine.res.yale$LatestOutcome)
cytokine.res.yale$subject_id <- sapply(cytokine.res.yale$ID,function(x){unlist(strsplit(x,".",fixed = T))[1]})
patient.max.severity <- aggregate(Clinical.score ~ subject_id,cytokine.res.yale,max)
cat("Max subject severity score:\n")
## Max subject severity score:
table(patient.max.severity$Clinical.score)
##
## 0 1 2 3 4 5
## 105 29 9 35 9 16
cytokines <- colnames(cytokine.res.yale)[88:158]
cat("Cytokines measured in the Yale dataset:\n")
## Cytokines measured in the Yale dataset:
print(cytokines)
## [1] "sCD40L" "EGF" "Eotaxin" "FGF2"
## [5] "FLT3L" "Fractalkine" "GCSF" "GMCSF"
## [9] "CXCL1orGROa" "IFNa2" "IFNy" "IL1a"
## [13] "IL1b" "IL1RA" "IL2" "IL3"
## [17] "IL4" "IL5" "IL6" "IL7"
## [21] "IL8orCXCL8" "IL9" "IL10" "IL12p40"
## [25] "IL12p70" "IL13" "IL15" "IL17A"
## [29] "IL17E.IL25" "IL17F" "IL18" "IL22"
## [33] "IL27" "CXCL10orIP10" "CCL2orMCP1" "CCL7orMCP3"
## [37] "MCSF" "CCL22orMDC" "CXCL9orMIG" "CCL3orMIP1a"
## [41] "CCL4orMIP1b" "PDGFAA" "PDGFABAndPDGFBB" "CCL5orRANTES"
## [45] "TGFa" "TNFa" "TNFb" "VEGFA"
## [49] "Eotaxin2" "CCL8OrMCP2" "CXCL13OrBCA1" "CCL13orMCP4"
## [53] "CCL1orI309" "IL16" "CCL17orTARC" "CCL21Or6CKine"
## [57] "Eotaxin3" "LIF" "TPO" "SCF"
## [61] "TSLP" "IL33" "IL20" "IL21"
## [65] "IL23" "TRAIL" "CCL27orCTACK" "SDF1aAndSDF1B"
## [69] "CXCL5orENA78" "CCL15orMIP1d" "IFNL2orIL28A"
Identify all deceased patients (including those in hospice), and match them with critical but recovered patients by age and gender.
# patients that have died or in hospice
deceased.patients <- unique(subset(cytokine.res.yale,LatestOutcome >= 2)$subject_id)
recovered.patients <- subset(patient.max.severity,Clinical.score >= 4)$subject_id
recovered.patients <- setdiff(recovered.patients,deceased.patients)
patient.demo <- unique(cytokine.res.yale[,c("subject_id","Age","sex","LatestOutcome")])
patient.demo <- subset(patient.demo,!is.na(Age) & Age != "")
patient.demo$age.numeric <- as.numeric(patient.demo$Age)
patient.demo[patient.demo$Age == "? 90","age.numeric"] <- 95
rownames(patient.demo) <- patient.demo$subject_id
age.buffer <- 3
matched.recovered.patients <- list()
for (i in deceased.patients) {
#cat("Target:\n")
#print(patient.demo[i,])
#cat("Matches:\n")
tmp <- subset(patient.demo,subject_id %in% recovered.patients & sex == patient.demo[i,"sex"] &
age.numeric >= patient.demo[i,"age.numeric"] - age.buffer & age.numeric <= patient.demo[i,"age.numeric"] + age.buffer)# &
#subject_id %in% subset(cytokine.res.yale,DFSO <= 23)$subject_id)
#print(tmp)
matched.recovered.patients[[i]] <- tmp$subject_id
}
deceased.v.recovered.juncture <- subset(cytokine.res.yale,subject_id %in% c(deceased.patients,unlist(matched.recovered.patients)))# &
# Age != "? 90") # excluding age 90+ patients as they don't have matching recovered counterparts
deceased.v.recovered.juncture$outcome <- "Alive"
deceased.v.recovered.juncture$age.numeric <- as.numeric(deceased.v.recovered.juncture$Age)
deceased.v.recovered.juncture[deceased.v.recovered.juncture$Age == "? 90","age.numeric"] <- 95
deceased.v.recovered.juncture[deceased.v.recovered.juncture$subject_id %in% deceased.patients,"outcome"] <- "Deceased"
# removing samples without any cytokine data
deceased.v.recovered.juncture <- deceased.v.recovered.juncture[rowSums(is.na(deceased.v.recovered.juncture[,cytokines])) < 30,]
cat("Deceased patients sampling timeline:\n")
## Deceased patients sampling timeline:
table(subset(deceased.v.recovered.juncture,outcome=="Deceased")[,c("subject_id","DFSO")])
## DFSO
## subject_id 1 3 5 7 9 12 13 14 15 16 17 20 21 22 23 30 32 35 41
## Pt026 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## Pt027 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0
## Pt029 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0
## Pt047 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0
## Pt054 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1
## Pt059 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0
## Pt061 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## Pt071 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Pt082 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## Pt086 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0
## Pt087 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Pt091 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## Pt106 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
cat("Alive patients sampling timeline:\n")
## Alive patients sampling timeline:
table(subset(deceased.v.recovered.juncture,outcome=="Alive")[,c("subject_id","DFSO")])
## DFSO
## subject_id 4 5 8 9 11 12 13 14 15 16 18 19 20 22 25 28 32 47
## Pt001 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## Pt006 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## Pt021 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## Pt028 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0
## Pt039 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Pt040 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## Pt043 0 0 0 1 0 0 0 1 0 0 0 0 0 1 1 1 1 0
## Pt045 1 0 1 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0
## Pt115 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## Pt129 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
Compare between deceased and recovered patients during critical juncture (between 17 and 23 days TSO) and check whether any of the cytokines had signficant shift entering the juncture
# PCA
cytokine.pca <- deceased.v.recovered.juncture[,cytokines]
rownames(cytokine.pca) <- deceased.v.recovered.juncture$ID
cytokine.pca <- prcomp(cytokine.pca[,colSums(is.na(cytokine.pca)) == 0],center = T,scale. = T)
deceased.v.recovered.juncture <- merge(deceased.v.recovered.juncture,cytokine.pca$x[,1:2],by.x="ID",by.y="row.names")
ggplot(deceased.v.recovered.juncture,aes(PC1,PC2,label=age.numeric,group=subject_id)) + geom_text_repel() + geom_line(color="grey",alpha=0.5) +
geom_point(aes(color = Age == "? 90",fill= as.factor(LatestOutcome)),size=2,alpha=0.75,pch=21) +
theme_bw() + scale_fill_nejm(labels=c("discharged","deceased","hospice"),name="Status") + scale_color_manual(values=c("white","black")) +
labs(title="Yale critical sub-cohort")
kable(patient.demo[unique(deceased.v.recovered.juncture$subject_id),])
subject_id | Age | sex | LatestOutcome | age.numeric | |
---|---|---|---|---|---|
Pt001 | Pt001 | 67 | M | 1 | 67 |
Pt006 | Pt006 | 64 | M | 1 | 64 |
Pt021 | Pt021 | 74 | F | 1 | 74 |
Pt026 | Pt026 | 75 | F | 2 | 75 |
Pt027 | Pt027 | 70 | M | 3 | 70 |
Pt028 | Pt028 | 77 | F | 1 | 77 |
Pt029 | Pt029 | ? 90 | F | 2 | 95 |
Pt039 | Pt039 | 68 | M | 1 | 68 |
Pt040 | Pt040 | 68 | M | 1 | 68 |
Pt043 | Pt043 | 65 | F | 1 | 65 |
Pt045 | Pt045 | 62 | F | 1 | 62 |
Pt047 | Pt047 | 82 | M | 2 | 82 |
Pt054 | Pt054 | 72 | M | 2 | 72 |
Pt059 | Pt059 | 35 | M | 2 | 35 |
Pt061 | Pt061 | 66 | F | 2 | 66 |
Pt071 | Pt071 | ? 90 | F | 2 | 95 |
Pt082 | Pt082 | 62 | M | 2 | 62 |
Pt086 | Pt086 | ? 90 | F | 2 | 95 |
Pt087 | Pt087 | 60 | F | 3 | 60 |
Pt091 | Pt091 | 29 | F | 2 | 29 |
Pt106 | Pt106 | ? 90 | M | 2 | 95 |
Pt115 | Pt115 | 29 | F | 1 | 29 |
Pt129 | Pt129 | 67 | M | 1 | 67 |
Using the model: test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id), for samples within the juncture to identify cytokines with sig. differences between the two outcome groups
# test which one is diverging between 17-23 days for the two groups
divergence.df <- data.frame()
deceased.v.recovered.juncture.rel <- subset(deceased.v.recovered.juncture,DFSO >= 7 & DFSO <= 30)
form <- test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id)
for (i in cytokines) {
tmp <- deceased.v.recovered.juncture.rel
tmp$test_value <- scale(tmp[,i])
tmp <- subset(tmp,DFSO >= 17 & DFSO <= 23)
lm.res <- lmer(form,tmp)
lm.anova <- anova(lm.res)
divergence.df <- rbind(divergence.df,data.frame(cytokine=i,coef=t(fixef(lm.res)),severity.outcome.pval = t(lm.anova$`Pr(>F)`),singular=isSingular(lm.res)))
}
divergence.df <- divergence.df[order(divergence.df$coef.outcomeDeceased),]
divergence.df$cytokine <- factor(divergence.df$cytokine,levels=divergence.df$cytokine)
ggplot(divergence.df,aes(cytokine,coef.outcomeDeceased)) + geom_hline(yintercept = 0,linetype="dashed") +
geom_point(aes(size=-log10(severity.outcome.pval.1),fill=severity.outcome.pval.1<=0.05),alpha=0.75,pch=21) +
#geom_point(aes(y=coef.post.20daysTRUE,size=-log10(severity.outcome.pval.2),
# color=singular,fill=severity.outcome.pval.2<=0.05),alpha=0.75,pch=22) +
coord_flip() + ylab("Effect Size for Deceased Group") + xlab("Cytokine") +
labs(title="Deceased vs. Recovered",subtitle = "Differential cytokines between 17 and 23 days post symptom onset") +
scale_color_manual(values=c("black","white")) + scale_fill_manual(name="Sig. P-Value",values=c("grey90","lightblue")) +
scale_size(name="-log10(p-val)") + theme_bw() + theme(text = element_text(size=8))
sig.diff.cytokines <- as.character(subset(divergence.df,severity.outcome.pval.1 <= 0.05)$cytokine)
Compare test results with those from the Brescia cohort
brescia.envir <- new.env()
load(file.path(output.folder,"deceased.v.recovered.cytokine.test.results.RData"),verbose = F,envir = brescia.envir)
brescia.cytokines <- tolower(gsub(".","",brescia.envir$merged.de.df$cytokine,fixed = T))
names(brescia.cytokines) <- brescia.envir$merged.de.df$cytokine
brescia.cytokines[c("IL.18.IL.1F4","IL.8","IFN.gamma","CXCL9.MIG","CX3CL1.Fractalkine","IL.17","SCF.c.kit.Ligand","VEGF","IL.1ra.IL.1F3")] <-
c("il18","cxcl8","ifny","mig","fractalkine","il17a","scf","vegfa","il1ra")
brescia.envir$merged.de.df$cytokine.standardized <- brescia.cytokines
yale.cytokines <- tolower(sapply(as.character(divergence.df$cytokine),function(x){tail(unlist(strsplit(x,"[oO]r")),1)}))
names(yale.cytokines) <- divergence.df$cytokine
divergence.df$cytokine.standardized <- yale.cytokines[divergence.df$cytokine]
# DE
common.divergence.df <- merge(divergence.df,brescia.envir$merged.de.df,by="cytokine.standardized",suffixes = c(".yale",".brescia"))
cor.test(common.divergence.df$coef.outcomeDeceased,common.divergence.df$de.coef.outcomeDeceased,method = "spearman")
##
## Spearman's rank correlation rho
##
## data: common.divergence.df$coef.outcomeDeceased and common.divergence.df$de.coef.outcomeDeceased
## S = 6286, p-value = 0.05683
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3121786
common.divergence.df.sig <- subset(common.divergence.df,de.pval.1 <= 0.05)
cor.test(common.divergence.df.sig$coef.outcomeDeceased,common.divergence.df.sig$de.coef.outcomeDeceased,method = "spearman")
##
## Spearman's rank correlation rho
##
## data: common.divergence.df.sig$coef.outcomeDeceased and common.divergence.df.sig$de.coef.outcomeDeceased
## S = 94, p-value = 0.02044
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6713287
ggplot(common.divergence.df,aes(de.coef.outcomeDeceased,coef.outcomeDeceased,label=cytokine.label.deceased.v.recovered )) +
geom_hline(yintercept = 0) + geom_vline(xintercept = 0) +
geom_point(aes(fill=de.pval.1 <= 0.05),pch=21,alpha=0.75) + geom_text_repel(aes(color=severity.outcome.pval.1 <= 0.05),size=2) +
scale_color_manual(name="Sig. p-value\nin Yale Cohort",values=c("grey","purple")) +
scale_fill_manual(name="Sig. p-value\nin Brescia Cohort",values=c("grey","red")) +
theme_bw() + xlab("Brescia Chohort ES") + ylab("Yale Cohort ES") + ggtitle("Deceased vs. Recovered")
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] lmerTest_3.1-2 lme4_1.1-23 Matrix_1.2-18 tidymv_3.0.0
## [5] mgcv_1.8-31 nlme_3.1-144 ggfortify_0.4.10 ggrepel_0.8.2
## [9] ggpubr_0.4.0 ggsci_2.9 ggplot2_3.3.2 knitr_1.30
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.5 lattice_0.20-38 tidyr_1.1.2
## [4] digest_0.6.25 R6_2.4.1 cellranger_1.1.0
## [7] backports_1.1.10 evaluate_0.14 highr_0.8
## [10] pillar_1.4.6 rlang_0.4.7 curl_4.3
## [13] readxl_1.3.1 minqa_1.2.4 data.table_1.13.0
## [16] car_3.0-10 nloptr_1.2.2.2 rmarkdown_2.4
## [19] labeling_0.3 splines_3.6.3 statmod_1.4.34
## [22] stringr_1.4.0 foreign_0.8-75 munsell_0.5.0
## [25] broom_0.7.0 compiler_3.6.3 numDeriv_2016.8-1.1
## [28] xfun_0.17 pkgconfig_2.0.3 htmltools_0.5.0
## [31] tidyselect_1.1.0 tibble_3.0.3 gridExtra_2.3
## [34] rio_0.5.16 crayon_1.3.4 dplyr_1.0.2
## [37] withr_2.3.0 MASS_7.3-51.5 grid_3.6.3
## [40] gtable_0.3.0 lifecycle_0.2.0 magrittr_1.5
## [43] scales_1.1.1 zip_2.1.1 stringi_1.4.6
## [46] carData_3.0-4 farver_2.0.3 ggsignif_0.6.0
## [49] ellipsis_0.3.1 generics_0.0.2 vctrs_0.3.4
## [52] boot_1.3-24 openxlsx_4.2.2 tools_3.6.3
## [55] forcats_0.5.0 glue_1.4.2 purrr_0.3.4
## [58] hms_0.5.3 abind_1.4-5 yaml_2.2.1
## [61] colorspace_1.4-1 rstatix_0.6.0 haven_2.3.1