Using the Yale dataset as validation cohort, compare cytokine profiles between deceased patients and those with good outcome.

Import Data

Yale severity scores: 0 - Uninfected, 1 - hospitalization, 2 - low O2 supplement, 3 - Moderate O2 supplement, 4 - ICU, 5 - MV, 6 - Death LatestOutcome: 0 - still admitted, 1 - discharged, 2 - deceased, 3 - CMO/hospice

# yale
cytokine.res.yale <- read.csv(file.path(yale.data.folder,"yale.data.csv"),na="NA")
cytokine.res.yale$severity.outcome <- paste0(cytokine.res.yale$Clinical.score,"-",cytokine.res.yale$LatestOutcome)
cytokine.res.yale$subject_id <- sapply(cytokine.res.yale$ID,function(x){unlist(strsplit(x,".",fixed = T))[1]})
patient.max.severity <- aggregate(Clinical.score ~ subject_id,cytokine.res.yale,max)
cat("Max subject severity score:\n")
## Max subject severity score:
table(patient.max.severity$Clinical.score)
## 
##   0   1   2   3   4   5 
## 105  29   9  35   9  16
cytokines <- colnames(cytokine.res.yale)[88:158]
cat("Cytokines measured in the Yale dataset:\n")
## Cytokines measured in the Yale dataset:
print(cytokines)
##  [1] "sCD40L"          "EGF"             "Eotaxin"         "FGF2"           
##  [5] "FLT3L"           "Fractalkine"     "GCSF"            "GMCSF"          
##  [9] "CXCL1orGROa"     "IFNa2"           "IFNy"            "IL1a"           
## [13] "IL1b"            "IL1RA"           "IL2"             "IL3"            
## [17] "IL4"             "IL5"             "IL6"             "IL7"            
## [21] "IL8orCXCL8"      "IL9"             "IL10"            "IL12p40"        
## [25] "IL12p70"         "IL13"            "IL15"            "IL17A"          
## [29] "IL17E.IL25"      "IL17F"           "IL18"            "IL22"           
## [33] "IL27"            "CXCL10orIP10"    "CCL2orMCP1"      "CCL7orMCP3"     
## [37] "MCSF"            "CCL22orMDC"      "CXCL9orMIG"      "CCL3orMIP1a"    
## [41] "CCL4orMIP1b"     "PDGFAA"          "PDGFABAndPDGFBB" "CCL5orRANTES"   
## [45] "TGFa"            "TNFa"            "TNFb"            "VEGFA"          
## [49] "Eotaxin2"        "CCL8OrMCP2"      "CXCL13OrBCA1"    "CCL13orMCP4"    
## [53] "CCL1orI309"      "IL16"            "CCL17orTARC"     "CCL21Or6CKine"  
## [57] "Eotaxin3"        "LIF"             "TPO"             "SCF"            
## [61] "TSLP"            "IL33"            "IL20"            "IL21"           
## [65] "IL23"            "TRAIL"           "CCL27orCTACK"    "SDF1aAndSDF1B"  
## [69] "CXCL5orENA78"    "CCL15orMIP1d"    "IFNL2orIL28A"

Build subcohort

Identify all deceased patients (including those in hospice), and match them with critical but recovered patients by age and gender.

# patients that have died or in hospice
deceased.patients <- unique(subset(cytokine.res.yale,LatestOutcome >= 2)$subject_id)
recovered.patients <- subset(patient.max.severity,Clinical.score >= 4)$subject_id
recovered.patients <- setdiff(recovered.patients,deceased.patients)
patient.demo <- unique(cytokine.res.yale[,c("subject_id","Age","sex","LatestOutcome")])
patient.demo <- subset(patient.demo,!is.na(Age) & Age != "")
patient.demo$age.numeric <- as.numeric(patient.demo$Age)
patient.demo[patient.demo$Age == "? 90","age.numeric"] <- 95
rownames(patient.demo) <- patient.demo$subject_id

age.buffer <- 3
matched.recovered.patients <- list()
for (i in deceased.patients) {
  #cat("Target:\n")
  #print(patient.demo[i,])
  #cat("Matches:\n")
  tmp <- subset(patient.demo,subject_id %in% recovered.patients & sex == patient.demo[i,"sex"] & 
           age.numeric >= patient.demo[i,"age.numeric"] - age.buffer & age.numeric <= patient.demo[i,"age.numeric"] + age.buffer)# &
             #subject_id %in% subset(cytokine.res.yale,DFSO <= 23)$subject_id)
  #print(tmp)
  matched.recovered.patients[[i]] <- tmp$subject_id
}

deceased.v.recovered.juncture <- subset(cytokine.res.yale,subject_id %in% c(deceased.patients,unlist(matched.recovered.patients)))# &
                                         # Age != "? 90") # excluding age 90+ patients as they don't have matching recovered counterparts
deceased.v.recovered.juncture$outcome <- "Alive"
deceased.v.recovered.juncture$age.numeric <- as.numeric(deceased.v.recovered.juncture$Age)
deceased.v.recovered.juncture[deceased.v.recovered.juncture$Age == "? 90","age.numeric"] <- 95
deceased.v.recovered.juncture[deceased.v.recovered.juncture$subject_id %in% deceased.patients,"outcome"] <- "Deceased"
# removing samples without any cytokine data
deceased.v.recovered.juncture <- deceased.v.recovered.juncture[rowSums(is.na(deceased.v.recovered.juncture[,cytokines])) < 30,]

cat("Deceased patients sampling timeline:\n")
## Deceased patients sampling timeline:
table(subset(deceased.v.recovered.juncture,outcome=="Deceased")[,c("subject_id","DFSO")])
##           DFSO
## subject_id 1 3 5 7 9 12 13 14 15 16 17 20 21 22 23 30 32 35 41
##      Pt026 0 0 0 0 0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##      Pt027 0 1 0 0 0  1  0  0  0  0  1  0  1  0  0  0  0  0  0
##      Pt029 0 0 0 0 0  1  0  0  0  1  0  0  1  0  0  0  0  0  0
##      Pt047 0 0 0 0 0  0  0  0  0  0  0  0  0  0  1  1  0  0  0
##      Pt054 0 0 0 0 0  0  0  0  0  0  0  0  0  1  0  0  1  1  1
##      Pt059 0 0 0 0 0  1  0  0  0  1  0  1  0  0  0  0  0  0  0
##      Pt061 0 0 0 0 1  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##      Pt071 0 0 1 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##      Pt082 0 0 0 0 0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##      Pt086 0 1 0 0 0  0  0  0  1  0  1  0  0  0  0  0  0  0  0
##      Pt087 1 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##      Pt091 0 0 0 0 1  0  1  0  0  0  1  0  0  0  0  0  0  0  0
##      Pt106 0 0 0 1 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
cat("Alive patients sampling timeline:\n")
## Alive patients sampling timeline:
table(subset(deceased.v.recovered.juncture,outcome=="Alive")[,c("subject_id","DFSO")])
##           DFSO
## subject_id 4 5 8 9 11 12 13 14 15 16 18 19 20 22 25 28 32 47
##      Pt001 0 0 0 0  1  0  0  1  0  0  0  0  0  0  0  0  0  0
##      Pt006 0 0 0 0  0  1  0  0  0  1  0  0  0  0  0  0  0  0
##      Pt021 0 0 0 0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##      Pt028 0 1 0 0  0  0  0  1  0  0  1  0  0  1  0  0  0  0
##      Pt039 0 0 1 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##      Pt040 0 0 0 0  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##      Pt043 0 0 0 1  0  0  0  1  0  0  0  0  0  1  1  1  1  0
##      Pt045 1 0 1 0  0  0  0  0  0  1  0  1  0  1  0  0  0  0
##      Pt115 0 0 0 0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##      Pt129 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0  1

Juncture Analysis

Compare between deceased and recovered patients during critical juncture (between 17 and 23 days TSO) and check whether any of the cytokines had signficant shift entering the juncture

# PCA
cytokine.pca <- deceased.v.recovered.juncture[,cytokines]
rownames(cytokine.pca) <- deceased.v.recovered.juncture$ID
cytokine.pca <- prcomp(cytokine.pca[,colSums(is.na(cytokine.pca)) == 0],center = T,scale. = T)
deceased.v.recovered.juncture <- merge(deceased.v.recovered.juncture,cytokine.pca$x[,1:2],by.x="ID",by.y="row.names") 
ggplot(deceased.v.recovered.juncture,aes(PC1,PC2,label=age.numeric,group=subject_id)) + geom_text_repel() + geom_line(color="grey",alpha=0.5) +
  geom_point(aes(color = Age == "? 90",fill= as.factor(LatestOutcome)),size=2,alpha=0.75,pch=21) + 
  theme_bw() + scale_fill_nejm(labels=c("discharged","deceased","hospice"),name="Status") + scale_color_manual(values=c("white","black")) +
  labs(title="Yale critical sub-cohort")
<b>Figure 1</b> PCA plot of samples from the deceased and recovered sub-cohort. Patient age shown in labels.

Figure 1 PCA plot of samples from the deceased and recovered sub-cohort. Patient age shown in labels.

kable(patient.demo[unique(deceased.v.recovered.juncture$subject_id),])
subject_id Age sex LatestOutcome age.numeric
Pt001 Pt001 67 M 1 67
Pt006 Pt006 64 M 1 64
Pt021 Pt021 74 F 1 74
Pt026 Pt026 75 F 2 75
Pt027 Pt027 70 M 3 70
Pt028 Pt028 77 F 1 77
Pt029 Pt029 ? 90 F 2 95
Pt039 Pt039 68 M 1 68
Pt040 Pt040 68 M 1 68
Pt043 Pt043 65 F 1 65
Pt045 Pt045 62 F 1 62
Pt047 Pt047 82 M 2 82
Pt054 Pt054 72 M 2 72
Pt059 Pt059 35 M 2 35
Pt061 Pt061 66 F 2 66
Pt071 Pt071 ? 90 F 2 95
Pt082 Pt082 62 M 2 62
Pt086 Pt086 ? 90 F 2 95
Pt087 Pt087 60 F 3 60
Pt091 Pt091 29 F 2 29
Pt106 Pt106 ? 90 M 2 95
Pt115 Pt115 29 F 1 29
Pt129 Pt129 67 M 1 67


Differential level within juncture

Using the model: test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id), for samples within the juncture to identify cytokines with sig. differences between the two outcome groups

# test which one is diverging between 17-23 days for the two groups
divergence.df <- data.frame()
deceased.v.recovered.juncture.rel <- subset(deceased.v.recovered.juncture,DFSO >= 7 & DFSO <= 30)
form <- test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id)
for (i in cytokines) {
  tmp <- deceased.v.recovered.juncture.rel
  tmp$test_value <-  scale(tmp[,i])
  tmp <- subset(tmp,DFSO >= 17 & DFSO <= 23)
  lm.res <- lmer(form,tmp)
  lm.anova <- anova(lm.res)
  divergence.df <- rbind(divergence.df,data.frame(cytokine=i,coef=t(fixef(lm.res)),severity.outcome.pval = t(lm.anova$`Pr(>F)`),singular=isSingular(lm.res)))
}
divergence.df <- divergence.df[order(divergence.df$coef.outcomeDeceased),]
divergence.df$cytokine <- factor(divergence.df$cytokine,levels=divergence.df$cytokine)
ggplot(divergence.df,aes(cytokine,coef.outcomeDeceased)) + geom_hline(yintercept = 0,linetype="dashed") +
  geom_point(aes(size=-log10(severity.outcome.pval.1),fill=severity.outcome.pval.1<=0.05),alpha=0.75,pch=21) + 
  #geom_point(aes(y=coef.post.20daysTRUE,size=-log10(severity.outcome.pval.2),
  #               color=singular,fill=severity.outcome.pval.2<=0.05),alpha=0.75,pch=22) + 
  coord_flip() + ylab("Effect Size for Deceased Group") + xlab("Cytokine") + 
  labs(title="Deceased vs. Recovered",subtitle = "Differential cytokines between 17 and 23 days post symptom onset") +
  scale_color_manual(values=c("black","white")) + scale_fill_manual(name="Sig. P-Value",values=c("grey90","lightblue")) +
  scale_size(name="-log10(p-val)") + theme_bw() + theme(text = element_text(size=8))
<b>Figure 2</b> Outcome effect and its significance in mixed-effect models comparing 71 cytokines of eventually deceased patients (n=8) relative to recovered patients (n=4) around critical juncture (17-23 days TSO). Significance is determined by ANOVA unadjusted p <= 0.05

Figure 2 Outcome effect and its significance in mixed-effect models comparing 71 cytokines of eventually deceased patients (n=8) relative to recovered patients (n=4) around critical juncture (17-23 days TSO). Significance is determined by ANOVA unadjusted p <= 0.05

sig.diff.cytokines <- as.character(subset(divergence.df,severity.outcome.pval.1 <= 0.05)$cytokine)


Compare with Brescia cohort

Compare test results with those from the Brescia cohort

brescia.envir <- new.env()
load(file.path(output.folder,"deceased.v.recovered.cytokine.test.results.RData"),verbose = F,envir = brescia.envir)
brescia.cytokines <- tolower(gsub(".","",brescia.envir$merged.de.df$cytokine,fixed = T))
names(brescia.cytokines) <- brescia.envir$merged.de.df$cytokine
brescia.cytokines[c("IL.18.IL.1F4","IL.8","IFN.gamma","CXCL9.MIG","CX3CL1.Fractalkine","IL.17","SCF.c.kit.Ligand","VEGF","IL.1ra.IL.1F3")] <- 
  c("il18","cxcl8","ifny","mig","fractalkine","il17a","scf","vegfa","il1ra")
brescia.envir$merged.de.df$cytokine.standardized <- brescia.cytokines

yale.cytokines <- tolower(sapply(as.character(divergence.df$cytokine),function(x){tail(unlist(strsplit(x,"[oO]r")),1)}))
names(yale.cytokines) <- divergence.df$cytokine
divergence.df$cytokine.standardized <- yale.cytokines[divergence.df$cytokine]

# DE
common.divergence.df <- merge(divergence.df,brescia.envir$merged.de.df,by="cytokine.standardized",suffixes = c(".yale",".brescia"))
cor.test(common.divergence.df$coef.outcomeDeceased,common.divergence.df$de.coef.outcomeDeceased,method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  common.divergence.df$coef.outcomeDeceased and common.divergence.df$de.coef.outcomeDeceased
## S = 6286, p-value = 0.05683
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3121786
common.divergence.df.sig <- subset(common.divergence.df,de.pval.1 <= 0.05)
cor.test(common.divergence.df.sig$coef.outcomeDeceased,common.divergence.df.sig$de.coef.outcomeDeceased,method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  common.divergence.df.sig$coef.outcomeDeceased and common.divergence.df.sig$de.coef.outcomeDeceased
## S = 94, p-value = 0.02044
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6713287
ggplot(common.divergence.df,aes(de.coef.outcomeDeceased,coef.outcomeDeceased,label=cytokine.label.deceased.v.recovered )) + 
  geom_hline(yintercept = 0) + geom_vline(xintercept = 0) +
  geom_point(aes(fill=de.pval.1 <= 0.05),pch=21,alpha=0.75) + geom_text_repel(aes(color=severity.outcome.pval.1 <= 0.05),size=2) + 
  scale_color_manual(name="Sig. p-value\nin Yale Cohort",values=c("grey","purple")) +
    scale_fill_manual(name="Sig. p-value\nin Brescia Cohort",values=c("grey","red")) +
  theme_bw() + xlab("Brescia Chohort ES") + ylab("Yale Cohort ES") + ggtitle("Deceased vs. Recovered")
<b>Figure 3</b> Effects of the deceased group relative to recovered patients in the Brescia cohort on the x-axis and in the Yale cohort on the y-axis for the circulating levels of 38 overlapping cytokines in the critical juncture. Unadjusted ANOVA p values are used.

Figure 3 Effects of the deceased group relative to recovered patients in the Brescia cohort on the x-axis and in the Yale cohort on the y-axis for the circulating levels of 38 overlapping cytokines in the critical juncture. Unadjusted ANOVA p values are used.

Session Info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lmerTest_3.1-2   lme4_1.1-23      Matrix_1.2-18    tidymv_3.0.0    
##  [5] mgcv_1.8-31      nlme_3.1-144     ggfortify_0.4.10 ggrepel_0.8.2   
##  [9] ggpubr_0.4.0     ggsci_2.9        ggplot2_3.3.2    knitr_1.30      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5          lattice_0.20-38     tidyr_1.1.2        
##  [4] digest_0.6.25       R6_2.4.1            cellranger_1.1.0   
##  [7] backports_1.1.10    evaluate_0.14       highr_0.8          
## [10] pillar_1.4.6        rlang_0.4.7         curl_4.3           
## [13] readxl_1.3.1        minqa_1.2.4         data.table_1.13.0  
## [16] car_3.0-10          nloptr_1.2.2.2      rmarkdown_2.4      
## [19] labeling_0.3        splines_3.6.3       statmod_1.4.34     
## [22] stringr_1.4.0       foreign_0.8-75      munsell_0.5.0      
## [25] broom_0.7.0         compiler_3.6.3      numDeriv_2016.8-1.1
## [28] xfun_0.17           pkgconfig_2.0.3     htmltools_0.5.0    
## [31] tidyselect_1.1.0    tibble_3.0.3        gridExtra_2.3      
## [34] rio_0.5.16          crayon_1.3.4        dplyr_1.0.2        
## [37] withr_2.3.0         MASS_7.3-51.5       grid_3.6.3         
## [40] gtable_0.3.0        lifecycle_0.2.0     magrittr_1.5       
## [43] scales_1.1.1        zip_2.1.1           stringi_1.4.6      
## [46] carData_3.0-4       farver_2.0.3        ggsignif_0.6.0     
## [49] ellipsis_0.3.1      generics_0.0.2      vctrs_0.3.4        
## [52] boot_1.3-24         openxlsx_4.2.2      tools_3.6.3        
## [55] forcats_0.5.0       glue_1.4.2          purrr_0.3.4        
## [58] hms_0.5.3           abind_1.4-5         yaml_2.2.1         
## [61] colorspace_1.4-1    rstatix_0.6.0       haven_2.3.1