Deceased v. Recovered Cytokine Profiles in Yale Dataset

Using the Yale dataset as validation cohort, compare cytokine profiles between deceased patients and those with good outcome.

Import Data

Yale severity scores: 0 - Uninfected, 1 - hospitalization, 2 - low O2 supplement, 3 - Moderate O2 supplement, 4 - ICU, 5 - MV, 6 - Death LatestOutcome: 0 - still admitted, 1 - discharged, 2 - deceased, 3 - CMO/hospice

# yale
cytokine.res.yale <- read.csv(file.path(yale.data.folder,"yale.data.csv"),na="NA")
cytokine.res.yale$severity.outcome <- paste0(cytokine.res.yale$Clinical.score,"-",cytokine.res.yale$LatestOutcome)
cytokine.res.yale$subject_id <- sapply(cytokine.res.yale$ID,function(x){unlist(strsplit(x,".",fixed = T))[1]})
patient.max.severity <- aggregate(Clinical.score ~ subject_id,cytokine.res.yale,max)
cat("Max subject severity score:\n")

## Max subject severity score:

table(patient.max.severity$Clinical.score)

## 
##   0   1   2   3   4   5 
## 105  29   9  35   9  16

cytokines <- colnames(cytokine.res.yale)[88:158]
cat("Cytokines measured in the Yale dataset:\n")

## Cytokines measured in the Yale dataset:

print(cytokines)

##  [1] "sCD40L"          "EGF"             "Eotaxin"         "FGF2"           
##  [5] "FLT3L"           "Fractalkine"     "GCSF"            "GMCSF"          
##  [9] "CXCL1orGROa"     "IFNa2"           "IFNy"            "IL1a"           
## [13] "IL1b"            "IL1RA"           "IL2"             "IL3"            
## [17] "IL4"             "IL5"             "IL6"             "IL7"            
## [21] "IL8orCXCL8"      "IL9"             "IL10"            "IL12p40"        
## [25] "IL12p70"         "IL13"            "IL15"            "IL17A"          
## [29] "IL17E.IL25"      "IL17F"           "IL18"            "IL22"           
## [33] "IL27"            "CXCL10orIP10"    "CCL2orMCP1"      "CCL7orMCP3"     
## [37] "MCSF"            "CCL22orMDC"      "CXCL9orMIG"      "CCL3orMIP1a"    
## [41] "CCL4orMIP1b"     "PDGFAA"          "PDGFABAndPDGFBB" "CCL5orRANTES"   
## [45] "TGFa"            "TNFa"            "TNFb"            "VEGFA"          
## [49] "Eotaxin2"        "CCL8OrMCP2"      "CXCL13OrBCA1"    "CCL13orMCP4"    
## [53] "CCL1orI309"      "IL16"            "CCL17orTARC"     "CCL21Or6CKine"  
## [57] "Eotaxin3"        "LIF"             "TPO"             "SCF"            
## [61] "TSLP"            "IL33"            "IL20"            "IL21"           
## [65] "IL23"            "TRAIL"           "CCL27orCTACK"    "SDF1aAndSDF1B"  
## [69] "CXCL5orENA78"    "CCL15orMIP1d"    "IFNL2orIL28A"

Build subcohort

Identify all deceased patients (including those in hospice), and match them with critical but recovered patients by age and gender.

# patients that have died or in hospice
deceased.patients <- unique(subset(cytokine.res.yale,LatestOutcome >= 2)$subject_id)
recovered.patients <- subset(patient.max.severity,Clinical.score >= 4)$subject_id
recovered.patients <- setdiff(recovered.patients,deceased.patients)
patient.demo <- unique(cytokine.res.yale[,c("subject_id","Age","sex","LatestOutcome")])
patient.demo <- subset(patient.demo,!is.na(Age) & Age != "")
patient.demo$age.numeric <- as.numeric(patient.demo$Age)
patient.demo[patient.demo$Age == "? 90","age.numeric"] <- 95
rownames(patient.demo) <- patient.demo$subject_id

age.buffer <- 3
matched.recovered.patients <- list()
for (i in deceased.patients) {
  #cat("Target:\n")
  #print(patient.demo[i,])
  #cat("Matches:\n")
  tmp <- subset(patient.demo,subject_id %in% recovered.patients & sex == patient.demo[i,"sex"] & 
           age.numeric >= patient.demo[i,"age.numeric"] - age.buffer & age.numeric <= patient.demo[i,"age.numeric"] + age.buffer)# &
             #subject_id %in% subset(cytokine.res.yale,DFSO <= 23)$subject_id)
  #print(tmp)
  matched.recovered.patients[[i]] <- tmp$subject_id
}

deceased.v.recovered.juncture <- subset(cytokine.res.yale,subject_id %in% c(deceased.patients,unlist(matched.recovered.patients)))# &
                                         # Age != "? 90") # excluding age 90+ patients as they don't have matching recovered counterparts
deceased.v.recovered.juncture$outcome <- "Alive"
deceased.v.recovered.juncture$age.numeric <- as.numeric(deceased.v.recovered.juncture$Age)
deceased.v.recovered.juncture[deceased.v.recovered.juncture$Age == "? 90","age.numeric"] <- 95
deceased.v.recovered.juncture[deceased.v.recovered.juncture$subject_id %in% deceased.patients,"outcome"] <- "Deceased"
# removing samples without any cytokine data
deceased.v.recovered.juncture <- deceased.v.recovered.juncture[rowSums(is.na(deceased.v.recovered.juncture[,cytokines])) < 30,]

cat("Deceased patients sampling timeline:\n")

## Deceased patients sampling timeline:

table(subset(deceased.v.recovered.juncture,outcome=="Deceased")[,c("subject_id","DFSO")])

##           DFSO
## subject_id 1 3 5 7 9 12 13 14 15 16 17 20 21 22 23 30 32 35 41
##      Pt026 0 0 0 0 0  0  0  0  1  0  0  0  0  0  0  0  0  0  0
##      Pt027 0 1 0 0 0  1  0  0  0  0  1  0  1  0  0  0  0  0  0
##      Pt029 0 0 0 0 0  1  0  0  0  1  0  0  1  0  0  0  0  0  0
##      Pt047 0 0 0 0 0  0  0  0  0  0  0  0  0  0  1  1  0  0  0
##      Pt054 0 0 0 0 0  0  0  0  0  0  0  0  0  1  0  0  1  1  1
##      Pt059 0 0 0 0 0  1  0  0  0  1  0  1  0  0  0  0  0  0  0
##      Pt061 0 0 0 0 1  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##      Pt071 0 0 1 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##      Pt082 0 0 0 0 0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##      Pt086 0 1 0 0 0  0  0  0  1  0  1  0  0  0  0  0  0  0  0
##      Pt087 1 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##      Pt091 0 0 0 0 1  0  1  0  0  0  1  0  0  0  0  0  0  0  0
##      Pt106 0 0 0 1 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0

cat("Alive patients sampling timeline:\n")

## Alive patients sampling timeline:

table(subset(deceased.v.recovered.juncture,outcome=="Alive")[,c("subject_id","DFSO")])

##           DFSO
## subject_id 4 5 8 9 11 12 13 14 15 16 18 19 20 22 25 28 32 47
##      Pt001 0 0 0 0  1  0  0  1  0  0  0  0  0  0  0  0  0  0
##      Pt006 0 0 0 0  0  1  0  0  0  1  0  0  0  0  0  0  0  0
##      Pt021 0 0 0 0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##      Pt028 0 1 0 0  0  0  0  1  0  0  1  0  0  1  0  0  0  0
##      Pt039 0 0 1 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##      Pt040 0 0 0 0  0  0  0  0  0  0  0  0  1  0  0  0  0  0
##      Pt043 0 0 0 1  0  0  0  1  0  0  0  0  0  1  1  1  1  0
##      Pt045 1 0 1 0  0  0  0  0  0  1  0  1  0  1  0  0  0  0
##      Pt115 0 0 0 0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##      Pt129 0 0 0 0  0  0  0  0  0  0  0  0  0  0  0  0  0  1

Juncture Analysis

Compare between deceased and recovered patients during critical juncture (between 17 and 23 days TSO) and check whether any of the cytokines had signficant shift entering the juncture

# PCA
cytokine.pca <- deceased.v.recovered.juncture[,cytokines]
rownames(cytokine.pca) <- deceased.v.recovered.juncture$ID
cytokine.pca <- prcomp(cytokine.pca[,colSums(is.na(cytokine.pca)) == 0],center = T,scale. = T)
deceased.v.recovered.juncture <- merge(deceased.v.recovered.juncture,cytokine.pca$x[,1:2],by.x="ID",by.y="row.names") 
ggplot(deceased.v.recovered.juncture,aes(PC1,PC2,label=age.numeric,group=subject_id)) + geom_text_repel() + geom_line(color="grey",alpha=0.5) +
  geom_point(aes(color = Age == "? 90",fill= as.factor(LatestOutcome)),size=2,alpha=0.75,pch=21) + 
  theme_bw() + scale_fill_nejm(labels=c("discharged","deceased","hospice"),name="Status") + scale_color_manual(values=c("white","black")) +
  labs(title="Yale critical sub-cohort")

<b>Figure 1</b> PCA plot of samples from the deceased and recovered sub-cohort. Patient age shown in labels.

Figure 1 PCA plot of samples from the deceased and recovered sub-cohort. Patient age shown in labels.

kable(patient.demo[unique(deceased.v.recovered.juncture$subject_id),])

	subject_id	Age	sex	LatestOutcome	age.numeric
Pt001	Pt001	67	M	1	67
Pt006	Pt006	64	M	1	64
Pt021	Pt021	74	F	1	74
Pt026	Pt026	75	F	2	75
Pt027	Pt027	70	M	3	70
Pt028	Pt028	77	F	1	77
Pt029	Pt029	? 90	F	2	95
Pt039	Pt039	68	M	1	68
Pt040	Pt040	68	M	1	68
Pt043	Pt043	65	F	1	65
Pt045	Pt045	62	F	1	62
Pt047	Pt047	82	M	2	82
Pt054	Pt054	72	M	2	72
Pt059	Pt059	35	M	2	35
Pt061	Pt061	66	F	2	66
Pt071	Pt071	? 90	F	2	95
Pt082	Pt082	62	M	2	62
Pt086	Pt086	? 90	F	2	95
Pt087	Pt087	60	F	3	60
Pt091	Pt091	29	F	2	29
Pt106	Pt106	? 90	M	2	95
Pt115	Pt115	29	F	1	29
Pt129	Pt129	67	M	1	67

Differential level within juncture

Using the model: test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id), for samples within the juncture to identify cytokines with sig. differences between the two outcome groups

# test which one is diverging between 17-23 days for the two groups
divergence.df <- data.frame()
deceased.v.recovered.juncture.rel <- subset(deceased.v.recovered.juncture,DFSO >= 7 & DFSO <= 30)
form <- test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id)
for (i in cytokines) {
  tmp <- deceased.v.recovered.juncture.rel
  tmp$test_value <-  scale(tmp[,i])
  tmp <- subset(tmp,DFSO >= 17 & DFSO <= 23)
  lm.res <- lmer(form,tmp)
  lm.anova <- anova(lm.res)
  divergence.df <- rbind(divergence.df,data.frame(cytokine=i,coef=t(fixef(lm.res)),severity.outcome.pval = t(lm.anova$`Pr(>F)`),singular=isSingular(lm.res)))
}
divergence.df <- divergence.df[order(divergence.df$coef.outcomeDeceased),]
divergence.df$cytokine <- factor(divergence.df$cytokine,levels=divergence.df$cytokine)
ggplot(divergence.df,aes(cytokine,coef.outcomeDeceased)) + geom_hline(yintercept = 0,linetype="dashed") +
  geom_point(aes(size=-log10(severity.outcome.pval.1),fill=severity.outcome.pval.1<=0.05),alpha=0.75,pch=21) + 
  #geom_point(aes(y=coef.post.20daysTRUE,size=-log10(severity.outcome.pval.2),
  #               color=singular,fill=severity.outcome.pval.2<=0.05),alpha=0.75,pch=22) + 
  coord_flip() + ylab("Effect Size for Deceased Group") + xlab("Cytokine") + 
  labs(title="Deceased vs. Recovered",subtitle = "Differential cytokines between 17 and 23 days post symptom onset") +
  scale_color_manual(values=c("black","white")) + scale_fill_manual(name="Sig. P-Value",values=c("grey90","lightblue")) +
  scale_size(name="-log10(p-val)") + theme_bw() + theme(text = element_text(size=8))

<b>Figure 2</b> Outcome effect and its significance in mixed-effect models comparing 71 cytokines of eventually deceased patients (n=8) relative to recovered patients (n=4) around critical juncture (17-23 days TSO). Significance is determined by ANOVA unadjusted p <= 0.05

Figure 2 Outcome effect and its significance in mixed-effect models comparing 71 cytokines of eventually deceased patients (n=8) relative to recovered patients (n=4) around critical juncture (17-23 days TSO). Significance is determined by ANOVA unadjusted p <= 0.05

sig.diff.cytokines <- as.character(subset(divergence.df,severity.outcome.pval.1 <= 0.05)$cytokine)

Compare with Brescia cohort

Compare test results with those from the Brescia cohort

brescia.envir <- new.env()
load(file.path(output.folder,"deceased.v.recovered.cytokine.test.results.RData"),verbose = F,envir = brescia.envir)
brescia.cytokines <- tolower(gsub(".","",brescia.envir$merged.de.df$cytokine,fixed = T))
names(brescia.cytokines) <- brescia.envir$merged.de.df$cytokine
brescia.cytokines[c("IL.18.IL.1F4","IL.8","IFN.gamma","CXCL9.MIG","CX3CL1.Fractalkine","IL.17","SCF.c.kit.Ligand","VEGF","IL.1ra.IL.1F3")] <- 
  c("il18","cxcl8","ifny","mig","fractalkine","il17a","scf","vegfa","il1ra")
brescia.envir$merged.de.df$cytokine.standardized <- brescia.cytokines

yale.cytokines <- tolower(sapply(as.character(divergence.df$cytokine),function(x){tail(unlist(strsplit(x,"[oO]r")),1)}))
names(yale.cytokines) <- divergence.df$cytokine
divergence.df$cytokine.standardized <- yale.cytokines[divergence.df$cytokine]

# DE
common.divergence.df <- merge(divergence.df,brescia.envir$merged.de.df,by="cytokine.standardized",suffixes = c(".yale",".brescia"))
cor.test(common.divergence.df$coef.outcomeDeceased,common.divergence.df$de.coef.outcomeDeceased,method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  common.divergence.df$coef.outcomeDeceased and common.divergence.df$de.coef.outcomeDeceased
## S = 6286, p-value = 0.05683
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.3121786

common.divergence.df.sig <- subset(common.divergence.df,de.pval.1 <= 0.05)
cor.test(common.divergence.df.sig$coef.outcomeDeceased,common.divergence.df.sig$de.coef.outcomeDeceased,method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  common.divergence.df.sig$coef.outcomeDeceased and common.divergence.df.sig$de.coef.outcomeDeceased
## S = 94, p-value = 0.02044
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.6713287

ggplot(common.divergence.df,aes(de.coef.outcomeDeceased,coef.outcomeDeceased,label=cytokine.label.deceased.v.recovered )) + 
  geom_hline(yintercept = 0) + geom_vline(xintercept = 0) +
  geom_point(aes(fill=de.pval.1 <= 0.05),pch=21,alpha=0.75) + geom_text_repel(aes(color=severity.outcome.pval.1 <= 0.05),size=2) + 
  scale_color_manual(name="Sig. p-value\nin Yale Cohort",values=c("grey","purple")) +
    scale_fill_manual(name="Sig. p-value\nin Brescia Cohort",values=c("grey","red")) +
  theme_bw() + xlab("Brescia Chohort ES") + ylab("Yale Cohort ES") + ggtitle("Deceased vs. Recovered")

<b>Figure 3</b> Effects of the deceased group relative to recovered patients in the Brescia cohort on the x-axis and in the Yale cohort on the y-axis for the circulating levels of 38 overlapping cytokines in the critical juncture. Unadjusted ANOVA p values are used.

Figure 3 Effects of the deceased group relative to recovered patients in the Brescia cohort on the x-axis and in the Yale cohort on the y-axis for the circulating levels of 38 overlapping cytokines in the critical juncture. Unadjusted ANOVA p values are used.

Session Info

sessionInfo()

## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lmerTest_3.1-2   lme4_1.1-23      Matrix_1.2-18    tidymv_3.0.0    
##  [5] mgcv_1.8-31      nlme_3.1-144     ggfortify_0.4.10 ggrepel_0.8.2   
##  [9] ggpubr_0.4.0     ggsci_2.9        ggplot2_3.3.2    knitr_1.30      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5          lattice_0.20-38     tidyr_1.1.2        
##  [4] digest_0.6.25       R6_2.4.1            cellranger_1.1.0   
##  [7] backports_1.1.10    evaluate_0.14       highr_0.8          
## [10] pillar_1.4.6        rlang_0.4.7         curl_4.3           
## [13] readxl_1.3.1        minqa_1.2.4         data.table_1.13.0  
## [16] car_3.0-10          nloptr_1.2.2.2      rmarkdown_2.4      
## [19] labeling_0.3        splines_3.6.3       statmod_1.4.34     
## [22] stringr_1.4.0       foreign_0.8-75      munsell_0.5.0      
## [25] broom_0.7.0         compiler_3.6.3      numDeriv_2016.8-1.1
## [28] xfun_0.17           pkgconfig_2.0.3     htmltools_0.5.0    
## [31] tidyselect_1.1.0    tibble_3.0.3        gridExtra_2.3      
## [34] rio_0.5.16          crayon_1.3.4        dplyr_1.0.2        
## [37] withr_2.3.0         MASS_7.3-51.5       grid_3.6.3         
## [40] gtable_0.3.0        lifecycle_0.2.0     magrittr_1.5       
## [43] scales_1.1.1        zip_2.1.1           stringi_1.4.6      
## [46] carData_3.0-4       farver_2.0.3        ggsignif_0.6.0     
## [49] ellipsis_0.3.1      generics_0.0.2      vctrs_0.3.4        
## [52] boot_1.3-24         openxlsx_4.2.2      tools_3.6.3        
## [55] forcats_0.5.0       glue_1.4.2          purrr_0.3.4        
## [58] hms_0.5.3           abind_1.4-5         yaml_2.2.1         
## [61] colorspace_1.4-1    rstatix_0.6.0       haven_2.3.1