Validate DSM association with disease severity using Brescia cohort not in the citeseq

Earliest cytokine measurements for each patient

# using one of the cytokines to check
cytokine.measurements <- subset(covid19.lab.results,test_name == "TNF-a")
sum(is.na(cytokine.measurements$days_from_admission_to_test))
## [1] 4
sum(is.na(cytokine.measurements$days_from_symptom_onset_to_test))
## [1] 119
cytokine.measurements <- cytokine.measurements[order(cytokine.measurements$days_from_admission_to_test,
                                                     cytokine.measurements$days_from_symptom_onset_to_test,cytokine.measurements$test_time_point.Serum),]
cytokine.measurements.earliest <- cytokine.measurements[!duplicated(cytokine.measurements$subject_id),]
cat("Number of patients with cytokine data:\n")
## Number of patients with cytokine data:
dim(cytokine.measurements.earliest)
## [1] 143  35
#table(cytokine.measurements.earliest$visit)

# patients with symptom onset and admission dates and cytokine data within 30 days of onset
cytokine.measurements.earliest.30days <- subset(cytokine.measurements.earliest,
                                                !is.na(days_from_symptom_onset_to_test) & !is.na(days_from_admission_to_test) & 
                                                  days_from_symptom_onset_to_test < 30)
cat("Patients with symptom onset and admission dates and cytokine data within 30 days of onset:\n")
## Patients with symptom onset and admission dates and cytokine data within 30 days of onset:
dim(cytokine.measurements.earliest.30days)
## [1] 107  35

Earliest SpO2/FiO2 timepoint

o2.measurements <- subset(covid19.lab.results,test_name == "SpO2/FiO2")
sum(is.na(o2.measurements$days_from_admission_to_test))
## [1] 0
sum(is.na(o2.measurements$days_from_symptom_onset_to_test))
## [1] 2
o2.measurements.merged <- merge(o2.measurements,cytokine.measurements,by=c("subject_id","days_from_symptom_onset_to_test"),suffixes = c("",".y"))
o2.measurements.merged <- o2.measurements.merged[order(o2.measurements.merged$days_from_admission_to_test,
                                                     o2.measurements.merged$days_from_symptom_onset_to_test,o2.measurements.merged$test_time_point.Serum),]
o2.measurements.earliest <- o2.measurements.merged[!duplicated(o2.measurements.merged$subject_id),]
cat("Number of patients with o2 data:\n")
## Number of patients with o2 data:
dim(o2.measurements.earliest)
## [1] 32 68
# patients with symptom onset and admission dates and o2 data within 30 days of onset
o2.measurements.earliest.30days <- subset(o2.measurements.earliest,
                                                !is.na(days_from_symptom_onset_to_test) & !is.na(days_from_admission_to_test) & 
                                                  days_from_symptom_onset_to_test < 30)
cat("Patients with symptom onset and admission dates and o2 data within 30 days of onset:\n")
## Patients with symptom onset and admission dates and o2 data within 30 days of onset:
dim(o2.measurements.earliest.30days)
## [1] 30 68

Gather features

cytokine.measurements.earliest.30days.optimized <- cytokine.measurements.earliest.30days
# remove the CITE-seq patients
cytokine.measurements.earliest.30days.optimized <- subset(cytokine.measurements.earliest.30days.optimized,!(subject_id %in% 
                                                            unique(subset(covid19.samples,!is.na(batch))$subject_id)))

paramaters.of.interest <- c("SpO2/FiO2","CRP","IL-6","TNF-a","TNF-b","IL-18/IL-1f4","ANC/ALC Ratio","IP-10","D-Dimer","IFN-Gamma","CXCL9/MIG",
                            "IL-17","IL-4","IL-13","Fibrinogen","Lymphocytes",#"Serology Spike","Serology Nucleocapsid"
                            "Lactate Dehydrogenase (LDH)","Platelets (PLT)")#,"Neutrophils")
setdiff(tolower(paramaters.of.interest),tolower(covid19.lab.results$test_name))
## character(0)
selected.lab.values <- subset(covid19.lab.results,subject_id %in% cytokine.measurements.earliest.30days.optimized$subject_id
  & tolower(test_name) %in% tolower(paramaters.of.interest) & test_id != "Lymphocytes_pct")

# measurements within +/- 2 days of target
selected.lab.values.expanded <- data.frame()
day.margin <- 2
for (i in 1:nrow(cytokine.measurements.earliest.30days.optimized)) {
  subj <- cytokine.measurements.earliest.30days.optimized$subject_id[i]
  #cat(i,"-",subj,"\n")
  days_from_symptom_onset <- cytokine.measurements.earliest.30days.optimized$days_from_symptom_onset_to_test[i]
  tmp <- subset(selected.lab.values,subject_id == subj &
    days_from_symptom_onset_to_test <= (days_from_symptom_onset + day.margin) & 
      days_from_symptom_onset_to_test >= (days_from_symptom_onset - day.margin))
  tmp$target.day.diff <- abs(tmp$days_from_symptom_onset_to_test - days_from_symptom_onset)
  #print(table(tmp$target.day.diff))
  
  tmp <- tmp[order(tmp$target.day.diff,decreasing = F),]
  tmp$test_date_id <- apply(tmp[,c("subject_id","test_id","days_from_symptom_onset_to_test")],1,paste0,collapse=":")
  tmp <- subset(tmp,test_date_id %in% tmp[!duplicated(tmp[,c("subject_id","test_id")]),]$test_date_id) # keep multiple values taken on the same closest day
  #tmp <- tmp[!duplicated(tmp[,c("subject_id","test_id")]),]
  selected.lab.values.expanded <- rbind(selected.lab.values.expanded,tmp)
}
#table(selected.lab.values.expanded[,c("subject_id","target.day.diff")])
selected.lab.values.expanded.avg <- reshape2::dcast(selected.lab.values.expanded,subject_id ~ test_id,
                                               value.var = "test_value",fun.aggregate = function(x){mean(x,na.rm = T)})
rownames(selected.lab.values.expanded.avg) <- selected.lab.values.expanded.avg$subject_id
selected.lab.values.expanded.avg <- selected.lab.values.expanded.avg[,-1]
rowSums(is.na(selected.lab.values.expanded.avg))
## HGR0000002 HGR0000004 HGR0000005 HGR0000006 HGR0000007 HGR0000009 HGR0000011 
##          3          1          9          2          4          2          2 
## HGR0000012 HGR0000014 HGR0000015 HGR0000017 HGR0000020 HGR0000021 HGR0000022 
##          2          3          2          2          3          2          3 
## HGR0000023 HGR0000025 HGR0000026 HGR0000027 HGR0000032 HGR0000033 HGR0000034 
##          2          2          3          2          3          2          9 
## HGR0000036 HGR0000037 HGR0000038 HGR0000039 HGR0000040 HGR0000041 HGR0000043 
##          9          2          9          2          9          2          3 
## HGR0000044 HGR0000046 HGR0000047 HGR0000049 HGR0000050 HGR0000052 HGR0000053 
##          3          2          2          3          1          2          2 
## HGR0000056 HGR0000057 HGR0000058 HGR0000060 HGR0000061 HGR0000063 HGR0000065 
##          2          3          2          2          2          1          4 
## HGR0000066 HGR0000067 HGR0000068 HGR0000072 HGR0000073 HGR0000075 HGR0000077 
##          2          2          2          3          2          3          2 
## HGR0000080 HGR0000081 HGR0000085 HGR0000086 HGR0000088 HGR0000097 HGR0000103 
##          9          1          3          9          1          2          1 
## HGR0000104 HGR0000106 HGR0000107 HGR0000108 HGR0000114 HGR0000122 HGR0000132 
##          2          2          2          2          9          2          2 
## HGR0000139 HGR0000145 HGR0000147 HGR0000149 HGR0000153 HGR0000159 HGR0000160 
##          9          2          2          3          1          2          2 
## HGR0000396 HGR0000400 HGR0000403 HGR0000404 HGR0000405 HGR0000423 HGR0001143 
##          2          2          9          9          9          2          9
table(rowSums(is.na(selected.lab.values.expanded.avg)))
## 
##  1  2  3  4  9 
##  7 41 14  2 13
colSums(is.na(selected.lab.values.expanded.avg))
##          ANC.ALC.Ratio_.ratio                      CRP_mg.L 
##                            13                            14 
##                     CXCL9.MIG                 D.Dimer_ng.mL 
##                             0                            27 
##              Fibrinogen_mg.dL                     IFN.gamma 
##                            14                             0 
##                         IL.13                         IL.17 
##                             0                             0 
##                  IL.18.IL.1F4                          IL.4 
##                             0                             0 
##                          IL.6                     IL.6_ng.L 
##                             0                            71 
##                         IP.10 Lactate.Dehydrogenase.LDH_U.L 
##                             0                            16 
##          Lymphocytes_x10.3.uL        Platelets.PLT_x10.3.uL 
##                            14                            13 
##              SpO2.FiO2_.ratio                         TNF.a 
##                            74                             0 
##                         TNF.b 
##                             0
# remove patients with too many NAs
max.na <- 5
selected.patients <- names(which(rowSums(is.na(selected.lab.values.expanded.avg)) <= max.na))
selected.lab.values.expanded.avg.filtered <- selected.lab.values.expanded.avg[selected.patients,grep("IL.6_ng.L",colnames(selected.lab.values.expanded.avg),invert = T)]
colSums(is.na(selected.lab.values.expanded.avg.filtered))
##          ANC.ALC.Ratio_.ratio                      CRP_mg.L 
##                             0                             1 
##                     CXCL9.MIG                 D.Dimer_ng.mL 
##                             0                            14 
##              Fibrinogen_mg.dL                     IFN.gamma 
##                             1                             0 
##                         IL.13                         IL.17 
##                             0                             0 
##                  IL.18.IL.1F4                          IL.4 
##                             0                             0 
##                          IL.6                         IP.10 
##                             0                             0 
## Lactate.Dehydrogenase.LDH_U.L          Lymphocytes_x10.3.uL 
##                             3                             1 
##        Platelets.PLT_x10.3.uL              SpO2.FiO2_.ratio 
##                             0                            61 
##                         TNF.a                         TNF.b 
##                             0                             0

Data Transformation

# transform to log10
selected.lab.values.expanded.avg.filtered <- log10(selected.lab.values.expanded.avg.filtered+0.01)

covid19.patient.demo$severity.outcome <- paste0(covid19.patient.demo$severity,"-",covid19.patient.demo$outcome)
severity.color <- ggsci::pal_jama()(4)
names(severity.color) <- sort(unique(covid19.patient.demo[selected.patients,]$severity.outcome))
pheatmap::pheatmap(as.matrix(scale(selected.lab.values.expanded.avg.filtered)),
                   annotation_row = covid19.patient.demo[selected.patients,c("severity.outcome"),drop=F],
                   color = viridis::inferno(64),breaks=c(-6,seq(-3,3,by=0.1),6),
                   annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
                   main="All earliest time-point batch samples (values from time point +/- 2 days)")

# impute remaining missing data
if (as.numeric(paste0(R.version$major,".",floor(as.numeric(R.version$minor)))) >= 3.6)
  RNGkind(sample.kind = "Rounding") # ensure compatibility with versions prior to 3.6
#set.seed(1087)
set.seed(10276)
selected.lab.values.expanded.avg.imputed <- selected.lab.values.expanded.avg.filtered
imputed.obj <- aregImpute(formula = as.formula(paste0("~ ",paste0(colnames(selected.lab.values.expanded.avg.filtered),collapse = " + "))), 
                                                           data =  selected.lab.values.expanded.avg.filtered, n.impute = 5, type = "regression",nk = 0)
## Iteration 1 
Iteration 2 
Iteration 3 
Iteration 4 
Iteration 5 
Iteration 6 
Iteration 7 
Iteration 8 
imputed.values <- impute.transcan(imputed.obj,imputation = 1,data=selected.lab.values.expanded.avg.filtered,list.out=TRUE)
## 
## 
## Imputed missing values with the following frequencies
##  and stored them in variables with their original names:
## 
##                      CRP_mg.L                 D.Dimer_ng.mL 
##                             1                            14 
##              Fibrinogen_mg.dL Lactate.Dehydrogenase.LDH_U.L 
##                             1                             3 
##          Lymphocytes_x10.3.uL              SpO2.FiO2_.ratio 
##                             1                            61
selected.lab.values.expanded.avg.imputed[names(imputed.values)] <- imputed.values

pheatmap::pheatmap(as.matrix(scale(selected.lab.values.expanded.avg.imputed)),
                   annotation_row = covid19.patient.demo[selected.patients,c("severity.outcome"),drop=F],
                   color = viridis::inferno(64),breaks=c(-6,seq(-3,3,by=0.1),6),
                   annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
                   main="All earliest time-point batch samples (values from time point +/- 2 days or imputed)")

# correct for days from symptom onset or admission 
correct.for <- cytokine.measurements.earliest.30days.optimized[match(rownames(selected.lab.values.expanded.avg.imputed),
                                                           cytokine.measurements.earliest.30days.optimized$subject_id),
                                                     "days_from_admission_to_test"]
selected.lab.values.expanded.avg.imputed.corrected <- t(limma::removeBatchEffect(t(selected.lab.values.expanded.avg.imputed),
                  covariates = correct.for))

DSM calculation

dsm.paramaters <- c("SpO2.FiO2_.ratio","IL.6","IL.13","TNF.b","ANC.ALC.Ratio_.ratio","IP.10","D.Dimer_ng.mL",
                    "Lymphocytes_x10.3.uL","Lactate.Dehydrogenase.LDH_U.L")

lab.pca <- prcomp(selected.lab.values.expanded.avg.imputed.corrected[,dsm.paramaters],scale. = T,center = T)
pca.summary <- summary(lab.pca)
# PCA plot
cytokine.measurements.earliest.30days.pc <- merge(cytokine.measurements.earliest.30days.optimized,lab.pca$x[,1:2],by.x="subject_id",by.y="row.names")
cytokine.measurements.earliest.30days.pc$severity.outcome <- factor(covid19.patient.demo[cytokine.measurements.earliest.30days.pc$subject_id,]$severity.outcome,
                                                                    levels=c("Moderate-Alive","Severe-Alive","Critical-Alive","Critical-Deceased"))

# violin plot by severity group
comparisons <- gtools::combinations(4,2,names(severity.color))
comparisons <- comparisons[comparisons[,1] != comparisons[,2],]
comparisons <- lapply(1:nrow(comparisons),function(x){comparisons[x,]})
# ggviolin(cytokine.measurements.earliest.30days.pc,x="severity.outcome",y="PC1", add = "boxplot",palette = c("#00A1D5FF","#B24745FF","#374E55FF","#DF8F44FF"),
#          fill="severity.outcome",add.params = list(fill="white"),xlab = "Severity-Outcome",ylab="Disease Severity Metric (DSM)") +
#   geom_point(alpha=0.25,size=2) +
#   stat_compare_means(comparisons = comparisons,hide.ns = T,label = "p.signif",tip.length = 0,method = "wilcox.test") +
#   theme(text = element_text(size=8))

variables <- c("subject_id","PC1","severity","outcome")
combined.dsm <- rbind(data.frame(cohort="CITE-Seq",dsm.data[,variables]),
                      data.frame(cohort="Independent",cytokine.measurements.earliest.30days.pc[,variables]))
combined.dsm$severity.outcome <- factor(paste0(combined.dsm$severity,"-",combined.dsm$outcome),
                                        levels=c("Moderate-Alive","Severe-Alive","Critical-Alive","Critical-Deceased"))
combined.dsm <- merge(combined.dsm,as.data.frame(table(combined.dsm[,c("cohort","severity.outcome")])))
combined.dsm$class.label <- paste0(combined.dsm$severity.outcome," (",combined.dsm$Freq,")")
combined.dsm <- combined.dsm[order(combined.dsm$severity.outcome),]
ggplot(combined.dsm,aes(severity.outcome,PC1)) + geom_boxplot(aes(fill=severity.outcome)) + 
  geom_point(alpha=0.5) + facet_grid(~cohort,scales = "free_x") + 
  stat_compare_means(comparisons = comparisons,hide.ns = T,label = "p.signif",tip.length = 0,method = "wilcox.test") + 
  scale_fill_manual(values=severity.color) +
  theme_classic2() + theme(text = element_text(size=8),axis.text.x = element_text(angle=45,hjust = 1)) + 
  ylab("Disease Severity Metric (DSM)") + xlab("") 
Fig 1E

Fig 1E

tmp <- rbind(cbind(type="All",cytokine.measurements.earliest.30days.pc),
             cbind(type="Critical-Alive Only",subset(cytokine.measurements.earliest.30days.pc,severity.outcome=="Critical-Alive")))
ggplot(tmp,aes(ever_admitted_to_icu,PC1)) + geom_boxplot(outlier.shape = NA) + 
  geom_jitter(aes(fill=severity.outcome),pch=21,width=0.2,height=0,size=2,alpha=0.75) + theme_bw() + scale_fill_manual(values=severity.color) + 
  ylab("DSM") + xlab("ICU Admission") + stat_compare_means(size=3) + theme(text=element_text(size=7)) +
  facet_grid(~type)
Fig S1C

Fig S1C

Session Info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## Random number generation:
##  RNG:     Mersenne-Twister 
##  Normal:  Inversion 
##  Sample:  Rounding 
##  
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] Hmisc_4.4-1     Formula_1.2-3   survival_3.1-8  lattice_0.20-38
## [5] ggpubr_0.4.0    ggsci_2.9       ggplot2_3.3.2   knitr_1.30     
## 
## loaded via a namespace (and not attached):
##  [1] viridis_0.5.1       tidyr_1.1.2         viridisLite_0.3.0  
##  [4] splines_3.6.3       carData_3.0-4       gtools_3.8.2       
##  [7] highr_0.8           latticeExtra_0.6-29 cellranger_1.1.0   
## [10] yaml_2.2.1          pillar_1.4.6        backports_1.1.10   
## [13] glue_1.4.2          limma_3.42.2        digest_0.6.25      
## [16] RColorBrewer_1.1-2  ggsignif_0.6.0      checkmate_2.0.0    
## [19] colorspace_1.4-1    htmltools_0.5.0     Matrix_1.2-18      
## [22] plyr_1.8.6          pkgconfig_2.0.3     pheatmap_1.0.12    
## [25] broom_0.7.0         haven_2.3.1         purrr_0.3.4        
## [28] scales_1.1.1        jpeg_0.1-8.1        openxlsx_4.2.2     
## [31] rio_0.5.16          htmlTable_2.1.0     tibble_3.0.3       
## [34] generics_0.0.2      farver_2.0.3        car_3.0-10         
## [37] ellipsis_0.3.1      withr_2.3.0         nnet_7.3-12        
## [40] magrittr_1.5        crayon_1.3.4        readxl_1.3.1       
## [43] evaluate_0.14       rstatix_0.6.0       forcats_0.5.0      
## [46] foreign_0.8-75      tools_3.6.3         data.table_1.13.0  
## [49] hms_0.5.3           lifecycle_0.2.0     stringr_1.4.0      
## [52] munsell_0.5.0       cluster_2.1.0       zip_2.1.1          
## [55] compiler_3.6.3      rlang_0.4.7         grid_3.6.3         
## [58] rstudioapi_0.11     htmlwidgets_1.5.1   labeling_0.3       
## [61] base64enc_0.1-3     rmarkdown_2.4       gtable_0.3.0       
## [64] abind_1.4-5         curl_4.3            reshape2_1.4.4     
## [67] R6_2.4.1            gridExtra_2.3       dplyr_1.0.2        
## [70] stringi_1.4.6       Rcpp_1.0.5          vctrs_0.3.4        
## [73] rpart_4.1-15        png_0.1-7           tidyselect_1.1.0   
## [76] xfun_0.17