Validate DSM association with disease severity using Brescia cohort not in the citeseq
# using one of the cytokines to check
cytokine.measurements <- subset(covid19.lab.results,test_name == "TNF-a")
sum(is.na(cytokine.measurements$days_from_admission_to_test))
## [1] 4
sum(is.na(cytokine.measurements$days_from_symptom_onset_to_test))
## [1] 119
cytokine.measurements <- cytokine.measurements[order(cytokine.measurements$days_from_admission_to_test,
cytokine.measurements$days_from_symptom_onset_to_test,cytokine.measurements$test_time_point.Serum),]
cytokine.measurements.earliest <- cytokine.measurements[!duplicated(cytokine.measurements$subject_id),]
cat("Number of patients with cytokine data:\n")
## Number of patients with cytokine data:
dim(cytokine.measurements.earliest)
## [1] 143 35
#table(cytokine.measurements.earliest$visit)
# patients with symptom onset and admission dates and cytokine data within 30 days of onset
cytokine.measurements.earliest.30days <- subset(cytokine.measurements.earliest,
!is.na(days_from_symptom_onset_to_test) & !is.na(days_from_admission_to_test) &
days_from_symptom_onset_to_test < 30)
cat("Patients with symptom onset and admission dates and cytokine data within 30 days of onset:\n")
## Patients with symptom onset and admission dates and cytokine data within 30 days of onset:
dim(cytokine.measurements.earliest.30days)
## [1] 107 35
o2.measurements <- subset(covid19.lab.results,test_name == "SpO2/FiO2")
sum(is.na(o2.measurements$days_from_admission_to_test))
## [1] 0
sum(is.na(o2.measurements$days_from_symptom_onset_to_test))
## [1] 2
o2.measurements.merged <- merge(o2.measurements,cytokine.measurements,by=c("subject_id","days_from_symptom_onset_to_test"),suffixes = c("",".y"))
o2.measurements.merged <- o2.measurements.merged[order(o2.measurements.merged$days_from_admission_to_test,
o2.measurements.merged$days_from_symptom_onset_to_test,o2.measurements.merged$test_time_point.Serum),]
o2.measurements.earliest <- o2.measurements.merged[!duplicated(o2.measurements.merged$subject_id),]
cat("Number of patients with o2 data:\n")
## Number of patients with o2 data:
dim(o2.measurements.earliest)
## [1] 32 68
# patients with symptom onset and admission dates and o2 data within 30 days of onset
o2.measurements.earliest.30days <- subset(o2.measurements.earliest,
!is.na(days_from_symptom_onset_to_test) & !is.na(days_from_admission_to_test) &
days_from_symptom_onset_to_test < 30)
cat("Patients with symptom onset and admission dates and o2 data within 30 days of onset:\n")
## Patients with symptom onset and admission dates and o2 data within 30 days of onset:
dim(o2.measurements.earliest.30days)
## [1] 30 68
cytokine.measurements.earliest.30days.optimized <- cytokine.measurements.earliest.30days
# remove the CITE-seq patients
cytokine.measurements.earliest.30days.optimized <- subset(cytokine.measurements.earliest.30days.optimized,!(subject_id %in%
unique(subset(covid19.samples,!is.na(batch))$subject_id)))
paramaters.of.interest <- c("SpO2/FiO2","CRP","IL-6","TNF-a","TNF-b","IL-18/IL-1f4","ANC/ALC Ratio","IP-10","D-Dimer","IFN-Gamma","CXCL9/MIG",
"IL-17","IL-4","IL-13","Fibrinogen","Lymphocytes",#"Serology Spike","Serology Nucleocapsid"
"Lactate Dehydrogenase (LDH)","Platelets (PLT)")#,"Neutrophils")
setdiff(tolower(paramaters.of.interest),tolower(covid19.lab.results$test_name))
## character(0)
selected.lab.values <- subset(covid19.lab.results,subject_id %in% cytokine.measurements.earliest.30days.optimized$subject_id
& tolower(test_name) %in% tolower(paramaters.of.interest) & test_id != "Lymphocytes_pct")
# measurements within +/- 2 days of target
selected.lab.values.expanded <- data.frame()
day.margin <- 2
for (i in 1:nrow(cytokine.measurements.earliest.30days.optimized)) {
subj <- cytokine.measurements.earliest.30days.optimized$subject_id[i]
#cat(i,"-",subj,"\n")
days_from_symptom_onset <- cytokine.measurements.earliest.30days.optimized$days_from_symptom_onset_to_test[i]
tmp <- subset(selected.lab.values,subject_id == subj &
days_from_symptom_onset_to_test <= (days_from_symptom_onset + day.margin) &
days_from_symptom_onset_to_test >= (days_from_symptom_onset - day.margin))
tmp$target.day.diff <- abs(tmp$days_from_symptom_onset_to_test - days_from_symptom_onset)
#print(table(tmp$target.day.diff))
tmp <- tmp[order(tmp$target.day.diff,decreasing = F),]
tmp$test_date_id <- apply(tmp[,c("subject_id","test_id","days_from_symptom_onset_to_test")],1,paste0,collapse=":")
tmp <- subset(tmp,test_date_id %in% tmp[!duplicated(tmp[,c("subject_id","test_id")]),]$test_date_id) # keep multiple values taken on the same closest day
#tmp <- tmp[!duplicated(tmp[,c("subject_id","test_id")]),]
selected.lab.values.expanded <- rbind(selected.lab.values.expanded,tmp)
}
#table(selected.lab.values.expanded[,c("subject_id","target.day.diff")])
selected.lab.values.expanded.avg <- reshape2::dcast(selected.lab.values.expanded,subject_id ~ test_id,
value.var = "test_value",fun.aggregate = function(x){mean(x,na.rm = T)})
rownames(selected.lab.values.expanded.avg) <- selected.lab.values.expanded.avg$subject_id
selected.lab.values.expanded.avg <- selected.lab.values.expanded.avg[,-1]
rowSums(is.na(selected.lab.values.expanded.avg))
## HGR0000002 HGR0000004 HGR0000005 HGR0000006 HGR0000007 HGR0000009 HGR0000011
## 3 1 9 2 4 2 2
## HGR0000012 HGR0000014 HGR0000015 HGR0000017 HGR0000020 HGR0000021 HGR0000022
## 2 3 2 2 3 2 3
## HGR0000023 HGR0000025 HGR0000026 HGR0000027 HGR0000032 HGR0000033 HGR0000034
## 2 2 3 2 3 2 9
## HGR0000036 HGR0000037 HGR0000038 HGR0000039 HGR0000040 HGR0000041 HGR0000043
## 9 2 9 2 9 2 3
## HGR0000044 HGR0000046 HGR0000047 HGR0000049 HGR0000050 HGR0000052 HGR0000053
## 3 2 2 3 1 2 2
## HGR0000056 HGR0000057 HGR0000058 HGR0000060 HGR0000061 HGR0000063 HGR0000065
## 2 3 2 2 2 1 4
## HGR0000066 HGR0000067 HGR0000068 HGR0000072 HGR0000073 HGR0000075 HGR0000077
## 2 2 2 3 2 3 2
## HGR0000080 HGR0000081 HGR0000085 HGR0000086 HGR0000088 HGR0000097 HGR0000103
## 9 1 3 9 1 2 1
## HGR0000104 HGR0000106 HGR0000107 HGR0000108 HGR0000114 HGR0000122 HGR0000132
## 2 2 2 2 9 2 2
## HGR0000139 HGR0000145 HGR0000147 HGR0000149 HGR0000153 HGR0000159 HGR0000160
## 9 2 2 3 1 2 2
## HGR0000396 HGR0000400 HGR0000403 HGR0000404 HGR0000405 HGR0000423 HGR0001143
## 2 2 9 9 9 2 9
table(rowSums(is.na(selected.lab.values.expanded.avg)))
##
## 1 2 3 4 9
## 7 41 14 2 13
colSums(is.na(selected.lab.values.expanded.avg))
## ANC.ALC.Ratio_.ratio CRP_mg.L
## 13 14
## CXCL9.MIG D.Dimer_ng.mL
## 0 27
## Fibrinogen_mg.dL IFN.gamma
## 14 0
## IL.13 IL.17
## 0 0
## IL.18.IL.1F4 IL.4
## 0 0
## IL.6 IL.6_ng.L
## 0 71
## IP.10 Lactate.Dehydrogenase.LDH_U.L
## 0 16
## Lymphocytes_x10.3.uL Platelets.PLT_x10.3.uL
## 14 13
## SpO2.FiO2_.ratio TNF.a
## 74 0
## TNF.b
## 0
# remove patients with too many NAs
max.na <- 5
selected.patients <- names(which(rowSums(is.na(selected.lab.values.expanded.avg)) <= max.na))
selected.lab.values.expanded.avg.filtered <- selected.lab.values.expanded.avg[selected.patients,grep("IL.6_ng.L",colnames(selected.lab.values.expanded.avg),invert = T)]
colSums(is.na(selected.lab.values.expanded.avg.filtered))
## ANC.ALC.Ratio_.ratio CRP_mg.L
## 0 1
## CXCL9.MIG D.Dimer_ng.mL
## 0 14
## Fibrinogen_mg.dL IFN.gamma
## 1 0
## IL.13 IL.17
## 0 0
## IL.18.IL.1F4 IL.4
## 0 0
## IL.6 IP.10
## 0 0
## Lactate.Dehydrogenase.LDH_U.L Lymphocytes_x10.3.uL
## 3 1
## Platelets.PLT_x10.3.uL SpO2.FiO2_.ratio
## 0 61
## TNF.a TNF.b
## 0 0
# transform to log10
selected.lab.values.expanded.avg.filtered <- log10(selected.lab.values.expanded.avg.filtered+0.01)
covid19.patient.demo$severity.outcome <- paste0(covid19.patient.demo$severity,"-",covid19.patient.demo$outcome)
severity.color <- ggsci::pal_jama()(4)
names(severity.color) <- sort(unique(covid19.patient.demo[selected.patients,]$severity.outcome))
pheatmap::pheatmap(as.matrix(scale(selected.lab.values.expanded.avg.filtered)),
annotation_row = covid19.patient.demo[selected.patients,c("severity.outcome"),drop=F],
color = viridis::inferno(64),breaks=c(-6,seq(-3,3,by=0.1),6),
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
main="All earliest time-point batch samples (values from time point +/- 2 days)")
# impute remaining missing data
if (as.numeric(paste0(R.version$major,".",floor(as.numeric(R.version$minor)))) >= 3.6)
RNGkind(sample.kind = "Rounding") # ensure compatibility with versions prior to 3.6
#set.seed(1087)
set.seed(10276)
selected.lab.values.expanded.avg.imputed <- selected.lab.values.expanded.avg.filtered
imputed.obj <- aregImpute(formula = as.formula(paste0("~ ",paste0(colnames(selected.lab.values.expanded.avg.filtered),collapse = " + "))),
data = selected.lab.values.expanded.avg.filtered, n.impute = 5, type = "regression",nk = 0)
## Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
imputed.values <- impute.transcan(imputed.obj,imputation = 1,data=selected.lab.values.expanded.avg.filtered,list.out=TRUE)
##
##
## Imputed missing values with the following frequencies
## and stored them in variables with their original names:
##
## CRP_mg.L D.Dimer_ng.mL
## 1 14
## Fibrinogen_mg.dL Lactate.Dehydrogenase.LDH_U.L
## 1 3
## Lymphocytes_x10.3.uL SpO2.FiO2_.ratio
## 1 61
selected.lab.values.expanded.avg.imputed[names(imputed.values)] <- imputed.values
pheatmap::pheatmap(as.matrix(scale(selected.lab.values.expanded.avg.imputed)),
annotation_row = covid19.patient.demo[selected.patients,c("severity.outcome"),drop=F],
color = viridis::inferno(64),breaks=c(-6,seq(-3,3,by=0.1),6),
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
main="All earliest time-point batch samples (values from time point +/- 2 days or imputed)")
# correct for days from symptom onset or admission
correct.for <- cytokine.measurements.earliest.30days.optimized[match(rownames(selected.lab.values.expanded.avg.imputed),
cytokine.measurements.earliest.30days.optimized$subject_id),
"days_from_admission_to_test"]
selected.lab.values.expanded.avg.imputed.corrected <- t(limma::removeBatchEffect(t(selected.lab.values.expanded.avg.imputed),
covariates = correct.for))
dsm.paramaters <- c("SpO2.FiO2_.ratio","IL.6","IL.13","TNF.b","ANC.ALC.Ratio_.ratio","IP.10","D.Dimer_ng.mL",
"Lymphocytes_x10.3.uL","Lactate.Dehydrogenase.LDH_U.L")
lab.pca <- prcomp(selected.lab.values.expanded.avg.imputed.corrected[,dsm.paramaters],scale. = T,center = T)
pca.summary <- summary(lab.pca)
# PCA plot
cytokine.measurements.earliest.30days.pc <- merge(cytokine.measurements.earliest.30days.optimized,lab.pca$x[,1:2],by.x="subject_id",by.y="row.names")
cytokine.measurements.earliest.30days.pc$severity.outcome <- factor(covid19.patient.demo[cytokine.measurements.earliest.30days.pc$subject_id,]$severity.outcome,
levels=c("Moderate-Alive","Severe-Alive","Critical-Alive","Critical-Deceased"))
# violin plot by severity group
comparisons <- gtools::combinations(4,2,names(severity.color))
comparisons <- comparisons[comparisons[,1] != comparisons[,2],]
comparisons <- lapply(1:nrow(comparisons),function(x){comparisons[x,]})
# ggviolin(cytokine.measurements.earliest.30days.pc,x="severity.outcome",y="PC1", add = "boxplot",palette = c("#00A1D5FF","#B24745FF","#374E55FF","#DF8F44FF"),
# fill="severity.outcome",add.params = list(fill="white"),xlab = "Severity-Outcome",ylab="Disease Severity Metric (DSM)") +
# geom_point(alpha=0.25,size=2) +
# stat_compare_means(comparisons = comparisons,hide.ns = T,label = "p.signif",tip.length = 0,method = "wilcox.test") +
# theme(text = element_text(size=8))
variables <- c("subject_id","PC1","severity","outcome")
combined.dsm <- rbind(data.frame(cohort="CITE-Seq",dsm.data[,variables]),
data.frame(cohort="Independent",cytokine.measurements.earliest.30days.pc[,variables]))
combined.dsm$severity.outcome <- factor(paste0(combined.dsm$severity,"-",combined.dsm$outcome),
levels=c("Moderate-Alive","Severe-Alive","Critical-Alive","Critical-Deceased"))
combined.dsm <- merge(combined.dsm,as.data.frame(table(combined.dsm[,c("cohort","severity.outcome")])))
combined.dsm$class.label <- paste0(combined.dsm$severity.outcome," (",combined.dsm$Freq,")")
combined.dsm <- combined.dsm[order(combined.dsm$severity.outcome),]
ggplot(combined.dsm,aes(severity.outcome,PC1)) + geom_boxplot(aes(fill=severity.outcome)) +
geom_point(alpha=0.5) + facet_grid(~cohort,scales = "free_x") +
stat_compare_means(comparisons = comparisons,hide.ns = T,label = "p.signif",tip.length = 0,method = "wilcox.test") +
scale_fill_manual(values=severity.color) +
theme_classic2() + theme(text = element_text(size=8),axis.text.x = element_text(angle=45,hjust = 1)) +
ylab("Disease Severity Metric (DSM)") + xlab("")
tmp <- rbind(cbind(type="All",cytokine.measurements.earliest.30days.pc),
cbind(type="Critical-Alive Only",subset(cytokine.measurements.earliest.30days.pc,severity.outcome=="Critical-Alive")))
ggplot(tmp,aes(ever_admitted_to_icu,PC1)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(aes(fill=severity.outcome),pch=21,width=0.2,height=0,size=2,alpha=0.75) + theme_bw() + scale_fill_manual(values=severity.color) +
ylab("DSM") + xlab("ICU Admission") + stat_compare_means(size=3) + theme(text=element_text(size=7)) +
facet_grid(~type)
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## Random number generation:
## RNG: Mersenne-Twister
## Normal: Inversion
## Sample: Rounding
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Hmisc_4.4-1 Formula_1.2-3 survival_3.1-8 lattice_0.20-38
## [5] ggpubr_0.4.0 ggsci_2.9 ggplot2_3.3.2 knitr_1.30
##
## loaded via a namespace (and not attached):
## [1] viridis_0.5.1 tidyr_1.1.2 viridisLite_0.3.0
## [4] splines_3.6.3 carData_3.0-4 gtools_3.8.2
## [7] highr_0.8 latticeExtra_0.6-29 cellranger_1.1.0
## [10] yaml_2.2.1 pillar_1.4.6 backports_1.1.10
## [13] glue_1.4.2 limma_3.42.2 digest_0.6.25
## [16] RColorBrewer_1.1-2 ggsignif_0.6.0 checkmate_2.0.0
## [19] colorspace_1.4-1 htmltools_0.5.0 Matrix_1.2-18
## [22] plyr_1.8.6 pkgconfig_2.0.3 pheatmap_1.0.12
## [25] broom_0.7.0 haven_2.3.1 purrr_0.3.4
## [28] scales_1.1.1 jpeg_0.1-8.1 openxlsx_4.2.2
## [31] rio_0.5.16 htmlTable_2.1.0 tibble_3.0.3
## [34] generics_0.0.2 farver_2.0.3 car_3.0-10
## [37] ellipsis_0.3.1 withr_2.3.0 nnet_7.3-12
## [40] magrittr_1.5 crayon_1.3.4 readxl_1.3.1
## [43] evaluate_0.14 rstatix_0.6.0 forcats_0.5.0
## [46] foreign_0.8-75 tools_3.6.3 data.table_1.13.0
## [49] hms_0.5.3 lifecycle_0.2.0 stringr_1.4.0
## [52] munsell_0.5.0 cluster_2.1.0 zip_2.1.1
## [55] compiler_3.6.3 rlang_0.4.7 grid_3.6.3
## [58] rstudioapi_0.11 htmlwidgets_1.5.1 labeling_0.3
## [61] base64enc_0.1-3 rmarkdown_2.4 gtable_0.3.0
## [64] abind_1.4-5 curl_4.3 reshape2_1.4.4
## [67] R6_2.4.1 gridExtra_2.3 dplyr_1.0.2
## [70] stringi_1.4.6 Rcpp_1.0.5 vctrs_0.3.4
## [73] rpart_4.1-15 png_0.1-7 tidyselect_1.1.0
## [76] xfun_0.17