Compare cytokine profiles between deceased patients and those with good outcome. Specific focus on 17-23 days since symptom onset.
Identify all deceased patients and match them with critical but recovered patients by age and gender (excluding those staying hospitalized for more than 50 days). Patients in the citiseq cohort can optionally be excluded.
load(file.path(data.folder,"covid19.metadata.paper1.RData"),verbose=F)
covid19.lab.results$severity.outcome <- paste0(covid19.lab.results$severity,"-",covid19.lab.results$outcome)
severity.color <- c("Critical-Alive"="#374E55FF","Critical-Deceased"="#DF8F44FF","Moderate-Alive"="#00A1D5FF" ,"Severe-Alive"="#B24745FF","HC" = "#79AF97FF")
deceased.patients.cytokines <- subset(covid19.lab.results,outcome == "Deceased" & test_type == "ELISA" & !is.na(days_from_symptom_onset_to_test))
deceased.patients <- unique(deceased.patients.cytokines$subject_id)
cat("Map of sampling TSO time points for deceased patients:\n")
## Map of sampling TSO time points for deceased patients:
deceased.time.points <- as.data.frame.matrix(table(deceased.patients.cytokines[,c("subject_id","days_from_symptom_onset_to_test")]))
colSums(deceased.time.points > 0)
## 3 8 10 11 12 13 14 16 17 18 19 20 21 22 23 24 25 26 28 29 30 32 35 36 39
## 1 1 1 3 1 2 1 2 1 1 2 3 1 4 1 3 3 2 1 2 1 1 2 2 2
## 42 45 47 53
## 1 1 2 1
cat("Age and sex distribution:\n")
## Age and sex distribution:
table(subset(covid19.patient.demo,subject_id %in% deceased.patients)$sex)
##
## F M
## 2 15
summary(subset(covid19.patient.demo,subject_id %in% deceased.patients)$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.00 61.00 66.00 66.24 73.00 78.00
# recovered patients with similar age and sex
age.tol <- 3
recovered.patients <- c()
for (i in deceased.patients) {
target.age <- covid19.patient.demo[i,"age"]
target.sex <- covid19.patient.demo[i,"sex"]
cat("Target age:",target.age,"sex:",target.sex,"\n")
tmp <- subset(covid19.lab.results,outcome != "Deceased" & test_type == "ELISA" & !is.na(days_from_symptom_onset_to_test) &
age >= target.age - age.tol & age <= target.age + age.tol & severity == "Critical" & sex == target.sex &
days_from_symptom_onset_to_test > 15 & days_from_symptom_onset_to_test <= 25 &
number_of_days_hospitalized < 50)
matched.subject <- unique(tmp$subject_id)
cat("Matched subject:",matched.subject,"\n")
recovered.patients <- c(recovered.patients,matched.subject)
}
## Target age: 73 sex: M
## Matched subject: HGR0000015 HGR0000088 HGR0000093 HGR0000101
## Target age: 65 sex: M
## Matched subject: HGR0000039 HGR0000077
## Target age: 66 sex: M
## Matched subject: HGR0000063 HGR0000085 HGR0000149
## Target age: 59 sex: M
## Matched subject: HGR0000039 HGR0000049 HGR0000077 HGR0000102 HGR0000108 HGR0000135
## Target age: 46 sex: M
## Matched subject: HGR0000032 HGR0000092 HGR0000142 HGR0000145
## Target age: 66 sex: M
## Matched subject: HGR0000063 HGR0000085 HGR0000149
## Target age: 78 sex: M
## Matched subject: HGR0000088 HGR0000089
## Target age: 61 sex: M
## Matched subject: HGR0000039 HGR0000077 HGR0000102 HGR0000135
## Target age: 68 sex: M
## Matched subject: HGR0000063 HGR0000085 HGR0000093 HGR0000149
## Target age: 71 sex: F
## Matched subject:
## Target age: 66 sex: M
## Matched subject: HGR0000063 HGR0000085 HGR0000149
## Target age: 49 sex: F
## Matched subject: HGR0000044 HGR0000075 HGR0000139
## Target age: 73 sex: M
## Matched subject: HGR0000015 HGR0000088 HGR0000093 HGR0000101
## Target age: 76 sex: M
## Matched subject: HGR0000015 HGR0000088 HGR0000089 HGR0000101
## Target age: 60 sex: M
## Matched subject: HGR0000039 HGR0000049 HGR0000077 HGR0000102 HGR0000135
## Target age: 72 sex: M
## Matched subject: HGR0000015 HGR0000063 HGR0000085 HGR0000093 HGR0000101 HGR0000149
## Target age: 77 sex: M
## Matched subject: HGR0000015 HGR0000088 HGR0000089
recovered.patients <- unique(recovered.patients[!is.na(recovered.patients)])
recovered.patients.cytokines <- subset(covid19.lab.results,test_type == "ELISA" & subject_id %in% recovered.patients)
cat("Map of samping TSO time points for recovered patients:\n")
## Map of samping TSO time points for recovered patients:
recovered.time.points <- as.data.frame.matrix(table(recovered.patients.cytokines[,c("subject_id","days_from_symptom_onset_to_test")]))
colSums(recovered.time.points > 0)
## 3 8 9 10 11 12 13 14 15 16 17 18 19 20 22 23 24 25 26 27 29 30 31 32 33
## 1 1 1 3 2 2 3 3 4 4 2 5 5 6 1 5 2 3 2 3 2 3 2 1 2
## 34 35 37 38 39 40 44 45 48 51 56
## 1 2 1 1 1 1 2 1 1 1 1
cat("Age and sex distribution:\n")
## Age and sex distribution:
table(subset(covid19.patient.demo,subject_id %in% recovered.patients)$sex)
##
## F M
## 3 18
summary(subset(covid19.patient.demo,subject_id %in% recovered.patients)$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 45.00 49.00 59.00 60.33 69.00 78.00
selected.cytokine.profiles <- rbind(deceased.patients.cytokines,recovered.patients.cytokines)
# remove patients in citeseq
#citeseq.patients <- subset(covid19.samples,!is.na(batch))$subject_id
#intersect(citeseq.patients,c(recovered.patients,deceased.patients))
#selected.cytokine.profiles <- subset(selected.cytokine.profiles,!(subject_id %in% citeseq.patients))
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
Separate samples into interval groups relative to juncture: early (7-16 days), mid (juncture/17-23 days), and late (24-30 days)
selected.cytokine.profiles$unique.id <- make.unique(apply(selected.cytokine.profiles[,c("subject_id","days_from_admission_to_test","test_id")],1,
paste0,collapse="."),sep = "|")
selected.cytokine.profiles$unique.id <- sapply(selected.cytokine.profiles$unique.id,function(x){unlist(strsplit(x,"|",fixed=T))[2]})
selected.cytokine.profiles.matrix <- reshape2::dcast(selected.cytokine.profiles,
subject_id + age + sex + outcome + severity + days_from_symptom_onset_to_test + unique.id ~ test_id,
value.var = "test_value")
selected.cytokine.profiles.matrix[,colnames(selected.cytokine.profiles.matrix) %in% selected.cytokine.profiles$test_id] <-
apply(selected.cytokine.profiles.matrix[,colnames(selected.cytokine.profiles.matrix) %in% selected.cytokine.profiles$test_id],2,function(x){scale(log10(x+1))})
selected.cytokine.profiles.matrix$stage <- NA
tmp <- which(selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test >= 7 & selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test <= 16)
selected.cytokine.profiles.matrix[tmp,"stage"] <- "early"
tmp <- which(selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test >= 17 & selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test <= 23)
selected.cytokine.profiles.matrix[tmp,"stage"] <- "mid"
tmp <- which(selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test >= 24 & selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test <= 30)
selected.cytokine.profiles.matrix[tmp,"stage"] <- "late"
cat("Sample distribution:\n")
## Sample distribution:
table(selected.cytokine.profiles.matrix[,c("outcome","stage")])
## stage
## outcome early late mid
## Alive 24 16 25
## Deceased 11 12 13
Search for divergent proteins around juncture using the following model:
Differential expression: test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id), for samples within each stage to identify cytokines with sig. differences between the two outcome groups
Do leave-one-out cross validation by holding out one patient in each round for testing
# leave-one-out modeling
sample.scores <- data.frame()
feature.importance <- data.frame()
selected.cytokine.profiles.matrix$outcome.numerical <- 0
selected.cytokine.profiles.matrix[selected.cytokine.profiles.matrix$outcome == "Deceased",]$outcome.numerical <- 1
for (i in unique(selected.cytokine.profiles.matrix$subject_id)) {
train.data <- subset(selected.cytokine.profiles.matrix,subject_id != i)
test.data <- subset(selected.cytokine.profiles.matrix,subject_id == i)
# feature selection
search.results <- juncture.divergent.proteins(train.data,pval.cutoff = 1)
# modeling
for (s in c("mid","late")) {
pls.train.data <- subset(train.data,stage == s)
pls.test.data <- subset(test.data,stage == s)
sig.did.de.df <- subset(search.results$type1,de.pval.1 <= 0.05 &
did.pval.test.intervalTRUE.outcomeDeceased <= 0.05 & stage == s) # DE & sig. delta
if (nrow(pls.test.data) > 0 & nrow(pls.train.data) > 0) {
modpls2 <-plsRglm(dataY=pls.train.data$outcome.numerical,dataX=pls.train.data[,sig.did.de.df$cytokine],
nt=1,modele="pls-glm-logistic",pvals.expli = T,
dataPredictY = pls.test.data[,sig.did.de.df$cytokine,drop=F],sparse = F,verbose = F)
sample.scores <- rbind(sample.scores,data.frame(stage=s,sample=pls.test.data[,
c("subject_id","days_from_symptom_onset_to_test","stage","outcome")],
predicted=modpls2$ValsPredictY,
observed=pls.test.data[,"outcome.numerical"]))
feature.importance <- dplyr::bind_rows(feature.importance,data.frame(stage=s,t(modpls2$Std.Coeffs)))
}
}
}
# performance
cat("Performance at juncture:\n")
## Performance at juncture:
precrec_obj_mid <- evalmod(scores = subset(sample.scores,stage=="mid")$predicted,labels = subset(sample.scores,stage=="mid")$observed)
precrec_obj_mid
##
## === AUCs ===
##
## Model name Dataset ID Curve type AUC
## 1 m1 1 ROC 0.8338462
## 2 m1 1 PRC 0.7089797
##
##
## === Input data ===
##
## Model name Dataset ID # of negatives # of positives
## 1 m1 1 25 13
cat("Performance post-juncture:\n")
## Performance post-juncture:
precrec_obj_late <- evalmod(scores = subset(sample.scores,stage=="late")$predicted,labels = subset(sample.scores,stage=="late")$observed)
precrec_obj_late
##
## === AUCs ===
##
## Model name Dataset ID Curve type AUC
## 1 m1 1 ROC 0.8281250
## 2 m1 1 PRC 0.8204015
##
##
## === Input data ===
##
## Model name Dataset ID # of negatives # of positives
## 1 m1 1 16 12
#precrec_obj <- evalmod(scores = sample.scores$predicted,labels = sample.scores$observed)
#autoplot(precrec_obj)
# ROC plot
roc.curve.all <- rbind(data.frame(type="mid",x=precrec_obj_mid$rocs[[1]]$x,y=precrec_obj_mid$rocs[[1]]$y),
data.frame(type="late",x=precrec_obj_late$rocs[[1]]$x,y=precrec_obj_late$rocs[[1]]$y))
ggplot(roc.curve.all,aes(x,y,color=type)) + geom_path(size=1.5,alpha=0.95) + theme_bw() +
geom_abline(slope=1,intercept = 0,linetype="dashed",color="grey30") +
xlab("1-Specificity") + ylab("Sensitivity") + scale_color_manual(labels=c("Post-Juncture","Juncture"),values=c("lightblue","#EADCFA"))
Calculate permutation-based p-value of classification performance by permuting the patient labels
# permute labels
num.perm <- 1000
protein.search.res.permuted <- mclapply(which(sapply(permuted.auc,is.null)==T),function(j) {
set.seed(j)
cat("Round",j,"\n")
selected.cytokine.profiles.permute <- selected.cytokine.profiles.matrix
subject.outcome <- unique(selected.cytokine.profiles.permute[,c("subject_id","outcome")])
subject.outcome$permuted.outcome <- sample(subject.outcome$outcome,nrow(subject.outcome))
selected.cytokine.profiles.permute$outcome <- subject.outcome[match(selected.cytokine.profiles.permute$subject_id,subject.outcome$subject_id),
"permuted.outcome"]
#return(selected.cytokine.profiles.permute)
# leave-one-out modeling
sample.scores <- data.frame()
feature.importance <- data.frame()
selected.cytokine.profiles.permute$outcome.numerical <- 0
selected.cytokine.profiles.permute[selected.cytokine.profiles.permute$outcome == "Deceased",]$outcome.numerical <- 1
for (i in unique(selected.cytokine.profiles.matrix$subject_id)) {
cat(i,"\n")
train.data <- subset(selected.cytokine.profiles.permute,subject_id != i)
test.data <- subset(selected.cytokine.profiles.permute,subject_id == i)
# feature selection
search.results <- juncture.divergent.proteins(train.data,pval.cutoff = 1)
# modeling
for (s in c("mid","late")) {
pls.train.data <- subset(train.data,stage == s)
pls.test.data <- subset(test.data,stage == s)
# feature selection
sig.did.de.df <- subset(search.results$type1,de.pval.1 <= 0.05 &
did.pval.test.intervalTRUE.outcomeDeceased <= 0.05 & stage == s) # DE & sig. delta
if (nrow(pls.test.data) > 0 & nrow(pls.train.data) > 0 & nrow(sig.did.de.df) > 0) {
modpls2 <-plsRglm(dataY=pls.train.data$outcome.numerical,dataX=pls.train.data[,sig.did.de.df$cytokine,drop=F],
nt=1,modele="pls-glm-logistic",pvals.expli = T,
dataPredictY = pls.test.data[,sig.did.de.df$cytokine,drop=F],sparse = F,verbose = T)
if (is.numeric(modpls2$ValsPredictY)) {
sample.scores <- rbind(sample.scores,data.frame(round=j,stage=s,sample=pls.test.data[,
c("subject_id","days_from_symptom_onset_to_test","stage","outcome")],
predicted=modpls2$ValsPredictY,
observed=pls.test.data[,"outcome.numerical"]))
feature.importance <- dplyr::bind_rows(feature.importance,data.frame(round=j,stage=s,t(modpls2$Std.Coeffs)))
}
}
}
}
res <- list(scores=sample.scores,features=feature.importance)
return(res)
},mc.cores=num.workers)
permuted.auc <- lapply(1:length(protein.search.res.permuted),function(x){
cat(x,"\n")
if (class(protein.search.res.permuted[[x]]) != "try-error") {
permuted.scores <- protein.search.res.permuted[[x]]$scores
roc.auc <- c("mid"=NA,"late"=NA)
for (s in unique(permuted.scores$stage)) {
true.scores <- subset(sample.scores,stage == s)
round.scores <- subset(permuted.scores,stage == s)
true.scores <- merge(true.scores,round.scores[,c("sample.subject_id","sample.days_from_symptom_onset_to_test","predicted","observed")],
by = c("sample.subject_id","sample.days_from_symptom_onset_to_test"),all.x=T,suffixes=c("",".perm"))
# default to score of 0.5 when no model is created
true.scores[is.na(true.scores$predicted.perm),"predicted.perm"] <- 0.5
auc <- evalmod(scores = true.scores$predicted.perm,labels = true.scores$observed)
roc.auc[s] <- attr(auc$rocs[[1]],"auc")
}
return(roc.auc)
} else {
cat("error!\n")
}
})
permuted.mid.auc <- sapply(permuted.auc,function(x){x["mid"]})
cat("P-value for juncture samples:",sum(permuted.mid.auc > attr(precrec_obj_mid$rocs[[1]],"auc"),na.rm = T)/num.perm)
permuted.late.auc <- sapply(permuted.auc,function(x){x["late"]})
cat("P-value for post-juncture samples:",sum(permuted.late.auc > attr(precrec_obj_late$rocs[[1]],"auc"),na.rm = T)/num.perm)
stopCluster(cl)
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.2 (Maipo)
##
## Matrix products: default
## BLAS/LAPACK: /sysapps/cluster/software/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_haswellp-r0.3.1.so
##
## locale:
## [1] en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] foreach_1.4.7 BiocParallel_1.16.6 precrec_0.11.2
## [4] plsRglm_1.2.5 lmerTest_3.1-0 lme4_1.1-21
## [7] Matrix_1.2-17 tidymv_3.0.0 mgcv_1.8-28
## [10] nlme_3.1-140 ggfortify_0.4.11 ggpubr_0.2
## [13] magrittr_1.5 ggsci_2.9 ggplot2_3.2.0
## [16] knitr_1.23
##
## loaded via a namespace (and not attached):
## [1] doParallel_1.0.15 numDeriv_2016.8-1.1 tools_3.5.2
## [4] R6_2.4.0 vegan_2.5-6 lazyeval_0.2.2
## [7] colorspace_1.4-1 permute_0.9-5 withr_2.1.2
## [10] tidyselect_0.2.5 gridExtra_2.3 curl_4.2
## [13] compiler_3.5.2 network_1.15 labeling_0.3
## [16] scales_1.1.0 mvtnorm_1.0-11 stringr_1.4.0
## [19] digest_0.6.19 foreign_0.8-71 minqa_1.2.4
## [22] rmarkdown_1.16 rio_0.5.16 pkgconfig_2.0.2
## [25] htmltools_0.5.0 maps_3.3.0 rlang_0.4.8
## [28] readxl_1.3.1 farver_2.0.3 statnet.common_4.3.0
## [31] dplyr_0.8.3 zip_2.0.4 car_3.0-3
## [34] dotCall64_1.0-0 bipartite_2.13 Rcpp_1.0.3
## [37] munsell_0.5.0 abind_1.4-5 lifecycle_0.1.0
## [40] stringi_1.4.3 yaml_2.2.0 carData_3.0-2
## [43] MASS_7.3-51.4 plyr_1.8.4 grid_3.5.2
## [46] forcats_0.4.0 crayon_1.3.4 lattice_0.20-38
## [49] haven_2.1.1 splines_3.5.2 hms_0.5.1
## [52] sna_2.4 pillar_1.4.2 igraph_1.2.4.1
## [55] boot_1.3-23 reshape2_1.4.3 codetools_0.2-16
## [58] glue_1.3.1 evaluate_0.14 data.table_1.12.2
## [61] vctrs_0.3.4 spam_2.3-0 nloptr_1.2.1
## [64] cellranger_1.1.0 gtable_0.3.0 purrr_0.3.2
## [67] tidyr_1.0.0 assertthat_0.2.1 xfun_0.8
## [70] openxlsx_4.1.0.1 coda_0.19-3 tibble_2.1.3
## [73] iterators_1.0.12 fields_9.8-6 cluster_2.1.0