Classification of Brescia Deceased vs. Recovered

Compare cytokine profiles between deceased patients and those with good outcome. Specific focus on 17-23 days since symptom onset.

Create sub-cohort

Identify all deceased patients and match them with critical but recovered patients by age and gender (excluding those staying hospitalized for more than 50 days). Patients in the citiseq cohort can optionally be excluded.

load(file.path(data.folder,"covid19.metadata.paper1.RData"),verbose=F)
covid19.lab.results$severity.outcome <- paste0(covid19.lab.results$severity,"-",covid19.lab.results$outcome)

severity.color <- c("Critical-Alive"="#374E55FF","Critical-Deceased"="#DF8F44FF","Moderate-Alive"="#00A1D5FF" ,"Severe-Alive"="#B24745FF","HC" = "#79AF97FF")

deceased.patients.cytokines <- subset(covid19.lab.results,outcome == "Deceased" & test_type == "ELISA" & !is.na(days_from_symptom_onset_to_test))
deceased.patients <- unique(deceased.patients.cytokines$subject_id)

cat("Map of sampling TSO time points for deceased patients:\n")

## Map of sampling TSO time points for deceased patients:

deceased.time.points <- as.data.frame.matrix(table(deceased.patients.cytokines[,c("subject_id","days_from_symptom_onset_to_test")])) 
colSums(deceased.time.points > 0)

##  3  8 10 11 12 13 14 16 17 18 19 20 21 22 23 24 25 26 28 29 30 32 35 36 39 
##  1  1  1  3  1  2  1  2  1  1  2  3  1  4  1  3  3  2  1  2  1  1  2  2  2 
## 42 45 47 53 
##  1  1  2  1

cat("Age and sex distribution:\n")

## Age and sex distribution:

table(subset(covid19.patient.demo,subject_id %in% deceased.patients)$sex)

## 
##  F  M 
##  2 15

summary(subset(covid19.patient.demo,subject_id %in% deceased.patients)$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   46.00   61.00   66.00   66.24   73.00   78.00

# recovered patients with similar age and sex
age.tol <- 3
recovered.patients <- c()
for (i in deceased.patients) {
  target.age <- covid19.patient.demo[i,"age"]
  target.sex <- covid19.patient.demo[i,"sex"]
  cat("Target age:",target.age,"sex:",target.sex,"\n")
  tmp <- subset(covid19.lab.results,outcome != "Deceased" & test_type == "ELISA" & !is.na(days_from_symptom_onset_to_test) & 
           age >= target.age - age.tol & age <= target.age + age.tol & severity == "Critical" & sex == target.sex & 
             days_from_symptom_onset_to_test > 15 & days_from_symptom_onset_to_test <= 25 &
           number_of_days_hospitalized < 50)

  matched.subject <- unique(tmp$subject_id)
  cat("Matched subject:",matched.subject,"\n")
  recovered.patients <- c(recovered.patients,matched.subject)
}

## Target age: 73 sex: M 
## Matched subject: HGR0000015 HGR0000088 HGR0000093 HGR0000101 
## Target age: 65 sex: M 
## Matched subject: HGR0000039 HGR0000077 
## Target age: 66 sex: M 
## Matched subject: HGR0000063 HGR0000085 HGR0000149 
## Target age: 59 sex: M 
## Matched subject: HGR0000039 HGR0000049 HGR0000077 HGR0000102 HGR0000108 HGR0000135 
## Target age: 46 sex: M 
## Matched subject: HGR0000032 HGR0000092 HGR0000142 HGR0000145 
## Target age: 66 sex: M 
## Matched subject: HGR0000063 HGR0000085 HGR0000149 
## Target age: 78 sex: M 
## Matched subject: HGR0000088 HGR0000089 
## Target age: 61 sex: M 
## Matched subject: HGR0000039 HGR0000077 HGR0000102 HGR0000135 
## Target age: 68 sex: M 
## Matched subject: HGR0000063 HGR0000085 HGR0000093 HGR0000149 
## Target age: 71 sex: F 
## Matched subject:  
## Target age: 66 sex: M 
## Matched subject: HGR0000063 HGR0000085 HGR0000149 
## Target age: 49 sex: F 
## Matched subject: HGR0000044 HGR0000075 HGR0000139 
## Target age: 73 sex: M 
## Matched subject: HGR0000015 HGR0000088 HGR0000093 HGR0000101 
## Target age: 76 sex: M 
## Matched subject: HGR0000015 HGR0000088 HGR0000089 HGR0000101 
## Target age: 60 sex: M 
## Matched subject: HGR0000039 HGR0000049 HGR0000077 HGR0000102 HGR0000135 
## Target age: 72 sex: M 
## Matched subject: HGR0000015 HGR0000063 HGR0000085 HGR0000093 HGR0000101 HGR0000149 
## Target age: 77 sex: M 
## Matched subject: HGR0000015 HGR0000088 HGR0000089

recovered.patients <- unique(recovered.patients[!is.na(recovered.patients)])
recovered.patients.cytokines <- subset(covid19.lab.results,test_type == "ELISA" & subject_id %in% recovered.patients)
cat("Map of samping TSO time points for recovered patients:\n")

## Map of samping TSO time points for recovered patients:

recovered.time.points <- as.data.frame.matrix(table(recovered.patients.cytokines[,c("subject_id","days_from_symptom_onset_to_test")])) 
colSums(recovered.time.points > 0)

##  3  8  9 10 11 12 13 14 15 16 17 18 19 20 22 23 24 25 26 27 29 30 31 32 33 
##  1  1  1  3  2  2  3  3  4  4  2  5  5  6  1  5  2  3  2  3  2  3  2  1  2 
## 34 35 37 38 39 40 44 45 48 51 56 
##  1  2  1  1  1  1  2  1  1  1  1

cat("Age and sex distribution:\n")

## Age and sex distribution:

table(subset(covid19.patient.demo,subject_id %in% recovered.patients)$sex)

## 
##  F  M 
##  3 18

summary(subset(covid19.patient.demo,subject_id %in% recovered.patients)$age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   45.00   49.00   59.00   60.33   69.00   78.00

selected.cytokine.profiles <- rbind(deceased.patients.cytokines,recovered.patients.cytokines)
# remove patients in citeseq
#citeseq.patients <- subset(covid19.samples,!is.na(batch))$subject_id
#intersect(citeseq.patients,c(recovered.patients,deceased.patients))
#selected.cytokine.profiles <- subset(selected.cytokine.profiles,!(subject_id %in% citeseq.patients)) 
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)

Grouping of samples

Separate samples into interval groups relative to juncture: early (7-16 days), mid (juncture/17-23 days), and late (24-30 days)

selected.cytokine.profiles$unique.id <- make.unique(apply(selected.cytokine.profiles[,c("subject_id","days_from_admission_to_test","test_id")],1,
                                                          paste0,collapse="."),sep = "|")
selected.cytokine.profiles$unique.id <- sapply(selected.cytokine.profiles$unique.id,function(x){unlist(strsplit(x,"|",fixed=T))[2]})
selected.cytokine.profiles.matrix <- reshape2::dcast(selected.cytokine.profiles,
                                                     subject_id + age + sex + outcome + severity + days_from_symptom_onset_to_test + unique.id ~ test_id,
                                                     value.var = "test_value")
selected.cytokine.profiles.matrix[,colnames(selected.cytokine.profiles.matrix) %in% selected.cytokine.profiles$test_id] <-
  apply(selected.cytokine.profiles.matrix[,colnames(selected.cytokine.profiles.matrix) %in% selected.cytokine.profiles$test_id],2,function(x){scale(log10(x+1))})
selected.cytokine.profiles.matrix$stage <- NA
tmp <- which(selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test >= 7 & selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test <= 16)
selected.cytokine.profiles.matrix[tmp,"stage"] <- "early"
tmp <- which(selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test >= 17 & selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test <= 23)
selected.cytokine.profiles.matrix[tmp,"stage"] <- "mid"
tmp <- which(selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test >= 24 & selected.cytokine.profiles.matrix$days_from_symptom_onset_to_test <= 30)
selected.cytokine.profiles.matrix[tmp,"stage"] <- "late"

cat("Sample distribution:\n")

## Sample distribution:

table(selected.cytokine.profiles.matrix[,c("outcome","stage")])

##           stage
## outcome    early late mid
##   Alive       24   16  25
##   Deceased    11   12  13

Outcome Prediction

Search for divergent proteins around juncture using the following model:

Differential expression: test_value ~ outcome + age.numeric + sex + DFSO + (1|subject_id), for samples within each stage to identify cytokines with sig. differences between the two outcome groups

Do leave-one-out cross validation by holding out one patient in each round for testing

# leave-one-out modeling
sample.scores <- data.frame()
feature.importance <- data.frame()
selected.cytokine.profiles.matrix$outcome.numerical <- 0
selected.cytokine.profiles.matrix[selected.cytokine.profiles.matrix$outcome == "Deceased",]$outcome.numerical <- 1
for (i in unique(selected.cytokine.profiles.matrix$subject_id)) {
  train.data <- subset(selected.cytokine.profiles.matrix,subject_id != i)
  test.data <- subset(selected.cytokine.profiles.matrix,subject_id == i)
  
  # feature selection
  search.results <- juncture.divergent.proteins(train.data,pval.cutoff = 1)

  # modeling
  for (s in c("mid","late")) {
    pls.train.data <- subset(train.data,stage == s)
    pls.test.data <- subset(test.data,stage == s)
    sig.did.de.df <- subset(search.results$type1,de.pval.1 <= 0.05 & 
                                 did.pval.test.intervalTRUE.outcomeDeceased <= 0.05 & stage == s) # DE & sig. delta 
    if (nrow(pls.test.data) > 0 & nrow(pls.train.data) > 0) {
      modpls2 <-plsRglm(dataY=pls.train.data$outcome.numerical,dataX=pls.train.data[,sig.did.de.df$cytokine],
                        nt=1,modele="pls-glm-logistic",pvals.expli = T,
                      dataPredictY = pls.test.data[,sig.did.de.df$cytokine,drop=F],sparse = F,verbose = F)
      sample.scores <- rbind(sample.scores,data.frame(stage=s,sample=pls.test.data[,
                                                    c("subject_id","days_from_symptom_onset_to_test","stage","outcome")],
                                                    predicted=modpls2$ValsPredictY,
                                                    observed=pls.test.data[,"outcome.numerical"]))
      feature.importance <- dplyr::bind_rows(feature.importance,data.frame(stage=s,t(modpls2$Std.Coeffs)))
    }
  }
}
# performance
cat("Performance at juncture:\n")

## Performance at juncture:

precrec_obj_mid <- evalmod(scores = subset(sample.scores,stage=="mid")$predicted,labels = subset(sample.scores,stage=="mid")$observed)
precrec_obj_mid

## 
##     === AUCs ===
## 
##      Model name Dataset ID Curve type       AUC
##    1         m1          1        ROC 0.8338462
##    2         m1          1        PRC 0.7089797
## 
## 
##     === Input data ===
## 
##      Model name Dataset ID # of negatives # of positives
##    1         m1          1             25             13

cat("Performance post-juncture:\n")

## Performance post-juncture:

precrec_obj_late <- evalmod(scores = subset(sample.scores,stage=="late")$predicted,labels = subset(sample.scores,stage=="late")$observed)
precrec_obj_late

## 
##     === AUCs ===
## 
##      Model name Dataset ID Curve type       AUC
##    1         m1          1        ROC 0.8281250
##    2         m1          1        PRC 0.8204015
## 
## 
##     === Input data ===
## 
##      Model name Dataset ID # of negatives # of positives
##    1         m1          1             16             12

#precrec_obj <- evalmod(scores = sample.scores$predicted,labels = sample.scores$observed)
#autoplot(precrec_obj)

# ROC plot
roc.curve.all <- rbind(data.frame(type="mid",x=precrec_obj_mid$rocs[[1]]$x,y=precrec_obj_mid$rocs[[1]]$y),
                       data.frame(type="late",x=precrec_obj_late$rocs[[1]]$x,y=precrec_obj_late$rocs[[1]]$y))
ggplot(roc.curve.all,aes(x,y,color=type)) + geom_path(size=1.5,alpha=0.95) + theme_bw() +
 geom_abline(slope=1,intercept = 0,linetype="dashed",color="grey30") +
 xlab("1-Specificity") + ylab("Sensitivity") + scale_color_manual(labels=c("Post-Juncture","Juncture"),values=c("lightblue","#EADCFA"))

Permutation

Calculate permutation-based p-value of classification performance by permuting the patient labels

# permute labels
num.perm <- 1000
protein.search.res.permuted <- mclapply(which(sapply(permuted.auc,is.null)==T),function(j) {
  set.seed(j)
  cat("Round",j,"\n")
  selected.cytokine.profiles.permute <- selected.cytokine.profiles.matrix
  subject.outcome <- unique(selected.cytokine.profiles.permute[,c("subject_id","outcome")])
  subject.outcome$permuted.outcome <- sample(subject.outcome$outcome,nrow(subject.outcome))
  selected.cytokine.profiles.permute$outcome <- subject.outcome[match(selected.cytokine.profiles.permute$subject_id,subject.outcome$subject_id),
                                                                "permuted.outcome"]
  #return(selected.cytokine.profiles.permute)
  
  # leave-one-out modeling
  sample.scores <- data.frame()
  feature.importance <- data.frame()
  selected.cytokine.profiles.permute$outcome.numerical <- 0
  selected.cytokine.profiles.permute[selected.cytokine.profiles.permute$outcome == "Deceased",]$outcome.numerical <- 1
  for (i in unique(selected.cytokine.profiles.matrix$subject_id)) {
    cat(i,"\n")
    train.data <- subset(selected.cytokine.profiles.permute,subject_id != i)
    test.data <- subset(selected.cytokine.profiles.permute,subject_id == i)
    
    # feature selection
    search.results <- juncture.divergent.proteins(train.data,pval.cutoff = 1)
    
    # modeling
    for (s in c("mid","late")) {
      pls.train.data <- subset(train.data,stage == s)
      pls.test.data <- subset(test.data,stage == s)
      # feature selection
      sig.did.de.df <- subset(search.results$type1,de.pval.1 <= 0.05 & 
                                   did.pval.test.intervalTRUE.outcomeDeceased <= 0.05 & stage == s) # DE & sig. delta 
      if (nrow(pls.test.data) > 0 & nrow(pls.train.data) > 0 & nrow(sig.did.de.df) > 0) {
        modpls2 <-plsRglm(dataY=pls.train.data$outcome.numerical,dataX=pls.train.data[,sig.did.de.df$cytokine,drop=F],
                          nt=1,modele="pls-glm-logistic",pvals.expli = T,
                          dataPredictY = pls.test.data[,sig.did.de.df$cytokine,drop=F],sparse = F,verbose = T)
        if (is.numeric(modpls2$ValsPredictY)) {
          sample.scores <- rbind(sample.scores,data.frame(round=j,stage=s,sample=pls.test.data[,
                                                        c("subject_id","days_from_symptom_onset_to_test","stage","outcome")],
                                                        predicted=modpls2$ValsPredictY,
                                                        observed=pls.test.data[,"outcome.numerical"]))
          feature.importance <- dplyr::bind_rows(feature.importance,data.frame(round=j,stage=s,t(modpls2$Std.Coeffs)))
        }
      }
    }
  }
  res <- list(scores=sample.scores,features=feature.importance)
  return(res)
},mc.cores=num.workers)

permuted.auc <- lapply(1:length(protein.search.res.permuted),function(x){
  cat(x,"\n")
  if (class(protein.search.res.permuted[[x]]) != "try-error") {
    permuted.scores <- protein.search.res.permuted[[x]]$scores
    roc.auc <- c("mid"=NA,"late"=NA)
    for (s in unique(permuted.scores$stage)) {
      true.scores <- subset(sample.scores,stage == s)
      round.scores <- subset(permuted.scores,stage == s)
      true.scores <- merge(true.scores,round.scores[,c("sample.subject_id","sample.days_from_symptom_onset_to_test","predicted","observed")],
                           by = c("sample.subject_id","sample.days_from_symptom_onset_to_test"),all.x=T,suffixes=c("",".perm"))
      # default to score of 0.5 when no model is created
      true.scores[is.na(true.scores$predicted.perm),"predicted.perm"] <- 0.5
      auc <- evalmod(scores = true.scores$predicted.perm,labels = true.scores$observed)
      roc.auc[s] <- attr(auc$rocs[[1]],"auc")
    }
    return(roc.auc)
  } else {
    cat("error!\n")
  }
})

permuted.mid.auc <- sapply(permuted.auc,function(x){x["mid"]})
cat("P-value for juncture samples:",sum(permuted.mid.auc > attr(precrec_obj_mid$rocs[[1]],"auc"),na.rm = T)/num.perm)
permuted.late.auc <- sapply(permuted.auc,function(x){x["late"]})
cat("P-value for post-juncture samples:",sum(permuted.late.auc > attr(precrec_obj_late$rocs[[1]],"auc"),na.rm = T)/num.perm)

Session Info

stopCluster(cl)
sessionInfo()

## R version 3.5.2 (2018-12-20)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.2 (Maipo)
## 
## Matrix products: default
## BLAS/LAPACK: /sysapps/cluster/software/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_haswellp-r0.3.1.so
## 
## locale:
## [1] en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] foreach_1.4.7       BiocParallel_1.16.6 precrec_0.11.2     
##  [4] plsRglm_1.2.5       lmerTest_3.1-0      lme4_1.1-21        
##  [7] Matrix_1.2-17       tidymv_3.0.0        mgcv_1.8-28        
## [10] nlme_3.1-140        ggfortify_0.4.11    ggpubr_0.2         
## [13] magrittr_1.5        ggsci_2.9           ggplot2_3.2.0      
## [16] knitr_1.23         
## 
## loaded via a namespace (and not attached):
##  [1] doParallel_1.0.15    numDeriv_2016.8-1.1  tools_3.5.2         
##  [4] R6_2.4.0             vegan_2.5-6          lazyeval_0.2.2      
##  [7] colorspace_1.4-1     permute_0.9-5        withr_2.1.2         
## [10] tidyselect_0.2.5     gridExtra_2.3        curl_4.2            
## [13] compiler_3.5.2       network_1.15         labeling_0.3        
## [16] scales_1.1.0         mvtnorm_1.0-11       stringr_1.4.0       
## [19] digest_0.6.19        foreign_0.8-71       minqa_1.2.4         
## [22] rmarkdown_1.16       rio_0.5.16           pkgconfig_2.0.2     
## [25] htmltools_0.5.0      maps_3.3.0           rlang_0.4.8         
## [28] readxl_1.3.1         farver_2.0.3         statnet.common_4.3.0
## [31] dplyr_0.8.3          zip_2.0.4            car_3.0-3           
## [34] dotCall64_1.0-0      bipartite_2.13       Rcpp_1.0.3          
## [37] munsell_0.5.0        abind_1.4-5          lifecycle_0.1.0     
## [40] stringi_1.4.3        yaml_2.2.0           carData_3.0-2       
## [43] MASS_7.3-51.4        plyr_1.8.4           grid_3.5.2          
## [46] forcats_0.4.0        crayon_1.3.4         lattice_0.20-38     
## [49] haven_2.1.1          splines_3.5.2        hms_0.5.1           
## [52] sna_2.4              pillar_1.4.2         igraph_1.2.4.1      
## [55] boot_1.3-23          reshape2_1.4.3       codetools_0.2-16    
## [58] glue_1.3.1           evaluate_0.14        data.table_1.12.2   
## [61] vctrs_0.3.4          spam_2.3-0           nloptr_1.2.1        
## [64] cellranger_1.1.0     gtable_0.3.0         purrr_0.3.2         
## [67] tidyr_1.0.0          assertthat_0.2.1     xfun_0.8            
## [70] openxlsx_4.1.0.1     coda_0.19-3          tibble_2.1.3        
## [73] iterators_1.0.12     fields_9.8-6         cluster_2.1.0