Compare cytokine profiles of citeseq patients in DSM-high and DSM-low severity groups. Specific focus on 17-23 days since symptom onset.

Read Data

Read patient demo & cytokine data.

citeseq.patient.dsm <- readRDS(file.path(output.folder,"..","DSM","citeseq.patient.end.points.RDS"))
citeseq.patient.dsm$dsm.group <- NA
citeseq.patient.dsm[citeseq.patient.dsm$PC1 < median(citeseq.patient.dsm$PC1),"dsm.group"] <- "DSM-low"
citeseq.patient.dsm[citeseq.patient.dsm$PC1 > median(citeseq.patient.dsm$PC1),"dsm.group"] <- "DSM-high"
table(citeseq.patient.dsm$dsm.group)
## 
## DSM-high  DSM-low 
##       15       15
load(file.path(data.folder,"covid19.metadata.paper1.RData"),verbose=T)
## Loading objects:
##   covid19.patient.demo
##   covid19.samples
##   covid19.treatment
##   covid19.lab.results
covid19.lab.results$severity.outcome <- paste0(covid19.lab.results$severity,"-",covid19.lab.results$outcome)

severity.color <- c("Critical-Alive"="#374E55FF","Critical-Deceased"="#DF8F44FF","Moderate-Alive"="#00A1D5FF" ,"Severe-Alive"="#B24745FF","HC" = "#79AF97FF")

Cytokine time course differences

Compare between dsm-high and dsm-low patients during critical juncture (between 17 and 23 days TSO) and check whether any of the cytokines had signficant shift entering the juncture

selected.cytokine.profiles <- subset(covid19.lab.results,subject_id %in% citeseq.patient.dsm$subject_id & test_type == "ELISA" & 
                                       !is.na(days_from_symptom_onset_to_test))
selected.cytokine.profiles$dsm.group <- citeseq.patient.dsm[selected.cytokine.profiles$subject_id,"dsm.group"]
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)

# ggplot(subset(selected.cytokine.profiles,test_name=="IL-23"),aes(days_from_symptom_onset_to_test,log10(test_value+1),group=dsm.group)) +
#   geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
#   geom_point(alpha=.5,pch=21,aes(fill=dsm.group),size=1) + scale_fill_manual(name="DSM",values=c("red","deepskyblue1")) +
#   scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) +
#   geom_smooth(se = T,aes(color=dsm.group),alpha=0.15) + facet_wrap(~test_name,scales = "free") + theme_bw() + #xlim(0,60) +
#   xlab("Days from Symptom Onset") + ylab("log10 Cytokine Level") + xlim(0,25)


Differential level within juncture

Using the model: test_value ~ dsm.group + age.numeric + sex + DFSO + (1|subject_id), for samples within the juncture to identify cytokines with sig. differences between the two severity groups

# test which one is diverging between 17-23 days for the two groups
divergence.df <- data.frame()
form <- test_value ~ dsm.group + age + sex + days_from_symptom_onset_to_test + (1|subject_id)
for (i in parameter.labels$test_id) {
  tmp <- subset(selected.cytokine.profiles,test_id == i & days_from_symptom_onset_to_test >= 7 & days_from_symptom_onset_to_test <= 30)
  if (nrow(tmp) > 20) {
    tmp$test_value <- scale(log10(tmp$test_value+1))
    tmp <- subset(tmp, days_from_symptom_onset_to_test >= 17 & days_from_symptom_onset_to_test <= 23)
    lm.res <- lmer(form,tmp)
    lm.anova <- anova(lm.res)
    divergence.df <- rbind(divergence.df,data.frame(cytokine=i,coef=t(fixef(lm.res)),pval = t(lm.anova$`Pr(>F)`),singular=isSingular(lm.res)))
  } else
    warning(paste0(i," only has ",nrow(tmp)," samples."))
}
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
divergence.df$cytokine.label <- parameter.labels[divergence.df$cytokine,"test_name"]
divergence.df <- divergence.df[order(divergence.df$coef.dsm.groupDSM.low),]
divergence.df$cytokine.label <- factor(divergence.df$cytokine.label,levels=divergence.df$cytokine.label)
ggplot(divergence.df,aes(cytokine.label,coef.dsm.groupDSM.low)) + geom_hline(yintercept = 0,linetype="dashed") +
  geom_point(aes(size=-log10(pval.1),fill=pval.1<=0.05),alpha=0.75,pch=21) + 
  #geom_point(aes(y=coef.post.20daysTRUE,size=-log10(severity.outcome.pval.2),
  #               color=singular,fill=severity.outcome.pval.2<=0.05),alpha=0.75,pch=22) + 
  coord_flip() + ylab("DSM-low Effect Size") + xlab("Cytokine") + labs(title = "DSM-low vs. DSM-high",subtitle = "Cytokine differences between 17 and 23 days TSO") +
  scale_color_manual(values=c("black","white")) + scale_fill_manual(name="Sig. P-Value",values=c("grey90","lightblue")) +
  scale_size(name="-log10(p-val)") + theme_bw() + theme(text = element_text(size=8))
Figure 2 Coefficient and significance of the severity term in mixed-effect models comparing 63 serum protein levels DSM-low (n=15) relative to DSM-low (n=15) patients around critical juncture (17-23 days TSO; only patients with samples around this period are included in this analysis). Significance is determined by ANOVA unadjusted p = 0.05.

Figure 2 Coefficient and significance of the severity term in mixed-effect models comparing 63 serum protein levels DSM-low (n=15) relative to DSM-low (n=15) patients around critical juncture (17-23 days TSO; only patients with samples around this period are included in this analysis). Significance is determined by ANOVA unadjusted p = 0.05.

sig.diff.cytokines <- as.character(subset(divergence.df,pval.1 <= 0.05)$cytokine)


Time course of selected cytokines

selected.cytokine.profiles.avg <- aggregate(test_value ~ subject_id + days_from_symptom_onset_to_test + test_name + test_id + dsm.group,
                                            selected.cytokine.profiles,mean)

ggplot(subset(selected.cytokine.profiles.avg,test_id %in% c(sig.diff.cytokines)),
       aes(days_from_symptom_onset_to_test,log10(test_value+1),group=dsm.group)) +
  #geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
  geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),fill = "#EADCFA", alpha = 0.05) +
  geom_point(alpha=.25,pch=21,aes(fill=dsm.group),size=1) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
  geom_line(aes(group=subject_id),alpha=0.1) +
  scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) +
  geom_smooth(se = T,aes(color=dsm.group),alpha=0.2) + #geom_smooth(se = T,aes(fill=dsm.group),alpha=0.1) + 
  facet_wrap(~test_name,scales = "free") + theme_bw() + xlim(0,40) +
  xlab("Days from Symptom Onset") + ylab("Cytokine Level (log10 pg/mL)") 
Figure 3 Time course of selected circulating protein levels of by disease severity. Proteins that either show significant differences (p = 0.05) between the two severity groups or their expression patterns change (p = 0.05) around the critical juncture are shown. Longitudinal samples from the same individual are connected by gray lines. Trajectories were fitted to the two DSM patient groups separately and are shown with the shaded areas representing standard error.

Figure 3 Time course of selected circulating protein levels of by disease severity. Proteins that either show significant differences (p = 0.05) between the two severity groups or their expression patterns change (p = 0.05) around the critical juncture are shown. Longitudinal samples from the same individual are connected by gray lines. Trajectories were fitted to the two DSM patient groups separately and are shown with the shaded areas representing standard error.


CBC cell count time course

selected.cbc.profiles <- subset(covid19.lab.results,test_type == "Clinical" & subject_id %in% citeseq.patient.dsm$subject_id)
selected.cbc.profiles <- selected.cbc.profiles[grep("((T|C|s|\\.)_x10.[36].uL$)|[sCT]_pct$",selected.cbc.profiles$test_id),]
selected.cbc.profiles <- subset(selected.cbc.profiles,test_id %in% names(which(table(selected.cbc.profiles$test_id) > 100)))


# reference range
cell.counts.range <- unique(selected.cbc.profiles[,c("test_name","test_id","normal_range")])
cell.counts.range[,c("lower.bound","upper.bound")] <- t(sapply(cell.counts.range$normal_range,function(x){as.numeric(unlist(strsplit(x,"-")))}))
cell.counts.range <- cbind(aggregate(lower.bound ~ test_id + test_name,cell.counts.range,min),
                           upper.bound=aggregate(upper.bound ~ test_id + test_name,cell.counts.range,max)[,"upper.bound"])

# remove possible erroneous values
tolerance <- 10
to.remove.rows <- c()
for (i in unique(selected.cbc.profiles$test_id)) {
  cat(i,"\n")
  test.values <- subset(selected.cbc.profiles,test_id == i)$test_value
  median.value <- median(test.values,na.rm = T)
  range <- quantile(test.values,.75,na.rm = T) - quantile(test.values,.25,na.rm = T)
  to.remove.rows <- c(to.remove.rows,rownames(subset(selected.cbc.profiles,test_id == i & 
                                                       (test_value > (median.value + tolerance*range) | 
                                                       test_value < (median.value - tolerance*range)))))
}
## Basophils_pct 
## Neutrophils_pct 
## Hematocrit.HCT_pct 
## Monocytes_x10.3.uL 
## Lymphocytes_pct 
## Neutrophils_x10.3.uL 
## Eosinophils_x10.3.uL 
## White.Blood.Cells.WBC_x10.3.uL 
## Eosinophils_pct 
## Platelets.PLT_x10.3.uL 
## Red.Blood.Cells.RBC_x10.6.uL 
## Basophils_x10.3.uL 
## Lymphocytes_x10.3.uL 
## Monocytes_pct
#View(selected.cbc.profiles[to.remove.rows,])
selected.cbc.profiles <- subset(selected.cbc.profiles,!(rownames(selected.cbc.profiles) %in% to.remove.rows))

selected.cbc.profiles.avg <- aggregate(test_value ~ subject_id + days_from_symptom_onset_to_test + test_name + test_id + severity.outcome,
                                            selected.cbc.profiles,mean)
selected.cbc.profiles.avg$dsm.group <- citeseq.patient.dsm[selected.cbc.profiles.avg$subject_id,"dsm.group"]
to.display <- selected.cbc.profiles.avg[grep("_pct$",selected.cbc.profiles.avg$test_id),]
ggplot(to.display) + 
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=lower.bound),alpha=1,color="darkseagreen3",linetype="dashed") +   
  geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=upper.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
  geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),alpha=0.05,fill = "#EADCFA") +
  #geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
  geom_line(aes(x=days_from_symptom_onset_to_test,y=test_value,group=subject_id),alpha=0.1) + 
  geom_point(alpha=.1,pch=21,aes(fill=dsm.group,x=days_from_symptom_onset_to_test,y=test_value)) +
  geom_smooth(se = T,aes(x=days_from_symptom_onset_to_test,y=test_value,group=dsm.group,color=dsm.group),alpha=0.3) + xlim(0,40) +
  scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
    facet_wrap(~test_name,scales = "free",ncol=5,dir = "v") + theme_bw() + ylab("Cell Counts (% of Parent Pop.)") + xlab("Days from Symptom Onset")

to.display <- selected.cbc.profiles.avg[grep("_pct$",selected.cbc.profiles.avg$test_id,invert = T),]
ggplot(to.display) + 
  geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=lower.bound),alpha=1,color="darkseagreen3",linetype="dashed") +   
  geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=upper.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
  geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),alpha=0.05,fill = "#EADCFA") +
  #geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
  geom_line(aes(x=days_from_symptom_onset_to_test,y=test_value,group=subject_id),alpha=0.1) + 
  geom_point(alpha=.1,pch=21,aes(fill=dsm.group,x=days_from_symptom_onset_to_test,y=test_value)) +
  geom_smooth(se = T,aes(x=days_from_symptom_onset_to_test,y=test_value,group=dsm.group,color=dsm.group),alpha=0.3) + xlim(0,40) +
    scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
    facet_wrap(~test_id,scales = "free",ncol=5,dir = "v") + theme_bw() + ylab("Cell Counts") + xlab("Days from Symptom Onset")

Session Info

save(divergence.df,file=file.path(output.folder,"dsm.high.v.low.cytokine.test.results.RData"))

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] precrec_0.11.2   plsRglm_1.2.5    lmerTest_3.1-2   lme4_1.1-23     
##  [5] Matrix_1.2-18    tidymv_3.0.0     mgcv_1.8-31      nlme_3.1-144    
##  [9] ggfortify_0.4.10 ggpubr_0.4.0     ggsci_2.9        ggplot2_3.3.2   
## [13] knitr_1.30      
## 
## loaded via a namespace (and not attached):
##  [1] numDeriv_2016.8-1.1  tools_3.6.3          backports_1.1.10    
##  [4] R6_2.4.1             vegan_2.5-6          colorspace_1.4-1    
##  [7] permute_0.9-5        withr_2.3.0          tidyselect_1.1.0    
## [10] gridExtra_2.3        curl_4.3             compiler_3.6.3      
## [13] network_1.16.0       labeling_0.3         scales_1.1.1        
## [16] mvtnorm_1.1-1        stringr_1.4.0        digest_0.6.25       
## [19] foreign_0.8-75       minqa_1.2.4          rmarkdown_2.4       
## [22] rio_0.5.16           pkgconfig_2.0.3      htmltools_0.5.0     
## [25] highr_0.8            maps_3.3.0           rlang_0.4.7         
## [28] readxl_1.3.1         generics_0.0.2       farver_2.0.3        
## [31] statnet.common_4.3.0 dplyr_1.0.2          zip_2.1.1           
## [34] car_3.0-10           magrittr_1.5         dotCall64_1.0-0     
## [37] bipartite_2.15       Rcpp_1.0.5           munsell_0.5.0       
## [40] abind_1.4-5          lifecycle_0.2.0      stringi_1.4.6       
## [43] yaml_2.2.1           carData_3.0-4        MASS_7.3-51.5       
## [46] grid_3.6.3           parallel_3.6.3       forcats_0.5.0       
## [49] crayon_1.3.4         lattice_0.20-38      haven_2.3.1         
## [52] splines_3.6.3        hms_0.5.3            sna_2.5             
## [55] pillar_1.4.6         igraph_1.2.5         boot_1.3-24         
## [58] ggsignif_0.6.0       glue_1.4.2           evaluate_0.14       
## [61] data.table_1.13.0    vctrs_0.3.4          spam_2.5-1          
## [64] nloptr_1.2.2.2       cellranger_1.1.0     gtable_0.3.0        
## [67] purrr_0.3.4          tidyr_1.1.2          xfun_0.17           
## [70] openxlsx_4.2.2       broom_0.7.0          coda_0.19-3         
## [73] rstatix_0.6.0        tibble_3.0.3         fields_11.4         
## [76] cluster_2.1.0        statmod_1.4.34       ellipsis_0.3.1