Compare cytokine profiles of citeseq patients in DSM-high and DSM-low severity groups. Specific focus on 17-23 days since symptom onset.
Read patient demo & cytokine data.
citeseq.patient.dsm <- readRDS(file.path(output.folder,"..","DSM","citeseq.patient.end.points.RDS"))
citeseq.patient.dsm$dsm.group <- NA
citeseq.patient.dsm[citeseq.patient.dsm$PC1 < median(citeseq.patient.dsm$PC1),"dsm.group"] <- "DSM-low"
citeseq.patient.dsm[citeseq.patient.dsm$PC1 > median(citeseq.patient.dsm$PC1),"dsm.group"] <- "DSM-high"
table(citeseq.patient.dsm$dsm.group)
##
## DSM-high DSM-low
## 15 15
load(file.path(data.folder,"covid19.metadata.paper1.RData"),verbose=T)
## Loading objects:
## covid19.patient.demo
## covid19.samples
## covid19.treatment
## covid19.lab.results
covid19.lab.results$severity.outcome <- paste0(covid19.lab.results$severity,"-",covid19.lab.results$outcome)
severity.color <- c("Critical-Alive"="#374E55FF","Critical-Deceased"="#DF8F44FF","Moderate-Alive"="#00A1D5FF" ,"Severe-Alive"="#B24745FF","HC" = "#79AF97FF")
Compare between dsm-high and dsm-low patients during critical juncture (between 17 and 23 days TSO) and check whether any of the cytokines had signficant shift entering the juncture
selected.cytokine.profiles <- subset(covid19.lab.results,subject_id %in% citeseq.patient.dsm$subject_id & test_type == "ELISA" &
!is.na(days_from_symptom_onset_to_test))
selected.cytokine.profiles$dsm.group <- citeseq.patient.dsm[selected.cytokine.profiles$subject_id,"dsm.group"]
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
# ggplot(subset(selected.cytokine.profiles,test_name=="IL-23"),aes(days_from_symptom_onset_to_test,log10(test_value+1),group=dsm.group)) +
# geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
# geom_point(alpha=.5,pch=21,aes(fill=dsm.group),size=1) + scale_fill_manual(name="DSM",values=c("red","deepskyblue1")) +
# scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) +
# geom_smooth(se = T,aes(color=dsm.group),alpha=0.15) + facet_wrap(~test_name,scales = "free") + theme_bw() + #xlim(0,60) +
# xlab("Days from Symptom Onset") + ylab("log10 Cytokine Level") + xlim(0,25)
Using the model: test_value ~ dsm.group + age.numeric + sex + DFSO + (1|subject_id), for samples within the juncture to identify cytokines with sig. differences between the two severity groups
# test which one is diverging between 17-23 days for the two groups
divergence.df <- data.frame()
form <- test_value ~ dsm.group + age + sex + days_from_symptom_onset_to_test + (1|subject_id)
for (i in parameter.labels$test_id) {
tmp <- subset(selected.cytokine.profiles,test_id == i & days_from_symptom_onset_to_test >= 7 & days_from_symptom_onset_to_test <= 30)
if (nrow(tmp) > 20) {
tmp$test_value <- scale(log10(tmp$test_value+1))
tmp <- subset(tmp, days_from_symptom_onset_to_test >= 17 & days_from_symptom_onset_to_test <= 23)
lm.res <- lmer(form,tmp)
lm.anova <- anova(lm.res)
divergence.df <- rbind(divergence.df,data.frame(cytokine=i,coef=t(fixef(lm.res)),pval = t(lm.anova$`Pr(>F)`),singular=isSingular(lm.res)))
} else
warning(paste0(i," only has ",nrow(tmp)," samples."))
}
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
divergence.df$cytokine.label <- parameter.labels[divergence.df$cytokine,"test_name"]
divergence.df <- divergence.df[order(divergence.df$coef.dsm.groupDSM.low),]
divergence.df$cytokine.label <- factor(divergence.df$cytokine.label,levels=divergence.df$cytokine.label)
ggplot(divergence.df,aes(cytokine.label,coef.dsm.groupDSM.low)) + geom_hline(yintercept = 0,linetype="dashed") +
geom_point(aes(size=-log10(pval.1),fill=pval.1<=0.05),alpha=0.75,pch=21) +
#geom_point(aes(y=coef.post.20daysTRUE,size=-log10(severity.outcome.pval.2),
# color=singular,fill=severity.outcome.pval.2<=0.05),alpha=0.75,pch=22) +
coord_flip() + ylab("DSM-low Effect Size") + xlab("Cytokine") + labs(title = "DSM-low vs. DSM-high",subtitle = "Cytokine differences between 17 and 23 days TSO") +
scale_color_manual(values=c("black","white")) + scale_fill_manual(name="Sig. P-Value",values=c("grey90","lightblue")) +
scale_size(name="-log10(p-val)") + theme_bw() + theme(text = element_text(size=8))
Figure 2 Coefficient and significance of the severity term in mixed-effect models comparing 63 serum protein levels DSM-low (n=15) relative to DSM-low (n=15) patients around critical juncture (17-23 days TSO; only patients with samples around this period are included in this analysis). Significance is determined by ANOVA unadjusted p = 0.05.
sig.diff.cytokines <- as.character(subset(divergence.df,pval.1 <= 0.05)$cytokine)
selected.cytokine.profiles.avg <- aggregate(test_value ~ subject_id + days_from_symptom_onset_to_test + test_name + test_id + dsm.group,
selected.cytokine.profiles,mean)
ggplot(subset(selected.cytokine.profiles.avg,test_id %in% c(sig.diff.cytokines)),
aes(days_from_symptom_onset_to_test,log10(test_value+1),group=dsm.group)) +
#geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),fill = "#EADCFA", alpha = 0.05) +
geom_point(alpha=.25,pch=21,aes(fill=dsm.group),size=1) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
geom_line(aes(group=subject_id),alpha=0.1) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) +
geom_smooth(se = T,aes(color=dsm.group),alpha=0.2) + #geom_smooth(se = T,aes(fill=dsm.group),alpha=0.1) +
facet_wrap(~test_name,scales = "free") + theme_bw() + xlim(0,40) +
xlab("Days from Symptom Onset") + ylab("Cytokine Level (log10 pg/mL)")
Figure 3 Time course of selected circulating protein levels of by disease severity. Proteins that either show significant differences (p = 0.05) between the two severity groups or their expression patterns change (p = 0.05) around the critical juncture are shown. Longitudinal samples from the same individual are connected by gray lines. Trajectories were fitted to the two DSM patient groups separately and are shown with the shaded areas representing standard error.
selected.cbc.profiles <- subset(covid19.lab.results,test_type == "Clinical" & subject_id %in% citeseq.patient.dsm$subject_id)
selected.cbc.profiles <- selected.cbc.profiles[grep("((T|C|s|\\.)_x10.[36].uL$)|[sCT]_pct$",selected.cbc.profiles$test_id),]
selected.cbc.profiles <- subset(selected.cbc.profiles,test_id %in% names(which(table(selected.cbc.profiles$test_id) > 100)))
# reference range
cell.counts.range <- unique(selected.cbc.profiles[,c("test_name","test_id","normal_range")])
cell.counts.range[,c("lower.bound","upper.bound")] <- t(sapply(cell.counts.range$normal_range,function(x){as.numeric(unlist(strsplit(x,"-")))}))
cell.counts.range <- cbind(aggregate(lower.bound ~ test_id + test_name,cell.counts.range,min),
upper.bound=aggregate(upper.bound ~ test_id + test_name,cell.counts.range,max)[,"upper.bound"])
# remove possible erroneous values
tolerance <- 10
to.remove.rows <- c()
for (i in unique(selected.cbc.profiles$test_id)) {
cat(i,"\n")
test.values <- subset(selected.cbc.profiles,test_id == i)$test_value
median.value <- median(test.values,na.rm = T)
range <- quantile(test.values,.75,na.rm = T) - quantile(test.values,.25,na.rm = T)
to.remove.rows <- c(to.remove.rows,rownames(subset(selected.cbc.profiles,test_id == i &
(test_value > (median.value + tolerance*range) |
test_value < (median.value - tolerance*range)))))
}
## Basophils_pct
## Neutrophils_pct
## Hematocrit.HCT_pct
## Monocytes_x10.3.uL
## Lymphocytes_pct
## Neutrophils_x10.3.uL
## Eosinophils_x10.3.uL
## White.Blood.Cells.WBC_x10.3.uL
## Eosinophils_pct
## Platelets.PLT_x10.3.uL
## Red.Blood.Cells.RBC_x10.6.uL
## Basophils_x10.3.uL
## Lymphocytes_x10.3.uL
## Monocytes_pct
#View(selected.cbc.profiles[to.remove.rows,])
selected.cbc.profiles <- subset(selected.cbc.profiles,!(rownames(selected.cbc.profiles) %in% to.remove.rows))
selected.cbc.profiles.avg <- aggregate(test_value ~ subject_id + days_from_symptom_onset_to_test + test_name + test_id + severity.outcome,
selected.cbc.profiles,mean)
selected.cbc.profiles.avg$dsm.group <- citeseq.patient.dsm[selected.cbc.profiles.avg$subject_id,"dsm.group"]
to.display <- selected.cbc.profiles.avg[grep("_pct$",selected.cbc.profiles.avg$test_id),]
ggplot(to.display) +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=lower.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=upper.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),alpha=0.05,fill = "#EADCFA") +
#geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
geom_line(aes(x=days_from_symptom_onset_to_test,y=test_value,group=subject_id),alpha=0.1) +
geom_point(alpha=.1,pch=21,aes(fill=dsm.group,x=days_from_symptom_onset_to_test,y=test_value)) +
geom_smooth(se = T,aes(x=days_from_symptom_onset_to_test,y=test_value,group=dsm.group,color=dsm.group),alpha=0.3) + xlim(0,40) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
facet_wrap(~test_name,scales = "free",ncol=5,dir = "v") + theme_bw() + ylab("Cell Counts (% of Parent Pop.)") + xlab("Days from Symptom Onset")
to.display <- selected.cbc.profiles.avg[grep("_pct$",selected.cbc.profiles.avg$test_id,invert = T),]
ggplot(to.display) +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=lower.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=upper.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),alpha=0.05,fill = "#EADCFA") +
#geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
geom_line(aes(x=days_from_symptom_onset_to_test,y=test_value,group=subject_id),alpha=0.1) +
geom_point(alpha=.1,pch=21,aes(fill=dsm.group,x=days_from_symptom_onset_to_test,y=test_value)) +
geom_smooth(se = T,aes(x=days_from_symptom_onset_to_test,y=test_value,group=dsm.group,color=dsm.group),alpha=0.3) + xlim(0,40) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
facet_wrap(~test_id,scales = "free",ncol=5,dir = "v") + theme_bw() + ylab("Cell Counts") + xlab("Days from Symptom Onset")
save(divergence.df,file=file.path(output.folder,"dsm.high.v.low.cytokine.test.results.RData"))
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] precrec_0.11.2 plsRglm_1.2.5 lmerTest_3.1-2 lme4_1.1-23
## [5] Matrix_1.2-18 tidymv_3.0.0 mgcv_1.8-31 nlme_3.1-144
## [9] ggfortify_0.4.10 ggpubr_0.4.0 ggsci_2.9 ggplot2_3.3.2
## [13] knitr_1.30
##
## loaded via a namespace (and not attached):
## [1] numDeriv_2016.8-1.1 tools_3.6.3 backports_1.1.10
## [4] R6_2.4.1 vegan_2.5-6 colorspace_1.4-1
## [7] permute_0.9-5 withr_2.3.0 tidyselect_1.1.0
## [10] gridExtra_2.3 curl_4.3 compiler_3.6.3
## [13] network_1.16.0 labeling_0.3 scales_1.1.1
## [16] mvtnorm_1.1-1 stringr_1.4.0 digest_0.6.25
## [19] foreign_0.8-75 minqa_1.2.4 rmarkdown_2.4
## [22] rio_0.5.16 pkgconfig_2.0.3 htmltools_0.5.0
## [25] highr_0.8 maps_3.3.0 rlang_0.4.7
## [28] readxl_1.3.1 generics_0.0.2 farver_2.0.3
## [31] statnet.common_4.3.0 dplyr_1.0.2 zip_2.1.1
## [34] car_3.0-10 magrittr_1.5 dotCall64_1.0-0
## [37] bipartite_2.15 Rcpp_1.0.5 munsell_0.5.0
## [40] abind_1.4-5 lifecycle_0.2.0 stringi_1.4.6
## [43] yaml_2.2.1 carData_3.0-4 MASS_7.3-51.5
## [46] grid_3.6.3 parallel_3.6.3 forcats_0.5.0
## [49] crayon_1.3.4 lattice_0.20-38 haven_2.3.1
## [52] splines_3.6.3 hms_0.5.3 sna_2.5
## [55] pillar_1.4.6 igraph_1.2.5 boot_1.3-24
## [58] ggsignif_0.6.0 glue_1.4.2 evaluate_0.14
## [61] data.table_1.13.0 vctrs_0.3.4 spam_2.5-1
## [64] nloptr_1.2.2.2 cellranger_1.1.0 gtable_0.3.0
## [67] purrr_0.3.4 tidyr_1.1.2 xfun_0.17
## [70] openxlsx_4.2.2 broom_0.7.0 coda_0.19-3
## [73] rstatix_0.6.0 tibble_3.0.3 fields_11.4
## [76] cluster_2.1.0 statmod_1.4.34 ellipsis_0.3.1