Compare cytokine profiles of citeseq patients in DSM-high and DSM-low severity groups. Specific focus on 17-23 days since symptom onset.
Read patient demo & cytokine data.
citeseq.patient.dsm <- readRDS(file.path(output.folder,"..","DSM","citeseq.patient.end.points.RDS"))
citeseq.patient.dsm$dsm.group <- NA
citeseq.patient.dsm[citeseq.patient.dsm$PC1 < median(citeseq.patient.dsm$PC1),"dsm.group"] <- "DSM-low"
citeseq.patient.dsm[citeseq.patient.dsm$PC1 > median(citeseq.patient.dsm$PC1),"dsm.group"] <- "DSM-high"
table(citeseq.patient.dsm$dsm.group)
##
## DSM-high DSM-low
## 15 15
load(file.path(data.folder,"covid19.metadata.paper1.RData"),verbose=T)
## Loading objects:
## covid19.patient.demo
## covid19.samples
## covid19.treatment
## covid19.lab.results
covid19.lab.results$severity.outcome <- paste0(covid19.lab.results$severity,"-",covid19.lab.results$outcome)
severity.color <- c("Critical-Alive"="#374E55FF","Critical-Deceased"="#DF8F44FF","Moderate-Alive"="#00A1D5FF" ,"Severe-Alive"="#B24745FF","HC" = "#79AF97FF")
Compare between dsm-high and dsm-low patients during critical juncture (between 17 and 23 days TSO) and check whether any of the cytokines had signficant shift entering the juncture
selected.cytokine.profiles <- subset(covid19.lab.results,subject_id %in% citeseq.patient.dsm$subject_id & test_type == "ELISA" &
!is.na(days_from_symptom_onset_to_test))
selected.cytokine.profiles$dsm.group <- citeseq.patient.dsm[selected.cytokine.profiles$subject_id,"dsm.group"]
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
# ggplot(subset(selected.cytokine.profiles,test_name=="IL-23"),aes(days_from_symptom_onset_to_test,log10(test_value+1),group=dsm.group)) +
# geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
# geom_point(alpha=.5,pch=21,aes(fill=dsm.group),size=1) + scale_fill_manual(name="DSM",values=c("red","deepskyblue1")) +
# scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) +
# geom_smooth(se = T,aes(color=dsm.group),alpha=0.15) + facet_wrap(~test_name,scales = "free") + theme_bw() + #xlim(0,60) +
# xlab("Days from Symptom Onset") + ylab("log10 Cytokine Level") + xlim(0,25)
Using the model: test_value ~ dsm.group + age.numeric + sex + DFSO + (1|subject_id), for samples within the juncture to identify cytokines with sig. differences between the two severity groups
# test which one is diverging between 17-23 days for the two groups
divergence.df <- data.frame()
form <- test_value ~ dsm.group + age + sex + days_from_symptom_onset_to_test + (1|subject_id)
for (i in parameter.labels$test_id) {
tmp <- subset(selected.cytokine.profiles,test_id == i & days_from_symptom_onset_to_test >= 7 & days_from_symptom_onset_to_test <= 30)
if (nrow(tmp) > 20) {
tmp$test_value <- scale(log10(tmp$test_value+1))
tmp <- subset(tmp, days_from_symptom_onset_to_test >= 17 & days_from_symptom_onset_to_test <= 23)
lm.res <- lmer(form,tmp)
lm.anova <- anova(lm.res)
divergence.df <- rbind(divergence.df,data.frame(cytokine=i,coef=t(fixef(lm.res)),pval = t(lm.anova$`Pr(>F)`),singular=isSingular(lm.res)))
} else
warning(paste0(i," only has ",nrow(tmp)," samples."))
}
parameter.labels <- unique(selected.cytokine.profiles[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
divergence.df$cytokine.label <- parameter.labels[divergence.df$cytokine,"test_name"]
divergence.df <- divergence.df[order(divergence.df$coef.dsm.groupDSM.low),]
divergence.df$cytokine.label <- factor(divergence.df$cytokine.label,levels=divergence.df$cytokine.label)
ggplot(divergence.df,aes(cytokine.label,coef.dsm.groupDSM.low)) + geom_hline(yintercept = 0,linetype="dashed") +
geom_point(aes(size=-log10(pval.1),fill=pval.1<=0.05),alpha=0.75,pch=21) +
#geom_point(aes(y=coef.post.20daysTRUE,size=-log10(severity.outcome.pval.2),
# color=singular,fill=severity.outcome.pval.2<=0.05),alpha=0.75,pch=22) +
coord_flip() + ylab("DSM-low Effect Size") + xlab("Cytokine") + labs(title = "DSM-low vs. DSM-high",subtitle = "Cytokine differences between 17 and 23 days TSO") +
scale_color_manual(values=c("black","white")) + scale_fill_manual(name="Sig. P-Value",values=c("grey90","lightblue")) +
scale_size(name="-log10(p-val)") + theme_bw() + theme(text = element_text(size=8))
sig.diff.cytokines <- as.character(subset(divergence.df,pval.1 <= 0.05)$cytokine)
selected.cytokine.profiles.avg <- aggregate(test_value ~ subject_id + days_from_symptom_onset_to_test + test_name + test_id + dsm.group,
selected.cytokine.profiles,mean)
ggplot(subset(selected.cytokine.profiles.avg,test_id %in% c(sig.diff.cytokines)),
aes(days_from_symptom_onset_to_test,log10(test_value+1),group=dsm.group)) +
#geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),fill = "#EADCFA", alpha = 0.05) +
geom_point(alpha=.25,pch=21,aes(fill=dsm.group),size=1) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
geom_line(aes(group=subject_id),alpha=0.1) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) +
geom_smooth(se = T,aes(color=dsm.group),alpha=0.2) + #geom_smooth(se = T,aes(fill=dsm.group),alpha=0.1) +
facet_wrap(~test_name,scales = "free") + theme_bw() + xlim(0,40) +
xlab("Days from Symptom Onset") + ylab("Cytokine Level (log10 pg/mL)")
selected.cbc.profiles <- subset(covid19.lab.results,test_type == "Clinical" & subject_id %in% citeseq.patient.dsm$subject_id)
selected.cbc.profiles <- selected.cbc.profiles[grep("((T|C|s|\\.)_x10.[36].uL$)|[sCT]_pct$",selected.cbc.profiles$test_id),]
selected.cbc.profiles <- subset(selected.cbc.profiles,test_id %in% names(which(table(selected.cbc.profiles$test_id) > 100)))
# reference range
cell.counts.range <- unique(selected.cbc.profiles[,c("test_name","test_id","normal_range")])
cell.counts.range[,c("lower.bound","upper.bound")] <- t(sapply(cell.counts.range$normal_range,function(x){as.numeric(unlist(strsplit(x,"-")))}))
cell.counts.range <- cbind(aggregate(lower.bound ~ test_id + test_name,cell.counts.range,min),
upper.bound=aggregate(upper.bound ~ test_id + test_name,cell.counts.range,max)[,"upper.bound"])
# remove possible erroneous values
tolerance <- 10
to.remove.rows <- c()
for (i in unique(selected.cbc.profiles$test_id)) {
cat(i,"\n")
test.values <- subset(selected.cbc.profiles,test_id == i)$test_value
median.value <- median(test.values,na.rm = T)
range <- quantile(test.values,.75,na.rm = T) - quantile(test.values,.25,na.rm = T)
to.remove.rows <- c(to.remove.rows,rownames(subset(selected.cbc.profiles,test_id == i &
(test_value > (median.value + tolerance*range) |
test_value < (median.value - tolerance*range)))))
}
## Basophils_pct
## Neutrophils_pct
## Hematocrit.HCT_pct
## Monocytes_x10.3.uL
## Lymphocytes_pct
## Neutrophils_x10.3.uL
## Eosinophils_x10.3.uL
## White.Blood.Cells.WBC_x10.3.uL
## Eosinophils_pct
## Platelets.PLT_x10.3.uL
## Red.Blood.Cells.RBC_x10.6.uL
## Basophils_x10.3.uL
## Lymphocytes_x10.3.uL
## Monocytes_pct
#View(selected.cbc.profiles[to.remove.rows,])
selected.cbc.profiles <- subset(selected.cbc.profiles,!(rownames(selected.cbc.profiles) %in% to.remove.rows))
selected.cbc.profiles.avg <- aggregate(test_value ~ subject_id + days_from_symptom_onset_to_test + test_name + test_id + severity.outcome,
selected.cbc.profiles,mean)
selected.cbc.profiles.avg$dsm.group <- citeseq.patient.dsm[selected.cbc.profiles.avg$subject_id,"dsm.group"]
to.display <- selected.cbc.profiles.avg[grep("_pct$",selected.cbc.profiles.avg$test_id),]
ggplot(to.display) +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=lower.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=upper.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),alpha=0.05,fill = "#EADCFA") +
#geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
geom_line(aes(x=days_from_symptom_onset_to_test,y=test_value,group=subject_id),alpha=0.1) +
geom_point(alpha=.1,pch=21,aes(fill=dsm.group,x=days_from_symptom_onset_to_test,y=test_value)) +
geom_smooth(se = T,aes(x=days_from_symptom_onset_to_test,y=test_value,group=dsm.group,color=dsm.group),alpha=0.3) + xlim(0,40) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
facet_wrap(~test_name,scales = "free",ncol=5,dir = "v") + theme_bw() + ylab("Cell Counts (% of Parent Pop.)") + xlab("Days from Symptom Onset")
to.display <- selected.cbc.profiles.avg[grep("_pct$",selected.cbc.profiles.avg$test_id,invert = T),]
ggplot(to.display) +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=lower.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_hline(data=subset(cell.counts.range,test_id %in% to.display$test_id),aes(yintercept=upper.bound),alpha=1,color="darkseagreen3",linetype="dashed") +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),alpha=0.05,fill = "#EADCFA") +
#geom_vline(xintercept = 20,color="grey60",linetype="dashed") +
geom_line(aes(x=days_from_symptom_onset_to_test,y=test_value,group=subject_id),alpha=0.1) +
geom_point(alpha=.1,pch=21,aes(fill=dsm.group,x=days_from_symptom_onset_to_test,y=test_value)) +
geom_smooth(se = T,aes(x=days_from_symptom_onset_to_test,y=test_value,group=dsm.group,color=dsm.group),alpha=0.3) + xlim(0,40) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1")) +
facet_wrap(~test_id,scales = "free",ncol=5,dir = "v") + theme_bw() + ylab("Cell Counts") + xlab("Days from Symptom Onset")
save(divergence.df,file=file.path(output.folder,"dsm.high.v.low.cytokine.test.results.RData"))
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] precrec_0.11.2 plsRglm_1.2.5 lmerTest_3.1-2 lme4_1.1-23
## [5] Matrix_1.2-18 tidymv_3.0.0 mgcv_1.8-31 nlme_3.1-144
## [9] ggfortify_0.4.10 ggpubr_0.4.0 ggsci_2.9 ggplot2_3.3.2
## [13] knitr_1.30
##
## loaded via a namespace (and not attached):
## [1] numDeriv_2016.8-1.1 tools_3.6.3 backports_1.1.10
## [4] R6_2.4.1 vegan_2.5-6 colorspace_1.4-1
## [7] permute_0.9-5 withr_2.3.0 tidyselect_1.1.0
## [10] gridExtra_2.3 curl_4.3 compiler_3.6.3
## [13] network_1.16.0 labeling_0.3 scales_1.1.1
## [16] mvtnorm_1.1-1 stringr_1.4.0 digest_0.6.25
## [19] foreign_0.8-75 minqa_1.2.4 rmarkdown_2.4
## [22] rio_0.5.16 pkgconfig_2.0.3 htmltools_0.5.0
## [25] highr_0.8 maps_3.3.0 rlang_0.4.7
## [28] readxl_1.3.1 generics_0.0.2 farver_2.0.3
## [31] statnet.common_4.3.0 dplyr_1.0.2 zip_2.1.1
## [34] car_3.0-10 magrittr_1.5 dotCall64_1.0-0
## [37] bipartite_2.15 Rcpp_1.0.5 munsell_0.5.0
## [40] abind_1.4-5 lifecycle_0.2.0 stringi_1.4.6
## [43] yaml_2.2.1 carData_3.0-4 MASS_7.3-51.5
## [46] grid_3.6.3 parallel_3.6.3 forcats_0.5.0
## [49] crayon_1.3.4 lattice_0.20-38 haven_2.3.1
## [52] splines_3.6.3 hms_0.5.3 sna_2.5
## [55] pillar_1.4.6 igraph_1.2.5 boot_1.3-24
## [58] ggsignif_0.6.0 glue_1.4.2 evaluate_0.14
## [61] data.table_1.13.0 vctrs_0.3.4 spam_2.5-1
## [64] nloptr_1.2.2.2 cellranger_1.1.0 gtable_0.3.0
## [67] purrr_0.3.4 tidyr_1.1.2 xfun_0.17
## [70] openxlsx_4.2.2 broom_0.7.0 coda_0.19-3
## [73] rstatix_0.6.0 tibble_3.0.3 fields_11.4
## [76] cluster_2.1.0 statmod_1.4.34 ellipsis_0.3.1