Cluster patients based on the following clinical parameters:
Oxygen: SpO2/FiO2 ratio
Inflammation: CRP, IL-6, TNF, IL-18 Neutrophil/lymphocyte ratio
IFN-I: IP-10
coagulation/endothelial: D-dimer
T-cell related: IFNg, CXCL9 (downstream of IFNg), iL-17, IL-4, IL-13
Lymphopenia: lymphocyte counts (CBC)
Tissue damage: LDH
paramaters.of.interest <- c("SpO2/FiO2","CRP","IL-6","TNF-a","TNF-b","IL-18/IL-1f4","ANC/ALC Ratio","IP-10","D-Dimer","IFN-Gamma","CXCL9/MIG","CXCL9/ MIG",
"IL-17","IL-4","IL-13","Fibrinogen","Lymphocytes",#"Serology Spike","Serology Nucleocapsid"
"Lactate Dehydrogenase (LDH)","Platelets (PLT)")#,"Neutrophils")
batch.sample.lab.values.all <- subset(covid19.lab.results,subject_id %in% subset(covid19.samples,!is.na(batch))$subject_id
& tolower(test_name) %in% tolower(paramaters.of.interest) & test_id != "Lymphocytes_pct")
batch.sample.lab.values <- merge(batch.sample.lab.values.all,subset(covid19.samples,material_type == "PBMC"),#"Serum"),#
by.x=c("subject_id","days_from_admission_to_test"),by.y=c("subject_id","days_from_admission_to_sample_drawn"),
suffixes = c("",".duplicate"))
batch.sample.lab.values <- batch.sample.lab.values[,grep("duplicate$",colnames(batch.sample.lab.values),invert = T)]
batch.sample.lab.values.avg <- reshape2::dcast(batch.sample.lab.values,sample_bsi_id + subject_id + condition + sex + age + severity +
outcome + ever_admitted_to_icu +
days_from_symptom_onset_to_hospitalization + number_of_days_hospitalized + days_from_hospitalization_to_death +
sample_label + visit + days_from_symptom_onset_to_test + days_from_admission_to_test + batch + test_order ~ test_id,
value.var = "test_value",fun.aggregate = function(x){mean(x,na.rm = T)})
batch.sample.lab.values.avg <- batch.sample.lab.values.avg[order(batch.sample.lab.values.avg$test_order),]
# use admission date if missing symptom onset
batch.sample.lab.values.avg[is.na(batch.sample.lab.values.avg$days_from_symptom_onset_to_test),]$days_from_symptom_onset_to_test <-
batch.sample.lab.values.avg[is.na(batch.sample.lab.values.avg$days_from_symptom_onset_to_test),]$days_from_admission_to_test
batch.sample.lab.values.avg.T0 <- batch.sample.lab.values.avg[!duplicated(batch.sample.lab.values.avg$subject_id),]
batch.sample.lab.values.avg.T0$severity.outcome <- paste0(batch.sample.lab.values.avg.T0$severity,"-",batch.sample.lab.values.avg.T0$outcome)
severity.color <- ggsci::pal_jama()(4)
names(severity.color) <- sort(unique(batch.sample.lab.values.avg.T0$severity.outcome))
ggplot(batch.sample.lab.values.avg.T0,aes(visit,days_from_symptom_onset_to_test)) + geom_boxplot(outlier.shape = NA,width=0.5) +
geom_point(pch=21,size=3,alpha=0.75,position = position_jitter(width=0.2,h=0),aes(fill=severity.outcome)) +
theme_bw() + ggsci::scale_fill_jama() + ylab("Days from Symptom Onset")
# remove samples >= 30 days from symptom onset
max.days.since.syptom.onset <- 30
cat("Removing samples >=",max.days.since.syptom.onset,"-",
subset(batch.sample.lab.values.avg.T0,days_from_symptom_onset_to_test >= max.days.since.syptom.onset)$subject_id)
## Removing samples >= 30 - HGR0000079 HGR0000124 HGR0000418
batch.sample.lab.values.avg.T0 <- subset(batch.sample.lab.values.avg.T0,days_from_symptom_onset_to_test < max.days.since.syptom.onset)
ggplot(batch.sample.lab.values.avg.T0,aes(visit,days_from_admission_to_test)) + geom_boxplot(outlier.shape = NA,width=0.5) +
geom_point(pch=21,size=3,alpha=0.75,position = position_jitter(w=0.25,h=0),aes(fill=severity.outcome)) +
theme_bw() + ggsci::scale_fill_jama() + ylab("Days from Hospital Admission")
batch.sample.lab.values.avg.T0.meta <- batch.sample.lab.values.avg.T0[,setdiff(colnames(batch.sample.lab.values.avg.T0),unique(batch.sample.lab.values$test_id))]
rownames(batch.sample.lab.values.avg.T0.meta) <- batch.sample.lab.values.avg.T0.meta$subject_id
batch.sample.lab.values.avg.T0.meta$severity.outcome <- paste0(batch.sample.lab.values.avg.T0.meta$severity,"-",batch.sample.lab.values.avg.T0.meta$outcome)
batch.sample.lab.values.avg.T0 <- batch.sample.lab.values.avg.T0[,setdiff(colnames(batch.sample.lab.values.avg.T0),
colnames(batch.sample.lab.values.avg.T0.meta))]
rownames(batch.sample.lab.values.avg.T0.meta) <- batch.sample.lab.values.avg.T0.meta$subject_id
rownames(batch.sample.lab.values.avg.T0) <- batch.sample.lab.values.avg.T0.meta$subject_id
colnames(batch.sample.lab.values.avg.T0) <- sapply(colnames(batch.sample.lab.values.avg.T0),function(x){unlist(strsplit(x,"_|\\.\\."))[1]})
# transform to log10
batch.sample.lab.values.avg.T0 <- log10(batch.sample.lab.values.avg.T0+0.01)
pheatmap::pheatmap(scale(batch.sample.lab.values.avg.T0),
annotation_row = batch.sample.lab.values.avg.T0.meta[,c("severity.outcome","age","sex",
"days_from_symptom_onset_to_test","days_from_admission_to_test")],
color = viridis::inferno(101),fontsize = 6,
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),main="All earliest time-point batch samples")
# try to borrow from +/- 2 days
day.margin <- 2
batch.sample.lab.values.expanded <- data.frame()
for (i in 1:nrow(batch.sample.lab.values.avg.T0.meta)) {
subj <- batch.sample.lab.values.avg.T0.meta$subject_id[i]
#cat(i,"-",subj,"\n")
days_from_admission <- batch.sample.lab.values.avg.T0.meta$days_from_admission_to_test[i]
tmp <- subset(batch.sample.lab.values.all,subject_id == subj &
(days_from_admission_to_test == batch.sample.lab.values.avg.T0.meta$days_from_admission_to_test[i] |
(days_from_admission_to_test <= (days_from_admission + day.margin) &
days_from_admission_to_test >= (days_from_admission - day.margin))))
tmp$target.day.diff <- abs(tmp$days_from_admission_to_test - days_from_admission)
#print(table(tmp$target.day.diff))
tmp <- tmp[order(tmp$target.day.diff,decreasing = F),]
tmp$days_from_admission_to_test_id <- apply(tmp[,c("subject_id","test_id","days_from_admission_to_test")],1,paste0,collapse=":")
#tmp <- subset(tmp,days_from_symptom_onset_to_test_id %in% tmp[!duplicated(tmp[,c("subject_id","test_id")]),]$days_from_admission_to_test_id) # keep multiple values taken on the same closest day
tmp <- tmp[!duplicated(tmp[,c("subject_id","test_id")],fromLast = F),]
batch.sample.lab.values.expanded <- rbind(batch.sample.lab.values.expanded,tmp)
}
batch.sample.lab.values.expanded.avg <- reshape2::dcast(batch.sample.lab.values.expanded,subject_id ~ test_id,
value.var = "test_value",fun.aggregate = function(x){mean(x,na.rm = T)})
rownames(batch.sample.lab.values.expanded.avg) <- batch.sample.lab.values.expanded.avg$subject_id
batch.sample.lab.values.expanded.avg <- batch.sample.lab.values.expanded.avg[rownames(batch.sample.lab.values.avg.T0.meta),
intersect(colnames(batch.sample.lab.values.expanded.avg),batch.sample.lab.values.all$test_id)]
# transform to log10
batch.sample.lab.values.expanded.avg <- log10(batch.sample.lab.values.expanded.avg+0.01)
pheatmap::pheatmap(scale(batch.sample.lab.values.expanded.avg),
annotation_row = batch.sample.lab.values.avg.T0.meta[,c("severity.outcome","age","sex",
"days_from_symptom_onset_to_test","days_from_admission_to_test")],
color = viridis::inferno(101),fontsize = 6,
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
main="All earliest time-point batch samples (values from time point +/- 2 days)")
# impute remaining missing data
if (as.numeric(paste0(R.version$major,".",floor(as.numeric(R.version$minor)))) >= 3.6)
RNGkind(sample.kind = "Rounding") # ensure compatibility with versions prior to 3.6
#set.seed(10276)
set.seed(1087)
batch.sample.lab.values.expanded.avg.imputed <- batch.sample.lab.values.expanded.avg
imputed.obj <- aregImpute(formula = as.formula(paste0("~ ",paste0(colnames(batch.sample.lab.values.expanded.avg),collapse = " + "))),
data = batch.sample.lab.values.expanded.avg, n.impute = 5, type = "regression",nk = 0)
## Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
imputed.values <- impute.transcan(imputed.obj,imputation = 1,data=batch.sample.lab.values.expanded.avg,list.out=TRUE)
##
##
## Imputed missing values with the following frequencies
## and stored them in variables with their original names:
##
## ANC.ALC.Ratio_.ratio CRP_mg.L
## 4 5
## D.Dimer_ng.mL Fibrinogen_mg.dL
## 6 1
## Lactate.Dehydrogenase.LDH_U.L SpO2.FiO2_.ratio
## 1 5
batch.sample.lab.values.expanded.avg.imputed[names(imputed.values)] <- imputed.values
pheatmap::pheatmap(scale(batch.sample.lab.values.expanded.avg.imputed),
annotation_row = batch.sample.lab.values.avg.T0.meta[,c("severity.outcome","age","sex",
"days_from_symptom_onset_to_test","days_from_admission_to_test")],
color = viridis::inferno(101),fontsize = 6,
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
main="All earliest time-point batch samples (values from time point +/- 2 days or imputed)")
# correct for days from symptom onset or admission
correct.for <- "days_from_admission_to_test"
batch.sample.lab.values.expanded.avg.imputed.corrected <- t(limma::removeBatchEffect(t(batch.sample.lab.values.expanded.avg.imputed),
covariates = batch.sample.lab.values.avg.T0.meta[rownames(batch.sample.lab.values.expanded.avg.imputed),correct.for,drop=F]))
parameter.labels <- unique(batch.sample.lab.values[,c("test_id","test_name")])
rownames(parameter.labels) <- parameter.labels$test_id
parameter.labels["ANC.ALC.Ratio_.ratio","test_name"] <- "Neutrophil/lymphocyte ratio (NLR)"
parameter.labels["CRP","test_name"] <- "C-reactive protein (CRP)"
parameter.labels["Lymphocytes_x10.3.uL","test_name"] <- "Lymphocyte count"
parameter.labels["Platelets..PLT._x10.3.uL","test_name"] <- "Platelet count"
parameter.labels$test_name <- gsub("/ ","/",parameter.labels$test_name,fixed = T)
# merge back patients with multiple samples
batch.sample.lab.values.expanded.avg.imputed.corrected <-
aggregate(batch.sample.lab.values.expanded.avg.imputed.corrected,list(batch.sample.lab.values.avg.T0.meta$subject_id),mean)
rownames(batch.sample.lab.values.expanded.avg.imputed.corrected) <- batch.sample.lab.values.expanded.avg.imputed.corrected$Group.1
batch.sample.lab.values.expanded.avg.imputed.corrected <- batch.sample.lab.values.expanded.avg.imputed.corrected[,-1]
batch.sample.lab.values.avg.T0.meta <- batch.sample.lab.values.avg.T0.meta[match(rownames(batch.sample.lab.values.expanded.avg.imputed.corrected),
batch.sample.lab.values.avg.T0.meta$subject_id),]
# which were intubated?
batch.sample.lab.values.avg.T0.meta$intubated <- batch.sample.lab.values.avg.T0.meta$subject_id %in%
covid19.treatment[grep("int",covid19.treatment$treatment_notes,ignore.case = T),]$subject_id
subject.labels <- gsub("HGR0000","P",rownames(batch.sample.lab.values.expanded.avg.imputed.corrected))
subject.labels[batch.sample.lab.values.avg.T0.meta$ever_admitted_to_icu == T] <-
paste0(subject.labels[batch.sample.lab.values.avg.T0.meta$ever_admitted_to_icu == T],"*")
subject.labels[batch.sample.lab.values.avg.T0.meta$intubated == T] <-
paste0(subject.labels[batch.sample.lab.values.avg.T0.meta$intubated == T],"^")
pheatmap::pheatmap(scale(batch.sample.lab.values.expanded.avg.imputed.corrected),
annotation_row = batch.sample.lab.values.avg.T0.meta[,c("severity.outcome"),drop=F],#,"WHO.ordinal.scale.at.time.of.sampling"),drop=F],
#,"age","sex","batch","days_from_symptom_onset_to_test","days_from_admission_to_test")],
#color = colorRampPalette(brewer.pal(8,"YlGnBu"))(34),
color = viridis::cividis(34),
breaks = c(-3,seq(-1.5,1.5,by=0.1),3),angle_col = 45,cutree_rows = 4,
clustering_method = "ward.D",
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color),
#main=paste0("Values from time point +/- 2 days or imputed corrected for ",correct.for),
labels_row = subject.labels,cellwidth = 15,cellheight = 8,fontsize = 8,#file=file.path(data.folder,"plots","test2.pdf"),
labels_col = parameter.labels[colnames(batch.sample.lab.values.expanded.avg.imputed.corrected),"test_name"])
Select features associated with severity and use them to calculate DSM
severity.index <- c("Moderate-Alive"=1,"Severe-Alive"=2,"Critical-Alive"=3,"Critical-Deceased"=4)
batch.sample.lab.values.expanded.avg.corrected <- t(limma::removeBatchEffect(t(batch.sample.lab.values.expanded.avg),
covariates = batch.sample.lab.values.avg.T0.meta[rownames(batch.sample.lab.values.expanded.avg),correct.for,drop=F]))
batch.sample.lab.values.expanded.avg.corrected <- batch.sample.lab.values.expanded.avg.corrected[rownames(batch.sample.lab.values.avg.T0.meta),
-grep("FiO2",colnames(batch.sample.lab.values.expanded.avg.corrected))]
# leave-one-out modeling
feature.importance <- data.frame()
sample.scores <- data.frame()
for (i in 1:nrow(batch.sample.lab.values.expanded.avg.corrected)) {
tryCatch({
modpls2 <- plsRglm(#factor(who.severity.index[batch.sample.lab.values.avg.T0.meta$WHO.severity.outcome[-i]]),
factor(severity.index[batch.sample.lab.values.avg.T0.meta$severity.outcome[-i]]),
batch.sample.lab.values.expanded.avg.corrected[-i,],1,modele="pls-glm-polr",pvals.expli=T,
dataPredictY = batch.sample.lab.values.expanded.avg.corrected[i,,drop=F],verbose=F)
sample.scores <- rbind(sample.scores,data.frame(sample=rownames(batch.sample.lab.values.expanded.avg.corrected)[i],
predicted=as.numeric(modpls2$ValsPredictYCat),
#observed=who.severity.index[batch.sample.lab.values.avg.T0.meta$WHO.severity.outcome[i]]))
observed=severity.index[batch.sample.lab.values.avg.T0.meta$severity.outcome[i]]))
feature.importance <- rbind(feature.importance,as.data.frame(t(modpls2$Std.Coeffs)))
}, error = function(e){warning(paste0("Error in sample",i,"\n"))})
}
cat("Contingency table:\n")
## Contingency table:
table(sample.scores[,c("predicted","observed")])
## observed
## predicted 1 2 3 4
## 1 1 1 1 0
## 2 0 0 1 0
## 3 2 4 14 1
## 4 0 0 2 3
num.wrong <- sum(sample.scores$predicted != sample.scores$observed)
cat("Number of misclassified samples:",num.wrong," (",100*num.wrong/nrow(sample.scores),"%)\n")
## Number of misclassified samples: 12 ( 40 %)
ggplot(reshape::melt(feature.importance[,colnames(batch.sample.lab.values.expanded.avg.corrected)]),aes(variable,value)) +
geom_boxplot() + geom_hline(yintercept = 0,linetype="dashed",color="red") +
theme_bw() + theme(axis.text.x = element_text(angle=90,hjust = 1,vjust = 0.5)) + ggtitle("ordinal logistic PLS leave-one-out models") + ylab("Coefficients")
# permutation-based feature selection
modpls2 <- plsRglm(factor(severity.index[batch.sample.lab.values.avg.T0.meta$severity.outcome]),
batch.sample.lab.values.expanded.avg.corrected,1,modele="pls-glm-polr",pvals.expli=T)
## ____************************************************____
## Only naive DoF can be used with missing data
##
## Model: pls-glm-polr
## Method: logistic
##
## ____There are some NAs in X but not in Y____
## ____Component____ 1 ____
## ____Predicting X with NA in X and not in Y____
## ****________________________________________________****
bootYT2 <- bootplsglm(modpls2, R=100, strata=unclass(factor(severity.index[batch.sample.lab.values.avg.T0.meta$severity.outcome])), sim="permutation")
plots.confints.bootpls(confints.bootpls(bootYT2,typeBCa=FALSE), legendpos = "topright",xaxisticks=FALSE)
boxplots.bootpls(bootYT2,xaxisticks=FALSE,ranget0=TRUE)
#saveRDS(feature.importance,file.path(data.folder,"end.point.PLS.coefficients.RDS"))
feature.median <- sort(abs(apply(feature.importance[,colnames(batch.sample.lab.values.expanded.avg.corrected)],2,median)))
feature.sign <- as.factor(sign(apply(feature.importance[,colnames(batch.sample.lab.values.expanded.avg.corrected)],2,median))[names(feature.median)])
feature.sd <- apply(feature.importance[,colnames(batch.sample.lab.values.expanded.avg.corrected)],2,sd)[names(feature.median)]
feature.median
## IFN.gamma IL.4
## 0.002195008 0.062543014
## Platelets.PLT_x10.3.uL CRP_mg.L
## 0.063908593 0.117287235
## IL.18.IL.1F4 IL.17
## 0.120311420 0.139063413
## TNF.a Fibrinogen_mg.dL
## 0.179835486 0.204579162
## CXCL9.MIG IL.13
## 0.223692239 0.255547240
## TNF.b IL.6
## 0.290761238 0.465825040
## D.Dimer_ng.mL IP.10
## 0.465939065 0.505588311
## Lymphocytes_x10.3.uL Lactate.Dehydrogenase.LDH_U.L
## 0.634993151 0.655605487
## ANC.ALC.Ratio_.ratio
## 0.664137086
names(feature.median) <- parameter.labels[names(feature.median),"test_name"]
ggplot(data.frame(feature=factor(names(feature.median),levels = names(feature.median)),abs.median.coef=feature.median,coef.sd=feature.sd,sign=feature.sign),
aes(feature,-abs.median.coef*as.numeric(as.character(feature.sign)))) + geom_hline(yintercept = 0) + geom_bar(stat = "identity",fill="deepskyblue3") +
geom_errorbar(aes(ymin=-abs.median.coef*as.numeric(as.character(feature.sign))-coef.sd,
ymax=-abs.median.coef*as.numeric(as.character(feature.sign))+coef.sd),width=.2,color="red") +
scale_fill_manual(name="direction",values=c("darkblue","lightblue")) +
theme_bw() + theme(axis.text.x=element_text(angle=45,hjust = 1,size=7)) + xlab("Parameter") + ylab("Median Coefficient") + ggtitle("PLS Model")
# final model
selected.num.features <- 8
# heatmap using important features + oxygen
important.features <- tail(names(feature.sign),selected.num.features)
print(important.features)
## [1] "IL.13" "TNF.b"
## [3] "IL.6" "D.Dimer_ng.mL"
## [5] "IP.10" "Lymphocytes_x10.3.uL"
## [7] "Lactate.Dehydrogenase.LDH_U.L" "ANC.ALC.Ratio_.ratio"
batch.sample.lab.values.avg.T0.meta$ever_admitted_to_icu <- as.character(batch.sample.lab.values.avg.T0.meta$ever_admitted_to_icu)
pheatmap::pheatmap(scale(batch.sample.lab.values.expanded.avg.imputed.corrected[,important.features]),
annotation_row = batch.sample.lab.values.avg.T0.meta[,c("severity.outcome","ever_admitted_to_icu","age","sex","batch",correct.for)],
color = viridis::inferno(34),breaks = c(-3,seq(-1.5,1.5,by=0.1),3),fontsize = 6,
annotation_colors = list(sex=c("F"="darkred","M"="darkblue"),severity.outcome=severity.color,
ever_admitted_to_icu=c("FALSE"="white","TRUE"="black")),
cutree_rows = 4,
main=paste0("All earliest batch samples (values from time point +/- 2 days or imputed corrected for ",correct.for,")"))
# put oxygen back into PCA calculation
important.features <- c(important.features,"SpO2.FiO2_.ratio")
lab.pca <- prcomp(batch.sample.lab.values.expanded.avg.imputed.corrected[,important.features],scale. = T,center = T)
pca.summary <- summary(lab.pca)
row.annotation <- merge(batch.sample.lab.values.avg.T0.meta,lab.pca$x[,1:2],by="row.names")
# flip sign of PC1 to use it as "severity" metric
row.annotation$PC1 <- row.annotation$PC1*-1
# violin plot by severity group
comparisons <- gtools::combinations(4,2,names(severity.color))
comparisons <- comparisons[comparisons[,1] != comparisons[,2],]
comparisons <- lapply(1:nrow(comparisons),function(x){comparisons[x,]})
row.annotation$severity.outcome <- factor(row.annotation$severity.outcome,levels=names(severity.index))
ggviolin(row.annotation,x="severity.outcome",y="PC1", add = "boxplot",palette = c("#00A1D5FF","#B24745FF","#374E55FF","#DF8F44FF"),
fill="severity.outcome",add.params = list(fill="white"),xlab = "Severity",ylab="Disease Severity Metric (DSM)") +
geom_point(alpha=0.25,size=2) + #scale_y_continuous(expand=expansion(add=c(0,1))) +
stat_compare_means(comparisons = comparisons,hide.ns = T,label = "p.signif",tip.length = 0,method = "wilcox.test") +
theme(text = element_text(size=8))
rownames(row.annotation) <- row.annotation$Row.names
# serology at time point
serology.values <- subset(covid19.lab.results,test_type == "Serology")
serology.values$severity.outcome <- paste0(serology.values$severity,"-",serology.values$outcome)
serology.values.wide <- reshape2::dcast(serology.values,subject_id + days_from_symptom_onset_to_test + days_from_admission_to_test ~ test_name,value.var="test_value",
fun.aggregate = mean)
colnames(serology.values.wide) <- gsub(" Antibodies","",colnames(serology.values.wide))
serology.values.wide$Nucleocapsid <- log10(serology.values.wide$Nucleocapsid+0.01)
serology.values.wide$Spike <- log10(serology.values.wide$Spike+0.01)
serology.values <- merge(covid19.patient.demo,serology.values.wide,by="subject_id",all.y=T)
colnames(serology.values) <- gsub("test","serology",colnames(serology.values))
# add individual parameters
patient.data <- row.annotation#cbind(row.annotation,batch.sample.lab.values.expanded.avg.imputed[row.annotation$subject_id,important.features])
batch.serology <- subset(serology.values,subject_id %in% patient.data$subject_id)
# serology closest to PBMC time point
batch.serology$sample.time.point <- patient.data[batch.serology$subject_id,]$days_from_admission_to_test
batch.serology$days_from_sample_to_serology <- batch.serology$days_from_admission_to_serology - batch.serology$sample.time.point
batch.serology <- batch.serology[order(abs(batch.serology$days_from_sample_to_serology)),]
batch.serology.time.point <- batch.serology[!duplicated(batch.serology$subject_id),]
patient.data <- cbind(patient.data,initial=batch.serology.time.point[match(patient.data$subject_id,batch.serology.time.point$subject_id),
c("Nucleocapsid","Spike","days_from_sample_to_serology","days_from_admission_to_serology")])
# serology peak titer
batch.serology <- batch.serology[order(batch.serology$Nucleocapsid,decreasing = T),]
batch.serology$peak.Nucleocapsid <- !duplicated(batch.serology$subject_id)
Nucleocapsid.peak.titer <- batch.serology[!duplicated(batch.serology$subject_id),]
patient.data <- cbind(patient.data,nucleocapsid.peak=Nucleocapsid.peak.titer[match(patient.data$subject_id,Nucleocapsid.peak.titer$subject_id),
c("Nucleocapsid","Spike","days_from_sample_to_serology","days_from_admission_to_serology")])
batch.serology <- batch.serology[order(batch.serology$Spike,decreasing = T),]
batch.serology$peak.Spike <- !duplicated(batch.serology$subject_id)
Spike.peak.titer <- batch.serology[!duplicated(batch.serology$subject_id),]
patient.data <- cbind(patient.data,spike.peak=Spike.peak.titer[match(patient.data$subject_id,Spike.peak.titer$subject_id),
c("Nucleocapsid","Spike","days_from_sample_to_serology","days_from_admission_to_serology")])
# serology closest to discharge or death
batch.serology$days_from_hospitalization_end_to_serology <- as.numeric(batch.serology$days_from_admission_to_serology - batch.serology$number_of_days_hospitalized)
batch.serology[which(is.na(batch.serology$days_from_hospitalization_end_to_serology)),]$days_from_hospitalization_end_to_serology <-
as.numeric(batch.serology$days_from_admission_to_serology -
batch.serology$days_from_hospitalization_to_death)[is.na(batch.serology$days_from_hospitalization_end_to_serology)]
batch.serology <- batch.serology[order(abs(batch.serology$days_from_hospitalization_end_to_serology)),]
end.batch.serology <- batch.serology[!duplicated(batch.serology$subject_id),]
patient.data <- cbind(patient.data,end=end.batch.serology[match(patient.data$subject_id,end.batch.serology$subject_id),
c("Nucleocapsid","Spike","days_from_hospitalization_end_to_serology","days_from_admission_to_serology")])
WHO serverity score over time between DSM-high and DSM-low groups
who.score.df <- read.csv(file.path(data.folder,"pbmc.samples.csv"))
who.score.df <- merge(who.score.df,subset(covid19.samples,material_type == "PBMC"),by = c("subject_id","sample_label","visit"),suffixes = c("",".dup"))
who.score.df$DSM.group <- NA
who.score.df[who.score.df$subject_id %in% subset(patient.data,PC1 < median(patient.data$PC1))$subject_id,"DSM.group"] <- "DSM-low"
who.score.df[who.score.df$subject_id %in% subset(patient.data,PC1 > median(patient.data$PC1))$subject_id,"DSM.group"] <- "DSM-high"
ggplot(subset(who.score.df,!is.na(DSM.group)),aes(days_from_symptom_onset_to_sample_drawn,WHO.ordinal.scale.at.time.of.sampling)) +
geom_rect(aes(xmin=17,xmax=23,ymax=Inf,ymin=-Inf),fill="lightyellow2",alpha=0.25) + geom_point(aes(fill=DSM.group),pch=21,size=3,alpha=0.25) +
geom_line(aes(group=subject_id),alpha=0.1) + theme_bw() +
xlab("TSO") + ylab("WHO Sample Severity Score") + geom_smooth(se = T,aes(color=DSM.group),alpha=0.15,size=1.25) +
scale_color_manual(name="DSM group",values=c("red","deepskyblue1")) + scale_fill_manual(name="DSM group",values=c("red","deepskyblue1"))
# DSM v. WHO Scores
dsm.who.association <- merge(patient.data,who.score.df[,c("sample_label","visit","WHO.ordinal.scale.at.time.of.sampling")],
by=c("sample_label","visit"),all.x=T)
test.res <- clinfun::jonckheere.test(dsm.who.association$PC1,dsm.who.association$WHO.ordinal.scale.at.time.of.sampling,alternative = "increasing")
ggplot(dsm.who.association,aes(as.factor(WHO.ordinal.scale.at.time.of.sampling),PC1)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha=0.75,size=2,pch=21,aes(fill=severity.outcome),width=0.1,height=0) + xlab("WHO Severity Score") + ylab("DSM") +
scale_fill_manual(name="Severity-Outcome",values=severity.color) + theme_bw() + ylim(-3,4.5) +
annotate(geom="text", x=2, y=4, label=sprintf("Jonckheere-Terpstra, p=%.3e",test.res$p.value),color="black",size=3)
dsm.who.association$WHO.severity <- "Mild"
dsm.who.association[dsm.who.association$WHO.ordinal.scale.at.time.of.sampling >= 5,"WHO.severity"] <- "Severe"
ggplot(dsm.who.association,aes(as.factor(WHO.severity),PC1)) + geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha=0.75,size=2,pch=21,aes(fill=severity.outcome),width=0.1,height=0) + xlab("WHO Severity Class") + ylab("DSM") +
stat_compare_means() +
scale_fill_manual(name="Severity-Outcome",values=severity.color) + theme_bw() + ylim(-3,4.5)
patient.data <- patient.data[,-c(1:2,18:20)]
colnames(patient.data) <- gsub("test","sample",colnames(patient.data))
patient.data[is.na(patient.data$date_of_onset_of_symptom),"days_from_symptom_onset_to_sample"] <- NA
patient.data <- patient.data[,grep("date",colnames(patient.data),invert = T)]
saveRDS(patient.data,file.path(output.folder,"citeseq.patient.end.points.RDS"))
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## Random number generation:
## RNG: Mersenne-Twister
## Normal: Inversion
## Sample: Rounding
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plsRglm_1.2.5 GGally_2.0.0 Hmisc_4.4-1 Formula_1.2-3
## [5] survival_3.1-8 lattice_0.20-38 ggfortify_0.4.10 ggpubr_0.4.0
## [9] ggsci_2.9 ggplot2_3.3.2 knitr_1.30
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-144 RColorBrewer_1.1-2 tools_3.6.3
## [4] backports_1.1.10 R6_2.4.1 vegan_2.5-6
## [7] rpart_4.1-15 mgcv_1.8-31 colorspace_1.4-1
## [10] permute_0.9-5 nnet_7.3-12 withr_2.3.0
## [13] tidyselect_1.1.0 gridExtra_2.3 curl_4.3
## [16] compiler_3.6.3 htmlTable_2.1.0 network_1.16.0
## [19] labeling_0.3 scales_1.1.1 checkmate_2.0.0
## [22] mvtnorm_1.1-1 stringr_1.4.0 digest_0.6.25
## [25] foreign_0.8-75 rmarkdown_2.4 rio_0.5.16
## [28] base64enc_0.1-3 jpeg_0.1-8.1 pkgconfig_2.0.3
## [31] htmltools_0.5.0 limma_3.42.2 maps_3.3.0
## [34] htmlwidgets_1.5.1 rlang_0.4.7 readxl_1.3.1
## [37] rstudioapi_0.11 farver_2.0.3 generics_0.0.2
## [40] gtools_3.8.2 statnet.common_4.3.0 dplyr_1.0.2
## [43] zip_2.1.1 car_3.0-10 magrittr_1.5
## [46] dotCall64_1.0-0 bipartite_2.15 Matrix_1.2-18
## [49] Rcpp_1.0.5 munsell_0.5.0 viridis_0.5.1
## [52] abind_1.4-5 lifecycle_0.2.0 stringi_1.4.6
## [55] yaml_2.2.1 carData_3.0-4 MASS_7.3-51.5
## [58] plyr_1.8.6 grid_3.6.3 clinfun_1.0.15
## [61] parallel_3.6.3 forcats_0.5.0 crayon_1.3.4
## [64] haven_2.3.1 splines_3.6.3 hms_0.5.3
## [67] sna_2.5 pillar_1.4.6 igraph_1.2.5
## [70] boot_1.3-24 ggsignif_0.6.0 reshape2_1.4.4
## [73] glue_1.4.2 evaluate_0.14 latticeExtra_0.6-29
## [76] data.table_1.13.0 png_0.1-7 vctrs_0.3.4
## [79] spam_2.5-1 cellranger_1.1.0 gtable_0.3.0
## [82] purrr_0.3.4 tidyr_1.1.2 reshape_0.8.8
## [85] xfun_0.17 openxlsx_4.2.2 broom_0.7.0
## [88] coda_0.19-3 rstatix_0.6.0 viridisLite_0.3.0
## [91] pheatmap_1.0.12 tibble_3.0.3 fields_11.4
## [94] cluster_2.1.0 ellipsis_0.3.1