library(edgeR)
## Loading required package: limma
library(pheatmap)
library(GSVA)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unsplit, which,
## which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble 2.1.3 ✔ dplyr 0.8.5
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ✔ purrr 0.3.3
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ purrr::simplify() masks DelayedArray::simplify()
## ✖ dplyr::slice() masks IRanges::slice()
library(ggcorrplot)
#library(corrplot)
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(readxl)
# This is generated in ../differential_expression/covid_de_pipeline_cell_freq
ESET_TOTAL_IN_PATH <- "data/CITE5p/all_batches/differential_expression_cell_freq/2020_07_24/t0_plus_healthy/subsetted_expressionsets/WCT_course_pcnt_total-eset.rds"
SCORES_IN_PATH <- "input/healthy_vs_covid_COVID-Healthy_module_score_gsva_filtered_samples_genes.rds"
scores <- readRDS(SCORES_IN_PATH)
eset_total <- readRDS(ESET_TOTAL_IN_PATH)
GENESETS_IN_PATH <- "input/genesets/kegg_go_btm_reactome_foointerferon.rds"
#cd8mem, mono classical, nkCD56hi
#To get the interferon scores
load("input/covid19.metadata.paper1.RData")
ifn_dat <- covid19.lab.results %>% filter(test_name == "IFN-alpha2a") %>%
filter(!is.na(subject_id) & !is.na(test_time_point.PBMC)) %>%
filter(test_time_point.PBMC == "T0") %>%
select(subject_id, test_value) %>%
rename(`IFN-a2a (pg/mL)` = test_value)
stopifnot(sum(duplicated(ifn_dat$subject_id)) == 0 )
eset_total <- eset_total[, eset_total$Timepoint %in% c("HC", "T0")]
pdat <- pData(eset_total)
pdat <- left_join(pdat, ifn_dat, by =c("Donor" ="subject_id"))
pdat <- pdat %>% mutate(sample = paste(Donor, Timepoint, Batch, sep = "_"))
#pdat %>% write_tsv("data/ifn/sample_meta_w_ifna2a.tsv")
# The names were previously Batch_Donor_timepoint, but I need Donor_Timepoint_Batch
colnames(eset_total) <- with(pData(eset_total), paste(Donor, Timepoint, Batch, sep = "_"))
pDC_dat <- enframe(exprs(eset_total)["pDC", ], name = "sample", value ="pDC_freq" )
stopifnot(identical(pdat$sample, pDC_dat$sample))
pDC_dat <- pDC_dat %>% mutate(log10_ifn_a2a = log10(as.numeric(pdat$`IFN-a2a (pg/mL)`) + .5))
keep_celltypes <- c("CD8_Mem", "Mono_Classical", "NK_CD16hi", "pDC", "cDC")
keep_celltypes_reduced <- c("CD8_Mem", "Mono_Classical", "NK_CD16hi")
dat <- scores %>% mutate(cell_module = paste(celltype, pathway, sep = "_")) %>%
filter((pathway %in% c("GO_RESPONSE_TO_TYPE_I_INTERFERON", "reactome_Translation") & celltype %in% keep_celltypes )| (pathway == "GO_APOPTOTIC_SIGNALING_PATHWAY" & celltype == "pDC")) %>%
select(cell_module, sample, module.score) %>%
spread(key = cell_module, value = module.score)
dat <- left_join(pDC_dat, dat)
## Joining, by = "sample"
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
dat_covid_only <- dat %>% filter(grepl("HGR", sample))
mat_covid_only <- dat_covid_only %>%
select(-c("pDC_GO_RESPONSE_TO_TYPE_I_INTERFERON", "pDC_reactome_Translation",
"cDC_GO_RESPONSE_TO_TYPE_I_INTERFERON", "cDC_reactome_Translation",)) %>%
`rownames<-`(.$sample) %>%
select(-sample) %>%
as.matrix()
## Warning: Setting row names on a tibble is deprecated.
cormat_covid_only <- cor(mat_covid_only, method = "pearson", use = "pairwise.complete.obs")
pmat <- cormat_covid_only
for(x_name in rownames(pmat)){
for(y_name in colnames(pmat)){
pmat[x_name, y_name] <-
cor.test(mat_covid_only[, x_name], mat_covid_only[, y_name], use = )$p.value
}
}
#Do a hack to show things for significant p values
pmat <- 1 - pmat
#set sig.level to .95
# if you would like to save pdfs
#OUT_PREFIX <- "plots/CITE5p/all_batches/paper_figures/FIG3/2020_11_24/"
#dir.create(OUT_PREFIX, recursive = TRUE)
#
#
#out_path_list <- list(
# EXPLORE = "cor_pDC_IFN_apoptosis.pdf",
# CORMAT_PAPER = "Fig3_cor_pDC_IFN_apoptosis_cormat_for_paper.pdf",
# APOPTOSIS_SCATTER = "Fig3_pDC_apoptosis_scatter_for_paper.pdf",
# OXISTRESS_SCATTER = "Fig3_pDC_apoptosis_scatter_for_paper.pdf",
# IFN_SCATTER = "Fig3_pDC_freq_nk_ifn_scatter.pdf",
# IFN_CD8Mem_SCATTER = "FIG3_cd8mem_GO_IFN_scatter.pdf",
# IFNSERUM_CD8Mem_SCATTER = "FIG3_cd8mem_ifn_serum_scatter.pdf",
# IFNSERUM_PDC_SCATTER = "Fig3_pDC_freq_ifn_serum_scatter.pdf",
# IFNSERUM_NKIFN_SCATTER = "Fig3_nkifn_ifn_serum_scatter.pdf",
# IFN_TRANSLATION = "FIG3_ifn_translation_cor.pdf"
# )
#
#out_path_list <- lapply(out_path_list, function(x){
# file.path(OUT_PREFIX, x)
#})
#pdf(out_path_list$CORMAT_PAPER, height =9, width = 9)
p <- ggcorrplot(cormat_covid_only, hc.order = TRUE, lab = FALSE,
show.diag = FALSE, type = "upper", tl.cex = 9,
p.mat = pmat, sig.level = .9499, pch =8, pch.cex = 2 ) +
#show.diag = TRUE, tl.cex = 9, p.mat = pmat) +
scale_x_discrete(position = "top") +
theme(axis.text.x=element_text(angle = 45, hjust = 0)) +
theme(plot.margin = margin(t = 0, r = 4, l = 0 , b = 0, "cm")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
print(p)
#dev.off()
plot_dat <- dat_covid_only %>%
mutate(severity.outcome = paste(eset_total$severity, str_to_title(eset_total$outcome), sep = "-")[match(dat_covid_only$sample, colnames(eset_total))]) %>%
mutate(severity.outcome = as.character(severity.outcome)) %>%
mutate(severity.outcome = replace(severity.outcome, is.na(severity.outcome), "na"))
severity.color <- c("Critical-Alive"="#374E55FF","Critical-Deceased"="#DF8F44FF","Moderate-Alive"="#00A1D5FF" ,"Severe-Alive"="#B24745FF","Healthy" = "#79AF97FF", "na" = "grey")
p_apoptosis <-
#ggplot(plot_dat %>% filter(!sample %in% unoverlap), aes(x = pDC_freq, y = pDC_GO_APOPTOTIC_SIGNALING_PATHWAY)) +
ggplot(plot_dat, aes(x = pDC_freq, y = pDC_GO_APOPTOTIC_SIGNALING_PATHWAY)) +
geom_point(aes(color = severity.outcome), size = 6) +
geom_smooth(method = "lm", se = FALSE) +
stat_cor(method = "pearson", vjust = 3) +
#stat_cor(method = "spearman", vjust = 3) +
scale_color_manual(values = severity.color) + theme_bw()
#ggsave(plot = p_apoptosis, filename = out_path_list$APOPTOSIS_SCATTER, width =6, height = 5)
print(p_apoptosis)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing non-finite values (stat_cor).
## Warning: Removed 20 rows containing missing values (geom_point).
p_ifn <-
ggplot(plot_dat, aes(x = pDC_freq, y = NK_CD16hi_GO_RESPONSE_TO_TYPE_I_INTERFERON)) +
geom_point(aes(color = severity.outcome), size = 6) +
geom_smooth(method = "lm", se = FALSE) +
stat_cor(method = "pearson") +
#stat_cor(method = "spearman", vjust = 3) +
scale_color_manual(values = severity.color) + theme_bw()
#ggsave(plot = p_ifn, filename = out_path_list$IFN_SCATTER, height = 5, width = 6)
print(p_ifn)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_cor).
## Warning: Removed 1 rows containing missing values (geom_point).
p_ifn_cd8mem <-
ggplot(plot_dat, aes(x = pDC_freq, y = CD8_Mem_GO_RESPONSE_TO_TYPE_I_INTERFERON)) +
geom_point(aes(color = severity.outcome), size = 6) +
geom_smooth(method = "lm", se = FALSE) +
stat_cor(method = "pearson") +
#stat_cor(method = "spearman", vjust = 3) +
scale_color_manual(values = severity.color) + theme_bw()
#ggsave(plot = p_ifn_cd8mem, filename = out_path_list$IFN_CD8Mem_SCATTER, height = 5, width = 6)
print(p_ifn_cd8mem)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing non-finite values (stat_cor).
## Warning: Removed 4 rows containing missing values (geom_point).
p_ifn_serum_cd8mem <-
ggplot(plot_dat, aes(x = log10_ifn_a2a, y = CD8_Mem_GO_RESPONSE_TO_TYPE_I_INTERFERON)) +
geom_point(aes(color = severity.outcome), size = 6) +
geom_smooth(method = "lm", se = FALSE) +
stat_cor(method = "pearson") +
#stat_cor(method = "spearman", vjust = 3) +
scale_color_manual(values = severity.color) + theme_bw()
#ggsave(plot = p_ifn_serum, filename = out_path_list$IFNSERUM_CD8Mem_SCATTER, height = 5, width = 6)
print(p_ifn_serum_cd8mem)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 11 rows containing non-finite values (stat_cor).
## Warning: Removed 11 rows containing missing values (geom_point).
p_ifn_serum_nk_ifn <-
ggplot(plot_dat, aes(y = NK_CD16hi_GO_RESPONSE_TO_TYPE_I_INTERFERON, x = log10_ifn_a2a)) +
geom_point(aes(color = severity.outcome), size = 6) +
geom_smooth(method = "lm", se = FALSE) +
stat_cor(method = "pearson") +
#stat_cor(method = "spearman", vjust = 3) +
scale_color_manual(values = severity.color) + theme_bw()
#ggsave(plot = p_ifn_serum_nk_ifn, filename = out_path_list$IFNSERUM_NKIFN_SCATTER, height = 5, width = 6)
print(p_ifn_serum_nk_ifn)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing non-finite values (stat_cor).
## Warning: Removed 9 rows containing missing values (geom_point).
#pdf(file = out_path_list$IFN_TRANSLATION, height = 5, width = 6)
for(celltype in keep_celltypes){
x_string <- paste(celltype, "GO_RESPONSE_TO_TYPE_I_INTERFERON", sep = "_")
y_string <- paste(celltype, "reactome_Translation", sep = "_")
p <-
ggplot(plot_dat, aes_string(x = x_string, y = y_string)) +
geom_point(aes(color = severity.outcome), size = 6) +
geom_smooth(method = "lm", se = FALSE) +
stat_cor(method = "pearson") +
#stat_cor(method = "spearman", vjust = 3) +
scale_color_manual(values = severity.color) + theme_bw()
print(p)
}
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing non-finite values (stat_cor).
## Warning: Removed 4 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing non-finite values (stat_cor).
## Warning: Removed 3 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_cor).
## Warning: Removed 1 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 20 rows containing non-finite values (stat_smooth).
## Warning: Removed 20 rows containing non-finite values (stat_cor).
## Warning: Removed 20 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing non-finite values (stat_cor).
## Warning: Removed 19 rows containing missing values (geom_point).
#dev.off()