see ../../validation_schulteschrepping/differential_expression
see ../../validation_schulteschrepping/differential_expression_cohort2
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.0 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.5
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(fgsea)
## Loading required package: Rcpp
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
IN_DIR <- "data/externalData/SchulteSchrepping/"
GENESETS_IN_PATH <- "input/genesets/kegg_go_btm_reactome_foointerferon.rds"
genesets <- readRDS(GENESETS_IN_PATH)
files <- list.files(IN_DIR, recursive = TRUE)
files <- c(grep("fgsea_table", files, value = T), grep("toptables", files, value = T))
files_split <- strsplit(files, "/")
files_dat <- lapply(1:9, function(i){sapply(files_split, `[[`, i)}) %>%
as.data.frame(stringsAsFactors = FALSE)
colnames(files_dat) <- c("cohort", "sg", "samp_group", "rs", "model", "result_type", "coeff",
"annotation", "cell_mod_coeff")
files_dat <- files_dat %>%
mutate(full_path = file.path(IN_DIR, files)) %>%
mutate(cell = sapply(strsplit(cell_mod_coeff, "--"), `[[`, 1)) %>%
filter(cohort %in% c("differential_expression", "differential_expression_cohort2")) %>%
mutate(cohort = ifelse(grepl("cohort2", cohort), "cohort2", "cohort1"))
#keep_cells <- c("CD8+ T cells",
# "CD8_Mem", "12_CD8+ T cells_1",
# "13_CD8+ T cells_2", "14_CD8+ T cells_3")
# Just keep those called CD8_Mem with the transferred labels from our data
keep_cells <- c( "CD8_Mem")
files_dat <- files_dat %>%
filter(coeff %in% c("severe-mild", "COVID-Healthy")) %>%
filter(cell %in% keep_cells)
files_dat %>% select(annotation, cell) %>% distinct() %>% arrange(annotation)
## annotation cell
## 1 transferred_labels CD8_Mem
fgsea_files_dat <- files_dat %>%
filter(result_type == "fgsea_tables")
fgsea_dat_list <- lapply(fgsea_files_dat$full_path, read_tsv)
## Parsed with column specification:
## cols(
## pathway = col_character(),
## pval = col_double(),
## padj = col_double(),
## ES = col_double(),
## NES = col_double(),
## nMoreExtreme = col_double(),
## size = col_double(),
## leadingEdge = col_character()
## )
## Parsed with column specification:
## cols(
## pathway = col_character(),
## pval = col_double(),
## padj = col_double(),
## ES = col_double(),
## NES = col_double(),
## nMoreExtreme = col_double(),
## size = col_double(),
## leadingEdge = col_character()
## )
## Parsed with column specification:
## cols(
## pathway = col_character(),
## pval = col_double(),
## padj = col_double(),
## ES = col_double(),
## NES = col_double(),
## nMoreExtreme = col_double(),
## size = col_double(),
## leadingEdge = col_character()
## )
## Parsed with column specification:
## cols(
## pathway = col_character(),
## pval = col_double(),
## padj = col_double(),
## ES = col_double(),
## NES = col_double(),
## nMoreExtreme = col_double(),
## size = col_double(),
## leadingEdge = col_character()
## )
names(fgsea_dat_list) <- fgsea_files_dat$full_path
fgsea_dat <- bind_rows(fgsea_dat_list, .id = "full_path")
fgsea_dat <- fgsea_dat %>%
filter(grepl("wherry", pathway)) %>%
left_join(fgsea_files_dat)
## Joining, by = "full_path"
toptab_files_dat <- files_dat %>%
filter(result_type == "toptables")
#BUBBLE_OUT_PATH <- "plots/validations/SchulteSchrepping/exhaustion/exhaustion_bubble_schulteSchrepping.pdf"
#dir.create(dirname(BUBBLE_OUT_PATH))
p_bubble <- ggplot(fgsea_dat, aes(y = pathway, x = paste(cohort, annotation, cell, sep = "\n"))) +
geom_point(aes(size = -log10(pval), color = NES, shape = pval < .05)) +
scale_color_viridis_c() +
facet_wrap(~coeff, scales = "free_x") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("") +
theme(plot.margin= unit(c(t = 3, r = 0, b = 3, l = 0), unit = "in"))
#ggsave(plot = p_bubble, filename = BUBBLE_OUT_PATH, width =10, height = 9)
print(p_bubble)
#LE_CSV_OUT_PATH <- "plots/validations/SchulteSchrepping/exhaustion/cohort2_leading_edge.csv"
#fgsea_dat %>% filter(cohort == "cohort2") %>%
# select(pathway, coeff, annotation, cell, NES, pval, leadingEdge) %>%
# write_csv(LE_CSV_OUT_PATH)
#
plot_list <- list()
keep_pathways <- grep("wherry", names(genesets), value = TRUE)
keep_comparisons <- c("severe-mild", "COVID-Healthy")
#OUT_DIR <- "plots/validations/SchulteSchrepping/exhaustion/2020_11_20_exhaustion_gsea_curves"
#dir.create(OUT_DIR)
for(cohort_select in c("cohort1", "cohort2")){
for(path in keep_pathways){
for(coefficient in keep_comparisons){
cell_dat <- fgsea_dat %>%
filter(cohort == cohort_select) %>%
filter(pathway == path) %>%
filter(coeff == coefficient) %>%
select(cell, annotation) %>% unique()
#print(1)
for(i in seq_len(nrow(cell_dat))){
celltype <- cell_dat[[i, "cell"]]
anno <- cell_dat[[i, "annotation"]]
#print(2)
pval <- fgsea_dat %>%
filter(cohort == cohort_select) %>%
filter(pathway == path) %>%
filter(cell == celltype) %>%
filter(coeff == coefficient) %>%
filter(annotation == anno) %>%
pull(pval) %>%
round(5)
#print(3)
toptab_path <- toptab_files_dat %>%
filter(cohort == cohort_select) %>%
#filter(pathway == path) %>%
filter(cell == celltype) %>%
filter(coeff == coefficient) %>%
filter(annotation == anno) %>%
pull(full_path)
toptab <- read_tsv(toptab_path)
ranks <- toptab %>%
select(gene, t) %>%
deframe()
plot_list[[cohort_select]][[coefficient]][[path]][[paste(anno, celltype)]] <-
plotEnrichment(stats = ranks, pathway = genesets[[path]]) +
ylim(c(-.6, .6)) +
ggtitle(paste(cohort_select, coefficient, anno, celltype, path, sep = "\n")) +
annotate("text", x=Inf, y = Inf, label = paste("p =", pval), vjust=1, hjust=1)
}
plotlist_sub <- plot_list[[cohort_select]][[coefficient]][[path]]
#fig_out_path <- paste(cohort_select, coefficient, anno, celltype, path, sep = "_")
#fig_out_path <- paste0(file.path(OUT_DIR, fig_out_path), ".pdf")
p <- plot_grid(plotlist = plotlist_sub, nrow = 1)
#ggsave(plot = p, filename= fig_out_path, height = 4, width = 4 * length(plotlist_sub))
print(p)
}
}
}
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )