To generate intermediate files see ../differential_expression/covid_de_pipeline_cell_freq
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.0 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.5
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggrepel)
IN_DIR <- "data/CITE5p/all_batches/differential_expression_cell_freq/2020_08_25/"
#dir.create(dirname(FIG_OUT_PATH), recursive = TRUE)
files <- list.files(IN_DIR, full.names = F, recursive = TRUE)
files <- grep("toptab", files, value = TRUE)
#files <- files[sapply(strsplit(files, "\\/"), length) == 7]
files_dat <- data.frame(filename = files,
sample_group = sapply(strsplit(files, split = "\\/"), `[[`, 1),
model = sapply(strsplit(files, split = "\\/"), `[[`, 3),
coefficient = sapply(strsplit(files, split = "\\/"), `[[`, 5),
#cell_anno = sapply(strsplit(files, split = "\\/"), `[[`, 6),
celltype_mod_coef = sapply(strsplit(files, split = "\\/"), `[[`, 6),
stringsAsFactors = FALSE) %>%
mutate(celltype = sapply(strsplit(celltype_mod_coef, split = "--"), `[[`, 1))
keep_celltypes <- "TotalCD4and8_LE_genes_20200824"
keep_coefs <- c("COVID-Healthy", "PC1", "PC1high-low")
files_dat <- files_dat %>%
filter(celltype %in% keep_celltypes & coefficient %in% keep_coefs)
dat_list <- lapply(files_dat$filename, function(path){
path <- file.path(IN_DIR, path)
read_tsv(path)
})
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
## Parsed with column specification:
## cols(
## gene = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double()
## )
names(dat_list) <- files_dat$filename
dat_list <- lapply(dat_list, function(dat){
dat %>% filter(!gene %in% c("TotalCD4", "TotalCD8")) %>%
mutate(celltype2 = ifelse(grepl("CD4", gene), "CD4", "CD8"))
})
#FIG_OUT_PATH <- "plots/CITE5p/all_batches/paper_figures/FIG4/2020_08_31/FIG4_cd4_8_volcano_sep_cd4_8.pdf"
#pdf(FIG_OUT_PATH, height = 1.7, width = 1.7)
#for(i in seq_along(dat_list)){
for(i in c(3)){
toptab <- dat_list[[i]]
ylims <- c(0, max(-log10(toptab$P.Value) + .5))
xlims <- c(-max(abs(toptab$logFC)), max(abs(toptab$logFC)))
for(cell in c("CD4", "CD8")){
#plot_title <- paste(cell, files_dat$sample_group[[i]], files_dat$coefficient[[i]])
plot_title <- paste(cell, files_dat$coefficient[[i]], sep = "\n")
p = ggplot(data = toptab %>% filter(celltype2 == cell), aes(x = logFC, y= -log10(P.Value))) +
geom_point(aes(color=adj.P.Val < .05, alpha = 0.9), show.legend = FALSE) +
#geom_text_repel(data = toptab[rank(toptab$adj.P.Val) < 40, ], aes(label = gene)) +
geom_text_repel(aes(label = gene), size = 1.5) +
#geom_text_repel(aes(label = gene)) +
scale_color_manual(values=c("darkgrey", "firebrick4")) +
geom_hline(yintercept = -log10(.05)) +
geom_vline(xintercept = 0) +
ylim(ylims) +
xlim(xlims) +
theme_classic(base_size = 6) +
#theme(base_size = 2)
ggtitle(plot_title)
print(p)
}
}
#dev.off()