Pseudobulk gene expression heatmaps – Schulte-Schrepping et al

cohort2

Generate Figure S3F; Figure S4D

input data from pseudobulk eset object – output from pseudobulk object generation pipeline

DGE_IN_PATH <- "../input/SchulteSchrepping/"
g_cohort2.idcelltype <- readRDS(paste(DGE_IN_PATH, "cohort2/cluster_labels_res.0.4.rds", sep = ""))

eset_list <- lapply(g_cohort2.idcelltype, function(dge){
  mat <- cpm(dge$counts, log = TRUE)
  meta <- dge$samples
  ExpressionSet(assayData = mat, phenoData = AnnotatedDataFrame(meta))
})

# get only the first timepoint for each subject
eset_list_T0 <- lapply(eset_list, function(dge){
  filter <- colnames(dge)[dge@phenoData@data$Timepoint %in% c("T0", "HC")]
  dge$group <- factor(dge$group, levels = c("control","mild","severe"))
  dge <- dge[,filter]
})

# read in pathway_summary
pathways <- read.xlsx("../input/selected_pathway_summary.xlsx", sheet = "reduced", startRow = 1)
IFNsets <- filter(pathways, Category %in% c("Type I IFN response", 
                                            "Vaccine_infection induced early blood transcriptomic signature"))
Agpresets <- filter(pathways, Primary_gene_sets %in% c("KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION"))
translationsets <- filter(pathways, Category %in% c("Translation/ribosome"))
NFkBsets <- filter(pathways, Primary_gene_sets %in% c("HALLMARK_TNFA_SIGNALING_VIA_NFKB"))
cyclesets <- c("btm_M4.1_cell cycle (I)","btm_M4.0_cell cycle and transcription")
apoptsets <- filter(pathways, Category %in% c("Apoptosis and cell death"))
inflamsets <- filter(pathways, Primary_gene_sets %in% c("HALLMARK_INFLAMMATORY_RESPONSE"))
oxphossets <- filter(pathways, Primary_gene_sets %in% c("KEGG_OXIDATIVE_PHOSPHORYLATION"))
fattysets <- filter(pathways, Primary_gene_sets %in% c("reactome_Fatty acid metabolism"))
chemoksets <- filter(pathways, Primary_gene_sets %in% c("KEGG_CHEMOKINE_SIGNALING_PATHWAY","GO_CELLULAR_RESPONSE_TO_INTERLEUKIN_1"))

pDC

apoptosis/cell death

showing the apoptosis signature (union LE genes of COVID19vsHCs and association with DSM) from our cohort that driving the difference of COVID19 and severity

DGE_IN_PATH2 <- "../input/differential_expression/fgsea_tables/"
pDC <- eset_list_T0$pDC
pDC_PC1 <- read.delim(paste0(DGE_IN_PATH2, "PC1/pDC--model@PC1--coef@PC1--fgsea.tsv"), header = TRUE)
pDC_covid <- read.delim(paste0(DGE_IN_PATH2, "COVID-Healthy/pDC--model@healthy_vs_covid--coef@COVID-Healthy--fgsea.tsv"), header = TRUE)

pDC_PC1_apopt <- pDC_PC1 %>% filter(pDC_PC1$pathway %in% apoptsets$Primary_gene_sets)
pDC_covid_apopt <- pDC_covid %>% filter(pDC_covid$pathway %in% apoptsets$Primary_gene_sets)
pDC_PC1_apopt_LE <- unique(unlist(sapply(pDC_PC1_apopt$leadingEdge, function(x) str_split(x, pattern = " "))))
pDC_covid_apopt_LE <- unique(unlist(sapply(pDC_covid_apopt$leadingEdge, function(x) str_split(x, pattern = " "))))
pDC_apopt_LE <- intersect(pDC_PC1_apopt_LE,pDC_covid_apopt_LE)
pDC_apopt_LE_union <- union(pDC_PC1_apopt_LE,pDC_covid_apopt_LE)

pDC_mtx_union <- exprs(pDC)[rownames(exprs(pDC)) %in% pDC_apopt_LE_union,]
meta <- pData(pDC) %>% dplyr::arrange(group,who_per_sample)
pDC_mtx_union <- pDC_mtx_union[,rownames(meta)]

# customize pDC union LE heatmap
# set marked genesets
PC1_COVID <- rownames(pDC_mtx_union) %in% pDC_apopt_LE
oxi_stress_LE <- filter(pDC_PC1, pathway %in% "reactome_Oxidative Stress Induced Senescence")$leadingEdge
oxi_stress <- rownames(pDC_mtx_union) %in% unlist(str_split(oxi_stress_LE, pattern = " "))
names(oxi_stress) <- rownames(pDC_mtx_union)

label <- which(rownames(pDC_mtx_union) %in% union(pDC_PC1_apopt_LE,pDC_covid_apopt_LE)[1:24])
label_genes <- rownames(pDC_mtx_union)[label]

# set colors
col_fun = colorRamp2(c(-1.5, 0, 1.5), c("#FF00FF", "#000000", "#FFFF00"))
who.color <- colorRamp2(c(0, 7), c("white", "#e97171"))
severity.color <- c("control"="#00BA38", "mild"="#619CFF", "severe"="#F8766D")

# set heatmap annotation
ha = HeatmapAnnotation(
  severity = meta$group,
  who = meta$who_per_sample, 
  col = list(who = who.color,
             severity = severity.color)
)

# pdf("../SchulteSchrepping/output/Subjecthm_pDC_unionLE_apopt_extLE_cohort2.pdf", width = 10, height = 5)
colnames(pDC_mtx_union) <- meta$n_barcodes
p <- Heatmap(pheatmap:::scale_rows(pDC_mtx_union), name = paste("pDC", "apopt", sep = "_"), 
             show_column_names = TRUE,
             top_annotation = ha, cluster_columns = FALSE,
             column_split = meta$group,
             row_split = PC1_COVID,
             col = col_fun,
             # row_names_gp = gpar(col = c("blue", "black"), fontsize = c(10, 10)),
             column_gap = unit(2, "mm"),
             row_gap = unit(2, "mm")
)+
  Heatmap(PC1_COVID + 0, name = "coLE", col = c("0" = "white", "1" = "blue"), 
          show_heatmap_legend = FALSE, width = unit(5, "mm")) +
  Heatmap(oxi_stress + 0, name = "OxiSenes", col = c("0" = "white", "1" = "green"), 
          show_row_names = TRUE, row_names_gp = gpar(fontsize = 8),
          show_heatmap_legend = FALSE, width = unit(5, "mm"))+
  rowAnnotation(foo = anno_mark(at = label,
                                labels = label_genes,
                                labels_gp = gpar(fontsize = 12)))
p

# dev.off()

NK cells

Fatty acid metabolism

NK_CD16hi <- eset_list_T0$`NK cells`
# cohort2 LE
NK_CD16hi_PC1 <- read.delim(paste0(DGE_IN_PATH, "cohort2/fgsea_tables/severe-mild/NK cells--model@severity--coef@severe-mild--fgsea.tsv"), header = TRUE)
NK_CD16hi_covid <- read.delim(paste0(DGE_IN_PATH, "cohort2/fgsea_tables/COVID-Healthy/NK cells--model@healthy_vs_covid--coef@COVID-Healthy--fgsea.tsv"), header = TRUE)

NK_CD16hi_PC1_fatty <- NK_CD16hi_PC1 %>% filter(NK_CD16hi_PC1$pathway %in% fattysets$Primary_gene_sets)
NK_CD16hi_covid_fatty <- NK_CD16hi_covid %>% filter(NK_CD16hi_covid$pathway %in% fattysets$Primary_gene_sets)
NK_CD16hi_PC1_fatty_LE <- unique(unlist(sapply(NK_CD16hi_PC1_fatty$leadingEdge, function(x) str_split(x, pattern = " "))))
NK_CD16hi_covid_fatty_LE <- unique(unlist(sapply(NK_CD16hi_covid_fatty$leadingEdge, function(x) str_split(x, pattern = " "))))
NK_CD16hi_fatty_LE <- intersect(NK_CD16hi_PC1_fatty_LE,NK_CD16hi_covid_fatty_LE)

NK_CD16hi_mtx_PC1 <- exprs(NK_CD16hi)[NK_CD16hi_PC1_fatty_LE, ]
meta <- pData(NK_CD16hi) %>% dplyr::arrange(group,who_per_sample) 
NK_CD16hi_mtx_PC1 <- NK_CD16hi_mtx_PC1[,rownames(meta)]

# get classifications of FA LE genes
catabolicgene <- read.xlsx("../input/fatty acid LE NK gene categories_cohort2.xlsx", sheet = "catabolic", rowNames = TRUE, colNames = TRUE)$Gene.Symbol
biosyngene <- read.xlsx("../input/fatty acid LE NK gene categories_cohort2.xlsx", sheet = "biosynthetic", rowNames = TRUE, colNames = TRUE)$Gene.Symbol
fatty_NK <- vector(length = nrow(NK_CD16hi_mtx_PC1))
fatty_NK <- replace(fatty_NK, which(rownames(NK_CD16hi_mtx_PC1) %in% biosyngene), "biosynthetic")
fatty_NK <- replace(fatty_NK, which(rownames(NK_CD16hi_mtx_PC1) %in% catabolicgene), "catabolic")
fatty_NK <- replace(fatty_NK, which(fatty_NK == "FALSE"), "others")
names(fatty_NK) <- rownames(NK_CD16hi_mtx_PC1)

# pdf("../SchulteSchrepping/output/Subjecthm_NK_PC1LE_fatty_cohort2.pdf", width = 10, height = 6)
subject.hm.PC1(exprs_mtx = NK_CD16hi_mtx_PC1, meta = meta, celltype = "NK", 
               module = "fatty", PC1LEset = NK_CD16hi_PC1_fatty_LE, 
               covidLEset = NK_CD16hi_covid_fatty_LE, 
               rowsplit=2, othergroup=fatty_NK)

# dev.off()
sI <- sessionInfo()
utils:::print.sessionInfo(sI[-c(10,11)])
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server 7.2 (Maipo)
## 
## Matrix products: default
## 
## locale:
## [1] en_US.UTF-8
## 
## attached base packages:
## [1] grid      parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] viridis_0.5.1        viridisLite_0.3.0    circlize_0.4.10     
##  [4] pheatmap_1.0.12      ComplexHeatmap_2.5.4 openxlsx_4.1.5      
##  [7] forcats_0.5.0        stringr_1.4.0        dplyr_1.0.0         
## [10] purrr_0.3.4          readr_1.3.1          tidyr_0.8.3         
## [13] tibble_3.0.3         ggplot2_3.1.1        tidyverse_1.2.1     
## [16] Biobase_2.46.0       BiocGenerics_0.32.0  edgeR_3.28.1        
## [19] limma_3.42.2        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5          locfit_1.5-9.4      lubridate_1.7.9    
##  [4] lattice_0.20-41     png_0.1-7           assertthat_0.2.1   
##  [7] digest_0.6.25       R6_2.4.1            cellranger_1.1.0   
## [10] plyr_1.8.6          backports_1.1.8     stats4_3.6.1       
## [13] evaluate_0.14       httr_1.4.2          pillar_1.4.6       
## [16] GlobalOptions_0.1.2 rlang_0.4.7         lazyeval_0.2.2     
## [19] readxl_1.3.1        rstudioapi_0.11     S4Vectors_0.24.4   
## [22] GetoptLong_1.0.2    rmarkdown_2.3       munsell_0.5.0      
## [25] broom_0.5.2         compiler_3.6.1      modelr_0.1.4       
## [28] xfun_0.15           pkgconfig_2.0.3     shape_1.4.4        
## [31] htmltools_0.5.0     tidyselect_1.1.0    gridExtra_2.3      
## [34] IRanges_2.20.2      fansi_0.4.1         crayon_1.3.4       
## [37] withr_2.2.0         nlme_3.1-148        jsonlite_1.7.0     
## [40] gtable_0.3.0        lifecycle_0.2.0     magrittr_1.5       
## [43] scales_1.1.1        zip_2.0.4           cli_2.0.2          
## [46] stringi_1.4.6       xml2_1.3.2          ellipsis_0.3.1     
## [49] generics_0.0.2      vctrs_0.3.2         RColorBrewer_1.1-2 
## [52] rjson_0.2.20        tools_3.6.1         glue_1.4.1         
## [55] hms_0.5.3           yaml_2.2.1          clue_0.3-57        
## [58] colorspace_1.4-1    cluster_2.1.0       rvest_0.3.5        
## [61] knitr_1.29          haven_2.1.0