Recluster on RNA within the T cell subsets

Load R packages and data:

#this is tested using R 3.6.1 on a high-performance comupting node with 8 cores and at least 320 gb or ram. 
library("Seurat") #load Seurat 3.1
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("matrixStats")
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
library('tidyverse')
## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2
## Registered S3 method overwritten by 'cli':
##   method     from    
##   print.boxx spatstat
## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.3.2     ✔ readr   1.3.1
## ✔ tibble  2.1.1     ✔ purrr   0.3.2
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ ggplot2 3.3.2     ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ matrixStats::count() masks dplyr::count()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
library('parallelDist')
library("pheatmap")
library("viridis")
## Loading required package: viridisLite
library("ggridges")
## 
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual
library("magrittr")
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library("genefilter")
## 
## Attaching package: 'genefilter'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars
library("DescTools")
## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:genefilter':
## 
##     AUC
library("scico")

#### Recluster on RNA within the T cell subsets ####

## import the seurat object downloaded from GEO
merge.SNG = readRDS("SeuratObjects/AllBatches_SeuratObj.rds")

## import fgsea results tables
t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable = read.table("Metadata/2020_08_09_t0_plus_healthy--healthy_vs_covid--COVID-Healthy.tsv", header = TRUE, sep = "\t")
t0_plus_t0_covid_only.PC1.fgseaatable = read.table("Metadata/2020_08_09_t0_covid_only--PC1--PC1.tsv", header = TRUE, sep = "\t")

Recluster within the CD8 cells, using the geneset enrichment leading edge genes identified in the pseudobulk differential expression analysis

## Warning in dir.create("withinWCTCelltypeRNAclustPathLeadingEdge"):
## 'withinWCTCelltypeRNAclustPathLeadingEdge' already exists
## Regressing out Donor, Batch
## Centering and scaling data matrix
## PC_ 1 
## Positive:  CCNL1, REL, TNFAIP3, B4GALT1, BHLHE40, NR4A3, KLF6, NFAT5, PTPRE, LDLR 
##     NR4A2, FOSB, ADAR, ATP2B1, SLC2A3, ACSL3, TCIRG1, HELLS, NFKB1, NFKB2 
##     TNF, KIF1B, MCL1, PDE4B, TIPARP, SP100, MAP3K8, IRF1, SAT1, NAMPT 
## Negative:  RPL41, RPS12, RPL30, RPL32, RPL10, RPS15A, RPS3, RPL35A, RPL28, RPS27A 
##     RPL34, RPL19, RPS28, RPS23, RPL39, RPS4X, RPL13, RPL18A, RPL29, RPS27 
##     RPL11, RPS3A, RPS18, RPS15, RPL37, RPLP1, RPS25, RPL18, RPS8, RPS13 
## PC_ 2 
## Positive:  CCR7, SELL, IL7R, RPS8, RPL13, RPS12, RPL32, IFNGR2, MYC, RPL22 
##     IL6ST, RPS23, RPL34, THEM4, RPS3A, RPL12, RPS13, RPS2, LDHB, RPL29 
##     RPL11, RPL36, RPS18, RPL30, RPL19, RPS21, RPS27A, SNN, RPL18A, RPLP1 
## Negative:  ZFP36, DUSP1, NR4A2, IER2, NFKBIA, DUSP2, JUN, CD69, FOS, KLF6 
##     PPP1R15A, JUNB, CCL5, HLA-A, BTG2, EIF1, TNFAIP3, BHLHE40, GADD45B, LITAF 
##     FOSB, IRF1, RGS1, GAPDH, ZC3H12A, MCL1, BTG1, MAP3K8, NR4A3, REL 
## PC_ 3 
## Positive:  IL7R, FOS, JUNB, CCR7, FOSB, TNFAIP3, SLC2A3, JUN, DUSP1, CD55 
##     CD69, PPP1R15A, BTG1, SOCS3, BTG2, AREG, GPR183, NR4A2, SGK1, CCNL1 
##     ZFP36, PTGER4, MYC, NFKBIA, IRS2, SELL, NR4A1, IL6ST, BIRC3, IL23A 
## Negative:  CCL5, IFITM2, GAPDH, HLA-A, DBI, LY6E, EMP3, IFITM1, NDUFB7, NDUFB2 
##     SCP2, LITAF, ATP5E, TPI1, PGAM1, IFI6, NDUFB10, ISG15, NDUFA1, ENO1 
##     NDUFS6, BST2, OASL, MAP3K8, CYC1, COX6C, PSMB8, ALDOA, PTPRE, ATP5G3 
## PC_ 4 
## Positive:  SELL, LDHB, CCR7, CD55, STAT1, ENO1, MX1, IL7R, SLC2A3, IFI6 
##     MYC, IL6ST, STMN1, ATP5A1, ISG15, EIF2AK2, TNFAIP8, NDUFV2, BST2, XAF1 
##     TPI1, ATP5G3, THEM4, LY6E, IRF7, OAS3, OAS1, NDUFB9, ATP5F1, GPR183 
## Negative:  CCL5, DUSP2, TNF, JUN, ZFP36, MAP3K8, IER2, RPS27, DUSP1, BHLHE40 
##     RPS29, MAFF, RPS19, RPS26, NFKBIA, RPS27A, RPL34, ZC3H12A, RPS15A, RPL41 
##     RPL37, RPL37A, RPLP2, PTPRE, RPL23A, RPL30, GADD45B, RPL27A, RPL39, RPL38 
## PC_ 5 
## Positive:  STMN1, MCM4, NUSAP1, HELLS, CENPM, DUSP4, DHFR, GMNN, FANCI, TIMELESS 
##     PRC1, NCAPH, GAPDH, ATP5A1, MCM6, NR4A3, FEN1, IRF4, ENO1, FBXO5 
##     ELOVL5, NAMPT, RPL4, CYC1, RGS1, SIK1, ATP2B1, KIF20B, NDC80, ATP5F1 
## Negative:  XAF1, IFI6, IFIT3, MX1, IRF7, ISG15, RSAD2, OAS1, EIF2AK2, OAS3 
##     MX2, IFITM1, IFIT2, OAS2, BST2, OASL, LY6E, STAT1, C19orf66, IFI35 
##     TNF, ISG20, SP100, IFIH1, DDX58, IFITM3, IFITM2, DUSP1, BTG1, FOS
## Computing nearest neighbor graph
## Computing SNN

#Plot clusters for figure 5 Figure 5A:

library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.0.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================
## 
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:genefilter':
## 
##     dist2
library(viridis)
viridis(9)

### heatmap for the CD8 clusters on leading edge genes of selected pathways
TotalCD8.LG$WCTRNAclusters = paste0("TotalCD8.LG_",TotalCD8.LG$RNA_snn_res.1)
TotalCD8.LG = ScaleData(TotalCD8.LG, assay="RNA", features = unique(c(tO.COVIDvsHealthy.SelectedPath.leadingEdge.LG.CD8,tO.PC1.SelectedPath.leadingEdge.LG.CD8)))
## Centering and scaling data matrix
TotalCD8.selectclust.LG = subset(TotalCD8.LG, Sorted == "N" & RNA_snn_res.1 %in% c(14) & Timepoint == "T0")
# also label genesets
inflamgenes <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "HALLMARK_INFLAMMATORY_RESPONSE"
        
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "HALLMARK_INFLAMMATORY_RESPONSE"
          
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

cellcyclegenes <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "btm_M4.1_cell cycle (I)"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "btm_M4.1_cell cycle (I)"
          
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

T1INFgenes <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "GO_RESPONSE_TO_TYPE_I_INTERFERON"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "GO_RESPONSE_TO_TYPE_I_INTERFERON"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

HALLMARK_TNFA_SIGNALING_VIA_NFKB <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
        
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

ribosome <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "KEGG_RIBOSOME"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "KEGG_RIBOSOME"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

KEGG_GLYCOLYSIS_GLUCONEOGENESIS <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

KEGG_OXIDATIVE_PHOSPHORYLATION <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "KEGG_OXIDATIVE_PHOSPHORYLATION"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "KEGG_OXIDATIVE_PHOSPHORYLATION"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

reactome_Fattyacidmetabolism <- rownames(GetAssayData(TotalCD8.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "reactome_Fatty acid metabolism"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD8_Naive",
          "CD8_Mem"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "reactome_Fatty acid metabolism"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD8_Naive",
            "CD8_Mem"
          )  
      ),]$leadingEdge),  " ")))
  )

ha.CD8 = HeatmapAnnotation(
  #PC1 = TotalCD8.selectclust.LG$PC1,
  #PC1class = TotalCD8.selectclust.LG$PC1_cat,
  #Class = TotalCD8.selectclust.LG$Class,
  Cluster = TotalCD8.selectclust.LG$WCTRNAclusters
)


library("circlize")
## ========================================
## circlize version 0.4.8
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================
col_fun = colorRamp2(c(-1.5, 0, 1.5), c("#FF00FF", "#000000", "#FFFF00"))


Idents(TotalCD8.LG) <- "RNA_snn_res.1"
TotalCD8.T0.LG = subset(TotalCD8.LG, Sorted == "N" & Timepoint == "T0")
TotalCD8.T0.LG = subset(TotalCD8.T0.LG, idents = c("16","17"), invert = TRUE)
TotalCD8.T0.aver.LG = AverageExpression(TotalCD8.T0.LG, return.seurat = TRUE)
## Finished averaging RNA for cluster 0
## Finished averaging RNA for cluster 1
## Finished averaging RNA for cluster 2
## Finished averaging RNA for cluster 3
## Finished averaging RNA for cluster 4
## Finished averaging RNA for cluster 5
## Finished averaging RNA for cluster 6
## Finished averaging RNA for cluster 7
## Finished averaging RNA for cluster 8
## Finished averaging RNA for cluster 9
## Finished averaging RNA for cluster 10
## Finished averaging RNA for cluster 11
## Finished averaging RNA for cluster 12
## Finished averaging RNA for cluster 13
## Finished averaging RNA for cluster 14
## Finished averaging RNA for cluster 15
## Finished averaging CITE for cluster 0
## Finished averaging CITE for cluster 1
## Finished averaging CITE for cluster 2
## Finished averaging CITE for cluster 3
## Finished averaging CITE for cluster 4
## Finished averaging CITE for cluster 5
## Finished averaging CITE for cluster 6
## Finished averaging CITE for cluster 7
## Finished averaging CITE for cluster 8
## Finished averaging CITE for cluster 9
## Finished averaging CITE for cluster 10
## Finished averaging CITE for cluster 11
## Finished averaging CITE for cluster 12
## Finished averaging CITE for cluster 13
## Finished averaging CITE for cluster 14
## Finished averaging CITE for cluster 15
## Finished averaging HTO for cluster 0
## Finished averaging HTO for cluster 1
## Finished averaging HTO for cluster 2
## Finished averaging HTO for cluster 3
## Finished averaging HTO for cluster 4
## Finished averaging HTO for cluster 5
## Finished averaging HTO for cluster 6
## Finished averaging HTO for cluster 7
## Finished averaging HTO for cluster 8
## Finished averaging HTO for cluster 9
## Finished averaging HTO for cluster 10
## Finished averaging HTO for cluster 11
## Finished averaging HTO for cluster 12
## Finished averaging HTO for cluster 13
## Finished averaging HTO for cluster 14
## Finished averaging HTO for cluster 15
## Finished averaging limmaCITE for cluster 0
## Finished averaging limmaCITE for cluster 1
## Finished averaging limmaCITE for cluster 2
## Finished averaging limmaCITE for cluster 3
## Finished averaging limmaCITE for cluster 4
## Finished averaging limmaCITE for cluster 5
## Finished averaging limmaCITE for cluster 6
## Finished averaging limmaCITE for cluster 7
## Finished averaging limmaCITE for cluster 8
## Finished averaging limmaCITE for cluster 9
## Finished averaging limmaCITE for cluster 10
## Finished averaging limmaCITE for cluster 11
## Finished averaging limmaCITE for cluster 12
## Finished averaging limmaCITE for cluster 13
## Finished averaging limmaCITE for cluster 14
## Finished averaging limmaCITE for cluster 15
## Centering and scaling data matrix
TotalCD8.T0.aver.LG = ScaleData(TotalCD8.T0.aver.LG, assay="RNA", features = unique(c(tO.COVIDvsHealthy.SelectedPath.leadingEdge.LG.CD8,tO.PC1.SelectedPath.leadingEdge.LG.CD8)))
## Centering and scaling data matrix
GOI = c("MX1","IFNAR1","IFNGR2","IRF3", "JUNB","PCNA","CEBPB","SOCS3","IL23A","EIF1","HIF1A","ALDOA", "POLA1", "HELLS", "PRC1",
        "STAT2","OAS1","NFKBIA","IRF7","MKI67", "TNFAIP3","TNF","TAP1","CD69","RELA", "SELL" ,"ISG15","AREG","RPL13A","RPL22")
label <- which(rownames(GetAssayData(TotalCD8.T0.aver.LG, assay = "RNA", slot = "scale.data")) %in% GOI)
label_genes <- rownames(GetAssayData(TotalCD8.T0.aver.LG, assay = "RNA", slot = "scale.data"))[label]

Heatmap(as.matrix(GetAssayData(TotalCD8.T0.aver.LG, assay = "RNA", slot = "scale.data")), name = "CD8", show_column_names = TRUE,
        cluster_columns = TRUE, column_title_rot = 90,
        #column_split = TotalCD8.T0.aver.LG$,
        col = col_fun,
        row_km = 8,
        row_names_gp = gpar(fontsize = 10)
) +
  Heatmap(inflamgenes + 0, name = "HALLMARK_INFLAMMATORY_RESPONSE", col = c("0" = "white", "1" = "purple"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(HALLMARK_TNFA_SIGNALING_VIA_NFKB + 0, name = "HALLMARK_TNFA_SIGNALING_VIA_NFKB", col = c("0" = "white", "1" = "red"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(cellcyclegenes + 0, name = "btm_M4.1_cellcycle", col = c("0" = "white", "1" = "blue"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(T1INFgenes + 0, name = "GO_RESPONSE_TO_TYPE_I_INTERFERON", col = c("0" = "white", "1" = "black"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(ribosome + 0, name = "KEGG_RIBOSOME", col = c("0" = "white", "1" = "orange"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(KEGG_GLYCOLYSIS_GLUCONEOGENESIS + 0, name = "KEGG_GLYCOLYSIS_GLUCONEOGENESIS", col = c("0" = "white", "1" = "pink"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(KEGG_OXIDATIVE_PHOSPHORYLATION + 0, name = "KEGG_OXIDATIVE_PHOSPHORYLATION", col = c("0" = "white", "1" = "green"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(reactome_Fattyacidmetabolism + 0, name = "reactome_Fatty acid metabolism", col = c("0" = "white", "1" = "grey"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  rowAnnotation(foo = anno_mark(at = label,
                              labels = label_genes,
                              labels_gp = gpar(fontsize = 8)))

Barplot of fraction clonal cells per cluster for figure 5A:

expandedT = read.table("Metadata/2020_07_28_tcell_expansion_df.tsv", header = TRUE)

TotalCD8.LG$Expanded <- ifelse(colnames(TotalCD8.LG) %in% 
                                    expandedT$barcodeBatch[which(expandedT$expanded == TRUE)], TRUE, FALSE)

TotalCD8.T0.LG$Expanded <- ifelse(colnames(TotalCD8.T0.LG) %in% 
                                    expandedT$barcodeBatch[which(expandedT$expanded == TRUE)], TRUE, FALSE)

TotalCD8.T0.LG.unsort <- subset(TotalCD8.T0.LG, Sorted == "N")

#make barplots of expanded cell percentage per cluster
barplot(prop.table(table(TotalCD8.T0.LG.unsort$RNA_snn_res.1, TotalCD8.T0.LG.unsort$Expanded), 
                   margin = 1)[c(15,12,14,11,1,5,4,2,8,13,7,3,10,16,6,9),2],
        horiz = FALSE,
        ylim = c(0,1)
        )

Protein heatmap for figure 5B

quantile_breaks <- function(xs, n = 100) {
  breaks <- quantile(xs, probs = seq(0.5, 1, length.out = n))
  breaks[!duplicated(breaks)]
}
source("utilityFunctions/AverageExpression_MeanOnly.r")
environment(AverageExpression_MeanOnly) <- asNamespace('Seurat') # need this to allow the function to call hidden seurat package functions
TotalCD8.T0.LGAver = AverageExpression_MeanOnly(TotalCD8.T0.LG, return.seurat=F)
## Finished averaging RNA for cluster 0
## Finished averaging RNA for cluster 1
## Finished averaging RNA for cluster 2
## Finished averaging RNA for cluster 3
## Finished averaging RNA for cluster 4
## Finished averaging RNA for cluster 5
## Finished averaging RNA for cluster 6
## Finished averaging RNA for cluster 7
## Finished averaging RNA for cluster 8
## Finished averaging RNA for cluster 9
## Finished averaging RNA for cluster 10
## Finished averaging RNA for cluster 11
## Finished averaging RNA for cluster 12
## Finished averaging RNA for cluster 13
## Finished averaging RNA for cluster 14
## Finished averaging RNA for cluster 15
## Finished averaging CITE for cluster 0
## Finished averaging CITE for cluster 1
## Finished averaging CITE for cluster 2
## Finished averaging CITE for cluster 3
## Finished averaging CITE for cluster 4
## Finished averaging CITE for cluster 5
## Finished averaging CITE for cluster 6
## Finished averaging CITE for cluster 7
## Finished averaging CITE for cluster 8
## Finished averaging CITE for cluster 9
## Finished averaging CITE for cluster 10
## Finished averaging CITE for cluster 11
## Finished averaging CITE for cluster 12
## Finished averaging CITE for cluster 13
## Finished averaging CITE for cluster 14
## Finished averaging CITE for cluster 15
## Finished averaging HTO for cluster 0
## Finished averaging HTO for cluster 1
## Finished averaging HTO for cluster 2
## Finished averaging HTO for cluster 3
## Finished averaging HTO for cluster 4
## Finished averaging HTO for cluster 5
## Finished averaging HTO for cluster 6
## Finished averaging HTO for cluster 7
## Finished averaging HTO for cluster 8
## Finished averaging HTO for cluster 9
## Finished averaging HTO for cluster 10
## Finished averaging HTO for cluster 11
## Finished averaging HTO for cluster 12
## Finished averaging HTO for cluster 13
## Finished averaging HTO for cluster 14
## Finished averaging HTO for cluster 15
## Finished averaging limmaCITE for cluster 0
## Finished averaging limmaCITE for cluster 1
## Finished averaging limmaCITE for cluster 2
## Finished averaging limmaCITE for cluster 3
## Finished averaging limmaCITE for cluster 4
## Finished averaging limmaCITE for cluster 5
## Finished averaging limmaCITE for cluster 6
## Finished averaging limmaCITE for cluster 7
## Finished averaging limmaCITE for cluster 8
## Finished averaging limmaCITE for cluster 9
## Finished averaging limmaCITE for cluster 10
## Finished averaging limmaCITE for cluster 11
## Finished averaging limmaCITE for cluster 12
## Finished averaging limmaCITE for cluster 13
## Finished averaging limmaCITE for cluster 14
## Finished averaging limmaCITE for cluster 15
mat_breaks.temp <- quantile_breaks(as.matrix(TotalCD8.T0.LGAver$limmaCITE[c("CD38", "HLA-DR","CD278","CD279", "CD86","KLRG1", "CD28","CD45RA","CD45RO"),]), n = 101)
pheatmap(TotalCD8.T0.LGAver$limmaCITE[c("CD38", "HLA-DR","CD278","CD279", "CD86","KLRG1", "CD28","CD45RA","CD45RO"),], scale = "none", border_color=NA, viridis(n=100), breaks = mat_breaks.temp)

Heatmap of overlap of RNA clusters and protein based cluster labels, for Figure 5D

CD8.T0.LG.WCTmergedPerCluster = prop.table(table(Idents(TotalCD8.T0.LG), TotalCD8.T0.LG$WCTmergedcelltype), margin = 1)
pheatmap(CD8.T0.LG.WCTmergedPerCluster[,which(!colnames(CD8.T0.LG.WCTmergedPerCluster) %in% 
                                             c("CD8_Mem_CD158hi","CD8_Naive_CD41hi",
                                               "CD8_Mem_CD41hi","CD8_Mem_CD69pos",
                                               "CD8_Naive_MAIT","CD8_Mem_IgG.IsoBinding",
                                               "CD4_Naive","CD8_Naive_NKT","CD8_Naive_CD101hi",
                                               "CD8_Mem_NKT","CD8_Mem_CD101hi"))],
         border_color = NA)