Recluster on RNA within the T cell subsets

Load R packages and data:

#this is tested using R 3.6.1 on a high-performance comupting node with 8 cores and at least 320 gb or ram. 
library("Seurat") #load Seurat 3.1
library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("matrixStats")

## 
## Attaching package: 'matrixStats'

## The following object is masked from 'package:dplyr':
## 
##     count

library('tidyverse')

## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2

## Registered S3 method overwritten by 'cli':
##   method     from    
##   print.boxx spatstat

## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.3.2     ✔ readr   1.3.1
## ✔ tibble  2.1.1     ✔ purrr   0.3.2
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ ggplot2 3.3.2     ✔ forcats 0.4.0

## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ matrixStats::count() masks dplyr::count()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()

library('parallelDist')
library("pheatmap")
library("viridis")

## Loading required package: viridisLite

library("ggridges")

## 
## Attaching package: 'ggridges'

## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual

library("magrittr")

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

library("genefilter")

## 
## Attaching package: 'genefilter'

## The following object is masked from 'package:readr':
## 
##     spec

## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars

library("DescTools")

## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:genefilter':
## 
##     AUC

library("scico")

#### Recluster on RNA within the T cell subsets ####

## import the seurat object downloaded from GEO
merge.SNG = readRDS("SeuratObjects/AllBatches_SeuratObj.rds")

## import fgsea results tables
t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable = read.table("Metadata/2020_08_09_t0_plus_healthy--healthy_vs_covid--COVID-Healthy.tsv", header = TRUE, sep = "\t")
t0_plus_t0_covid_only.PC1.fgseaatable = read.table("Metadata/2020_08_09_t0_covid_only--PC1--PC1.tsv", header = TRUE, sep = "\t")

Recluster within the CD4 cells, using the geneset enrichment leading edge genes identified in the pseudobulk differential expression analysis

## Regressing out Donor, Batch

## Centering and scaling data matrix

## PC_ 1 
## Positive:  CCNL1, TNFAIP3, REL, SLC2A3, SAT1, ATP2B1, B4GALT1, KLF6, NR4A3, NFAT5 
##     DUSP4, FOSB, TRAF1, HELLS, NR4A2, PDE4B, AHR, ADAR, LDLR, NAMPT 
##     IL6ST, SMC4, BHLHE40, NFKB2, ATP2C1, NFKB1, MCL1, TIPARP, MAP3K8, BIRC2 
## Negative:  RPS12, RPL41, RPL10, RPL30, RPL32, RPL35A, RPS28, RPS15A, RPL19, RPS27A 
##     RPS4X, RPS23, RPL34, RPS3, RPL11, RPL13, RPL28, RPL29, RPL18A, RPS18 
##     RPS3A, RPS13, RPL18, RPL9, RPS8, RPS15, RPL39, RPS27, RPS25, RPLP1 
## PC_ 2 
## Positive:  IL6ST, SELL, RPS8, RPS3A, RPL32, RPS13, EPHX2, RPS12, RPL22, RPL30 
##     RPL13, RPS23, RPL11, RPL34, IFNGR2, RPL5, RPS27A, RPL19, RPS5, RPL39 
##     RPLP2, RPL10, RPL18, RPL35A, RPL12, RPL7, RPL9, RPS15A, RPS2, RPL29 
## Negative:  NFKBIA, DUSP2, PPP1R15A, HLA-A, TNFAIP3, DUSP1, HLA-B, IER2, JUN, NR4A2 
##     FOS, CD69, JUNB, HLA-C, BHLHE40, KLF6, CDKN1A, EMP3, FOSB, GADD45B 
##     GAPDH, CCL5, ZC3H12A, ID2, KLF10, MCL1, IRF1, EIF1, DUSP4, BTG2 
## PC_ 3 
## Positive:  FOS, JUN, DUSP1, JUNB, FOSB, PPP1R15A, CD69, BTG1, TNFAIP3, BTG2 
##     NR4A2, EIF1, NFKBIA, SLC2A3, IER2, AREG, PNRC1, KLF6, NR4A1, GADD45B 
##     PTGER4, KLF2, GPR183, SOCS3, DUSP2, CD55, SGK1, CCNL1, SIK1, CDKN1A 
## Negative:  LY6E, HLA-A, HLA-C, GAPDH, DBI, HLA-B, COX5A, ATP5J2, LCK, TPI1 
##     IFITM2, ISG15, ENO1, IFI6, HLA-E, DUSP4, EMP3, NDUFB2, ALOX5AP, PGAM1 
##     MKI67, ATP5E, COX8A, NDUFB10, ISG20, STMN1, NDUFS6, CCL5, XAF1, SCP2 
## PC_ 4 
## Positive:  SELL, SLC2A3, IL6ST, SOCS3, CD55, GPR183, LDHB, AREG, JUNB, KLF2 
##     PTGER4, SIK1, MX1, HIF1A, CCNL1, EPHX2, NDUFV2, BIRC2, MYC, PNRC1 
##     KCNA3, IFNGR2, LCK, PTGES3, STAT1, BTG1, STMN1, BIRC3, SOD2, MX2 
## Negative:  CCL5, BHLHE40, CDKN1A, MAP3K8, TNF, NFKBIA, PLEK, ZC3H12A, ID2, KLF10 
##     RPS19, MAFF, IER5, TIMP1, RPL36A, EMP3, DUSP2, OASL, GNA15, IFITM2 
##     PHLDA1, SQSTM1, RPS29, IER3, CD83, JUN, PTPRE, RPL34, GADD45B, GAPDH 
## PC_ 5 
## Positive:  STMN1, MKI67, ATP5J2, COX6B1, NUSAP1, RPL37, CENPF, COX6C, CENPM, COX7B 
##     RPL36A, PCNA, DUSP4, NDUFS5, HELLS, ATP5E, COX5A, RPL39, ATP5J, DUSP2 
##     UQCRQ, UQCR10, ATP5I, COX7C, JUNB, CENPW, PTTG1, NDUFS6, SMC4, COX6A1 
## Negative:  HLA-E, HLA-B, HLA-C, HLA-A, TAPBP, CD48, KCNA3, RPL21, JAK1, CCL5 
##     RPL3, RPS5, IFI6, EIF2AK2, PTGER2, CD44, BTG1, PNRC1, LY6E, RPS3 
##     RPL5, LITAF, RPS9, XAF1, ADAR, KLF9, STAT1, RPS27, BST2, RPL10A

## Computing nearest neighbor graph

## Computing SNN

## 2 singletons identified. 15 final clusters.

#Plot clusters for figure S6 Figure S6A:

library(ComplexHeatmap)

## Loading required package: grid

## ========================================
## ComplexHeatmap version 2.0.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================

## 
## Attaching package: 'ComplexHeatmap'

## The following object is masked from 'package:genefilter':
## 
##     dist2

library(viridis)
viridis(9)

### heatmap for the CD8 clusters on leading edge genes of selected pathways
TotalCD4.LG$WCTRNAclusters = paste0("TotalCD4.LG_",TotalCD4.LG$RNA_snn_res.1)
TotalCD4.LG = ScaleData(TotalCD4.LG, assay="RNA", features = unique(c(tO.COVIDvsHealthy.SelectedPath.leadingEdge.LG,tO.PC1.SelectedPath.leadingEdge.LG)))

## Centering and scaling data matrix

TotalCD4.selectclust.LG = subset(TotalCD4.LG, Sorted == "N" & RNA_snn_res.1 %in% c(14)& Timepoint == "T0") #

# create vector to label genesets
inflamgenes <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "HALLMARK_INFLAMMATORY_RESPONSE"
        
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "HALLMARK_INFLAMMATORY_RESPONSE"
          
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
    )

cellcyclegenes <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "btm_M4.1_cell cycle (I)"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
         
          "btm_M4.1_cell cycle (I)"
          
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

T1INFgenes <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "GO_RESPONSE_TO_TYPE_I_INTERFERON"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "GO_RESPONSE_TO_TYPE_I_INTERFERON"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

HALLMARK_TNFA_SIGNALING_VIA_NFKB <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
        
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          "HALLMARK_TNFA_SIGNALING_VIA_NFKB"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

ribosome <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
       
        "KEGG_RIBOSOME"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "KEGG_RIBOSOME"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

KEGG_GLYCOLYSIS_GLUCONEOGENESIS <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "KEGG_GLYCOLYSIS_GLUCONEOGENESIS"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

KEGG_OXIDATIVE_PHOSPHORYLATION <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "KEGG_OXIDATIVE_PHOSPHORYLATION"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "KEGG_OXIDATIVE_PHOSPHORYLATION"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

reactome_Fattyacidmetabolism <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% 
  union(unique(unlist(strsplit(as.character(t0_plus_t0_covid_only.PC1.fgseaatable[
    which(
      t0_plus_t0_covid_only.PC1.fgseaatable$pathway %in% c(
        
        "reactome_Fatty acid metabolism"
      ) 
      &
        t0_plus_t0_covid_only.PC1.fgseaatable$celltype %in% c(
          "CD4_Naive",
          "CD4_Mem",
          "Treg"
        )  
    ),]$leadingEdge),  " ")))
    ,
    unique(unlist(strsplit(as.character(t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable[
      which(
        t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$pathway %in% c(
          
          "reactome_Fatty acid metabolism"
        ) 
        &
          t0_plus_healthy.healthy_vs_covid.COVID.Healthy.fgseaatable$celltype %in% c(
            "CD4_Naive",
            "CD4_Mem",
            "Treg"
          )  
      ),]$leadingEdge),  " ")))
  )

ha.CD4 = HeatmapAnnotation(
  # PC1 = TotalCD4.selectclust.LG$PC1,
  # PC1class = TotalCD4.selectclust.LG$PC1_cat,
  # Class = TotalCD4.selectclust.LG$Class,
  Cluster = TotalCD4.selectclust.LG$WCTRNAclusters
)

# list genes to be labeled in text on the plot
GOI = c("MX1","IFNAR1","IFNGR2","IRF3", "JUNB","PCNA","CEBPB","SOCS3","IL23A","EIF1","HIF1A","ALDOA", "POLA1",
        "STAT2","OAS1","NFKBIA","IRF7","MKI67", "TNFAIP3","TNF","TAP1","CD69","RELA", "SELL" ,"ISG15","AREG",
        "RPL13A","RPL22")
label <- which(rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data")) %in% GOI)
label_genes <- rownames(GetAssayData(TotalCD4.selectclust.LG, assay = "RNA", slot = "scale.data"))[label]
  
  
library("circlize")

## ========================================
## circlize version 0.4.8
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================

col_fun = colorRamp2(c(-1.5, 0, 1.5), c("#FF00FF", "#000000", "#FFFF00"))

Idents(TotalCD4.LG) <- "RNA_snn_res.1"
TotalCD4.T0.LG = subset(TotalCD4.LG, Sorted == "N" & Timepoint == "T0")

TotalCD4.T0.aver.LG = AverageExpression(TotalCD4.T0.LG, return.seurat = TRUE)

## Finished averaging RNA for cluster 0

## Finished averaging RNA for cluster 1

## Finished averaging RNA for cluster 10

## Finished averaging RNA for cluster 11

## Finished averaging RNA for cluster 12

## Finished averaging RNA for cluster 13

## Finished averaging RNA for cluster 14

## Finished averaging RNA for cluster 2

## Finished averaging RNA for cluster 3

## Finished averaging RNA for cluster 4

## Finished averaging RNA for cluster 5

## Finished averaging RNA for cluster 6

## Finished averaging RNA for cluster 7

## Finished averaging RNA for cluster 8

## Finished averaging RNA for cluster 9

## Finished averaging CITE for cluster 0

## Finished averaging CITE for cluster 1

## Finished averaging CITE for cluster 10

## Finished averaging CITE for cluster 11

## Finished averaging CITE for cluster 12

## Finished averaging CITE for cluster 13

## Finished averaging CITE for cluster 14

## Finished averaging CITE for cluster 2

## Finished averaging CITE for cluster 3

## Finished averaging CITE for cluster 4

## Finished averaging CITE for cluster 5

## Finished averaging CITE for cluster 6

## Finished averaging CITE for cluster 7

## Finished averaging CITE for cluster 8

## Finished averaging CITE for cluster 9

## Finished averaging HTO for cluster 0

## Finished averaging HTO for cluster 1

## Finished averaging HTO for cluster 10

## Finished averaging HTO for cluster 11

## Finished averaging HTO for cluster 12

## Finished averaging HTO for cluster 13

## Finished averaging HTO for cluster 14

## Finished averaging HTO for cluster 2

## Finished averaging HTO for cluster 3

## Finished averaging HTO for cluster 4

## Finished averaging HTO for cluster 5

## Finished averaging HTO for cluster 6

## Finished averaging HTO for cluster 7

## Finished averaging HTO for cluster 8

## Finished averaging HTO for cluster 9

## Finished averaging limmaCITE for cluster 0

## Finished averaging limmaCITE for cluster 1

## Finished averaging limmaCITE for cluster 10

## Finished averaging limmaCITE for cluster 11

## Finished averaging limmaCITE for cluster 12

## Finished averaging limmaCITE for cluster 13

## Finished averaging limmaCITE for cluster 14

## Finished averaging limmaCITE for cluster 2

## Finished averaging limmaCITE for cluster 3

## Finished averaging limmaCITE for cluster 4

## Finished averaging limmaCITE for cluster 5

## Finished averaging limmaCITE for cluster 6

## Finished averaging limmaCITE for cluster 7

## Finished averaging limmaCITE for cluster 8

## Finished averaging limmaCITE for cluster 9

## Centering and scaling data matrix

TotalCD4.T0.aver.LG = ScaleData(TotalCD4.T0.aver.LG, assay="RNA", features = unique(c(tO.COVIDvsHealthy.SelectedPath.leadingEdge.LG,tO.PC1.SelectedPath.leadingEdge.LG)))

## Centering and scaling data matrix

GOI = c("MX1","IFNAR1","IFNGR2","IRF3", "JUNB","PCNA","CEBPB","SOCS3","IL23A","EIF1","HIF1A","ALDOA", "POLA1",
        "STAT2","OAS1","NFKBIA","IRF7","MKI67", "TNFAIP3","TNF","TAP1","CD69","RELA", "SELL" ,"ISG15","AREG","RPL13A","RPL22")
label <- which(rownames(GetAssayData(TotalCD4.T0.aver.LG, assay = "RNA", slot = "scale.data")) %in% GOI)
label_genes <- rownames(GetAssayData(TotalCD4.T0.aver.LG, assay = "RNA", slot = "scale.data"))[label]

Heatmap(as.matrix(GetAssayData(TotalCD4.T0.aver.LG, assay = "RNA", slot = "scale.data")), name = "CD4", show_column_names = TRUE,
        cluster_columns = TRUE, column_title_rot = 90,
        #column_split = TotalCD8.T0.aver.LG$,
        col = col_fun,
        row_km = 8,
        row_names_gp = gpar(fontsize = 10)
) +
  Heatmap(inflamgenes + 0, name = "HALLMARK_INFLAMMATORY_RESPONSE", col = c("0" = "white", "1" = "purple"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(HALLMARK_TNFA_SIGNALING_VIA_NFKB + 0, name = "HALLMARK_TNFA_SIGNALING_VIA_NFKB", col = c("0" = "white", "1" = "red"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(cellcyclegenes + 0, name = "btm_M4.1_cellcycle", col = c("0" = "white", "1" = "blue"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(T1INFgenes + 0, name = "GO_RESPONSE_TO_TYPE_I_INTERFERON", col = c("0" = "white", "1" = "black"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(ribosome + 0, name = "KEGG_RIBOSOME", col = c("0" = "white", "1" = "orange"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(KEGG_GLYCOLYSIS_GLUCONEOGENESIS + 0, name = "KEGG_GLYCOLYSIS_GLUCONEOGENESIS", col = c("0" = "white", "1" = "pink"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(KEGG_OXIDATIVE_PHOSPHORYLATION + 0, name = "KEGG_OXIDATIVE_PHOSPHORYLATION", col = c("0" = "white", "1" = "green"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  Heatmap(reactome_Fattyacidmetabolism + 0, name = "reactome_Fatty acid metabolism", col = c("0" = "white", "1" = "grey"),
          show_heatmap_legend = FALSE, width = unit(2, "mm"), column_names_gp = gpar(fontsize = 6)) +
  rowAnnotation(foo = anno_mark(at = label,
                              labels = label_genes,
                              labels_gp = gpar(fontsize = 8)))

Barplot of fraction clonal cells per cluster for figure S6A:

expandedT = read.table("Metadata/2020_07_28_tcell_expansion_df.tsv", header = TRUE)

TotalCD4.LG$Expanded <- ifelse(colnames(TotalCD4.LG) %in% 
                                 expandedT$barcodeBatch[which(expandedT$expanded == TRUE)], TRUE, FALSE)

TotalCD4.T0.LG$Expanded <- ifelse(colnames(TotalCD4.T0.LG) %in% 
                                 expandedT$barcodeBatch[which(expandedT$expanded == TRUE)], TRUE, FALSE)

TotalCD4.T0.LG.unsort <- subset(TotalCD4.T0.LG, Sorted == "N")

#make barplots of expanded cell percentage per cluster
barplot(prop.table(table(TotalCD4.T0.LG.unsort$RNA_snn_res.1, TotalCD4.T0.LG.unsort$Expanded), 
                   margin = 1)[c(7,10,12,3,13,8,2,1,11,4,10,5,14,15,6),2],
        horiz = FALSE,
        ylim = c(0,1)
        )

Protein heatmap for figure S6B

quantile_breaks <- function(xs, n = 100) {
  breaks <- quantile(xs, probs = seq(0.5, 1, length.out = n))
  breaks[!duplicated(breaks)]
}
source("utilityFunctions/AverageExpression_MeanOnly.r")
environment(AverageExpression_MeanOnly) <- asNamespace('Seurat') # need this to allow the function to call hidden seurat package functions
TotalCD4.T0.LGAver = AverageExpression_MeanOnly(TotalCD4.T0.LG, return.seurat=F)

## Finished averaging RNA for cluster 0

## Finished averaging RNA for cluster 1

## Finished averaging RNA for cluster 10

## Finished averaging RNA for cluster 11

## Finished averaging RNA for cluster 12

## Finished averaging RNA for cluster 13

## Finished averaging RNA for cluster 14

## Finished averaging RNA for cluster 2

## Finished averaging RNA for cluster 3

## Finished averaging RNA for cluster 4

## Finished averaging RNA for cluster 5

## Finished averaging RNA for cluster 6

## Finished averaging RNA for cluster 7

## Finished averaging RNA for cluster 8

## Finished averaging RNA for cluster 9

## Finished averaging CITE for cluster 0

## Finished averaging CITE for cluster 1

## Finished averaging CITE for cluster 10

## Finished averaging CITE for cluster 11

## Finished averaging CITE for cluster 12

## Finished averaging CITE for cluster 13

## Finished averaging CITE for cluster 14

## Finished averaging CITE for cluster 2

## Finished averaging CITE for cluster 3

## Finished averaging CITE for cluster 4

## Finished averaging CITE for cluster 5

## Finished averaging CITE for cluster 6

## Finished averaging CITE for cluster 7

## Finished averaging CITE for cluster 8

## Finished averaging CITE for cluster 9

## Finished averaging HTO for cluster 0

## Finished averaging HTO for cluster 1

## Finished averaging HTO for cluster 10

## Finished averaging HTO for cluster 11

## Finished averaging HTO for cluster 12

## Finished averaging HTO for cluster 13

## Finished averaging HTO for cluster 14

## Finished averaging HTO for cluster 2

## Finished averaging HTO for cluster 3

## Finished averaging HTO for cluster 4

## Finished averaging HTO for cluster 5

## Finished averaging HTO for cluster 6

## Finished averaging HTO for cluster 7

## Finished averaging HTO for cluster 8

## Finished averaging HTO for cluster 9

## Finished averaging limmaCITE for cluster 0

## Finished averaging limmaCITE for cluster 1

## Finished averaging limmaCITE for cluster 10

## Finished averaging limmaCITE for cluster 11

## Finished averaging limmaCITE for cluster 12

## Finished averaging limmaCITE for cluster 13

## Finished averaging limmaCITE for cluster 14

## Finished averaging limmaCITE for cluster 2

## Finished averaging limmaCITE for cluster 3

## Finished averaging limmaCITE for cluster 4

## Finished averaging limmaCITE for cluster 5

## Finished averaging limmaCITE for cluster 6

## Finished averaging limmaCITE for cluster 7

## Finished averaging limmaCITE for cluster 8

## Finished averaging limmaCITE for cluster 9

mat_breaks.temp <- quantile_breaks(as.matrix(TotalCD4.T0.LGAver$limmaCITE[c("CD38", "HLA-DR","CD278","CD279", "CD71","KLRG1", "CD28","CD45RA","CD45RO"),]), n = 101)
pheatmap(TotalCD4.T0.LGAver$limmaCITE[c("CD38", "HLA-DR","CD278","CD279", "CD71","KLRG1", "CD28","CD45RA","CD45RO"),], scale = "none", border_color=NA, viridis(n=100), breaks = mat_breaks.temp)

Heatmap of overlap of RNA clusters and protein based cluster labels, for Figure S6D

CD4.T0.LG.SpecificGatingPerCluster = prop.table(table(Idents(TotalCD4.T0.LG), TotalCD4.T0.LG$specific_gating), margin = 1)
CD4.T0.LG.GateGroupMergedPerCluster = prop.table(table(Idents(TotalCD4.T0.LG), TotalCD4.T0.LG$gate_group_merged), margin = 1)
CD4.T0.LG.WCTmergedPerCluster = prop.table(table(Idents(TotalCD4.T0.LG), TotalCD4.T0.LG$WCTmergedcelltype), margin = 1)
colnames(CD4.T0.LG.WCTmergedPerCluster)[17] <- "Treg.ActT"
pheatmap(CD4.T0.LG.WCTmergedPerCluster[,which(!colnames(CD4.T0.LG.WCTmergedPerCluster) %in% 
                                                   c("CD4_Mem_CD69pos","CD4_Mem_Activated.HLADRhi",
                                                     "CD4_Mem_CD22hi","CD4_Mem_MAIT",
                                                     "CD4_Naive_MAIT","CD4_Naive_CD38hi",
                                                     "CD4_Mem_integrin.hi","CD4_Mem_CD41hi",
                                                     "CD4_Mem_IgG.IsoBinding","CD4_Mem_CD146hi",
                                                     "CD4_Naive_IgG.IsoBinding"))],
         border_color = NA)

pheatmap(cbind(Tfh = CD4.T0.LG.SpecificGatingPerCluster[,2], GatedTreg = CD4.T0.LG.GateGroupMergedPerCluster[,13]),
         border_color = NA)

4.TcellReclust.CD4

Recluster on RNA within the T cell subsets

Recluster within the CD4 cells, using the geneset enrichment leading edge genes identified in the pseudobulk differential expression analysis