3.UMAPandProteinVis.Figure2

Load seurat object from GEO and make heatmap of average protein expression for Figure 2A:

#this is tested using R 3.6.1 on a high-performance comupting node with 8 cores and at least 320 gb or ram. 
library("Seurat") #load Seurat 3.1
library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("matrixStats")

## 
## Attaching package: 'matrixStats'

## The following object is masked from 'package:dplyr':
## 
##     count

library('tidyverse')

## Registered S3 method overwritten by 'rvest':
##   method            from
##   read_xml.response xml2

## Registered S3 method overwritten by 'cli':
##   method     from    
##   print.boxx spatstat

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.3.2     ✔ readr   1.3.1
## ✔ tibble  2.1.1     ✔ purrr   0.3.2
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ ggplot2 3.3.2     ✔ forcats 0.4.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ matrixStats::count() masks dplyr::count()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::lag()         masks stats::lag()

library('parallelDist')
library("pheatmap")
library("viridis")

## Loading required package: viridisLite

library("ggridges")

## 
## Attaching package: 'ggridges'

## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual

library("magrittr")

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

library("genefilter")

## 
## Attaching package: 'genefilter'

## The following object is masked from 'package:readr':
## 
##     spec

## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars

library("DescTools")

## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata

## 
## Attaching package: 'DescTools'

## The following object is masked from 'package:genefilter':
## 
##     AUC

library("scico")
library("grid")

## import the seurat object downloaded from GEO
merge.SNG = readRDS("SeuratObjects/AllBatches_SeuratObj.rds")

##remove the CHI014 technical control sample
merge.SNG <- subset(merge.SNG, Donor == "CHI014", invert = TRUE)

## make a object of the cluster averages for heatmaps 
Idents(merge.SNG) <- "WCTcoursecelltype"
source("utilityFunctions/AverageExpression_MeanOnly.r")
environment(AverageExpression_MeanOnly) <- asNamespace('Seurat') # need this to allow the function to call hidden seurat package functions
merge.SNG.Aver = AverageExpression_MeanOnly(merge.SNG, assays = "limmaCITE")

## Finished averaging limmaCITE for cluster Mono_NonClassical

## Finished averaging limmaCITE for cluster B_Naive

## Finished averaging limmaCITE for cluster Mono_Classical

## Finished averaging limmaCITE for cluster NK_CD16hi

## Finished averaging limmaCITE for cluster B_Mem

## Finished averaging limmaCITE for cluster gammadeltaT

## Finished averaging limmaCITE for cluster NK_CD56loCD16lo

## Finished averaging limmaCITE for cluster PB_Plasmablasts

## Finished averaging limmaCITE for cluster gated_out

## Finished averaging limmaCITE for cluster Treg

## Finished averaging limmaCITE for cluster CD4_Mem

## Finished averaging limmaCITE for cluster CD8_Mem

## Finished averaging limmaCITE for cluster CD8_Naive

## Finished averaging limmaCITE for cluster CD4_Naive

## Finished averaging limmaCITE for cluster RBC

## Finished averaging limmaCITE for cluster MAIT

## Finished averaging limmaCITE for cluster pDC

## Finished averaging limmaCITE for cluster Dblt

## Finished averaging limmaCITE for cluster cDC

## Finished averaging limmaCITE for cluster Unknown

## Finished averaging limmaCITE for cluster TissueResMemT

## Finished averaging limmaCITE for cluster Platelets

## Finished averaging limmaCITE for cluster NK_CD56hiCD16lo

## Finished averaging limmaCITE for cluster DPT

## Finished averaging limmaCITE for cluster DNT

## Finished averaging limmaCITE for cluster Granulocytes

## Finished averaging limmaCITE for cluster Tcell

## Finished averaging limmaCITE for cluster dim

## Finished averaging limmaCITE for cluster TCRVbeta13.1pos

## Finished averaging limmaCITE for cluster Mono_Intermediate

quantile_breaks <- function(xs, n = 100) {
  breaks <- quantile(xs, probs = seq(0.5, 1, length.out = n))
  breaks[!duplicated(breaks)]
}

f2 <- pOverA(0.01, 3)
ffun2 <- filterfun(f2)
merge.SNGADTfilt = genefilter(GetAssayData(merge.SNG[["limmaCITE"]], slot = "data"), ffun2)

mat_breaks <- quantile_breaks(as.matrix(merge.SNG.Aver$limmaCITE[which(merge.SNGADTfilt),]), n = 101)
pheatmap(merge.SNG.Aver$limmaCITE[which(merge.SNGADTfilt),which(!(colnames(merge.SNG.Aver$limmaCITE) %in% c("Dblt","Unknown","gated_out","dim","Tcell","dblt")))], scale = "none", border_color=NA, viridis(n=100), breaks = mat_breaks)

Innate population UMAP for Figure 2B

innate.merge.SNG <- subset(merge.SNG, subset = WCTcoursecelltype %in% 
                             c("Mono_Classical", "Mono_NonClassical", "NK_CD16hi",
                               "NK_CD56loCD16lo", "pDC", "cDC", "Platelets", "NK_CD56hiCD16lo",
                               "Mono_Intermediate", "Granulocytes"))
innate.merge.SNGADTfilt = genefilter(GetAssayData(innate.merge.SNG[["limmaCITE"]], slot = "data"), ffun2)
innate.merge.SNG <- RunUMAP(innate.merge.SNG, assay = "limmaCITE", features = rownames(innate.merge.SNG[["limmaCITE"]][which(innate.merge.SNGADTfilt),]),
                     n.neighbors = 15, min.dist = 0.01, spread =  5)

## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session

## 16:30:24 UMAP embedding parameters a = 0.1502 b = 0.7925

## 16:30:24 Read 125117 rows and found 73 numeric columns

## 16:30:24 Using Annoy for neighbor search, n_neighbors = 15

## 16:30:24 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 16:30:53 Writing NN index file to temp file /tmp/Rtmp9V6aqj/file44aaba2f29b
## 16:30:53 Searching Annoy index using 1 thread, search_k = 1500
## 16:32:06 Annoy recall = 100%
## 16:32:07 Commencing smooth kNN distance calibration using 1 thread
## 16:32:12 Initializing from normalized Laplacian + noise
## 16:32:20 Commencing optimization for 200 epochs, with 3033514 positive edges
## 16:34:35 Optimization finished

innate.p <- DimPlot(innate.merge.SNG, 
                      group.by = "WCTcoursecelltype", label = TRUE) + NoLegend()
innate.p <- AugmentPlot(innate.p, width = 6, height = 6)
innate.p

Adaptive population UMAP for Figure 2B

adaptive.merge.SNG <- subset(merge.SNG, subset = WCTcoursecelltype %in% 
                             c("B_Naive", "B_Mem", "gammadeltaT",
                               "PB_Plasmablasts", "Treg", "CD4_Mem", "CD8_Mem", "CD8_Naive",
                               "CD4_Naive", "MAIT", "TissueResMemT", "DPT", "DNT", "TCRVbeta13.1pos"))
adaptive.merge.SNGADTfilt = genefilter(GetAssayData(adaptive.merge.SNG[["limmaCITE"]], slot = "data"), ffun2)
adaptive.merge.SNG <- RunUMAP(adaptive.merge.SNG, assay = "limmaCITE", features = rownames(adaptive.merge.SNG[["limmaCITE"]][which(adaptive.merge.SNGADTfilt),]),
                            n.neighbors = 15, min.dist = 0.01, spread =  5)

## 16:35:09 UMAP embedding parameters a = 0.1502 b = 0.7925

## 16:35:10 Read 246964 rows and found 72 numeric columns

## 16:35:10 Using Annoy for neighbor search, n_neighbors = 15

## 16:35:10 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 16:36:02 Writing NN index file to temp file /tmp/Rtmp9V6aqj/file44aa2e9df1
## 16:36:02 Searching Annoy index using 1 thread, search_k = 1500
## 16:37:52 Annoy recall = 100%
## 16:37:52 Commencing smooth kNN distance calibration using 1 thread
## 16:38:00 Initializing from normalized Laplacian + noise
## 16:38:25 Commencing optimization for 200 epochs, with 6036274 positive edges
## 16:42:53 Optimization finished

adaptive.p <- DimPlot(adaptive.merge.SNG, 
                      group.by = "WCTcoursecelltype", label = TRUE) + NoLegend()
adaptive.p <- AugmentPlot(adaptive.p, width = 6, height = 6)
adaptive.p

Average protein expression heatmap of selected T cell populations for Figure 2C

Idents(merge.SNG) <- "WCTmergedcelltype"
CD4_Mem.Aver = AverageExpression_MeanOnly(subset(merge.SNG, WCTcoursecelltype == "CD4_Mem"), assay = "limmaCITE")

## Finished averaging limmaCITE for cluster CD4_Mem_TM

## Finished averaging limmaCITE for cluster CD4_Mem_EM.TE

## Finished averaging limmaCITE for cluster CD4_Mem_CM

## Finished averaging limmaCITE for cluster CD4_Mem_TM.integrin.hi

## Finished averaging limmaCITE for cluster CD4_Mem_Activated.CD38.CD278hi

## Finished averaging limmaCITE for cluster CD4_Mem_KLRG1pos

## Finished averaging limmaCITE for cluster CD4_Mem_CD101pos

## Finished averaging limmaCITE for cluster CD4_Mem_IgG.IsoBinding

## Finished averaging limmaCITE for cluster CD4_Mem_CD4.KLRG1pos.NotCD8

## Finished averaging limmaCITE for cluster CD4_Mem_integrin.hi

## Finished averaging limmaCITE for cluster CD4_Mem_Activated.HLADRhi

## Finished averaging limmaCITE for cluster CD4_Mem_CD146hi

## Finished averaging limmaCITE for cluster CD4_Mem_CD41hi

## Finished averaging limmaCITE for cluster CD4_Mem_MAIT

## Finished averaging limmaCITE for cluster CD4_Mem_CD22hi

## Finished averaging limmaCITE for cluster CD4_Mem_CD69pos

f1 <- pOverA(0.01, 2)
ffun <- filterfun(f1)
CD4_Mem.ADTfilt = genefilter(CD4_Mem.Aver$limmaCITE[,c("CD4_Mem_TM","CD4_Mem_EM.TE","CD4_Mem_CM")], ffun)
#mat_breaks <- quantile_breaks(as.matrix(CD4_Mem.Aver$limmaCITE[which(CD4_Mem.ADTfilt),]), n = 101)
mat_breaks <- quantile_breaks(as.matrix(merge.SNG.Aver$limmaCITE[which(merge.SNGADTfilt),]), n = 101)

pheatmap(CD4_Mem.Aver$limmaCITE[which(CD4_Mem.ADTfilt),c("CD4_Mem_TM","CD4_Mem_EM.TE","CD4_Mem_CM")], scale = "none", border_color=NA, viridis(n=100), breaks = mat_breaks)

Mono_Classical population UMAP for Figure 2D, colored by COVID or healthy control

Mono_Classical <- subset(merge.SNG, subset = WCTcoursecelltype %in% c( "Mono_Classical"))
Mono_Classical <- FindVariableFeatures(Mono_Classical, assay = "RNA")
Mono_Classical <- ScaleData(Mono_Classical)

## Centering and scaling data matrix

Mono_Classical <- RunPCA(Mono_Classical)

## PC_ 1 
## Positive:  MT-CYB, MT-ND4L, MTRNR2L12, IL1R2, MT-ATP8, MT-ND6, HBB, MTRNR2L8, AREG, ZBTB16 
##     DAAM2, THBS1, KCNE1, FMN1, GNLY, MIR181A1HG, RALGAPA1, HMGB2, MCTP2, COQ7 
##     CCL5, IL32, HSPA1B, RP11-701P16.2, CST7, TSPYL2, RNF144B, PAG1, SLC25A37, PTPRCAP 
## Negative:  HLA-DRA, CD74, HLA-DRB1, SERPINA1, HLA-DPA1, EEF1A1, HLA-DPB1, B2M, HLA-B, PSAP 
##     HLA-A, HLA-DRB5, ACTG1, CPVL, FCN1, LYZ, RPL26, HLA-DMA, RPL10A, HLA-C 
##     SLC25A6, LGALS3, RPS5, RPS6, EEF2, SLC25A5, RPL4, GAPDH, AP1S2, ANXA2 
## PC_ 2 
## Positive:  S100A9, GAPDH, IL1R2, S100A8, S100A12, AREG, HMGB2, SLC25A6, LGALS1, RPL5 
##     RPLP0, ACTG1, CD163, RPS2, EEF1A1, SLC25A5, LYZ, SAP30, RPS6, CEBPD 
##     RPL10A, RPS5, PLBD1, RPSA, EEF1B2, PKM, CTSD, RNASE2, ADORA3, THBS1 
## Negative:  MTRNR2L12, CD300E, PHACTR1, CD83, MT-ND4L, MT-CYB, IRF1, DUSP2, IL1B, WARS 
##     LINC-PINT, PARP14, INSIG1, BHLHE40, RASGEF1B, MT-ND6, PDE4B, ZNF331, DSE, HLA-DPB1 
##     ATF3, RALGAPA1, STAT1, HLA-DQA1, NR4A1, BTG2, IFRD1, NFKBIZ, FOSB, TSC22D2 
## PC_ 3 
## Positive:  IFITM1, IFI6, ISG15, IFI27, IFITM3, LY6E, PLAC8, IFI44L, MX1, SERPING1 
##     CLU, TNFSF10, MT2A, SIGLEC1, IFIT3, XAF1, OAS3, OAS1, IFITM2, TYMP 
##     VAMP5, RSAD2, IFI35, PSMB9, LGALS3BP, GBP1, EPSTI1, IFIT1, MNDA, TNFSF13B 
## Negative:  IER3, LMNA, FOSB, THBS1, RGCC, IL1B, GABARAPL1, RASGEF1B, GPR183, TIPARP 
##     CDKN1A, NR4A2, PDE4B, NLRP3, PHLDA1, VIM, BTG1, ZNF331, AREG, PPIF 
##     CXCR4, EREG, TNFAIP3, EEF1A1, FCAR, INSIG1, KDM6B, NFKBIA, PNRC1, PNP 
## PC_ 4 
## Positive:  MTSS1, VSIG4, THBS1, CD163, KLF9, ADAMTS2, IL1R2, MIR181A1HG, FKBP5, ADORA3 
##     AREG, MT-ND4L, MT-ATP8, C1QA, PDK4, DAAM2, MTRNR2L8, TXNIP, MT-CYB, DDIT4 
##     MTRNR2L12, IFITM3, TPST1, RGL1, TCN2, RNASE1, ZBTB16, EPSTI1, FMN1, MAFB 
## Negative:  IL8, S100A8, S100A12, G0S2, PLAUR, NFKBIA, S100A9, IL1B, IER2, FOSB 
##     BCL2A1, STXBP2, TREM1, NFKBIZ, TUBA1A, LYZ, RGS2, SOD2, JUN, ANXA1 
##     VIM, MNDA, RETN, PNRC1, PTGS2, PROK2, SLC2A3, PADI4, GAPDH, AP1S2 
## PC_ 5 
## Positive:  ASPH, RGCC, S100A12, IFNGR2, MAFB, S100A9, S100A8, DUSP6, CLU, CD82 
##     SLC25A37, SERPINB2, CCR1, PHLDA1, IL8, TNFRSF12A, IFI6, TRMT6, TPST1, IFITM1 
##     IFI27, OASL, MAP3K8, BCL2A1, CCL2, IER3, AQP9, HIF1A, IFITM2, IFI44L 
## Negative:  HLA-DPB1, HLA-DPA1, HLA-DQA1, HLA-DQB1, HLA-DRB5, CD74, HLA-DRA, HLA-DRB1, CD4, LGALS2 
##     ZDHHC1, CSF1R, EEF1A1, FGL2, AP1S2, HLA-DQA2, ITGA4, CLEC10A, CPVL, HLA-DMB 
##     TPPP3, HLA-DMA, PEA15, POU2F2, NR4A1, UCP2, HSPA8, CAMK1, EIF3L, EEF1B2

Mono_Classical <- RunUMAP(Mono_Classical, dims = 1:15)

## 16:44:15 UMAP embedding parameters a = 0.9922 b = 1.112

## 16:44:15 Read 78908 rows and found 15 numeric columns

## 16:44:15 Using Annoy for neighbor search, n_neighbors = 30

## 16:44:15 Building Annoy index with metric = cosine, n_trees = 50

## 0%   10   20   30   40   50   60   70   80   90   100%

## [----|----|----|----|----|----|----|----|----|----|

## **************************************************|
## 16:44:26 Writing NN index file to temp file /tmp/Rtmp9V6aqj/file44aaf4de91e
## 16:44:26 Searching Annoy index using 1 thread, search_k = 3000
## 16:45:00 Annoy recall = 100%
## 16:45:01 Commencing smooth kNN distance calibration using 1 thread
## 16:45:05 Initializing from normalized Laplacian + noise
## 16:45:09 Commencing optimization for 200 epochs, with 3352896 positive edges
## 16:46:47 Optimization finished

MonoUMAP.Class <- DimPlot(Mono_Classical, group.by = "Class", cols = c("#00BA38", "#F8766D"))
MonoUMAP.Class <- AugmentPlot(MonoUMAP.Class, width = 6, height = 6)
MonoUMAP.Class

Mono_Classical population UMAP for Figure 2E, colored by DSM (labelled PC1 here) class

MonoUMAP.DSM_cat <- DimPlot(subset(Mono_Classical, PC1_cat %in% c("PC1_low","PC1_high")), group.by = "PC1_cat",cols = c("#619CFF", "#F8766D"))
MonoUMAP.DSM_cat <- AugmentPlot(MonoUMAP.DSM_cat, width = 6, height = 6)
MonoUMAP.DSM_cat

Mono_Classical population UMAP for Figure 2F, colored by days since symptom onset

MonoUMAP.dayssinceonset <- FeaturePlot(subset(Mono_Classical, PC1_cat %in% c("PC1_low","PC1_high") & days_since_onset != "NA"), features = "days_since_onset")
MonoUMAP.dayssinceonset <- AugmentPlot(MonoUMAP.dayssinceonset, width = 6, height = 6)
MonoUMAP.dayssinceonset

Mono_Classical population UMAP for Figure 2G, CD163-high monocytes in black

MonoUMAP.CD163 <- DimPlot(subset(Mono_Classical, WCTmergedcelltype %in% c("Mono_Classical","Mono_Classical_CD163hi")), group.by = "WCTmergedcelltype", cols = c("grey","black"))
MonoUMAP.CD163 <- AugmentPlot(MonoUMAP.CD163, width = 6, height = 6)
MonoUMAP.CD163

Mono_Classical population UMAP for Figure 2G (alternate), CD163 protein expression

MonoUMAP.CD163x <- FeaturePlot(subset(Mono_Classical, WCTmergedcelltype %in% c("Mono_Classical","Mono_Classical_CD163hi")), features = "cite_CD163",max.cutoff = 7.5 )
MonoUMAP.CD163x <- AugmentPlot(MonoUMAP.CD163x, width = 6, height = 6)
MonoUMAP.CD163x

Mono_Classical population UMAP for Figure 2H, HLA-DR protein expression

MonoUMAP.HLADRx <- FeaturePlot(subset(Mono_Classical, WCTmergedcelltype %in% c("Mono_Classical","Mono_Classical_CD163hi")), features = "cite_HLA-DR",max.cutoff = 7.5)
MonoUMAP.HLADRx <- AugmentPlot(MonoUMAP.HLADRx, width = 6, height = 6)
MonoUMAP.HLADRx

Mono_Classical population UMAP for Figure 2I, colored by Donor

MonoUMAP.Donor <- DimPlot(Mono_Classical, group.by = "Donor") + NoLegend()
MonoUMAP.Donor <- AugmentPlot(MonoUMAP.Donor, width = 6, height = 6)
MonoUMAP.Donor

sI <- sessionInfo()
utils:::print.sessionInfo(sI[-c(10,11)])