Load seurat object from GEO and make heatmap of average protein expression for Figure 2A:
#this is tested using R 3.6.1 on a high-performance comupting node with 8 cores and at least 320 gb or ram.
library("Seurat") #load Seurat 3.1
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("matrixStats")
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
library('tidyverse')
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## Registered S3 method overwritten by 'cli':
## method from
## print.boxx spatstat
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.3.2 ✔ readr 1.3.1
## ✔ tibble 2.1.1 ✔ purrr 0.3.2
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ ggplot2 3.3.2 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ matrixStats::count() masks dplyr::count()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library('parallelDist')
library("pheatmap")
library("viridis")
## Loading required package: viridisLite
library("ggridges")
##
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
##
## scale_discrete_manual
library("magrittr")
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library("genefilter")
##
## Attaching package: 'genefilter'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
library("DescTools")
## Registered S3 method overwritten by 'DescTools':
## method from
## reorder.factor gdata
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:genefilter':
##
## AUC
library("scico")
library("grid")
## import the seurat object downloaded from GEO
merge.SNG = readRDS("SeuratObjects/AllBatches_SeuratObj.rds")
##remove the CHI014 technical control sample
merge.SNG <- subset(merge.SNG, Donor == "CHI014", invert = TRUE)
## make a object of the cluster averages for heatmaps
Idents(merge.SNG) <- "WCTcoursecelltype"
source("utilityFunctions/AverageExpression_MeanOnly.r")
environment(AverageExpression_MeanOnly) <- asNamespace('Seurat') # need this to allow the function to call hidden seurat package functions
merge.SNG.Aver = AverageExpression_MeanOnly(merge.SNG, assays = "limmaCITE")
## Finished averaging limmaCITE for cluster Mono_NonClassical
## Finished averaging limmaCITE for cluster B_Naive
## Finished averaging limmaCITE for cluster Mono_Classical
## Finished averaging limmaCITE for cluster NK_CD16hi
## Finished averaging limmaCITE for cluster B_Mem
## Finished averaging limmaCITE for cluster gammadeltaT
## Finished averaging limmaCITE for cluster NK_CD56loCD16lo
## Finished averaging limmaCITE for cluster PB_Plasmablasts
## Finished averaging limmaCITE for cluster gated_out
## Finished averaging limmaCITE for cluster Treg
## Finished averaging limmaCITE for cluster CD4_Mem
## Finished averaging limmaCITE for cluster CD8_Mem
## Finished averaging limmaCITE for cluster CD8_Naive
## Finished averaging limmaCITE for cluster CD4_Naive
## Finished averaging limmaCITE for cluster RBC
## Finished averaging limmaCITE for cluster MAIT
## Finished averaging limmaCITE for cluster pDC
## Finished averaging limmaCITE for cluster Dblt
## Finished averaging limmaCITE for cluster cDC
## Finished averaging limmaCITE for cluster Unknown
## Finished averaging limmaCITE for cluster TissueResMemT
## Finished averaging limmaCITE for cluster Platelets
## Finished averaging limmaCITE for cluster NK_CD56hiCD16lo
## Finished averaging limmaCITE for cluster DPT
## Finished averaging limmaCITE for cluster DNT
## Finished averaging limmaCITE for cluster Granulocytes
## Finished averaging limmaCITE for cluster Tcell
## Finished averaging limmaCITE for cluster dim
## Finished averaging limmaCITE for cluster TCRVbeta13.1pos
## Finished averaging limmaCITE for cluster Mono_Intermediate
quantile_breaks <- function(xs, n = 100) {
breaks <- quantile(xs, probs = seq(0.5, 1, length.out = n))
breaks[!duplicated(breaks)]
}
f2 <- pOverA(0.01, 3)
ffun2 <- filterfun(f2)
merge.SNGADTfilt = genefilter(GetAssayData(merge.SNG[["limmaCITE"]], slot = "data"), ffun2)
mat_breaks <- quantile_breaks(as.matrix(merge.SNG.Aver$limmaCITE[which(merge.SNGADTfilt),]), n = 101)
pheatmap(merge.SNG.Aver$limmaCITE[which(merge.SNGADTfilt),which(!(colnames(merge.SNG.Aver$limmaCITE) %in% c("Dblt","Unknown","gated_out","dim","Tcell","dblt")))], scale = "none", border_color=NA, viridis(n=100), breaks = mat_breaks)
Innate population UMAP for Figure 2B
innate.merge.SNG <- subset(merge.SNG, subset = WCTcoursecelltype %in%
c("Mono_Classical", "Mono_NonClassical", "NK_CD16hi",
"NK_CD56loCD16lo", "pDC", "cDC", "Platelets", "NK_CD56hiCD16lo",
"Mono_Intermediate", "Granulocytes"))
innate.merge.SNGADTfilt = genefilter(GetAssayData(innate.merge.SNG[["limmaCITE"]], slot = "data"), ffun2)
innate.merge.SNG <- RunUMAP(innate.merge.SNG, assay = "limmaCITE", features = rownames(innate.merge.SNG[["limmaCITE"]][which(innate.merge.SNGADTfilt),]),
n.neighbors = 15, min.dist = 0.01, spread = 5)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:30:24 UMAP embedding parameters a = 0.1502 b = 0.7925
## 16:30:24 Read 125117 rows and found 73 numeric columns
## 16:30:24 Using Annoy for neighbor search, n_neighbors = 15
## 16:30:24 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:30:53 Writing NN index file to temp file /tmp/Rtmp9V6aqj/file44aaba2f29b
## 16:30:53 Searching Annoy index using 1 thread, search_k = 1500
## 16:32:06 Annoy recall = 100%
## 16:32:07 Commencing smooth kNN distance calibration using 1 thread
## 16:32:12 Initializing from normalized Laplacian + noise
## 16:32:20 Commencing optimization for 200 epochs, with 3033514 positive edges
## 16:34:35 Optimization finished
innate.p <- DimPlot(innate.merge.SNG,
group.by = "WCTcoursecelltype", label = TRUE) + NoLegend()
innate.p <- AugmentPlot(innate.p, width = 6, height = 6)
innate.p
Adaptive population UMAP for Figure 2B
adaptive.merge.SNG <- subset(merge.SNG, subset = WCTcoursecelltype %in%
c("B_Naive", "B_Mem", "gammadeltaT",
"PB_Plasmablasts", "Treg", "CD4_Mem", "CD8_Mem", "CD8_Naive",
"CD4_Naive", "MAIT", "TissueResMemT", "DPT", "DNT", "TCRVbeta13.1pos"))
adaptive.merge.SNGADTfilt = genefilter(GetAssayData(adaptive.merge.SNG[["limmaCITE"]], slot = "data"), ffun2)
adaptive.merge.SNG <- RunUMAP(adaptive.merge.SNG, assay = "limmaCITE", features = rownames(adaptive.merge.SNG[["limmaCITE"]][which(adaptive.merge.SNGADTfilt),]),
n.neighbors = 15, min.dist = 0.01, spread = 5)
## 16:35:09 UMAP embedding parameters a = 0.1502 b = 0.7925
## 16:35:10 Read 246964 rows and found 72 numeric columns
## 16:35:10 Using Annoy for neighbor search, n_neighbors = 15
## 16:35:10 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:36:02 Writing NN index file to temp file /tmp/Rtmp9V6aqj/file44aa2e9df1
## 16:36:02 Searching Annoy index using 1 thread, search_k = 1500
## 16:37:52 Annoy recall = 100%
## 16:37:52 Commencing smooth kNN distance calibration using 1 thread
## 16:38:00 Initializing from normalized Laplacian + noise
## 16:38:25 Commencing optimization for 200 epochs, with 6036274 positive edges
## 16:42:53 Optimization finished
adaptive.p <- DimPlot(adaptive.merge.SNG,
group.by = "WCTcoursecelltype", label = TRUE) + NoLegend()
adaptive.p <- AugmentPlot(adaptive.p, width = 6, height = 6)
adaptive.p
Average protein expression heatmap of selected T cell populations for Figure 2C
Idents(merge.SNG) <- "WCTmergedcelltype"
CD4_Mem.Aver = AverageExpression_MeanOnly(subset(merge.SNG, WCTcoursecelltype == "CD4_Mem"), assay = "limmaCITE")
## Finished averaging limmaCITE for cluster CD4_Mem_TM
## Finished averaging limmaCITE for cluster CD4_Mem_EM.TE
## Finished averaging limmaCITE for cluster CD4_Mem_CM
## Finished averaging limmaCITE for cluster CD4_Mem_TM.integrin.hi
## Finished averaging limmaCITE for cluster CD4_Mem_Activated.CD38.CD278hi
## Finished averaging limmaCITE for cluster CD4_Mem_KLRG1pos
## Finished averaging limmaCITE for cluster CD4_Mem_CD101pos
## Finished averaging limmaCITE for cluster CD4_Mem_IgG.IsoBinding
## Finished averaging limmaCITE for cluster CD4_Mem_CD4.KLRG1pos.NotCD8
## Finished averaging limmaCITE for cluster CD4_Mem_integrin.hi
## Finished averaging limmaCITE for cluster CD4_Mem_Activated.HLADRhi
## Finished averaging limmaCITE for cluster CD4_Mem_CD146hi
## Finished averaging limmaCITE for cluster CD4_Mem_CD41hi
## Finished averaging limmaCITE for cluster CD4_Mem_MAIT
## Finished averaging limmaCITE for cluster CD4_Mem_CD22hi
## Finished averaging limmaCITE for cluster CD4_Mem_CD69pos
f1 <- pOverA(0.01, 2)
ffun <- filterfun(f1)
CD4_Mem.ADTfilt = genefilter(CD4_Mem.Aver$limmaCITE[,c("CD4_Mem_TM","CD4_Mem_EM.TE","CD4_Mem_CM")], ffun)
#mat_breaks <- quantile_breaks(as.matrix(CD4_Mem.Aver$limmaCITE[which(CD4_Mem.ADTfilt),]), n = 101)
mat_breaks <- quantile_breaks(as.matrix(merge.SNG.Aver$limmaCITE[which(merge.SNGADTfilt),]), n = 101)
pheatmap(CD4_Mem.Aver$limmaCITE[which(CD4_Mem.ADTfilt),c("CD4_Mem_TM","CD4_Mem_EM.TE","CD4_Mem_CM")], scale = "none", border_color=NA, viridis(n=100), breaks = mat_breaks)
Mono_Classical population UMAP for Figure 2D, colored by COVID or healthy control
Mono_Classical <- subset(merge.SNG, subset = WCTcoursecelltype %in% c( "Mono_Classical"))
Mono_Classical <- FindVariableFeatures(Mono_Classical, assay = "RNA")
Mono_Classical <- ScaleData(Mono_Classical)
## Centering and scaling data matrix
Mono_Classical <- RunPCA(Mono_Classical)
## PC_ 1
## Positive: MT-CYB, MT-ND4L, MTRNR2L12, IL1R2, MT-ATP8, MT-ND6, HBB, MTRNR2L8, AREG, ZBTB16
## DAAM2, THBS1, KCNE1, FMN1, GNLY, MIR181A1HG, RALGAPA1, HMGB2, MCTP2, COQ7
## CCL5, IL32, HSPA1B, RP11-701P16.2, CST7, TSPYL2, RNF144B, PAG1, SLC25A37, PTPRCAP
## Negative: HLA-DRA, CD74, HLA-DRB1, SERPINA1, HLA-DPA1, EEF1A1, HLA-DPB1, B2M, HLA-B, PSAP
## HLA-A, HLA-DRB5, ACTG1, CPVL, FCN1, LYZ, RPL26, HLA-DMA, RPL10A, HLA-C
## SLC25A6, LGALS3, RPS5, RPS6, EEF2, SLC25A5, RPL4, GAPDH, AP1S2, ANXA2
## PC_ 2
## Positive: S100A9, GAPDH, IL1R2, S100A8, S100A12, AREG, HMGB2, SLC25A6, LGALS1, RPL5
## RPLP0, ACTG1, CD163, RPS2, EEF1A1, SLC25A5, LYZ, SAP30, RPS6, CEBPD
## RPL10A, RPS5, PLBD1, RPSA, EEF1B2, PKM, CTSD, RNASE2, ADORA3, THBS1
## Negative: MTRNR2L12, CD300E, PHACTR1, CD83, MT-ND4L, MT-CYB, IRF1, DUSP2, IL1B, WARS
## LINC-PINT, PARP14, INSIG1, BHLHE40, RASGEF1B, MT-ND6, PDE4B, ZNF331, DSE, HLA-DPB1
## ATF3, RALGAPA1, STAT1, HLA-DQA1, NR4A1, BTG2, IFRD1, NFKBIZ, FOSB, TSC22D2
## PC_ 3
## Positive: IFITM1, IFI6, ISG15, IFI27, IFITM3, LY6E, PLAC8, IFI44L, MX1, SERPING1
## CLU, TNFSF10, MT2A, SIGLEC1, IFIT3, XAF1, OAS3, OAS1, IFITM2, TYMP
## VAMP5, RSAD2, IFI35, PSMB9, LGALS3BP, GBP1, EPSTI1, IFIT1, MNDA, TNFSF13B
## Negative: IER3, LMNA, FOSB, THBS1, RGCC, IL1B, GABARAPL1, RASGEF1B, GPR183, TIPARP
## CDKN1A, NR4A2, PDE4B, NLRP3, PHLDA1, VIM, BTG1, ZNF331, AREG, PPIF
## CXCR4, EREG, TNFAIP3, EEF1A1, FCAR, INSIG1, KDM6B, NFKBIA, PNRC1, PNP
## PC_ 4
## Positive: MTSS1, VSIG4, THBS1, CD163, KLF9, ADAMTS2, IL1R2, MIR181A1HG, FKBP5, ADORA3
## AREG, MT-ND4L, MT-ATP8, C1QA, PDK4, DAAM2, MTRNR2L8, TXNIP, MT-CYB, DDIT4
## MTRNR2L12, IFITM3, TPST1, RGL1, TCN2, RNASE1, ZBTB16, EPSTI1, FMN1, MAFB
## Negative: IL8, S100A8, S100A12, G0S2, PLAUR, NFKBIA, S100A9, IL1B, IER2, FOSB
## BCL2A1, STXBP2, TREM1, NFKBIZ, TUBA1A, LYZ, RGS2, SOD2, JUN, ANXA1
## VIM, MNDA, RETN, PNRC1, PTGS2, PROK2, SLC2A3, PADI4, GAPDH, AP1S2
## PC_ 5
## Positive: ASPH, RGCC, S100A12, IFNGR2, MAFB, S100A9, S100A8, DUSP6, CLU, CD82
## SLC25A37, SERPINB2, CCR1, PHLDA1, IL8, TNFRSF12A, IFI6, TRMT6, TPST1, IFITM1
## IFI27, OASL, MAP3K8, BCL2A1, CCL2, IER3, AQP9, HIF1A, IFITM2, IFI44L
## Negative: HLA-DPB1, HLA-DPA1, HLA-DQA1, HLA-DQB1, HLA-DRB5, CD74, HLA-DRA, HLA-DRB1, CD4, LGALS2
## ZDHHC1, CSF1R, EEF1A1, FGL2, AP1S2, HLA-DQA2, ITGA4, CLEC10A, CPVL, HLA-DMB
## TPPP3, HLA-DMA, PEA15, POU2F2, NR4A1, UCP2, HSPA8, CAMK1, EIF3L, EEF1B2
Mono_Classical <- RunUMAP(Mono_Classical, dims = 1:15)
## 16:44:15 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:44:15 Read 78908 rows and found 15 numeric columns
## 16:44:15 Using Annoy for neighbor search, n_neighbors = 30
## 16:44:15 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:44:26 Writing NN index file to temp file /tmp/Rtmp9V6aqj/file44aaf4de91e
## 16:44:26 Searching Annoy index using 1 thread, search_k = 3000
## 16:45:00 Annoy recall = 100%
## 16:45:01 Commencing smooth kNN distance calibration using 1 thread
## 16:45:05 Initializing from normalized Laplacian + noise
## 16:45:09 Commencing optimization for 200 epochs, with 3352896 positive edges
## 16:46:47 Optimization finished
MonoUMAP.Class <- DimPlot(Mono_Classical, group.by = "Class", cols = c("#00BA38", "#F8766D"))
MonoUMAP.Class <- AugmentPlot(MonoUMAP.Class, width = 6, height = 6)
MonoUMAP.Class
Mono_Classical population UMAP for Figure 2E, colored by DSM (labelled PC1 here) class
MonoUMAP.DSM_cat <- DimPlot(subset(Mono_Classical, PC1_cat %in% c("PC1_low","PC1_high")), group.by = "PC1_cat",cols = c("#619CFF", "#F8766D"))
MonoUMAP.DSM_cat <- AugmentPlot(MonoUMAP.DSM_cat, width = 6, height = 6)
MonoUMAP.DSM_cat
Mono_Classical population UMAP for Figure 2F, colored by days since symptom onset
MonoUMAP.dayssinceonset <- FeaturePlot(subset(Mono_Classical, PC1_cat %in% c("PC1_low","PC1_high") & days_since_onset != "NA"), features = "days_since_onset")
MonoUMAP.dayssinceonset <- AugmentPlot(MonoUMAP.dayssinceonset, width = 6, height = 6)
MonoUMAP.dayssinceonset
Mono_Classical population UMAP for Figure 2G, CD163-high monocytes in black
MonoUMAP.CD163 <- DimPlot(subset(Mono_Classical, WCTmergedcelltype %in% c("Mono_Classical","Mono_Classical_CD163hi")), group.by = "WCTmergedcelltype", cols = c("grey","black"))
MonoUMAP.CD163 <- AugmentPlot(MonoUMAP.CD163, width = 6, height = 6)
MonoUMAP.CD163
Mono_Classical population UMAP for Figure 2G (alternate), CD163 protein expression
MonoUMAP.CD163x <- FeaturePlot(subset(Mono_Classical, WCTmergedcelltype %in% c("Mono_Classical","Mono_Classical_CD163hi")), features = "cite_CD163",max.cutoff = 7.5 )
MonoUMAP.CD163x <- AugmentPlot(MonoUMAP.CD163x, width = 6, height = 6)
MonoUMAP.CD163x
Mono_Classical population UMAP for Figure 2H, HLA-DR protein expression
MonoUMAP.HLADRx <- FeaturePlot(subset(Mono_Classical, WCTmergedcelltype %in% c("Mono_Classical","Mono_Classical_CD163hi")), features = "cite_HLA-DR",max.cutoff = 7.5)
MonoUMAP.HLADRx <- AugmentPlot(MonoUMAP.HLADRx, width = 6, height = 6)
MonoUMAP.HLADRx
Mono_Classical population UMAP for Figure 2I, colored by Donor
MonoUMAP.Donor <- DimPlot(Mono_Classical, group.by = "Donor") + NoLegend()
MonoUMAP.Donor <- AugmentPlot(MonoUMAP.Donor, width = 6, height = 6)
MonoUMAP.Donor
sI <- sessionInfo()
utils:::print.sessionInfo(sI[-c(10,11)])