Code to reproduce manuscript results &
figures
Matt Mulè
email: mpm63 AT cantab.ac.uk
Data associated with this manuscript used in analysis workflow below
are stored at the following repository:
10.5281/zenodo.10546916
The data repository includes CITE-seq data in additional formats for
reuse with other packages.
flu_vacc_CITEseq_Seurat4.rds
- a Seurat version 4 object
(separate assays for RNA and ADT).
flu_vacc_CITEseq_combinedassay.h5ad
- an Anndata object for
analysis in python / scanpy. (RNA + ADT
combined in single matrix).
Many scripts in this workflow require the scglmmr package associated
with this manuscript. Separate stand alone R functions used in package
can also be found in functions/scglmmr_functions/
and
sourced at the beginning of a script.
The data downloaded from the Zenodo repository should be added to a
data/ directory in the project root folder. The analysis can without
specifying any file paths. Each R script is self-contained, reading data
from the /data folder and writing to figures or results files within
each analysis subdirectory relative to the root directory using the R
package here
. Unless otherwise noted, scripts were run with
R 4.0.5. Package versions are listed in the table.
non CITEseq data used in the analysis is saved in the following
directories:
data
–vand
–CHI_H1N1_data
–full_metadata
–stim
Annotated CITE-seq data in a Seurat object is saved in the project
directory as h1h5_annotated_with_meta.rds
This section is run with R 3.5.1
Calculate and visualize distribution of individuals across protein based
clusters.
Biaxial plots of unsupervised cluster dsb normalized protein
expression.
mid_res/sample_and_protein_distributions/1.sample.lineage.proteinbiaxial.r
## Must be run in R 3.5.1
# umap of joint clustering results
suppressMessages(library(Seurat))
suppressMessages(library(tidyverse))
suppressMessages(library(here))
source("functions/analysis_functions.R")
# Set path
= here("mid_res/sample_and_protein_distributions/figures/");
figpath dir.create(figpath, recursive = TRUE)
# full sample bar plot
= readRDS(file = here("data/h1h5_annotated_with_meta.rds"))@meta.data
md = md$celltype_joint %>% unique() %>% sort()
celltypes = celltypes[c(7:12)]
t4 = celltypes[c(13:16)]
t8 = celltypes[c(4:5, 18, 19, 21, 23)]
myeloid = celltypes[c(1,2,6)]
bc = celltypes[c(22)]
nk = celltypes[c(3,20)]
unconventionalT
= md %>% mutate(lineage =
md if_else(celltype_joint %in% t4, "CD4 T Cell",
if_else(celltype_joint %in% t8, "CD8 T cell",
if_else(celltype_joint %in% myeloid, "Myeloid",
if_else(celltype_joint %in% bc, "B cell",
if_else(celltype_joint %in% nk, "NK cell",
if_else(celltype_joint %in% unconventionalT, "unconventional T",
false = "other")))))))
= md %>% filter(!celltype_joint %in% "DOUBLET")
md2
# calc and vis fraction of total
= md2 %>% group_by(lineage, sample) %>% tally
d
=
p ggplot(d, aes(x = sample, y = n, fill = lineage)) +
geom_bar(position = 'fill', stat = 'identity', show.legend = TRUE) +
::scale_fill_jama() +
ggsciylab("percent of total") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
theme(axis.text.x = element_text(size =6)) +
theme(axis.title.x = element_blank())
ggsave(p, filename = paste0(figpath, "LINEAGE_fullsamplebarplot.pdf"), width = 9, height = 4)
#### #Manual gate plots
= paste0(figpath,"mgplots/") ; dir.create(figpath)
figpath
# Day 1 cohort CD14 monocyte data
= as.data.frame(t(readRDS(file = here("h1h5_annotated.rds"))@assay$CITE@data))
cite colnames(cite) = str_sub(colnames(cite), 1, -6)
= cbind(md,cite)
mdf = mdf %>% filter(cohort == "H1N1")
h1md
# match colors in umap
= readRDS(here("data/celltypes_vector_ordered.rds"))
celltypes = pals::kelly(n = 22) %>% as.vector()
cu = cu[-1]
cu = c("midnightblue", cu, "lightslateblue")
cu 15] = "maroon1"
cu[11] = "darkseagreen1"
cu[= cu[-1]
cu = rev(cu)
cu $celltype_joint = factor(h1md$celltype_joint, levels = celltypes)
h1md
###################
# B cells
=
p ggplot(h1md %>% filter(lineage %in% "B cell"), aes(x = CD27, CD38, color = celltype_joint)) +
theme_bw(base_size = 12) +
scale_color_manual(values = c("lightslateblue","#2B3D26", "#882D17")) +
geom_density_2d() +
labs(color="celltype") +
theme(axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16)) +
theme(legend.position = c(0.85, 0.4)) +
theme(legend.key.size = unit(0.35, units='cm'))
ggsave(p, filename = paste0(figpath,'Bcell.pdf'), width = 3.5, height = 3)
=
p ggplot(h1md %>% filter(lineage %in% "B cell"), aes(x = CD27, CD38)) +
theme_bw(base_size = 12) +
geom_bin2d(bins = 200) +
scale_fill_viridis(option = "B") +
theme(axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16)) +
theme(legend.position = c(0.85, 0.4)) +
theme(legend.key.size = unit(0.45, units='cm'))
ggsave(p, filename = paste0(figpath,'Bcell_density.pdf'), width = 3.5, height = 3)
###################
### Myeloid Cells
=
p ggplot(h1md %>% filter(lineage %in% "Myeloid") %>% filter(!celltype_joint %in% 'IgA_CD14_Mono'),
aes(x = CD16, CD303, color = celltype_joint)) +
theme_bw(base_size = 12) +
scale_color_manual(values = c("#654522", "#8DB600", "#BE0032","#875692","#222222"))+
geom_density_2d() +
labs(color="celltype") +
theme(axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16)) +
theme(legend.position = c(0.75, 0.6))
pggsave(p, filename = paste0(figpath,'myeloid1.pdf'), width = 3.5, height = 3)
=
p ggplot(h1md %>% filter(lineage %in% "Myeloid") %>% filter(!celltype_joint %in% 'IgA_CD14_Mono'),
aes(x = CD16, CD303)) +
theme_bw(base_size = 12) +
geom_bin2d(bins = 200) +
scale_fill_viridis(option = "B") +
theme(axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16)) +
theme(legend.position = c(0.75, 0.6)) +
theme(legend.key.size = unit(0.45, units = 'cm'))
pggsave(p, filename = paste0(figpath,'myeloid1_density.pdf'), width = 3.5, height = 3)
# prots 2
=
p ggplot(h1md %>% filter(lineage %in% "Myeloid") %>% filter(!celltype_joint %in% 'IgA_CD14_Mono'),
aes(x = CD16, CD14, color = celltype_joint)) +
theme_bw(base_size = 12) +
scale_color_manual(values = c("#654522", "#8DB600", "#BE0032","#875692","#222222"))+
geom_density_2d() +
labs(color="celltype") +
theme(axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16)) +
theme(legend.position = c(0.8, 0.75)) +
theme(legend.key.size = unit(0.45, units = 'cm'))
pggsave(p, filename = paste0(figpath,'myeloid2.pdf'), width = 3.5, height = 3)
#
# ### T clels
= ggplot(h1md %>% filter(lineage %in% "CD8 T cell"), aes(x = CD161, CD45RO, color = celltype_joint)) +
p theme_bw(base_size = 12) +
::scale_color_jco() +
ggscigeom_density_2d() +
labs(color="celltype") +
theme(axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16))
pggsave(p, filename = paste0(figpath,'cd8Tcell.pdf'), width = 5, height = 3)
# R version 3.5.3 Patched (2019-03-11 r77192)
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] viridis_0.5.1 viridisLite_0.3.0 here_0.1 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.5 purrr_0.3.3 readr_1.3.1 tidyr_1.0.2 tibble_2.1.1 tidyverse_1.2.1
# [12] Seurat_2.3.4 Matrix_1.2-15 cowplot_0.9.4 ggplot2_3.1.1
#
# loaded via a namespace (and not attached):
# [1] readxl_1.3.1 snow_0.4-3 backports_1.1.4 Hmisc_4.2-0 plyr_1.8.4 igraph_1.2.4.1 lazyeval_0.2.2 splines_3.5.3
# [9] crosstalk_1.0.0 digest_0.6.25 foreach_1.4.4 htmltools_0.3.6 lars_1.2 gdata_2.18.0 magrittr_2.0.1 checkmate_1.9.3
# [17] cluster_2.0.7-1 mixtools_1.1.0 ROCR_1.0-7 modelr_0.1.4 R.utils_2.8.0 colorspace_1.4-1 rvest_0.3.4 haven_2.1.0
# [25] xfun_0.7 crayon_1.3.4 jsonlite_1.6 survival_2.43-3 zoo_1.8-6 iterators_1.0.10 ape_5.3 glue_1.3.1
# [33] pals_1.5 gtable_0.3.0 webshot_0.5.1 kernlab_0.9-27 prabclus_2.3-1 DEoptimR_1.0-8 maps_3.3.0 scales_1.0.0
# [41] mvtnorm_1.0-10 bibtex_0.4.2 miniUI_0.1.1.1 Rcpp_1.0.1 metap_1.1 dtw_1.20-1 xtable_1.8-4 htmlTable_1.13.1
# [49] reticulate_1.12 foreign_0.8-71 bit_1.1-14 mapproj_1.2.6 proxy_0.4-23 mclust_5.4.5 SDMTools_1.1-221.1 Formula_1.2-3
# [57] stats4_3.5.3 tsne_0.1-3 htmlwidgets_1.3 httr_1.4.0 gplots_3.0.1.1 RColorBrewer_1.1-2 fpc_2.2-1 acepack_1.4.1
# [65] modeltools_0.2-22 ica_1.0-2 pkgconfig_2.0.2 R.methodsS3_1.7.1 flexmix_2.3-15 nnet_7.3-12 manipulateWidget_0.10.0 tidyselect_0.2.5
# [73] labeling_0.3 rlang_0.4.5 reshape2_1.4.3 later_0.8.0 munsell_0.5.0 cellranger_1.1.0 tools_3.5.3 cli_1.1.0
# [81] generics_0.0.2 broom_0.5.2 ggridges_0.5.1 npsurv_0.4-0 knitr_1.23 bit64_0.9-7 fitdistrplus_1.0-14 robustbase_0.93-5
# [89] rgl_0.100.30 caTools_1.17.1.2 RANN_2.6.1 packrat_0.5.0 pbapply_1.4-0 nlme_3.1-137 mime_0.6 R.oo_1.22.0
# [97] xml2_1.2.0 hdf5r_1.2.0 compiler_3.5.3 rstudioapi_0.10 png_0.1-7 lsei_1.2-0 stringi_1.4.3 lattice_0.20-38
# [105] ggsci_2.9 vctrs_0.2.4 pillar_1.4.1 lifecycle_0.1.0 Rdpack_0.11-0 lmtest_0.9-37 data.table_1.12.2 bitops_1.0-6
# [113] irlba_2.3.3 gbRd_0.4-11 httpuv_1.5.1 R6_2.4.0 latticeExtra_0.6-28 promises_1.0.1 KernSmooth_2.23-15 gridExtra_2.3
# [121] codetools_0.2-16 dichromat_2.0-0 MASS_7.3-51.1 gtools_3.8.1 assertthat_0.2.1 rprojroot_1.3-2 withr_2.1.2 diptest_0.75-7
# [129] parallel_3.5.3 doSNOW_1.0.16 hms_0.4.2 grid_3.5.3 rpart_4.1-13 class_7.3-15 segmented_0.5-4.0 Rtsne_0.15
# [137] shiny_1.3.2 lubridate_1.7.4 base64enc_0.1-3
Calculate and visualize distribution of immune cell frequency across individuals.
mid_res/sample_and_protein_distributions/2.cell.frequency.map.r
# this script is run in R 4.0.5
# sample barplot
suppressMessages(library(Seurat))
suppressMessages(library(tidyverse))
suppressMessages(library(here))
= here("mid_res/sample_and_protein_distributions/figures/")
figpath
= readRDS(file = "data/h1h5_annotated_with_meta.rds")
s
= s@meta.data %>%
freq_plot select(celltype_joint, sample) %>%
group_by(celltype_joint, sample) %>%
summarise(n = n()) %>%
group_by(sample) %>%
mutate(log_cell = log10(n)) %>%
select(sample, celltype_joint, log_cell) %>%
spread(celltype_joint, log_cell) %>%
mutate(sample = if_else(str_sub(sample, 1, 2) == 'H5',
true = str_sub(sample, -6, -1),
false = sample)) %>%
column_to_rownames("sample") %>%
t()
= read_delim(file = "data/full_metadata/full_sample_metadata.txt", delim = "\t")
annotation
= s@meta.data %>%
md select(sample) %>%
group_by(sample) %>%
summarise(n_cells = log10(n())) %>%
mutate(subjectID = str_sub(sample, -6, -4)) %>%
mutate(timepoint = str_sub(sample, -2, -1)) %>%
mutate(group = plyr::mapvalues(
x = subjectID,
from = annotation$subjectid,
to = annotation$adjMFC)) %>%
select(sample, group, timepoint) %>%
mutate(group = if_else(str_sub(sample, 1, 2) == 'H5', true = "adjuvant", false = group)) %>%
mutate(sample = if_else(
str_sub(sample, 1, 2) == 'H5',
true = str_sub(sample, -6, -1),
false = sample)) %>%
column_to_rownames("sample")
# quant palette
= list(
mat_colors group = c("grey", "red", "deepskyblue3"),
timepoint = c("grey", "orange", "black")
)names(mat_colors$timepoint) = unique(md$timepoint)
names(mat_colors$group) = unique(md$group)
# plot
rownames(freq_plot) = str_replace_all(rownames(freq_plot),pattern = '_', replacement = ' ')
::pheatmap(freq_plot,
pheatmapannotation_col = md,
display_numbers = FALSE,
color = grDevices::colorRampPalette(colors = c("gray100", "black" ))(100),
cluster_cols = F, border_color = NA,
width = 10, height = 5,treeheight_row = 20,
annotation_colors = mat_colors,
filename = paste0(figpath, 'sample_celltype_map.pdf')
)
sessionInfo()
# R version 4.0.5 Patched (2021-03-31 r80136)
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] here_1.0.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4 readr_1.4.0
# [7] tidyr_1.1.2 tibble_3.1.8 ggplot2_3.3.3 tidyverse_1.3.0 sp_1.4-5 SeuratObject_4.1.0
# [13] Seurat_4.0.1
#
# loaded via a namespace (and not attached):
# [1] Rtsne_0.15 colorspace_2.0-0 deldir_1.0-6 ellipsis_0.3.2 ggridges_0.5.3
# [6] rprojroot_2.0.2 fs_1.5.0 rstudioapi_0.13 spatstat.data_2.1-0 farver_2.0.3
# [11] leiden_0.3.7 listenv_0.8.0 ggrepel_0.9.1 lubridate_1.8.0 fansi_0.4.2
# [16] xml2_1.3.2 codetools_0.2-18 splines_4.0.5 polyclip_1.10-0 jsonlite_1.7.2
# [21] packrat_0.7.0 broom_0.7.5 ica_1.0-2 cluster_2.1.2 dbplyr_2.1.0
# [26] png_0.1-7 rgeos_0.5-9 pheatmap_1.0.12 uwot_0.1.10 shiny_1.6.0
# [31] sctransform_0.3.2 spatstat.sparse_2.0-0 compiler_4.0.5 httr_1.4.2 backports_1.2.1
# [36] assertthat_0.2.1 Matrix_1.4-1 fastmap_1.1.0 lazyeval_0.2.2 cli_3.4.1
# [41] later_1.1.0.1 htmltools_0.5.2 tools_4.0.5 igraph_1.2.6 gtable_0.3.0
# [46] glue_1.6.2 RANN_2.6.1 reshape2_1.4.4 Rcpp_1.0.9 scattermore_0.7
# [51] cellranger_1.1.0 vctrs_0.5.1 nlme_3.1-152 progressr_0.10.0 lmtest_0.9-38
# [56] globals_0.14.0 rvest_0.3.6 mime_0.10 miniUI_0.1.1.1 lifecycle_1.0.3
# [61] irlba_2.3.3 goftest_1.2-2 future_1.21.0 MASS_7.3-53.1 zoo_1.8-8
# [66] scales_1.1.1 spatstat.core_2.0-0 hms_1.0.0 promises_1.2.0.1 spatstat.utils_2.3-0
# [71] parallel_4.0.5 RColorBrewer_1.1-2 reticulate_1.18 pbapply_1.4-3 gridExtra_2.3
# [76] rpart_4.1-15 stringi_1.5.3 rlang_1.0.6 pkgconfig_2.0.3 matrixStats_0.58.0
# [81] lattice_0.20-41 ROCR_1.0-11 tensor_1.5 patchwork_1.1.1 htmlwidgets_1.5.3
# [86] cowplot_1.1.1 tidyselect_1.2.0 parallelly_1.23.0 RcppAnnoy_0.0.18 plyr_1.8.6
# [91] magrittr_2.0.3 R6_2.5.0 generics_0.1.2 DBI_1.1.1 withr_2.4.3
# [96] pillar_1.8.1 haven_2.4.3 mgcv_1.8-34 fitdistrplus_1.1-3 survival_3.2-10
# [101] abind_1.4-5 future.apply_1.7.0 crayon_1.4.1 modelr_0.1.8 KernSmooth_2.23-18
# [106] utf8_1.2.2 spatstat.geom_2.4-0 plotly_4.9.3 readxl_1.3.1 grid_4.0.5
# [111] data.table_1.14.0 reprex_1.0.0 digest_0.6.27 xtable_1.8-4 httpuv_1.5.5
# [116] munsell_0.5.0 viridisLite_0.3.0
Clustered protein histogram distribution across cell types
mid_res/histogram_hclust/hclust_histogram_protein.r
# high resolution histogram heatmaps
# script uses R 3.5.1
suppressMessages(library(Seurat))
suppressMessages(library(tidyverse))
suppressMessages(library(magrittr))
suppressMessages(library(ggridges))
suppressMessages(library(ggsci))
suppressMessages(library(viridis))
suppressMessages(library(here))
source(file = "functions/analysis_functions.R")
# save paths
= here("mid_res/histogram_hclust/figures/")
figpath dir.create(figpath)
# Define proteins for hclust and visualization
# T cell markers
= c("CD3_PROT", "CD4_PROT", "CD8_PROT", "CD45RA_PROT", "CD45RO_PROT",
tc_markers "CD161_PROT", "CD127_PROT", "CD57_PROT", "CD27_PROT", "CD62L_PROT",
"KLRG1_PROT", "CD103_PROT", "CD25_PROT", "CD31_PROT")
# B cell markers
= c("CD20_PROT", "CD38_PROT", "IgD_PROT", "CD133_PROT", "IgM_PROT", "CD40_PROT")
bc_markers
# monocyte / dc markers
= c("CD33_PROT", "CD14_PROT", "CD16_PROT", "CD141_PROT", "CD11b_PROT")
mono_markers
# NK markers
= c("CD56_PROT")
nk_markers
# CD markers
= c("CD1c_PROT")
dc_markers
# rare cell markers
= c("CD303_PROT", "CD123_PROT", "CD34_PROT")
rare_markers
# cell activation markers
= c("CD71_PROT", "CD183_PROT", "CD184_PROT", "CD185_PROT", "CD39_PROT",
activation "CD279_PROT", "CD278 _PROT","CD194_PROT", "CD195_PROT", "CD196_PROT",
"CD117_PROT", "CD244_PROT")
= c(tc_markers,
prot_use
bc_markers,
mono_markers,
nk_markers,
dc_markers,
rare_markers,
activation) = str_replace(prot_use, pattern = "_PROT", replacement = "")
prot_use_plot
c("#000000", "#E69F00", "#56B4E9", "#009E73",
"#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# match a color palette to these markers
= rev(c(rep("red3", length(tc_markers)),
my_pal rep("royalblue1", length(bc_markers)),
rep("#009E73", length(mono_markers)),
rep("#0072B2", length(nk_markers)),
rep("#D55E00", length(dc_markers)),
rep("#CC79A7", length(rare_markers)),
rep("black", length(activation))
))
# read in H1 Seurat object
= ReadCohort(joint_object_dir = "data/h1h5_annotated_with_meta.rds", cohort = "H1N1")
h1 = h1 %>% SetAllIdent(id = "celltype_joint") %>% SubsetData(ident.remove = "DOUBLET", subset.raw = TRUE)
h1
# get vector of all clusters
= h1@meta.data$celltype_joint %>% unique
celltypes = SetAllIdent(h1,id = "celltype_joint") %>%
h1 SubsetData(max.cells.per.ident = 1000, random.seed = 1, subset.raw = TRUE)
# convert to tidy ; aggregate as the mean of proteins
= h1@assay$CITE@data %>% t %>% as.data.frame() %>% rownames_to_column("cell")
adt = h1@meta.data %>% select(celltype = celltype_joint)
md = cbind(adt, md)
adt = adt %>%
mean_mtx select(celltype, everything()) %>%
group_by(celltype) %>%
summarize_at(.vars = prot_use, .funs = base::mean) %>%
column_to_rownames("celltype") %>%
%>%
t
as.data.frame
# index for tidying
= rownames(mean_mtx)[1]
index1 = rownames(mean_mtx)[length(rownames(mean_mtx))]
index2
# order by lineage
= h1@meta.data$celltype_joint %>% unique() %>% sort()
celltype_order = celltype_order[c(12,11,7,8,9,10,13,19,14:16,3,17,21,1,2,6,4,5,18,20,22)]
celltype_order
# alt (not used)
# use hclust within pheatmap to get ordered of clustered protein and celltypes
#x = pheatmap::pheatmap(mean_mtx, silent = TRUE)
#celltype_order = colnames(mean_mtx[ ,x$tree_col$order]) %>% rev
# convert tidy and reorder based on hclust
= adt %>%
adt.l select(prot_use, celltype) %>%
gather(key = prot, value = dsb_count, index1:index2) %>%
mutate(prot = str_sub(prot, 1, -6)) %>%
mutate(prot = factor(prot, levels = rev(prot_use_plot))) %>%
mutate(celltype = factor(celltype, levels = celltype_order))
# plot
= length(celltype_order) %>% as.numeric()
col_split =
adt.l %>% filter(dsb_count > -5) %>%
adt.l filter(!celltype=="DOUBLET" )
= ggplot(adt.l, aes(x = dsb_count, y = prot, color = prot, fill = prot)) +
p geom_density_ridges2(show.legend = F, inherit.aes = T, size = 0.1) +
theme_minimal() +
scale_fill_manual(values = my_pal) +
scale_color_manual(values = my_pal) +
geom_vline(xintercept = 0, color = "red", size=0.3) +
xlab("dsb normalized protein expression") +
theme(axis.title.x = element_text(size = 15)) +
facet_wrap(~celltype, ncol = col_split, scales = "free_x") +
theme(panel.spacing.x = unit(0.1, "lines"))+
theme(strip.background = element_blank()) +
theme(strip.text = element_text(colour = 'black', size = 10, angle = 90, hjust = 0)) +
theme(axis.text.x = element_text(size = 5, family = "Helvetica", color = "black")) +
theme(axis.text.y = element_text(size = 10, family = "Helvetica", color = "black"))
ggsave(p, filename = paste0(figpath,"H1_cluster_histogram_heatmap.pdf"), width = 14.5, height =10)
# R version 3.5.3 Patched (2019-03-11 r77192)
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] viridis_0.5.1 viridisLite_0.3.0 ggsci_2.9 ggridges_0.5.1 magrittr_1.5 forcats_0.4.0 stringr_1.4.0
# [8] dplyr_0.8.5 purrr_0.3.3 readr_1.3.1 tidyr_1.0.2 tibble_2.1.1 tidyverse_1.2.1 Seurat_2.3.4
# [15] Matrix_1.2-15 cowplot_0.9.4 ggplot2_3.1.1
#
# loaded via a namespace (and not attached):
# [1] Rtsne_0.15 colorspace_1.4-1 ellipsis_0.3.0 class_7.3-15 modeltools_0.2-22 mclust_5.4.5
# [7] htmlTable_1.13.1 base64enc_0.1-3 rstudioapi_0.10 proxy_0.4-23 npsurv_0.4-0 flexmix_2.3-15
# [13] bit64_0.9-7 lubridate_1.7.4 mvtnorm_1.0-10 xml2_1.2.0 codetools_0.2-16 splines_3.5.3
# [19] R.methodsS3_1.7.1 lsei_1.2-0 robustbase_0.93-5 knitr_1.23 Formula_1.2-3 jsonlite_1.6
# [25] broom_0.5.2 ica_1.0-2 cluster_2.0.7-1 kernlab_0.9-27 png_0.1-7 R.oo_1.22.0
# [31] pheatmap_1.0.12 compiler_3.5.3 httr_1.4.0 backports_1.1.4 assertthat_0.2.1 lazyeval_0.2.2
# [37] cli_1.1.0 lars_1.2 acepack_1.4.1 htmltools_0.3.6 tools_3.5.3 igraph_1.2.4.1
# [43] gtable_0.3.0 glue_1.3.1 RANN_2.6.1 reshape2_1.4.3 Rcpp_1.0.1 cellranger_1.1.0
# [49] vctrs_0.2.4 gdata_2.18.0 ape_5.3 nlme_3.1-137 iterators_1.0.10 fpc_2.2-1
# [55] gbRd_0.4-11 lmtest_0.9-37 xfun_0.7 rvest_0.3.4 lifecycle_0.1.0 irlba_2.3.3
# [61] gtools_3.8.1 DEoptimR_1.0-8 MASS_7.3-51.1 zoo_1.8-6 scales_1.0.0 hms_0.4.2
# [67] doSNOW_1.0.16 parallel_3.5.3 RColorBrewer_1.1-2 reticulate_1.12 pbapply_1.4-0 gridExtra_2.3
# [73] rpart_4.1-13 segmented_0.5-4.0 latticeExtra_0.6-28 stringi_1.4.3 foreach_1.4.4 checkmate_1.9.3
# [79] caTools_1.17.1.2 bibtex_0.4.2 Rdpack_0.11-0 SDMTools_1.1-221.1 rlang_0.4.5 pkgconfig_2.0.2
# [85] dtw_1.20-1 prabclus_2.3-1 bitops_1.0-6 lattice_0.20-38 ROCR_1.0-7 labeling_0.3
# [91] htmlwidgets_1.3 bit_1.1-14 tidyselect_0.2.5 plyr_1.8.4 R6_2.4.0 generics_0.0.2
# [97] snow_0.4-3 gplots_3.0.1.1 Hmisc_4.2-0 haven_2.1.0 pillar_1.4.1 foreign_0.8-71
# [103] withr_2.1.2 fitdistrplus_1.0-14 mixtools_1.1.0 survival_2.43-3 nnet_7.3-12 tsne_0.1-3
# [109] modelr_0.1.4 crayon_1.3.4 hdf5r_1.2.0 KernSmooth_2.23-15 readxl_1.3.1 grid_3.5.3
# [115] data.table_1.12.2 metap_1.1 digest_0.6.19 diptest_0.75-7 R.utils_2.8.0 stats4_3.5.3
# [121] munsell_0.5.0
Aggregated protein expression heatmap across 780 libraries by sample x timepoint
mid_res/histogram_hclust/hclust_histogram_protein.r
suppressMessages(library(ComplexHeatmap))
suppressMessages(library(Seurat))
suppressMessages(library(tidyverse))
suppressMessages(library(here))
# functions
source("functions/analysis_functions.R")
source("functions/protein_annotation_functions.r")
# save path
= here("mid_res/aggregated_protein_libraries/figures/")
figpath dir.create(figpath)
# cluster information combined heatmap
= readRDS(file = here("data/h1h5_annotated_with_meta.rds")) %>%
h1 SetAllIdent(id = "celltype_joint") %>%
SubsetData(ident.remove = "DOUBLET")
# specify subsets of proteins for fig 1
= c(
prot_order # B
"CD19", "CD20", "IgM", "IgD", "CD40", "CD185",
# pdc
"CD123", "CD303",
# my lin / dc
"HLA-DR", "CD11c",
# DC and mono
"CD71", "CD14", "CD16", "CD11b", "CD1d", "CD1c",
# hsc
"CD34",
# T
"CD3", "CD45RO", "CD45RA" , "CD62L", "CD27",
"CD28", "CD279", "CD4", "CD8", "CD161",
# nk
"CD244", "KLRG1", "CD127", "CD56", "CD57", "CD38",
# state
"CD103" , "CD196", "CD195", "CD25", "CD86",
"CD69", "CD31"
)
# aggregate protein data
# single cell data
= as.data.frame(t(h1@assay$CITE@data))
prot.dat # replact ADT string
colnames(prot.dat) = str_sub(colnames(prot.dat), start = 1, end = -6)
# aggregate (mean)
= cbind(prot.dat, h1@meta.data) %>%
prot_data group_by(sample, subject_id = sampleid , timepoint,
celltype = celltype_joint,
time_cohort, batch, age, gender, antibody_response = adjmfc.group) %>%
summarize_at(.vars = colnames(prot.dat), .funs = median) %>%
ungroup() %>%
select(sample, celltype, prot_order) %>%
arrange(celltype, sample) %>%
unite(col = "sample_celltype", sample:celltype, sep = "_") %>%
column_to_rownames("sample_celltype") %>%
t()
# cell frequency
= h1@meta.data
md = md %>%
df group_by(sample, subject_id = sampleid, timepoint,
time_cohort, batch, celltype = celltype_joint, antibody_response = adjmfc.group) %>%
age, gender, summarize(count = n(), log_lib_size = log10(sum(nUMI))) %>%
group_by(sample) %>%
mutate(cell_freq=count/sum(count)*100) %>%
arrange(celltype, sample) %>%
mutate(log_cell_count = log10(count))
= df$cell_freq
cellfreq
# celltype
= df$celltype
cellt = HeatmapAnnotation(celltype = cellt,
ha col = list(celltype = c(
"BC_Mem" = "lightslateblue",
"BC_Naive" = "#2B3D26",
"CD103_Tcell" = "#E25822",
"CD14_Mono"= "red",
"CD16_Mono" = "firebrick4",
"CD38_Bcell" = "#882D17",
"CD4_CD161_Mem_Tcell" = "navy",
"CD4_CD25_Tcell"= "#B3446C",
"CD4_CD56_Tcell" = "maroon1",
"CD4_CD57_Tcell" = "#604E97",
"CD4_Efct_Mem_Tcell" ="#F99379",
"CD4Naive_Tcell" = "#0067A5",
"CD8_CD161_Tcell" = "olivedrab",
"CD8_Mem_Tcell" = "#008856",
"CD8_Naive_Tcell" = "#848482",
"CD8_NKT" = "#C2B280",
"HSC" = "#BE0032",
"IgA_CD14_Mono" = "#A1CAF1",
"MAIT_Like" = "#F38400",
"mDC" = "#875692",
"NK" = "#F3C300",
"pDC" = "#222222"))
)
# Create annotations
= columnAnnotation(
libraries_map log_library_size = column_anno_points(
$log_lib_size,
dfsize = unit(0.3, 'mm'),
pch = 21, axis = TRUE, border = TRUE,
gp = gpar(color = "black")
),height = unit(1.8, units = "cm")
)
# matrix color values
= circlize::colorRamp2(breaks = c(-1,0,2,4,8,12,16,20),
col_fun colors = viridis::viridis(n = 8, option = "B"))
# organize proteins by lineages
rownames(prot_data) = factor(rownames(prot_data),levels = prot_order)
# cluster by column; save heatmap
pdf(paste0(figpath,"heatmap3.pdf"), width = 7, height = 6.5)
draw(
::Heatmap(
ComplexHeatmapmatrix = prot_data,
name = "",
col = col_fun,
row_names_gp = gpar(color = "black", fontsize = 10),
top_annotation = ha,
bottom_annotation = libraries_map,
show_column_names = FALSE,
cluster_rows = FALSE,
cluster_columns = TRUE,
use_raster = TRUE), show_annotation_legend = FALSE)
dev.off()
sessionInfo()
# R version 3.5.3 Patched (2019-03-11 r77192)
# attached base packages:
# [1] grid stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] ComplexHeatmap_1.20.0 viridis_0.5.1 viridisLite_0.3.0 here_0.1 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.5 purrr_0.3.3
# [9] readr_1.3.1 tidyr_1.0.2 tibble_2.1.1 tidyverse_1.2.1 Seurat_2.3.4 Matrix_1.2-15 cowplot_0.9.4 ggplot2_3.1.1
#
# loaded via a namespace (and not attached):
# [1] circlize_0.4.10 readxl_1.3.1 snow_0.4-3 backports_1.1.4 Hmisc_4.2-0 plyr_1.8.4 igraph_1.2.4.1
# [8] lazyeval_0.2.2 splines_3.5.3 crosstalk_1.0.0 digest_0.6.25 foreach_1.4.4 htmltools_0.3.6 lars_1.2
# [15] fansi_0.4.0 gdata_2.18.0 magrittr_2.0.1 checkmate_1.9.3 cluster_2.0.7-1 mixtools_1.1.0 ROCR_1.0-7
# [22] modelr_0.1.4 R.utils_2.8.0 colorspace_1.4-1 rvest_0.3.4 haven_2.1.0 xfun_0.7 crayon_1.3.4
# [29] jsonlite_1.6 survival_2.43-3 zoo_1.8-6 iterators_1.0.10 ape_5.3 glue_1.3.1 pals_1.5
# [36] gtable_0.3.0 webshot_0.5.1 GetoptLong_1.0.2 kernlab_0.9-27 shape_1.4.4 prabclus_2.3-1 DEoptimR_1.0-8
# [43] maps_3.3.0 scales_1.0.0 pheatmap_1.0.12 mvtnorm_1.0-10 bibtex_0.4.2 miniUI_0.1.1.1 Rcpp_1.0.1
# [50] metap_1.1 dtw_1.20-1 xtable_1.8-4 htmlTable_1.13.1 reticulate_1.12 foreign_0.8-71 bit_1.1-14
# [57] mapproj_1.2.6 proxy_0.4-23 mclust_5.4.5 SDMTools_1.1-221.1 Formula_1.2-3 stats4_3.5.3 tsne_0.1-3
# [64] htmlwidgets_1.3 httr_1.4.0 gplots_3.0.1.1 RColorBrewer_1.1-2 fpc_2.2-1 ellipsis_0.3.0 acepack_1.4.1
# [71] modeltools_0.2-22 ica_1.0-2 pkgconfig_2.0.2 R.methodsS3_1.7.1 flexmix_2.3-15 nnet_7.3-12 utf8_1.1.4
# [78] manipulateWidget_0.10.0 tidyselect_0.2.5 labeling_0.3 rlang_0.4.5 reshape2_1.4.3 later_0.8.0 munsell_0.5.0
# [85] cellranger_1.1.0 tools_3.5.3 cli_1.1.0 generics_0.0.2 broom_0.5.2 ggridges_0.5.1 npsurv_0.4-0
# [92] knitr_1.23 bit64_0.9-7 fitdistrplus_1.0-14 robustbase_0.93-5 rgl_0.100.30 caTools_1.17.1.2 RANN_2.6.1
# [99] packrat_0.5.0 pbapply_1.4-0 nlme_3.1-137 mime_0.6 R.oo_1.22.0 xml2_1.2.0 hdf5r_1.2.0
# [106] compiler_3.5.3 rstudioapi_0.10 png_0.1-7 lsei_1.2-0 stringi_1.4.3 lattice_0.20-38 ggsci_2.9
# [113] vctrs_0.2.4 pillar_1.4.1 lifecycle_0.1.0 GlobalOptions_0.1.2 Rdpack_0.11-0 lmtest_0.9-37 data.table_1.12.2
# [120] bitops_1.0-6 irlba_2.3.3 gbRd_0.4-11 httpuv_1.5.1 R6_2.4.0 latticeExtra_0.6-28 promises_1.0.1
# [127] KernSmooth_2.23-15 gridExtra_2.3 sessioninfo_1.1.1 codetools_0.2-16 dichromat_2.0-0 MASS_7.3-51.1 gtools_3.8.1
# [134] assertthat_0.2.1 rjson_0.2.20 rprojroot_1.3-2 withr_2.1.2 diptest_0.75-7 parallel_3.5.3 doSNOW_1.0.16
# [141] hms_0.4.2 rpart_4.1-13 class_7.3-15 segmented_0.5-4.0 Rtsne_0.15 shiny_1.3.2 lubridate_1.7.4
# [148] base64enc_0.1-3
Analyze transcriptome of manually gated cells using cell type
specific gene signatures
mid_res/pblast_abc_integration/1_pbasc_analysis_gates_modscoresv2.r
# Analysis of plasmablast and Activated_B.
# script uses R 3.5.1
suppressMessages(library(Seurat))
suppressMessages(library(tidyverse))
suppressMessages(library(here))
# source functions
theme_set(theme_bw())
source("functions/analysis_functions.R")
# file path
= here("mid_res/pblast_abc_integration/figures/")
figpath = here("mid_res/pblast_abc_integration/generated_data/")
datapath dir.create(figpath); dir.create(datapath)
# load h1 annotated data
= ReadCohort(joint_object_dir = "data/h1h5_annotated_with_meta.rds", cohort = "H1N1") %>%
h1 SubsetData(accept.high = 28, subset.name = "CD3_PROT", subset.raw = T)
#manually gate activated memory B cells and plasmablasts
= GenePlot4(h1, gene1 = "CD19_PROT", gene2 = "CD14_PROT", pt.size = 0.1)
p ggsave(p, filename = paste0(figpath,"/cd19cells.png"),width = 4, height = 3)
= GenePlot4(h1, gene1 = "CD19_PROT", gene2 = "CD3_PROT", pt.size = 0.1) +
p geom_vline(xintercept = 8) +
geom_hline(yintercept = 5)
ggsave(p, filename = paste0(figpath,"/cd19cells_2.pdf"),width = 4, height = 3)
############## Pt 1 Manual gate asc abc
= function(SeuratObject, return.seurat = F) {
GateBC = as.matrix(SeuratObject@assay$CITE@data)
adt <- names(which(adt["CD19_PROT", ] > 8 &
cells "CD3_PROT", ] < 5 &
adt["CD14_PROT", ] < 5 ))
adt[if(return.seurat == TRUE) {
= SubsetData(SeuratObject, cells.use = cells, subset.raw = TRUE)
sub return(sub)
else { return(cells) }
}
}
= GateBC(SeuratObject = h1, return.seurat = F)
bcells_gate = SubsetData(h1, cells.use = bcells_gate, subset.raw = T)
cd19
# abc asc
= GenePlot4(cd19, gene1 = "CD71_PROT", gene2 = "IgD_PROT",pt.size = 0.1) +
p geom_vline(xintercept = 5) + geom_hline(yintercept = 10)
ggsave(p, filename = paste0(figpath,"/cd71igd_cells.pdf"),width = 4, height = 3)
# naive memory
= GenePlot4(cd19, gene1 = "CD27_PROT", gene2 = "IgD_PROT",pt.size = 0.1) +
p geom_vline(xintercept = 4) + geom_hline(yintercept = 10)
ggsave(p, filename = paste0(figpath,"/cd27_igd_cells.pdf"),width = 4, height = 3)
# activated B and asc gate
= function(SeuratObject) {
Gate_Activated_BASC = as.matrix(SeuratObject@assay$CITE@data)
adt <- names(which(adt["CD71_PROT", ] > 5 & adt["IgD_PROT", ] < 10))
cells return(cells)
}= Gate_Activated_BASC(SeuratObject = cd19)
Activated_B_asc_cells = SubsetData(cd19, cells.use = Activated_B_asc_cells, subset.raw = T)
Activated_Basc
# plot gates
= GenePlot4(Activated_Basc, gene1 = "CD20_PROT", gene2 = "CD38_PROT", pt.size = 0.6) +
p geom_vline(xintercept = 8) +
geom_hline(yintercept = 10)
ggsave(p, filename = paste0(figpath,"/Activated_B_pblast.pdf"),width = 4, height = 3)
= cbind(Activated_Basc@meta.data, as.data.frame(t(Activated_Basc@assay$CITE@data)))
bmd = bmd %>% mutate(
bmd b_type =
if_else(
> 8 &
CD19_PROT > 5 &
CD71_PROT < 10 &
IgD_PROT < 8 & CD38_PROT > 10,
CD20_PROT true = "Plasmablast",
if_else(
> 8 &
CD19_PROT > 5 &
CD71_PROT < 10 &
IgD_PROT > 8 &
CD20_PROT < 10,
CD38_PROT true = "Activated_Bcell",
false = "NA"
)
)%>%
) filter(b_type %in% c("Plasmablast", "Activated_Bcell")) %>%
select(b_type, barcode_check) %>%
column_to_rownames("barcode_check")
= SubsetData(Activated_Basc, cells.use = rownames(bmd), subset.raw = TRUE) %>% AddMetaData(metadata = bmd)
bsub
########## Pt 2 add module scores for ellebedy gene sets.
= read.table(file = here("signature_curation/ellebedy_genes.txt"), sep = "\t", header = T)
bcgenes = bcgenes %>% filter(celltype == "ABC-")
Activated_B.genes = bcgenes %>% filter(celltype == "ASC-")
asc.genes = list(as.character(Activated_B.genes$Gene), as.character(asc.genes$Gene))
module.list names(module.list) = c("Activated_B_module", "ASC_module")
saveRDS(module.list, file = paste0(datapath,"/ellebedy_bcell.rds"))
# pt 2 module s
# add module score for ellebedy genes
= AddModuleScore(bsub,
bsub genes.list = module.list,
enrich.name = names(module.list),
random.seed = 1)
names(bsub@meta.data)[c(33, 34)] = c("Activated_Bcell_Gene_Score", "Plasmablast_Gene_score")
= bsub@meta.data %>% select(Activated_Bcell_Gene_Score, Plasmablast_Gene_score, b_type)
bsubdf
# plot module scores.
= bsub@meta.data
bsubdf = ggpubr::ggviolin(data = bsubdf, x = "b_type",
p y = c("Plasmablast_Gene_score", "Activated_Bcell_Gene_Score"),
combine = TRUE,
fill = "b_type",
palette = "d3")
= p %>% ggpubr::ggadd(add = "jitter", jitter = 0.35, alpha = 0.4, size = 1, shape = 16)
p = p +
p theme(legend.position = "none") +
ylab("module score") + xlab("") +
theme(strip.background = element_blank()) +
::stat_compare_means(method = "wilcox")
ggpubrggsave(p, filename =paste0(figpath,"/pb_asc_modules.pdf"), height = 3, width = 4.5)
sessionInfo()
# R version 3.5.3 Patched (2019-03-11 r77192)
#
# attached base packages:
# [1] stats graphics grDevices utils datasets methods base
#
# other attached packages:
# [1] viridis_0.5.1 viridisLite_0.3.0 here_0.1 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.5
# [7] purrr_0.3.3 readr_1.3.1 tidyr_1.0.2 tibble_2.1.1 tidyverse_1.2.1 Seurat_2.3.4
# [13] Matrix_1.2-15 cowplot_0.9.4 ggplot2_3.1.1
#
# loaded via a namespace (and not attached):
# [1] Rtsne_0.15 colorspace_1.4-1 class_7.3-15 modeltools_0.2-22 ggridges_0.5.1 rprojroot_1.3-2
# [7] mclust_5.4.5 htmlTable_1.13.1 base64enc_0.1-3 rstudioapi_0.10 proxy_0.4-23 ggpubr_0.2
# [13] npsurv_0.4-0 flexmix_2.3-15 bit64_0.9-7 lubridate_1.7.4 mvtnorm_1.0-10 xml2_1.2.0
# [19] codetools_0.2-16 splines_3.5.3 R.methodsS3_1.7.1 lsei_1.2-0 robustbase_0.93-5 knitr_1.23
# [25] Formula_1.2-3 jsonlite_1.6 packrat_0.5.0 broom_0.5.2 ica_1.0-2 cluster_2.0.7-1
# [31] kernlab_0.9-27 png_0.1-7 R.oo_1.22.0 compiler_3.5.3 httr_1.4.0 backports_1.1.4
# [37] assertthat_0.2.1 lazyeval_0.2.2 cli_1.1.0 lars_1.2 acepack_1.4.1 htmltools_0.3.6