library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.0     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.5
## ✔ tidyr   1.0.2     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
source("figures/color_schemes.R")
## ========================================
## circlize version 0.4.6
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: http://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization 
##   in R. Bioinformatics 2014.
## ========================================
#output from clonality pipeline
CD8MEM_SORTED_IN_PATH <- "data/CITE5p/all_batches/tcr/diversity_pipeline/2020_07_30/diversity_metrics/CD8_Mem-Sorted_diversity.rds"

#Metadata
META_IN_PATH <- "data/CITE5p/all_batches/tcr/diversity_pipeline/2020_07_30/sample_meta.tsv"

meta <- read_tsv(META_IN_PATH)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Donor = col_character(),
##   condition = col_character(),
##   sex = col_character(),
##   ever_admitted_to_icu = col_logical(),
##   intubation_vent = col_logical(),
##   outcome = col_character(),
##   date_of_onset_of_symptoms = col_date(format = ""),
##   hospital_admission_date = col_date(format = ""),
##   hospital_discharge_date = col_date(format = ""),
##   batch = col_character(),
##   severity = col_character(),
##   severity.outcome = col_character(),
##   PC1_cat = col_character(),
##   PC2_cat = col_character(),
##   PLS1_cat = col_character(),
##   Class = col_character(),
##   cond_group = col_character(),
##   Batch = col_character(),
##   Gender = col_character(),
##   Timepoint = col_character()
##   # ... with 5 more columns
## )
## See spec(...) for full column specifications.
diversity_dat <- readRDS(CD8MEM_SORTED_IN_PATH)

combined_dat <- left_join(diversity_dat, meta)
## Joining, by = c("Donor", "Timepoint", "Batch")
combined_dat <- combined_dat %>%
        mutate(PC1_cat = as.character(PC1_cat)) %>%
        mutate(PC1_cat = replace(PC1_cat, Class == "HC", "HC")) %>%
        mutate(PC1_cat= factor(PC1_cat, levels = c("HC", "PC1_low", "PC1_high"))) %>%
        filter(!is.na(PC1_cat)) %>%
        filter(measure == "simpson") %>%
        filter(!grepl("CHI", Donor))
# If would like to save pdf instead
#FIG_OUT_PATH <- "plots/CITE5p/all_batches/paper_figures/FIG4/2020_09_04/fig4_clonality_boxplots.pdf"
#dir.create(dirname(FIG_OUT_PATH))
#pdf(FIG_OUT_PATH, height =2.5, width = 4)


p2 <- combined_dat %>% 
        filter(Timepoint %in% c("HC", "T0")) %>%
        ggplot(aes(x = PC1_cat, y = median1000, color = PC1_cat)) +
        geom_boxplot(outlier.shape = NA) +
        geom_jitter(height = 0, aes(shape = severity.outcome2), size = 3) +
        scale_shape_manual(values = severity.shape) +
        scale_color_manual(values = PC1class.color) +
        theme_bw() +
        ylab("Clonality (Simpson Index)") +
        ggtitle("Clonality in Sorted CD8 Mem")
print(p2)

#dev.off()