library(DESeq2)
library(ggplot2)
library(cowplot)
library(pheatmap)
library(ggrepel)
library(grid)
library(rstatix)CSF Bulk 3. Gene Enrichment Heatmaps
About the Data
DESeq2 analysis of CSF and PBMC myeloid and lymphoid cells with design = ~ 0 + group the groups include: PBMC_Myel, PBMC_Lym, CSF_Myel, CSF_Lym. Differential expression between CSF and PBMC in either myeloid cells or lymphoid cells. n = 6 CSF samples and 6 PBMC samples each bulk sorted for myeloid and lymphoid cells.
Load Data
load("data/filt_dds_env.rda")Heatmaps
Human Microglia Signature
add.flag <- function(pheatmap,
kept.labels,
repel.degree) {
heatmap <- pheatmap$gtable
new.label <- heatmap$grobs[[which(heatmap$layout$name == "row_names")]]
# keep only labels in kept.labels, replace the rest with ""
new.label$label <- ifelse(new.label$label %in% kept.labels,
new.label$label, "")
repelled.y <- function(d, d.select, k = repel.degree){
strip.npc <- function(dd){
if(!"unit.arithmetic" %in% class(dd)) {
return(as.numeric(dd))
}
d1 <- strip.npc(dd$arg1)
d2 <- strip.npc(dd$arg2)
fn <- dd$fname
return(lazyeval::lazy_eval(paste(d1, fn, d2)))
}
full.range <- sapply(seq_along(d), function(i) strip.npc(d[i]))
selected.range <- sapply(seq_along(d[d.select]), function(i) strip.npc(d[d.select][i]))
return(unit(seq(from = max(selected.range) + k*(max(full.range) - max(selected.range)),
to = min(selected.range) - k*(min(selected.range) - min(full.range)),
length.out = sum(d.select)), "npc"))
}
new.y.positions <- repelled.y(new.label$y, d.select = new.label$label != "")
new.flag <- segmentsGrob(x0 = new.label$x,
x1 = new.label$x + unit(0.15, "npc"),
y0 = new.label$y[new.label$label != ""],
y1 = new.y.positions)
# shift position for selected labels
new.label$x <- new.label$x + unit(0.2, "npc")
new.label$y[new.label$label != ""] <- new.y.positions
# add flag to heatmap
heatmap <- gtable::gtable_add_grob(x = heatmap,
grobs = new.flag,
t = 4,
l = 4
)
# replace label positions in heatmap
heatmap$grobs[[which(heatmap$layout$name == "row_names")]] <- new.label
# plot result
grid.newpage()
grid.draw(heatmap)
# return a copy of the heatmap invisibly
invisible(heatmap)
}df <- as.data.frame(colData(dds)[,c("tissue","cell")])
ann_colors = list(
tissue = c(CSF = "royalblue", PBMC = "tomato"),
cell = c(myeloid = "forestgreen", lymphoid = "darkblue"))
vsd <- vst(dds, blind=FALSE)
rownames(vsd) <- vsd@rowRanges@elementMetadata@listData$gene_name
MG_sig <- read.csv("writes/MG_Sig_Gosselin.csv", stringsAsFactors = F)
p <- which(rownames(vsd) %in% MG_sig$Gosselin_2017)
heat <- pheatmap(assay(vsd)[p,], cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, scale="row", col = colorRampPalette(c("navy", "white", "firebrick3"))(50), annotation_col=df, annotation_colors = ann_colors, main="Microglia Genes Gosselin 2017", fontsize_row=6, silent=TRUE, show_colnames = FALSE)
GosselinTop <- c("SPP1", "CD74", "ACTB", "C3", "FTL", "FOS", "CSF1R", "B2M", "C1QC", "C1QA", "C1QB", "PSAP", "A2M", "B2M", "ITM2B", "LAPTM5", "CTSB", "P2RY12", "SLCO2B1", "RGS1", "APOE", "CCL4L2", "RNASET2", "NEAT1", "CX3CR1", "DUSP1", "SAT1", "ZFP36", "CD81", "HLA-DRA", "HLA-B", "TMEM119", "P2RY13", "GPR34", "SALL1")
add.flag(heat, kept.labels = GosselinTop, repel.degree = 0.5)
# For datatable
write.csv(assay(vsd)[p,], file = "writes/GosselinHeatmap.csv")Macrophage Genes
p <- c('MRC1', 'PF4', 'LYVE1','TGFBI','EMP3', 'LYZ', 'MS4A7', 'STAB1', 'CD163', 'CD63', 'ITGAX', 'CD1C', 'FCN1', 'VCAN','FTL','SEPP1','F13A1','DAB2', 'MS4A4A', 'CCR2', 'TREM2', 'APOE', 'CD63', 'CD74', 'CLEC7A', 'FOLR2', 'CCL8','CD1C', "AXL", 'CLEC9A')
pheatmap(assay(vsd)[which(mcols(dds)$gene_name %in% p),], cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, annotation_col=df, scale="row", col = colorRampPalette(c("navy", "white", "firebrick3"))(50), annotation_colors = ann_colors, main="Macrophage Genes", show_colnames = FALSE)
write.csv(assay(vsd)[which(mcols(dds)$gene_name %in% p),], file = "writes/MacrophageHeatmap.csv")Identify CSF-Enriched Genes
CSFvPBMCinLym <- read.csv("writes/CSFvPBMCinLym.csv", row.names = 1)
CSFvPBMCinMye <- read.csv("writes/CSFvPBMCinMye.csv", row.names = 1)
all.equal(CSFvPBMCinLym$gene, CSFvPBMCinMye$gene)
merged <- as.data.frame(cbind(CSFvPBMCinMye, CSFvPBMCinLym))
colnames(merged) <- c("MYE_baseMean", "MYE_log2FoldChange", "MYE_lfcSE", "MYE_stat", "MYE_pvalue", "MYE_padj", "MYE_gene", "LYM_baseMean", "LYM_log2FoldChange", "LYM_lfcSE", "LYM_stat", "LYM_pvalue", "LYM_padj", "LYM_gene")
ggplot(merged, aes(x = MYE_log2FoldChange, y = LYM_log2FoldChange)) + geom_point()
ggplot(merged, aes(x = -log2(MYE_padj), y = -log2(LYM_padj))) + geom_point()
#Assign the Group variable for coloring 4-way plot
for (i in c(1:nrow(merged))){
if (merged$MYE_padj[i]<0.05 & (!is.na(merged$MYE_padj[i]))){
merged$MYESig[i]='YES'
}
else {
merged$MYESig[i]='NO'
}
}
for (i in c(1:nrow(merged))){
if (merged$LYM_padj[i]<0.05 & (!is.na(merged$LYM_padj[i]))){
merged$LYMSig[i]='YES'
}
else {
merged$LYMSig[i]='NO'
}
}
for (i in c(1:nrow(merged))){
if (merged$MYESig[i]=='YES' & merged$LYMSig[i]=='NO'){
merged$Group[i]=1 }
else if (merged$MYESig[i]=='NO' & merged$LYMSig[i]=='YES'){
merged$Group[i]=2}
else if (merged$MYESig[i]=='YES' & merged$LYMSig[i]=='YES'){
merged$Group[i]=3}
else {
merged$Group[i]=4}
}
table(merged$Group)
# 4923 SIG Myeloid
# 1661 SIG Lymphoid
# 1409 SIG Both
# 16798 NS
write.csv(merged, "writes/merged.csv")
temp <-subset(merged, merged$Group == 3)
ggplot(temp, aes(x = MYE_log2FoldChange, y=LYM_log2FoldChange)) + geom_point()
group3 <- temp %>% filter(MYE_log2FoldChange > 2 & LYM_log2FoldChange > 2) %>% filter(MYE_gene != "Y_RNA")
group3$MYE_genePlot CSF-Enriched Genes
GOI <- c("RGS16", "PHLDA3", "SCD", "DHCR24", "MSMO1", "CCR5", "ZNF547", "NBL1", "ADAMTS17", "TNFSF8", "PLK3", "PAQR3","HMGCR","TM2D2","INSIG1", "SQLE", "LDLR", "CCL20", "CYYR1", "FUT1", "ERICH5", "PKDCC", "CCL22", "EML1", "EML1", "CEACAM1", "KBTBD12", "TNFRSF12A", "PLCB4", "C6orf223", "TNF", "PTPN13", "EPAS1", "ZFP3", "FSD2")
pheatmap(assay(vsd)[which(mcols(dds)$gene_name %in% GOI),], cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, annotation_col=df, scale="row", col = colorRampPalette(c("navy", "white", "firebrick3"))(50), annotation_colors = ann_colors, main="CSF Genes", show_colnames = FALSE)GOIs
# plot genes of interest
GOIs <- c("MSMO1", "CCR5")
for (p in GOIs){
geneid <- rownames(dds)[which(mcols(dds)$gene_name == p)]
d <- plotCounts(dds, gene=geneid, intgroup=c("tissue", "cell"), returnData = TRUE)
plot <- ggplot(d, aes(x=cell, y=count, color=tissue)) +
geom_point(size=3, position=position_jitterdodge(dodge.width = 0.5, jitter.width = 0)) +
stat_summary(fun.y = "mean", geom = "point", aes(group = tissue),
position = position_dodge(width = 0.5), color = "black", shape = "_", size = 10) +
scale_color_manual(values = c("cornflowerblue","red")) +
expand_limits(y=0) + ylab("normalized count") + xlab("") + labs(subtitle = p) +
theme_cowplot() + theme(legend.position = "none")
print(plot)
# CSF = blue, PBMC = red
# save plot
#350x350
ggsave(filename=paste(p,".jpeg", sep = ""), plot = plot, width = 60, height = 80, units = "mm", path = "plots/", bg="white")
}sessionInfo()