CSF Bulk 3. Gene Enrichment Heatmaps

Author

Lindsay N Hayes

Published

December 1, 2025

About the Data

DESeq2 analysis of CSF and PBMC myeloid and lymphoid cells with design = ~ 0 + group the groups include: PBMC_Myel, PBMC_Lym, CSF_Myel, CSF_Lym. Differential expression between CSF and PBMC in either myeloid cells or lymphoid cells. n = 6 CSF samples and 6 PBMC samples each bulk sorted for myeloid and lymphoid cells.

library(DESeq2)
library(ggplot2)
library(cowplot)
library(pheatmap)
library(ggrepel)
library(grid)
library(rstatix)

Load Data

load("data/filt_dds_env.rda")

Heatmaps

Human Microglia Signature

reference

add.flag <- function(pheatmap,
                     kept.labels,
                     repel.degree) {
  
  heatmap <- pheatmap$gtable

  new.label <- heatmap$grobs[[which(heatmap$layout$name == "row_names")]] 

  # keep only labels in kept.labels, replace the rest with ""
  new.label$label <- ifelse(new.label$label %in% kept.labels, 
                            new.label$label, "")

  repelled.y <- function(d, d.select, k = repel.degree){
   
    strip.npc <- function(dd){
      if(!"unit.arithmetic" %in% class(dd)) {
        return(as.numeric(dd))
      }

      d1 <- strip.npc(dd$arg1)
      d2 <- strip.npc(dd$arg2)
      fn <- dd$fname
      return(lazyeval::lazy_eval(paste(d1, fn, d2)))
    }

    full.range <- sapply(seq_along(d), function(i) strip.npc(d[i]))
    selected.range <- sapply(seq_along(d[d.select]), function(i) strip.npc(d[d.select][i]))

    return(unit(seq(from = max(selected.range) + k*(max(full.range) - max(selected.range)),
                    to = min(selected.range) - k*(min(selected.range) - min(full.range)), 
                    length.out = sum(d.select)), "npc"))
  }
  new.y.positions <- repelled.y(new.label$y, d.select = new.label$label != "")
  new.flag <- segmentsGrob(x0 = new.label$x,
                           x1 = new.label$x + unit(0.15, "npc"),
                           y0 = new.label$y[new.label$label != ""],
                           y1 = new.y.positions)

  # shift position for selected labels
  new.label$x <- new.label$x + unit(0.2, "npc")
  new.label$y[new.label$label != ""] <- new.y.positions

  # add flag to heatmap
  heatmap <- gtable::gtable_add_grob(x = heatmap,
                                   grobs = new.flag,
                                   t = 4, 
                                   l = 4
  )

  # replace label positions in heatmap
  heatmap$grobs[[which(heatmap$layout$name == "row_names")]] <- new.label

  # plot result
  grid.newpage()
  grid.draw(heatmap)

  # return a copy of the heatmap invisibly
  invisible(heatmap)
}
df <- as.data.frame(colData(dds)[,c("tissue","cell")])

ann_colors = list(
    tissue = c(CSF = "royalblue", PBMC = "tomato"),
    cell = c(myeloid = "forestgreen", lymphoid = "darkblue"))

vsd <- vst(dds, blind=FALSE)
rownames(vsd) <- vsd@rowRanges@elementMetadata@listData$gene_name
MG_sig <- read.csv("writes/MG_Sig_Gosselin.csv", stringsAsFactors = F)
p <- which(rownames(vsd) %in% MG_sig$Gosselin_2017)


heat <- pheatmap(assay(vsd)[p,], cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, scale="row", col = colorRampPalette(c("navy", "white", "firebrick3"))(50), annotation_col=df, annotation_colors = ann_colors, main="Microglia Genes Gosselin 2017", fontsize_row=6, silent=TRUE, show_colnames = FALSE)

GosselinTop <- c("SPP1", "CD74", "ACTB", "C3", "FTL", "FOS", "CSF1R", "B2M", "C1QC", "C1QA", "C1QB", "PSAP", "A2M", "B2M", "ITM2B", "LAPTM5", "CTSB", "P2RY12", "SLCO2B1", "RGS1", "APOE", "CCL4L2", "RNASET2", "NEAT1", "CX3CR1", "DUSP1", "SAT1", "ZFP36", "CD81", "HLA-DRA", "HLA-B", "TMEM119", "P2RY13", "GPR34", "SALL1")

add.flag(heat, kept.labels = GosselinTop, repel.degree = 0.5)

# For datatable
write.csv(assay(vsd)[p,], file = "writes/GosselinHeatmap.csv")

Macrophage Genes

p <- c('MRC1', 'PF4', 'LYVE1','TGFBI','EMP3', 'LYZ', 'MS4A7', 'STAB1', 'CD163', 'CD63', 'ITGAX', 'CD1C', 'FCN1', 'VCAN','FTL','SEPP1','F13A1','DAB2', 'MS4A4A', 'CCR2', 'TREM2', 'APOE', 'CD63', 'CD74', 'CLEC7A', 'FOLR2', 'CCL8','CD1C', "AXL", 'CLEC9A')

pheatmap(assay(vsd)[which(mcols(dds)$gene_name %in% p),], cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, annotation_col=df, scale="row", col = colorRampPalette(c("navy", "white", "firebrick3"))(50), annotation_colors = ann_colors, main="Macrophage Genes", show_colnames = FALSE)


write.csv(assay(vsd)[which(mcols(dds)$gene_name %in% p),], file = "writes/MacrophageHeatmap.csv")

Identify CSF-Enriched Genes

CSFvPBMCinLym <- read.csv("writes/CSFvPBMCinLym.csv", row.names = 1)
CSFvPBMCinMye <- read.csv("writes/CSFvPBMCinMye.csv", row.names = 1)
all.equal(CSFvPBMCinLym$gene, CSFvPBMCinMye$gene)
merged <- as.data.frame(cbind(CSFvPBMCinMye, CSFvPBMCinLym))

colnames(merged) <- c("MYE_baseMean", "MYE_log2FoldChange", "MYE_lfcSE", "MYE_stat", "MYE_pvalue", "MYE_padj", "MYE_gene", "LYM_baseMean", "LYM_log2FoldChange", "LYM_lfcSE", "LYM_stat", "LYM_pvalue", "LYM_padj", "LYM_gene")

ggplot(merged, aes(x = MYE_log2FoldChange, y = LYM_log2FoldChange)) + geom_point()
ggplot(merged, aes(x = -log2(MYE_padj), y = -log2(LYM_padj))) + geom_point()

#Assign the Group variable for coloring 4-way plot 
for (i in c(1:nrow(merged))){
  if (merged$MYE_padj[i]<0.05 & (!is.na(merged$MYE_padj[i]))){
    merged$MYESig[i]='YES'
  }
  else {
    merged$MYESig[i]='NO'
  }
}

for (i in c(1:nrow(merged))){
  if (merged$LYM_padj[i]<0.05 & (!is.na(merged$LYM_padj[i]))){
    merged$LYMSig[i]='YES'
  }
  else {
    merged$LYMSig[i]='NO'
  }
}

for (i in c(1:nrow(merged))){
  if (merged$MYESig[i]=='YES' & merged$LYMSig[i]=='NO'){
    merged$Group[i]=1 }
  else if (merged$MYESig[i]=='NO' & merged$LYMSig[i]=='YES'){
    merged$Group[i]=2}
  else if (merged$MYESig[i]=='YES' & merged$LYMSig[i]=='YES'){
    merged$Group[i]=3}
  else {
    merged$Group[i]=4}
  
}
table(merged$Group)
# 4923 SIG Myeloid
# 1661 SIG Lymphoid
# 1409 SIG Both
# 16798 NS

write.csv(merged, "writes/merged.csv")

temp <-subset(merged, merged$Group == 3)

ggplot(temp, aes(x = MYE_log2FoldChange, y=LYM_log2FoldChange)) + geom_point()

group3 <- temp %>% filter(MYE_log2FoldChange > 2 & LYM_log2FoldChange > 2) %>% filter(MYE_gene != "Y_RNA")

group3$MYE_gene

Plot CSF-Enriched Genes

GOI <- c("RGS16", "PHLDA3", "SCD", "DHCR24", "MSMO1", "CCR5", "ZNF547", "NBL1", "ADAMTS17", "TNFSF8", "PLK3", "PAQR3","HMGCR","TM2D2","INSIG1", "SQLE", "LDLR", "CCL20", "CYYR1", "FUT1", "ERICH5",  "PKDCC", "CCL22", "EML1", "EML1", "CEACAM1", "KBTBD12", "TNFRSF12A", "PLCB4", "C6orf223", "TNF", "PTPN13", "EPAS1", "ZFP3", "FSD2")

pheatmap(assay(vsd)[which(mcols(dds)$gene_name %in% GOI),], cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE, annotation_col=df, scale="row", col = colorRampPalette(c("navy", "white", "firebrick3"))(50), annotation_colors = ann_colors, main="CSF Genes", show_colnames = FALSE)

GOIs

# plot genes of interest
GOIs <- c("MSMO1", "CCR5")

for (p in GOIs){
  geneid <- rownames(dds)[which(mcols(dds)$gene_name == p)]
  d <- plotCounts(dds, gene=geneid, intgroup=c("tissue", "cell"), returnData = TRUE)

  plot <- ggplot(d, aes(x=cell, y=count, color=tissue)) + 
    geom_point(size=3, position=position_jitterdodge(dodge.width = 0.5, jitter.width = 0)) + 
    stat_summary(fun.y = "mean", geom = "point", aes(group = tissue), 
                 position = position_dodge(width = 0.5), color = "black", shape = "_", size = 10) + 
    scale_color_manual(values = c("cornflowerblue","red")) + 
    expand_limits(y=0) + ylab("normalized count") + xlab("") + labs(subtitle = p) + 
    theme_cowplot() + theme(legend.position = "none")
  print(plot)
  # CSF = blue, PBMC = red
  # save plot
  #350x350
  ggsave(filename=paste(p,".jpeg", sep = ""), plot = plot, width = 60, height = 80, units = "mm", path = "plots/", bg="white")
}
sessionInfo()