library(DESeq2)
library(ggplot2)
library(cowplot)
library(pheatmap)
library(ggrepel)
library(grid)
library(rstatix)CSF Bulk 2. Differential Expression CSF vs PBMC
About the Data
Analysis of FACS data from cells derived from 5-6 mL of cerebrospinal fluid (CSF) or 1-2 million peripheral blood cells from the same individuals. All subjects were virally-suppressed people with HIV. CSF-derived cells were processed fresh. PBMCs were analyzed from frozen aliquots. Cells were FACS gated for Live CD45+ cells then sorted for lymphoid (CD3+, CD20+, or CD56+) and (CD3-, CD20-, and CD56-).
Create Count Matrix and Deseq object from Feature Counts Output
DESeqDataSetFromFeatureCounts <- function (sampleTable, directory = ".", design, ignoreRank = FALSE, ...)
{
if (missing(design))
stop("design is missing")
l <- lapply(as.character(sampleTable[, 2]), function(fn) read.table(file.path(directory, fn), skip=2))
if (!all(sapply(l, function(a) all(a$V1 == l[[1]]$V1))))
stop("Gene IDs (first column) differ between files.")
tbl <- sapply(l, function(a) a$V7)
colnames(tbl) <- sampleTable[, 1]
rownames(tbl) <- l[[1]]$V1
rownames(sampleTable) <- sampleTable[, 1]
dds <- DESeqDataSetFromMatrix(countData = tbl, colData = sampleTable[,
-(1:2), drop = FALSE], design = design, ignoreRank, ...)
return(dds)
}
samples <- read.csv("data/samples.csv", stringsAsFactors = TRUE)
genes <- read.csv("data/genes.csv")
dds <- DESeqDataSetFromFeatureCounts(sampleTable = samples,
directory = "FC/",
design = ~ 0 + group)
# check
dds
colData(dds)
all.equal(rownames(dds), genes$gene_id)
rowData(dds) <- genes
save(dds, file = "data/full_dds.rda")Create DDS Results Object
keep <- rowSums(counts(dds) >= 10) >= 6
dds <- dds[keep,]
dds$group <- factor(dds$group, levels = c("PBMC_Myel", "PBMC_Lym", "CSF_Myel", "CSF_Lym"))
dds <- DESeq(dds)
res <- results(dds)
resultsNames(dds)
save(dds, genes, samples, res, file = "data/filt_dds_env.rda")
normalized_counts <- counts(dds, normalized=TRUE)
write.csv(normalized_counts, file = "data/normalized_counts.csv")PCA
vsd <- vst(dds, blind=FALSE)
rownames(vsd) <- rowData(dds)$gene_name
m<-plotPCA(vsd, intgroup=c("tissue", "cell"), returnData = TRUE)
ggplot(m, aes(PC1, PC2, color = group)) + geom_point(size = 3) + scale_color_manual(values = c("purple", "royalblue", "magenta", "tomato")) + theme_cowplot() + xlab("PC1: 72% variance") + ylab("PC2: 14% variance")sessionInfo()