RNA-Seq with Bioconductor in R
Mary Piper
Bioinformatics Consultant and Trainer
dds <- estimateSizeFactors(dds)
normalized_counts <- counts(dds, normalized=TRUE)
# Log transformation of normalized counts
vsd <- vst(dds, blind=TRUE)
vsd %>%
assay() %>% # Extract the vst matrix from the object
cor() %>% # Compute pairwise correlation values
pheatmap(annotation = metadata[ , c("column_name1", "column_name2])
# PCA
plotPCA(vsd, intgroup="condition")
# Create DESeq object
dds <- DESeqDataSetFromMatrix(countData = rawcounts,
colData = metadata,
design = ~ source_of_variation + condition)
# Run analysis
dds <- DESeq(dds)
# Plot dispersion estimates
plotDispEsts(dds)
# Extract results for comparison of interest
res <- results(dds,
contrast = c("condition_factor", "level_to_compare",
"base_level"),
alpha = 0.05)
# Shrink the log2 foldchanges
res <- lfcShrink(dds,
contrast = c("condition_factor", "level_to_compare",
"base_level"),
res = res)
# Extract all results as a data frame
res_all <- data.frame(res) %>%
rownames_to_column(var = "ensgene")
# Add gene annotations
res_all <- left_join(x = res_all,
y = grcm38[, c("ensgene", "symbol", "description")],
by = "ensgene")
res_all <- arrange(res_all, padj)
# Identify significant genes
res_sig <- subset(res_all, padj < 0.05)
RNA-Seq with Bioconductor in R