Introduction to Bioconductor in R
Paula Andrea Martinez, PhD.
Data Scientist
fastq
@ unique sequence identifier
raw sequence string
+ optional id
quality encoding per sequence letter
fasta
> unique sequence identifier
raw sequence string
library(ShortRead) # read fasta fasample <- readFasta(dirPath = "data/", pattern = "fasta")
# print fasample print(fasample)
class: ShortRead
length: 500 reads; width: 50 cycles
# methods accessors methods(class = "ShortRead")
# Write a ShortRead object writeFasta(fasample, file = "data/sample.fasta")
library(ShortRead) # read fastq fqsample <- readFastq(dirPath = "data/", pattern = "fastq")
# print fqsample fqsample
class: ShortReadQ
length: 500 reads; width: 50 cycles
# methods accessors methods(class = "ShortReadQ")
# Write a ShortRead object writeFastq(fqsample, file = "data/sample.fastq.gz")
library(ShortRead)
# set the seed to draw the same read sequences every time set.seed(123)
# Subsample of 500 bases sampler <- FastqSampler("data/SRR1971253.fastq", 500)
# save the yield of 500 read sequences sample_small <- yield(sampler)
# Class ShortReadQ class(sample_small) # length 500 reads length(sample_small)
Introduction to Bioconductor in R