Massively parallel cDNA sequencing (RNA-Seq) provides an unbiased way to study a transcriptome, including both coding and noncoding genes. Until now, most RNA-Seq studies have depended crucially on existing annotations and thus focused on expression levels and variation in known transcripts. Here, we present Scripture, a method to reconstruct the transcriptome of a mammalian cell using only RNA-Seq reads and the genome sequence. We applied it to mouse embryonic stem cells, neuronal precursor cells and lung fibroblasts to accurately reconstruct the full-length gene structures for most known expressed genes. We identified substantial variation in protein coding genes, including thousands of novel 5' start sites, 3' ends and internal coding exons. We then determined the gene structures of more than a thousand large intergenic noncoding RNA (lincRNA) and antisense loci. Our results open the way to direct experimental manipulation of thousands of noncoding RNAs and demonstrate the power of ab initio reconstruction to render a comprehensive picture of mammalian transcriptomes.
%0 Journal Article
%1 Guttman2010
%A Guttman, Mitchell
%A Garber, Manuel
%A Levin, Joshua Z
%A Donaghey, Julie
%A Robinson, James
%A Adiconis, Xian
%A Fan, Lin
%A Koziol, Magdalena J
%A Gnirke, Andreas
%A Nusbaum, Chad
%A Rinn, John L
%A Lander, Eric S
%A Regev, Aviv
%D 2010
%J Nat Biotechnol
%K methods;DNA genetics;EmbryonicStemCells;GeneExpressionProfiling RNA Genetic;RNA Intergenic methods;GeneLibrary;Mice;Models Animals;CellLine;ComputationalBiology methods;Transcription Genetic genetics;SequenceAnalysis Messenger
%N 5
%P 503--510
%R 10.1038/nbt.1633
%T Ab initio reconstruction of cell type-specific transcriptomes in mouse reveals the conserved multi-exonic structure of lincRNAs.
%U http://dx.doi.org/10.1038/nbt.1633
%V 28
%X Massively parallel cDNA sequencing (RNA-Seq) provides an unbiased way to study a transcriptome, including both coding and noncoding genes. Until now, most RNA-Seq studies have depended crucially on existing annotations and thus focused on expression levels and variation in known transcripts. Here, we present Scripture, a method to reconstruct the transcriptome of a mammalian cell using only RNA-Seq reads and the genome sequence. We applied it to mouse embryonic stem cells, neuronal precursor cells and lung fibroblasts to accurately reconstruct the full-length gene structures for most known expressed genes. We identified substantial variation in protein coding genes, including thousands of novel 5' start sites, 3' ends and internal coding exons. We then determined the gene structures of more than a thousand large intergenic noncoding RNA (lincRNA) and antisense loci. Our results open the way to direct experimental manipulation of thousands of noncoding RNAs and demonstrate the power of ab initio reconstruction to render a comprehensive picture of mammalian transcriptomes.
@article{Guttman2010,
abstract = {Massively parallel cDNA sequencing (RNA-Seq) provides an unbiased way to study a transcriptome, including both coding and noncoding genes. Until now, most RNA-Seq studies have depended crucially on existing annotations and thus focused on expression levels and variation in known transcripts. Here, we present Scripture, a method to reconstruct the transcriptome of a mammalian cell using only RNA-Seq reads and the genome sequence. We applied it to mouse embryonic stem cells, neuronal precursor cells and lung fibroblasts to accurately reconstruct the full-length gene structures for most known expressed genes. We identified substantial variation in protein coding genes, including thousands of novel 5' start sites, 3' ends and internal coding exons. We then determined the gene structures of more than a thousand large intergenic noncoding RNA (lincRNA) and antisense loci. Our results open the way to direct experimental manipulation of thousands of noncoding RNAs and demonstrate the power of ab initio reconstruction to render a comprehensive picture of mammalian transcriptomes.},
added-at = {2010-12-31T02:55:46.000+0100},
author = {Guttman, Mitchell and Garber, Manuel and Levin, Joshua Z and Donaghey, Julie and Robinson, James and Adiconis, Xian and Fan, Lin and Koziol, Magdalena J and Gnirke, Andreas and Nusbaum, Chad and Rinn, John L and Lander, Eric S and Regev, Aviv},
biburl = {https://www.bibsonomy.org/bibtex/2b83f305afcd4b2e7f00a59c0e8b87f89/jabreftest},
doi = {10.1038/nbt.1633},
file = {Guttman2010.pdf:Guttman2010.pdf:PDF;Guttman2010S1.pdf:Guttman2010S1.pdf:PDF},
institution = {Broad Institute of MIT and Harvard, Cambridge, Massachusetts, USA. mguttman@mit.edu},
interhash = {0f4398f8d92b907613c693a640ca2e6e},
intrahash = {b83f305afcd4b2e7f00a59c0e8b87f89},
journal = {Nat Biotechnol},
keywords = {methods;DNA genetics;EmbryonicStemCells;GeneExpressionProfiling RNA Genetic;RNA Intergenic methods;GeneLibrary;Mice;Models Animals;CellLine;ComputationalBiology methods;Transcription Genetic genetics;SequenceAnalysis Messenger},
language = {eng},
medline-pst = {ppublish},
month = May,
number = 5,
pages = {503--510},
pii = {nbt.1633},
pmid = {20436462},
timestamp = {2010-12-31T02:55:46.000+0100},
title = {Ab initio reconstruction of cell type-specific transcriptomes in mouse reveals the conserved multi-exonic structure of lincRNAs.},
url = {http://dx.doi.org/10.1038/nbt.1633},
volume = 28,
year = 2010
}