It is critical for bioinformaticians to have their work cited so we can continue to develop exciting tools for you. Please be sure to not only cite searchsra.org, but to also cite the tools that we have used to build this amazing resource.
The references are available in bibtex format below so you can import them into your favorite reference manager.
This work used the Extreme Science and Engineering Discovery Environment (XSEDE), which is supported by National Science Foundation grant number ACI-1548562. Development of the Apache Airavata used to develop the science gateway is supported by NSF award #1339774. XSEDE resources used include JetStream and Wrangler at Indiana University through allocation TG-MCB170036.
@INPROCEEDINGS{Levi2018-dv,
title = "Searching the Sequence Read Archive using Jetstream and
Wrangler",
booktitle = "Proceedings of the Practice and Experience on Advanced
Research Computing",
author = "Levi, Kyle and Rynge, Mats and Abeysinghe, Eroma and Edwards,
Robert A",
abstract = "Abstract The Sequence Read Archive (SRA), the world's largest
database of sequences, hosts approximately 10 petabases (10^16
bp) of sequence data and is growing at the alarming rate of 10
TB per day. Yet this rich trove of data is inaccessible to
most researchers: searching through the SRA requires large
storage and computing facilities that are beyond the capacity
of most laboratories. Enabling scientists to analyze existing
sequence data will provide insight into ecology, medicine, and
industrial applications.",
publisher = "ACM",
pages = "50:1--50:7",
series = "PEARC '18",
institution = "ACM",
month = jul,
year = 2018,
url = "https://dl.acm.org/citation.cfm?doid=3219104.3229278",
address = "New York, NY, USA",
keywords = "Apache Airavata; Bacteriophage; Credential Store; Jetstream;
Metagenomics; Metagenomics Discovery Challenge; SRA; SRA
Gateway; SciGaP; Search SRA; Sequence Read Archive; Wrangler",
location = "Pittsburgh, PA, USA",
isbn = "9781450364461",
doi = "10.1145/3219104.3229278"
}
@MISC{Edwards_SearchSRAToolkit,
title = "{SearchSRA} Toolkit. Tools for processing data generated by
the Search {SRA}",
author = "Edwards, Robert A",
abstract = "Tools for processing data generated by the Search SRA -
linsalrob/SearchSRAToolKit",
institution = "Github",
month = jan,
year = 2021,
url = "doi://10.5281/zenodo.5044723",
doi = "//10.5281/zenodo.5044723"
}
@ARTICLE{Torres2017-eb,
title = "{PARTIE}: a partition engine to separate metagenomic and
amplicon projects in the Sequence Read Archive",
author = "Torres, Pedro J and Edwards, Robert A and McNair, Katelyn A",
abstract = "Motivation: The Sequence Read Archive (SRA) contains raw data
from many different types of sequence projects. As of 2017, the
SRA contained approximately ten petabases of DNA sequence (10 16
bp). Annotations of the data are provided by the submitter, and
mining the data in the SRA is complicated by both the amount of
data and the detail within those annotations. Here, we introduce
PARTIE, a partition engine optimized to differentiate sequence
read data into metagenomic (random) and amplicon (targeted)
sequence data sets. Results: PARTIE subsamples reads from the
sequencing file and calculates four different statistics: k -mer
frequency, 16S abundance, prokaryotic- and viral-read abundance.
These metrics are used to create a RandomForest decision tree to
classify the sequencing data, and PARTIE provides mechanisms for
both supervised and unsupervised classification. We demonstrate
the accuracy of PARTIE for classifying SRA data, discuss the
probable error rates in the SRA annotations and introduce a
resource assessing SRA data. Availability and Implementation:
PARTIE and reclassified metagenome SRA entries are available
from https://github.com/linsalrob/partie. Contact:
redwards@mail.sdsu.edu. Supplementary information: Supplementary
data are available at Bioinformatics online.",
journal = "Bioinformatics",
publisher = "Oxford University Press",
volume = 33,
number = 15,
pages = "2389--2391",
month = aug,
year = 2017,
url = "http://dx.doi.org/10.1093/bioinformatics/btx184",
language = "en",
issn = "1367-4803, 1367-4811",
pmid = "28369246",
doi = "10.1093/bioinformatics/btx184",
pmc = "PMC5860118"
}
@ARTICLE{Towns2014-po,
title = "{XSEDE}: Accelerating Scientific Discovery",
author = "Towns, J and Cockerill, T and Dahan, M and Foster, I and Gaither,
K and Grimshaw, A and Hazlewood, V and Lathrop, S and Lifka, D
and Peterson, G D and Roskies, R and Scott, J R and
Wilkins-Diehr, N",
abstract = "Computing in science and engineering is now ubiquitous: digital
technologies underpin, accelerate, and enable new, even
transformational, research in all domains. Access to an array of
integrated and well-supported high-end digital services is
critical for the advancement of knowledge. Driven by community
needs, the Extreme Science and Engineering Discovery Environment
(XSEDE) project substantially enhances the productivity of a
growing community of scholars, researchers, and engineers
(collectively referred to as ``scientists''' throughout this
article) through access to advanced digital services that support
open research. XSEDE's integrated, comprehensive suite of
advanced digital services federates with other high-end
facilities and with campus-based resources, serving as the
foundation for a national e-science infrastructure ecosystem.
XSEDE's e-science infrastructure has tremendous potential for
enabling new advancements in research and education. XSEDE's
vision is a world of digitally enabled scholars, researchers, and
engineers participating in multidisciplinary collaborations to
tackle society's grand challenges.",
journal = "Computing in Science Engineering",
volume = 16,
number = 5,
pages = "62--74",
year = 2014,
url = "http://dx.doi.org/10.1109/MCSE.2014.80",
keywords = "engineering computing;natural sciences computing;research and
development management;research initiatives;Extreme Science and
Engineering Discovery Environment;XSEDE;advanced digital
services;campus-based resources;digital services;digitally
enabled scholars;multidisciplinary collaborations;national
e-science infrastructure ecosystem;scientific discovery;Digital
systems;Knowledge discovery;Materials engineering;Scientific
computing;Supercomputers;HPC;cyberinfrastructure;distributed
computing;distributed virtual organizations;research
infrastructures;scientific computing",
issn = "1521-9615",
doi = "10.1109/MCSE.2014.80"
}
@INPROCEEDINGS{Stewart2015-pr,
title = "Jetstream: a self-provisioned, scalable science and engineering
cloud environment",
booktitle = "Proceedings of the 2015 {XSEDE} Conference: Scientific
Advancements Enabled by Enhanced Cyberinfrastructure",
author = "Stewart, Craig A and Cockerill, Timothy M and Foster, Ian and
Hancock, David and Merchant, Nirav and Skidmore, Edwin and
Stanzione, Daniel and Taylor, James and Tuecke, Steven and
Turner, George and Vaughn, Matthew and Gaffney, Niall I",
publisher = "ACM",
pages = "29",
month = jul,
year = 2015,
url = "https://dl.acm.org/citation.cfm?doid=2792745.2792774",
keywords = "atmosphere; big data; cloud computing; long tail of science",
isbn = "9781450337205",
doi = "10.1145/2792745.2792774"
}
@ARTICLE{Buchfink2015-nz,
title = "Fast and sensitive protein alignment using {DIAMOND}",
author = "Buchfink, Benjamin and Xie, Chao and Huson, Daniel H",
abstract = "The alignment of sequencing reads against a protein reference
database is a major computational bottleneck in metagenomics and
data-intensive evolutionary projects. Although recent tools offer
improved performance over the gold standard BLASTX, they exhibit
only a modest speedup or low sensitivity. We introduce DIAMOND,
an open-source algorithm based on double indexing that is 20,000
times faster than BLASTX on short reads and has a similar degree
of sensitivity.",
journal = "Nat. Methods",
volume = 12,
number = 1,
pages = "59--60",
month = jan,
year = 2015,
url = "http://dx.doi.org/10.1038/nmeth.3176",
language = "en",
issn = "1548-7091, 1548-7105",
pmid = "25402007",
doi = "10.1038/nmeth.3176"
}
@ARTICLE{Langmead2012-gu,
title = "Fast gapped-read alignment with Bowtie 2",
author = "Langmead, Ben and Salzberg, Steven L",
abstract = "As the rate of sequencing increases, greater throughput is
demanded from read aligners. The full-text minute index is often
used to make alignment very fast and memory-efficient, but the
approach is ill-suited to finding longer, gapped alignments.
Bowtie 2 combines the strengths of the full-text minute index
with the flexibility and speed of hardware-accelerated dynamic
programming algorithms to achieve a combination of high speed,
sensitivity and accuracy.",
journal = "Nat. Methods",
volume = 9,
number = 4,
pages = "357--359",
month = apr,
year = 2012,
url = "http://dx.doi.org/10.1038/nmeth.1923",
language = "eng",
issn = "1548-7091, 1548-7105",
pmid = "22388286",
doi = "10.1038/nmeth.1923"
}