public final class PSBuildReferenceTaxonomyUtils
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
protected static org.apache.logging.log4j.Logger |
logger |
Constructor and Description |
---|
PSBuildReferenceTaxonomyUtils() |
Modifier and Type | Method and Description |
---|---|
protected static java.util.Map<java.lang.String,java.lang.Integer> |
buildAccessionToTaxIdMap(java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties,
PSTree tree,
int minNonVirusContigLength)
Create reference_name-to-taxid map (just an inversion on taxIdToProperties)
|
protected static PSTree |
buildTaxonomicTree(java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
Returns a PSTree representing a reduced taxonomic tree containing only taxa present in the reference
|
static java.io.BufferedReader |
getBufferedReaderGz(java.lang.String path)
Gets a buffered reader for a gzipped file
|
static java.io.BufferedReader |
getBufferedReaderTarGz(java.lang.String tarPath,
java.lang.String fileName)
Gets a Reader for a file in a gzipped tarball
|
protected static java.util.Set<java.lang.String> |
parseCatalog(java.io.BufferedReader reader,
java.util.Map<java.lang.String,scala.Tuple2<java.lang.String,java.lang.Long>> accessionToNameAndLength,
java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties,
boolean bGenBank,
java.util.Set<java.lang.String> accessionsNotFoundIn)
Builds maps of reference contig accessions to their taxonomic ids and vice versa.
|
protected static void |
parseNamesFile(java.io.BufferedReader reader,
java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
Parses scientific name of each taxon and puts it in taxIdToProperties
|
protected static java.util.Collection<java.lang.Integer> |
parseNodesFile(java.io.BufferedReader reader,
java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
Gets the rank and parent of each taxon.
|
protected static java.util.Map<java.lang.String,scala.Tuple2<java.lang.String,java.lang.Long>> |
parseReferenceRecords(java.util.List<htsjdk.samtools.SAMSequenceRecord> dictionaryList,
java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
Build set of accessions contained in the reference.
|
static void |
writeTaxonomyDatabase(java.lang.String filePath,
PSTaxonomyDatabase taxonomyDatabase)
Writes objects using Kryo to specified local file path.
|
protected static java.util.Map<java.lang.String,scala.Tuple2<java.lang.String,java.lang.Long>> parseReferenceRecords(java.util.List<htsjdk.samtools.SAMSequenceRecord> dictionaryList, java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
protected static java.util.Set<java.lang.String> parseCatalog(java.io.BufferedReader reader, java.util.Map<java.lang.String,scala.Tuple2<java.lang.String,java.lang.Long>> accessionToNameAndLength, java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties, boolean bGenBank, java.util.Set<java.lang.String> accessionsNotFoundIn)
Returns a collection of reference accessions that could not be found, if any.
protected static void parseNamesFile(java.io.BufferedReader reader, java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
protected static java.util.Collection<java.lang.Integer> parseNodesFile(java.io.BufferedReader reader, java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
protected static java.util.Map<java.lang.String,java.lang.Integer> buildAccessionToTaxIdMap(java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties, PSTree tree, int minNonVirusContigLength)
protected static PSTree buildTaxonomicTree(java.util.Map<java.lang.Integer,PSPathogenReferenceTaxonProperties> taxIdToProperties)
public static java.io.BufferedReader getBufferedReaderGz(java.lang.String path)
path
- File pathpublic static java.io.BufferedReader getBufferedReaderTarGz(java.lang.String tarPath, java.lang.String fileName)
tarPath
- Path to the tarballfileName
- File within the tarballpublic static void writeTaxonomyDatabase(java.lang.String filePath, PSTaxonomyDatabase taxonomyDatabase)