public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractOpticalDuplicateFinderCommandLineProgram
Modifier and Type | Class and Description |
---|---|
static class |
AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator
Little class used to package up a header and an iterable/iterator.
|
Modifier and Type | Field and Description |
---|---|
htsjdk.samtools.SAMFileHeader.SortOrder |
ASSUME_SORT_ORDER |
boolean |
ASSUME_SORTED
Deprecated.
|
java.util.List<java.lang.String> |
COMMENT |
htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy |
DUPLICATE_SCORING_STRATEGY |
java.util.List<java.lang.String> |
INPUT |
java.io.File |
METRICS_FILE |
java.io.File |
OUTPUT |
protected java.util.Set<java.lang.String> |
pgIdsSeen
The program groups that have been seen during the course of examining the input records.
|
protected PGTagArgumentCollection |
pgTagArgumentCollection |
java.lang.String |
PROGRAM_GROUP_COMMAND_LINE |
java.lang.String |
PROGRAM_GROUP_NAME |
java.lang.String |
PROGRAM_GROUP_VERSION |
java.lang.String |
PROGRAM_RECORD_ID |
boolean |
REMOVE_DUPLICATES |
LOG, MAX_OPTICAL_DUPLICATE_SET_SIZE, OPTICAL_DUPLICATE_PIXEL_DISTANCE, opticalDuplicateFinder, READ_NAME_REGEX
COMPRESSION_LEVEL, CREATE_INDEX, CREATE_MD5_FILE, GA4GH_CLIENT_SECRETS, MAX_ALLOWABLE_ONE_LINE_SUMMARY_LENGTH, MAX_RECORDS_IN_RAM, QUIET, REFERENCE_SEQUENCE, referenceSequence, specialArgumentsCollection, TMP_DIR, USE_JDK_DEFLATER, USE_JDK_INFLATER, VALIDATION_STRINGENCY, VERBOSITY
Constructor and Description |
---|
AbstractMarkDuplicatesCommandLineProgram() |
Modifier and Type | Method and Description |
---|---|
static void |
addDuplicateReadToMetrics(htsjdk.samtools.SAMRecord rec,
DuplicationMetrics metrics) |
static DuplicationMetrics |
addReadToLibraryMetrics(htsjdk.samtools.SAMRecord rec,
htsjdk.samtools.SAMFileHeader header,
LibraryIdGenerator libraryIdGenerator) |
static void |
addSingletonToCount(LibraryIdGenerator libraryIdGenerator) |
static void |
finalizeAndWriteMetrics(LibraryIdGenerator libraryIdGenerator,
htsjdk.samtools.metrics.MetricsFile<DuplicationMetrics,java.lang.Double> metricsFile,
java.io.File outputFile)
Writes the metrics given by the libraryIdGenerator to the outputFile.
|
protected java.util.Map<java.lang.String,java.lang.String> |
getChainedPgIds(htsjdk.samtools.SAMFileHeader outputHeader)
We have to re-chain the program groups based on this algorithm.
|
protected AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator |
openInputs(boolean eagerlyDecode)
Since this may read its inputs more than once this method does all the opening
and checking of the inputs.
|
static void |
trackOpticalDuplicates(java.util.List<? extends ReadEnds> ends,
ReadEnds keeper,
OpticalDuplicateFinder opticalDuplicateFinder,
LibraryIdGenerator libraryIdGenerator)
Looks through the set of reads and identifies how many of the duplicates are
in fact optical duplicates, and stores the data in the instance level histogram.
|
customCommandLineValidation, setupOpticalDuplicateFinder
doWork, getCommandLine, getCommandLineParser, getCommandLineParserForArgs, getDefaultHeaders, getFaqLink, getMetricsFile, getStandardUsagePreamble, getStandardUsagePreamble, getVersion, hasWebDocumentation, instanceMain, instanceMainWithExit, makeReferenceArgumentCollection, parseArgs, requiresReference, setDefaultHeaders, useLegacyParser
@ArgumentCollection protected final PGTagArgumentCollection pgTagArgumentCollection
@Argument(shortName="I", doc="One or more input SAM or BAM files to analyze. Must be coordinate sorted.") public java.util.List<java.lang.String> INPUT
@Argument(shortName="O", doc="The output file to write marked records to") public java.io.File OUTPUT
@Argument(shortName="M", doc="File to write duplication metrics to") public java.io.File METRICS_FILE
@Argument(doc="If true do not write duplicates to the output file instead of writing them with appropriate flags set.") public boolean REMOVE_DUPLICATES
@Deprecated @Argument(shortName="AS", doc="If true, assume that the input file is coordinate sorted even if the header says otherwise. Deprecated, used ASSUME_SORT_ORDER=coordinate instead.", mutex="ASSUME_SORT_ORDER") public boolean ASSUME_SORTED
@Argument(shortName="ASO", doc="If not null, assume that the input file has this order even if the header says otherwise.", optional=true, mutex="ASSUME_SORTED") public htsjdk.samtools.SAMFileHeader.SortOrder ASSUME_SORT_ORDER
@Argument(shortName="DS", doc="The scoring strategy for choosing the non-duplicate among candidates.") public htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy DUPLICATE_SCORING_STRATEGY
@Argument(shortName="PG", doc="The program record ID for the @PG record(s) created by this program. Set to null to disable PG record creation. This string may have a suffix appended to avoid collision with other program record IDs.", optional=true) public java.lang.String PROGRAM_RECORD_ID
@Argument(shortName="PG_VERSION", doc="Value of VN tag of PG record to be created. If not specified, the version will be detected automatically.", optional=true) public java.lang.String PROGRAM_GROUP_VERSION
@Argument(shortName="PG_COMMAND", doc="Value of CL tag of PG record to be created. If not supplied the command line will be detected automatically.", optional=true) public java.lang.String PROGRAM_GROUP_COMMAND_LINE
@Argument(shortName="PG_NAME", doc="Value of PN tag of PG record to be created.") public java.lang.String PROGRAM_GROUP_NAME
@Argument(shortName="CO", doc="Comment(s) to include in the output file\'s header.", optional=true) public java.util.List<java.lang.String> COMMENT
protected final java.util.Set<java.lang.String> pgIdsSeen
public AbstractMarkDuplicatesCommandLineProgram()
protected java.util.Map<java.lang.String,java.lang.String> getChainedPgIds(htsjdk.samtools.SAMFileHeader outputHeader)
public static void finalizeAndWriteMetrics(LibraryIdGenerator libraryIdGenerator, htsjdk.samtools.metrics.MetricsFile<DuplicationMetrics,java.lang.Double> metricsFile, java.io.File outputFile)
libraryIdGenerator
- A LibraryIdGenerator
object that contains the map from library to DuplicationMetrics
for
that librarymetricsFile
- An empty MetricsFile
object that will be filled, with "finalized" metrics and written out.
It needs to be generated from a non-static context so that various commandline information is
added to the header when CommandLineProgram.getMetricsFile()
is called.outputFile
- The file to write the metrics topublic static DuplicationMetrics addReadToLibraryMetrics(htsjdk.samtools.SAMRecord rec, htsjdk.samtools.SAMFileHeader header, LibraryIdGenerator libraryIdGenerator)
public static void addDuplicateReadToMetrics(htsjdk.samtools.SAMRecord rec, DuplicationMetrics metrics)
protected AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator openInputs(boolean eagerlyDecode)
public static void trackOpticalDuplicates(java.util.List<? extends ReadEnds> ends, ReadEnds keeper, OpticalDuplicateFinder opticalDuplicateFinder, LibraryIdGenerator libraryIdGenerator)
public static void addSingletonToCount(LibraryIdGenerator libraryIdGenerator)