@DocumentedFeature @BetaFeature public class ReadsPipelineSpark extends GATKSparkTool
gatk ReadsPipelineSpark \ -I gs://my-gcs-bucket/aligned_reads.bam \ -R gs://my-gcs-bucket/reference.fasta \ --known-sites gs://my-gcs-bucket/sites_of_variation.vcf \ -O gs://my-gcs-bucket/output.vcf \ -- \ --sparkRunner GCS \ --cluster my-dataproc-cluster
To additionally align reads with BWA-MEM:
gatk ReadsPipelineSpark \ -I gs://my-gcs-bucket/unaligned_reads.bam \ -R gs://my-gcs-bucket/reference.fasta \ --known-sites gs://my-gcs-bucket/sites_of_variation.vcf \ --align -O gs://my-gcs-bucket/output.vcf \ -- \ --sparkRunner GCS \ --cluster my-dataproc-cluster
GATKSparkTool.ReadInputMergingPolicy
Modifier and Type | Field and Description |
---|---|
ApplyBQSRUniqueArgumentCollection |
applyBqsrArgs
command-line arguments to fine tune the apply BQSR step.
|
AssemblyRegionArgumentCollection |
assemblyRegionArgs |
BwaArgumentCollection |
bwaArgs |
HaplotypeCallerArgumentCollection |
hcArgs |
protected java.util.List<java.lang.String> |
knownVariants |
protected MarkDuplicatesSparkArgumentCollection |
markDuplicatesSparkArgumentCollection |
protected java.lang.String |
output |
protected java.lang.String |
outputBam |
AssemblyRegionReadShardArgumentCollection |
shardingArgs |
boolean |
strict |
addOutputVCFCommandLine, BAM_PARTITION_SIZE_LONG_NAME, bamPartitionSplitSize, CREATE_OUTPUT_BAM_SPLITTING_INDEX_LONG_NAME, createOutputBamIndex, createOutputBamSplittingIndex, createOutputVariantIndex, features, intervalArgumentCollection, NUM_REDUCERS_LONG_NAME, numReducers, OUTPUT_SHARD_DIR_LONG_NAME, readArguments, referenceArguments, sequenceDictionaryValidationArguments, SHARDED_OUTPUT_LONG_NAME, shardedOutput, shardedPartsDir, USE_NIO, useNio
programName, SPARK_PROGRAM_NAME_LONG_NAME, sparkArgs
GATK_CONFIG_FILE, logger, NIO_MAX_REOPENS, NIO_PROJECT_FOR_REQUESTER_PAYS, QUIET, specialArgumentsCollection, tmpDir, useJdkDeflater, useJdkInflater, VERBOSITY
Constructor and Description |
---|
ReadsPipelineSpark() |
Modifier and Type | Method and Description |
---|---|
java.util.List<java.lang.Class<? extends Annotation>> |
getDefaultVariantAnnotationGroups() |
java.util.Collection<Annotation> |
makeVariantAnnotations() |
boolean |
requiresReads()
Does this tool require reads? Tools that do should override to return true.
|
boolean |
requiresReference()
Does this tool require reference data? Tools that do should override to return true.
|
protected void |
runTool(org.apache.spark.api.java.JavaSparkContext ctx)
Runs the tool itself after initializing and validating inputs.
|
boolean |
useVariantAnnotations() |
protected void |
validateSequenceDictionaries()
Validates standard tool inputs against each other.
|
addReferenceFilesForSpark, addVCFsForSpark, editIntervals, getBestAvailableSequenceDictionary, getDefaultReadFilters, getDefaultToolVCFHeaderLines, getDefaultVariantAnnotations, getGatkReadJavaRDD, getHeaderForReads, getIntervals, getPluginDescriptors, getReadInputMergingPolicy, getReads, getReadSourceHeaderMap, getReadSourceName, getRecommendedNumReducers, getReference, getReferenceSequenceDictionary, getReferenceWindowFunction, getSequenceDictionaryValidationArgumentCollection, getTargetPartitionSize, getUnfilteredReads, hasReads, hasReference, hasUserSuppliedIntervals, makeReadFilter, makeReadFilter, requiresIntervals, runPipeline, writeReads, writeReads
afterPipeline, doWork, getProgramName
customCommandLineValidation, getCommandLine, getCommandLineParser, getDefaultHeaders, getMetricsFile, getSupportInformation, getToolkitName, getToolkitShortName, getToolStatusWarning, getUsage, getVersion, instanceMain, instanceMainPostParseArgs, isBetaFeature, isExperimentalFeature, onShutdown, onStartup, parseArgs, printLibraryVersions, printSettings, printStartupMessage, runTool, setDefaultHeaders, warnOnToolStatus
@Argument(doc="the known variants", fullName="known-sites", optional=false) protected java.util.List<java.lang.String> knownVariants
@Argument(doc="the output vcf", shortName="O", fullName="output", optional=false) protected java.lang.String output
@Argument(doc="the output bam", fullName="output-bam", optional=true) protected java.lang.String outputBam
@ArgumentCollection protected MarkDuplicatesSparkArgumentCollection markDuplicatesSparkArgumentCollection
@ArgumentCollection public final BwaArgumentCollection bwaArgs
@ArgumentCollection public final AssemblyRegionReadShardArgumentCollection shardingArgs
@ArgumentCollection public final AssemblyRegionArgumentCollection assemblyRegionArgs
@ArgumentCollection public ApplyBQSRUniqueArgumentCollection applyBqsrArgs
@ArgumentCollection public HaplotypeCallerArgumentCollection hcArgs
@Argument(doc="whether to use the strict implementation or not (defaults to the faster implementation that doesn\'t strictly match the walker version)", fullName="strict", optional=true) public boolean strict
public boolean requiresReads()
GATKSparkTool
requiresReads
in class GATKSparkTool
public boolean requiresReference()
GATKSparkTool
requiresReference
in class GATKSparkTool
public boolean useVariantAnnotations()
useVariantAnnotations
in class GATKSparkTool
GATKTool.useVariantAnnotations()
public java.util.List<java.lang.Class<? extends Annotation>> getDefaultVariantAnnotationGroups()
getDefaultVariantAnnotationGroups
in class GATKSparkTool
GATKTool.getDefaultVariantAnnotationGroups()
public java.util.Collection<Annotation> makeVariantAnnotations()
makeVariantAnnotations
in class GATKSparkTool
GATKTool.makeVariantAnnotations()
protected void validateSequenceDictionaries()
GATKSparkTool
validateSequenceDictionaries
in class GATKSparkTool
protected void runTool(org.apache.spark.api.java.JavaSparkContext ctx)
GATKSparkTool
runTool
in class GATKSparkTool
ctx
- our Spark context