rdd

Type Members

class ADAMContext extends Serializable with Logging
trait ADAMSaveAnyArgs extends SaveArgs
abstract class AvroGenomicRDD[T, U <: AvroGenomicRDD[T, U]] extends ADAMRDDFunctions[T] with GenomicRDD[T, U]
abstract class AvroReadGroupGenomicRDD[T, U <: AvroReadGroupGenomicRDD[T, U]] extends AvroGenomicRDD[T, U]
sealed trait BroadcastRegionJoin[T, U, RT] extends RegionJoin[T, U, RT, U]

Contains multiple implementations of a 'region join', an operation that joins two sets of regions based on the spatial overlap between the regions.
Contains multiple implementations of a 'region join', an operation that joins two sets of regions based on the spatial overlap between the regions.
Different implementations will have different performance characteristics -- and new implementations will likely be added in the future, see the notes to each individual method for more details.
case class FullOuterShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, Option[T], Option[U]] with Product with Serializable

Extends the ShuffleRegionJoin trait to implement a full outer join.
case class GenomeBins(binSize: Long, seqLengths: Map[String, Long]) extends Serializable with Product

Partition a genome into a set of bins.
Partition a genome into a set of bins.
Note that this class will not tolerate invalid input, so filter in advance if you use it.
binSize
The size of each bin in nucleotides
seqLengths
A map containing the length of each contig
case class GenomicPositionPartitioner(numParts: Int, seqLengths: Map[String, Long]) extends Partitioner with Logging with Product with Serializable

GenomicPositionPartitioner partitions ReferencePosition objects into separate, spatially-coherent regions of the genome.
GenomicPositionPartitioner partitions ReferencePosition objects into separate, spatially-coherent regions of the genome.
This can be used to organize genomic data for computation that is spatially distributed (e.g. GATK and Queue's "scatter-and-gather" for locus-parallelizable walkers).
numParts
The number of equally-sized regions into which the total genomic space is partitioned; the total number of partitions is numParts + 1, with the "+1" resulting from one extra partition that is used to capture null or UNMAPPED values of the ReferencePosition type.
seqLengths
a map relating sequence-name to length and indicating the set and length of all extant sequences in the genome.
trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends AnyRef
case class GenomicRegionPartitioner(partitionSize: Long, seqLengths: Map[String, Long], start: Boolean = true) extends Partitioner with Logging with Product with Serializable
case class InnerBroadcastRegionJoin[T, U]() extends BroadcastRegionJoin[T, U, T] with Product with Serializable

Extends the BroadcastRegionJoin trait to implement an inner join.
case class InnerShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, T, U] with Product with Serializable

Extends the ShuffleRegionJoin trait to implement an inner join.
case class InnerShuffleRegionJoinAndGroupByLeft[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, T, Iterable[U]] with Product with Serializable

Extends the ShuffleRegionJoin trait to implement an inner join followed by grouping by the left value.
case class LeftOuterShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, T, Option[U]] with Product with Serializable

Extends the ShuffleRegionJoin trait to implement a left outer join.
abstract class MultisampleAvroGenomicRDD[T, U <: MultisampleAvroGenomicRDD[T, U]] extends AvroGenomicRDD[T, U] with MultisampleGenomicRDD[T, U]
trait MultisampleGenomicRDD[T, U <: MultisampleGenomicRDD[T, U]] extends GenomicRDD[T, U]
case class ReferencePartitioner(sd: SequenceDictionary) extends Partitioner with Product with Serializable

Repartitions objects that are keyed by a ReferencePosition or ReferenceRegion into a single partition per contig.
trait RegionJoin[T, U, RT, RU] extends AnyRef
case class RightOuterBroadcastRegionJoin[T, U]() extends BroadcastRegionJoin[T, U, Option[T]] with Product with Serializable

Extends the BroadcastRegionJoin trait to implement a right outer join.
case class RightOuterShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, Option[T], U] with Product with Serializable

Extends the ShuffleRegionJoin trait to implement a right outer join.
case class RightOuterShuffleRegionJoinAndGroupByLeft[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, Option[T], Iterable[U]] with Product with Serializable

Extends the ShuffleRegionJoin trait to implement a right outer join followed by grouping by all non-null left values.
sealed trait ShuffleRegionJoin[T, U, RT, RU] extends RegionJoin[T, U, RT, RU]
trait Unaligned extends AnyRef

Value Members

object ADAMContext extends Serializable
object FileMerger extends Logging

Helper object to merge sharded files together.
object GenomicPositionPartitioner extends Serializable
object GenomicRegionPartitioner extends Serializable
package contig
package features
package fragment
package read
package variation

package rdd

Type Members

class ADAMContext extends Serializable with Logging

trait ADAMSaveAnyArgs extends SaveArgs

abstract class AvroGenomicRDD[T, U <: AvroGenomicRDD[T, U]] extends ADAMRDDFunctions[T] with GenomicRDD[T, U]

abstract class AvroReadGroupGenomicRDD[T, U <: AvroReadGroupGenomicRDD[T, U]] extends AvroGenomicRDD[T, U]

sealed trait BroadcastRegionJoin[T, U, RT] extends RegionJoin[T, U, RT, U]

case class FullOuterShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, Option[T], Option[U]] with Product with Serializable

case class GenomeBins(binSize: Long, seqLengths: Map[String, Long]) extends Serializable with Product

case class GenomicPositionPartitioner(numParts: Int, seqLengths: Map[String, Long]) extends Partitioner with Logging with Product with Serializable

trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends AnyRef

case class GenomicRegionPartitioner(partitionSize: Long, seqLengths: Map[String, Long], start: Boolean = true) extends Partitioner with Logging with Product with Serializable

case class InnerBroadcastRegionJoin[T, U]() extends BroadcastRegionJoin[T, U, T] with Product with Serializable

case class InnerShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, T, U] with Product with Serializable

case class InnerShuffleRegionJoinAndGroupByLeft[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, T, Iterable[U]] with Product with Serializable

case class LeftOuterShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, T, Option[U]] with Product with Serializable

abstract class MultisampleAvroGenomicRDD[T, U <: MultisampleAvroGenomicRDD[T, U]] extends AvroGenomicRDD[T, U] with MultisampleGenomicRDD[T, U]

trait MultisampleGenomicRDD[T, U <: MultisampleGenomicRDD[T, U]] extends GenomicRDD[T, U]

case class ReferencePartitioner(sd: SequenceDictionary) extends Partitioner with Product with Serializable

trait RegionJoin[T, U, RT, RU] extends AnyRef

case class RightOuterBroadcastRegionJoin[T, U]() extends BroadcastRegionJoin[T, U, Option[T]] with Product with Serializable

case class RightOuterShuffleRegionJoin[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, Option[T], U] with Product with Serializable

case class RightOuterShuffleRegionJoinAndGroupByLeft[T, U](sd: SequenceDictionary, partitionSize: Long, sc: SparkContext) extends ShuffleRegionJoin[T, U, Option[T], Iterable[U]] with Product with Serializable

sealed trait ShuffleRegionJoin[T, U, RT, RU] extends RegionJoin[T, U, RT, RU]

trait Unaligned extends AnyRef

Value Members

object ADAMContext extends Serializable

object FileMerger extends Logging

object GenomicPositionPartitioner extends Serializable

object GenomicRegionPartitioner extends Serializable

package contig

package features

package fragment

package read

package variation

Ungrouped