case class LeftOuterShuffleRegionJoin[T, U](leftRdd: RDD[(ReferenceRegion, T)], rightRdd: RDD[(ReferenceRegion, U)])(implicit evidence$7: ClassTag[T], evidence$8: ClassTag[U]) extends ShuffleRegionJoin[T, U, T, Option[U]] with VictimlessSortedIntervalPartitionJoin[T, U, T, Option[U]] with Product with Serializable
- Alphabetic
- By Inheritance
- LeftOuterShuffleRegionJoin
- Product
- Equals
- VictimlessSortedIntervalPartitionJoin
- ShuffleRegionJoin
- RegionJoin
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new LeftOuterShuffleRegionJoin(leftRdd: RDD[(ReferenceRegion, T)], rightRdd: RDD[(ReferenceRegion, U)])(implicit arg0: ClassTag[T], arg1: ClassTag[U])
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
advanceCache(cache: SetTheoryCache[U, T, Option[U]], right: BufferedIterator[(ReferenceRegion, U)], until: ReferenceRegion): Unit
Adds elements from right to cache based on the next region encountered.
Adds elements from right to cache based on the next region encountered.
- cache
The cache for this partition.
- right
The right iterator.
- until
The next region to join with.
- Attributes
- protected
- Definition Classes
- VictimlessSortedIntervalPartitionJoin → ShuffleRegionJoin
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
-
def
compute(): RDD[(T, Option[U])]
Performs a region join between two RDDs (shuffle join).
Performs a region join between two RDDs (shuffle join). All data should be pre-shuffled and copartitioned.
- returns
An RDD of joins (x, y), where x is from leftRDD, y is from rightRDD, and the region corresponding to x overlaps the region corresponding to y.
- Definition Classes
- ShuffleRegionJoin
-
def
emptyFn(left: Iterator[(ReferenceRegion, T)], right: Iterator[(ReferenceRegion, U)]): Iterator[(T, Option[U])]
Handles the case where the left or the right iterator were empty.
Handles the case where the left or the right iterator were empty.
- left
The left iterator.
- right
The right iterator.
- returns
The iterator containing properly formatted tuples.
- Attributes
- protected
- Definition Classes
- LeftOuterShuffleRegionJoin → ShuffleRegionJoin
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
finalizeHits(cache: SetTheoryCache[U, T, Option[U]], right: BufferedIterator[(ReferenceRegion, U)]): Iterable[(T, Option[U])]
Computes all victims for the partition.
Computes all victims for the partition. NOTE: These are victimless joins so we have no victims.
- cache
The cache for this partition.
- right
The right iterator.
- returns
An empty iterator.
- Attributes
- protected
- Definition Classes
- VictimlessSortedIntervalPartitionJoin → ShuffleRegionJoin
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
val
leftRdd: RDD[(ReferenceRegion, T)]
- Definition Classes
- LeftOuterShuffleRegionJoin → ShuffleRegionJoin
-
def
makeIterator(leftIter: Iterator[(ReferenceRegion, T)], rightIter: Iterator[(ReferenceRegion, U)]): Iterator[(T, Option[U])]
- Attributes
- protected
- Definition Classes
- ShuffleRegionJoin
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
partitionAndJoin(left: RDD[(ReferenceRegion, T)], right: RDD[(ReferenceRegion, U)]): RDD[(T, Option[U])]
Performs a region join between two RDDs.
Performs a region join between two RDDs.
- returns
An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region corresponding to x overlaps the region corresponding to y.
- Definition Classes
- ShuffleRegionJoin → RegionJoin
-
def
postProcessHits(iter: Iterable[U], currentLeft: T): Iterable[(T, Option[U])]
Computes post processing required to complete the join and properly format hits.
Computes post processing required to complete the join and properly format hits.
- iter
The iterator of hits.
- currentLeft
The current left value.
- returns
the post processed iterator.
- Attributes
- protected
- Definition Classes
- LeftOuterShuffleRegionJoin → ShuffleRegionJoin
-
def
processHits(cache: SetTheoryCache[U, T, Option[U]], currentLeft: T, currentLeftRegion: ReferenceRegion): Iterable[(T, Option[U])]
Process hits for a given object in left.
Process hits for a given object in left.
- cache
The cache containing potential hits.
- currentLeft
The current object from the left
- currentLeftRegion
The ReferenceRegion of currentLeft.
- returns
An iterator containing all hits, formatted by postProcessHits.
- Attributes
- protected
- Definition Classes
- ShuffleRegionJoin
-
def
pruneCache(cache: SetTheoryCache[U, T, Option[U]], to: ReferenceRegion): Unit
Removes elements from cache in place that do not meet the condition for the next region.
Removes elements from cache in place that do not meet the condition for the next region.
- cache
The cache for this partition.
- to
The next region in the left iterator.
- Attributes
- protected
- Definition Classes
- VictimlessSortedIntervalPartitionJoin → ShuffleRegionJoin
- Note
At one point these were all variables and we built new collections and reassigned the pointers every time. We fixed this by using trimStart() and ++=() to improve performance. Overall, we see roughly 25% improvement in runtime by doing things this way.
-
val
rightRdd: RDD[(ReferenceRegion, U)]
- Definition Classes
- LeftOuterShuffleRegionJoin → ShuffleRegionJoin
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )