SplitByKeyRDD

Add splitByKey method to any paired RDD: returns a Map from each key (type K) to an RDD[V] with all the values that had that key in the original RDD (in arbitrary order).

The resulting per-key RDDs have been shuffled to actually be separated from each other on disk, allowing subsequent operations to only have to traverse the values corresponding to a given key (as opposed to naive approach that called RDD.filter on the entire original RDD once for each key).

Linear Supertypes

AnyRef, Any

Instance Constructors

new SplitByKeyRDD(rdd: RDD[(K, V)])(implicit arg0: ClassTag[K], arg1: ClassTag[V])

rdd
Paired RDD to split up by key.

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def splitByKey(keyCounts: Map[K, Long]): Map[K, RDD[V]]
def splitByKey(): Map[K, RDD[V]]
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

class SplitByKeyRDD[K, V] extends AnyRef

Instance Constructors

new SplitByKeyRDD(rdd: RDD[(K, V)])(implicit arg0: ClassTag[K], arg1: ClassTag[V])

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def splitByKey(keyCounts: Map[K, Long]): Map[K, RDD[V]]

def splitByKey(): Map[K, RDD[V]]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped