Job

Instance Constructors

new Job(args: Args)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
def anyToFieldArg(f: Any): Comparable[_]

Attributes
protected
Definition Classes
LowPriorityFieldConversions
val args: Args
final def asInstanceOf[T0]: T0

Definition Classes
Any
def asList(f: Fields): List[Comparable[_]]

Definition Classes
FieldConversions
def asSet(f: Fields): Set[Comparable[_]]

Definition Classes
FieldConversions
def buildFlow: Flow[_]

combine the config, flowDef and the Mode to produce a flow
def classIdentifier: String
def clear: Unit
def clone(nextargs: Args): Job

Copy this job By default, this uses reflection and the single argument Args constructor
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def config: Map[AnyRef, AnyRef]

This is the exact config that is passed to the Cascading FlowConnector.
This is the exact config that is passed to the Cascading FlowConnector. By default: if there are no spill thresholds in mode.config, we replace with defaultSpillThreshold we overwrite io.serializations with ioSerializations we overwrite cascading.tuple.element.comparator.default to defaultComparator we add some scalding keys for debugging/logging
Tip: override this method, call super, and ++ your additional map to add or overwrite more options
This returns Map[AnyRef, AnyRef] for compatibility with older code
implicit def dateParser: DateParser

Override this to control how dates are parsed
def defaultComparator: Option[Class[_ <: Comparator[_]]]

Override this if you want to customize comparisons/hashing for your job the config method overwrites using this before sending to cascading The one we use by default is needed used to make Joins in the Fields-API more robust to Long vs Int differences.
Override this if you want to customize comparisons/hashing for your job the config method overwrites using this before sending to cascading The one we use by default is needed used to make Joins in the Fields-API more robust to Long vs Int differences. If you only use the Typed-API, consider changing this to return None
def defaultMode(fromFields: Fields, toFields: Fields): Fields

Rather than give the full power of cascading's selectors, we have a simpler set of rules encoded below: 1) if the input is non-definite (ALL, GROUP, ARGS, etc.
Rather than give the full power of cascading's selectors, we have a simpler set of rules encoded below: 1) if the input is non-definite (ALL, GROUP, ARGS, etc...) ALL is the output. Perhaps only fromFields=ALL will make sense 2) If one of from or to is a strict super set of the other, SWAP is used. 3) If they are equal, REPLACE is used. 4) Otherwise, ALL is used.

Definition Classes
FieldConversions
def defaultSpillThreshold: Int

Keep 100k tuples in memory by default before spilling Turn this up as high as you can without getting OOM.
Keep 100k tuples in memory by default before spilling Turn this up as high as you can without getting OOM.
This is ignored if there is a value set in the incoming jobConf on Hadoop
final def ensureUniqueFields(left: Fields, right: Fields, rightPipe: Pipe): (Fields, Pipe)

Definition Classes
FieldConversions
implicit def enumValueToFields(x: Value): Fields

Definition Classes
FieldConversions
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
implicit def fieldFields[T <: TraversableOnce[Field[_]]](f: T): RichFields

Definition Classes
FieldConversions
implicit def fieldToFields(f: Field[_]): RichFields

Definition Classes
FieldConversions
implicit def fields[T <: TraversableOnce[Symbol]](f: T): Fields

Definition Classes
FieldConversions
implicit def fieldsToRichFields(fields: Fields): RichFields

We can't set the field Manifests because cascading doesn't (yet) expose field type information in the Fields API.
We can't set the field Manifests because cascading doesn't (yet) expose field type information in the Fields API.

Definition Classes
FieldConversions
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
implicit val flowDef: FlowDef

Attributes
protected
implicit def fromEnum[T <: Enumeration](enumeration: T): Fields

Multi-entry fields.
Multi-entry fields. This are higher priority than Product conversions so that List will not conflict with Product.

Definition Classes
FieldConversions
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getField(f: Fields, idx: Int): Fields

Definition Classes
FieldConversions
def handleStats(statsData: CascadingStats): Unit

Attributes
protected
def hasInts(f: Fields): Boolean

Definition Classes
FieldConversions
def hashCode(): Int

Definition Classes
AnyRef → Any
implicit def intFields[T <: TraversableOnce[Int]](f: T): Fields

Definition Classes
FieldConversions
implicit def intToFields(x: Int): Fields

Definition Classes
FieldConversions
implicit def integerToFields(x: Integer): Fields

Definition Classes
FieldConversions
def ioSerializations: List[Class[_ <: Serialization[_]]]

These are user-defined serializations IN-ADDITION to (but deduped) with the required serializations
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
implicit def iterableToRichPipe[T](iter: Iterable[T])(implicit set: TupleSetter[T], conv: TupleConverter[T]): RichPipe
def keepAlive: Unit

Use this if a map or reduce phase takes a while before emitting tuples.
def listeners: List[FlowListener]
implicit def mode: Mode
def name: String
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def newSymbol(avoid: Set[Symbol], guess: Symbol, trial: Int = 0): Symbol

Definition Classes
FieldConversions
Annotations
@tailrec()
def next: Option[Job]

Implement this method if you want some other jobs to run after the current job.
Implement this method if you want some other jobs to run after the current job. These will not execute until the current job has run successfully.
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
implicit def parseAnySeqToFields[T <: TraversableOnce[Any]](anyf: T): Fields

Useful to convert f : Any* to Fields.
Useful to convert f : Any* to Fields. This handles mixed cases ("hey", 'you). Not sure we should be this flexible, but given that Cascading will throw an exception before scheduling the job, I guess this is okay.

Definition Classes
FieldConversions
implicit def pipeToRichPipe(pipe: Pipe): RichPipe

you should never call this directly, it is here to make the DSL work.
you should never call this directly, it is here to make the DSL work. Just know, you can treat a Pipe as a RichPipe within a Job
implicit def productToFields(f: Product): Fields

Handles treating any TupleN as a Fields object.
Handles treating any TupleN as a Fields object. This is low priority because List is also a Product, but this method will not work for List (because List is Product2(head, tail) and so productIterator won't work as expected. Lists are handled by an implicit in FieldConversions, which have higher priority.

Definition Classes
LowPriorityFieldConversions
implicit def read(src: Source): Pipe

This is implicit so that a Source can be used as the argument to a join or other method that accepts Pipe.
def run: Boolean
implicit def scaldingConfig: Config

This is here so that Mappable.
This is here so that Mappable.toIterator can find an implicit config

Attributes
protected
def skipStrategy: Option[FlowSkipStrategy]
implicit def sourceToRichPipe(src: Source): RichPipe

This implicit is to enable RichPipe methods directly on Source objects, such as map/flatMap, etc.
This implicit is to enable RichPipe methods directly on Source objects, such as map/flatMap, etc...
Note that Mappable is a subclass of Source, and Mappable already has mapTo and flatMapTo BUT WITHOUT incoming fields used (see the Mappable trait). This creates some confusion when using these methods (this is an unfortunate mistake in our design that was not noticed until later). To remove ambiguity, explicitly call .read on any Source that you begin operating with a mapTo/flatMapTo.
def stepListeners: List[FlowStepListener]
def stepStrategy: Option[FlowStepStrategy[_]]

Specify a callback to run before the start of each flow step.
Specify a callback to run before the start of each flow step.
Defaults to what Config.getReducerEstimator specifies.

See also
ExecutionContext.buildFlow
implicit def strFields[T <: TraversableOnce[String]](f: T): Fields

Definition Classes
FieldConversions
implicit def stringToFields(x: String): Fields

Definition Classes
FieldConversions
implicit def symbolToFields(x: Symbol): Fields

'* means Fields.
'* means Fields.ALL, otherwise we take the .name

Definition Classes
FieldConversions
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def timeout[T](timeout: AbsoluteDuration)(t: ⇒ T): Option[T]
implicit def toPipe[T](iter: Iterable[T])(implicit set: TupleSetter[T], conv: TupleConverter[T]): Pipe
def toString(): String

Definition Classes
AnyRef → Any
implicit def tuple2ToFieldsPair[T, U](pair: (T, U))(implicit tf: (T) ⇒ Fields, uf: (U) ⇒ Fields): (Fields, Fields)

Definition Classes
FieldConversions
implicit def unitToFields(u: Unit): Fields

Definition Classes
FieldConversions
def validate: Unit
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def write(pipe: Pipe, src: Source): Unit

This is only here for Java jobs which cannot automatically access the implicit Pipe => RichPipe which makes: pipe.
This is only here for Java jobs which cannot automatically access the implicit Pipe => RichPipe which makes: pipe.write( ) convenient

class Job extends FieldConversions with Serializable

Instance Constructors

new Job(args: Args)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

def anyToFieldArg(f: Any): Comparable[_]

val args: Args

final def asInstanceOf[T0]: T0

def asList(f: Fields): List[Comparable[_]]

def asSet(f: Fields): Set[Comparable[_]]

def buildFlow: Flow[_]

def classIdentifier: String

def clear: Unit

def clone(nextargs: Args): Job

def clone(): AnyRef

def config: Map[AnyRef, AnyRef]

implicit def dateParser: DateParser

def defaultComparator: Option[Class[_ <: Comparator[_]]]

def defaultMode(fromFields: Fields, toFields: Fields): Fields

def defaultSpillThreshold: Int

final def ensureUniqueFields(left: Fields, right: Fields, rightPipe: Pipe): (Fields, Pipe)

implicit def enumValueToFields(x: Value): Fields

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

implicit def fieldFields[T <: TraversableOnce[Field[_]]](f: T): RichFields

implicit def fieldToFields(f: Field[_]): RichFields

implicit def fields[T <: TraversableOnce[Symbol]](f: T): Fields

implicit def fieldsToRichFields(fields: Fields): RichFields

def finalize(): Unit

implicit val flowDef: FlowDef

implicit def fromEnum[T <: Enumeration](enumeration: T): Fields

final def getClass(): Class[_]

def getField(f: Fields, idx: Int): Fields

def handleStats(statsData: CascadingStats): Unit

def hasInts(f: Fields): Boolean

def hashCode(): Int

implicit def intFields[T <: TraversableOnce[Int]](f: T): Fields

implicit def intToFields(x: Int): Fields

implicit def integerToFields(x: Integer): Fields

def ioSerializations: List[Class[_ <: Serialization[_]]]

final def isInstanceOf[T0]: Boolean

implicit def iterableToRichPipe[T](iter: Iterable[T])(implicit set: TupleSetter[T], conv: TupleConverter[T]): RichPipe

def keepAlive: Unit

def listeners: List[FlowListener]

implicit def mode: Mode

def name: String

final def ne(arg0: AnyRef): Boolean

final def newSymbol(avoid: Set[Symbol], guess: Symbol, trial: Int = 0): Symbol

def next: Option[Job]

final def notify(): Unit

final def notifyAll(): Unit

implicit def parseAnySeqToFields[T <: TraversableOnce[Any]](anyf: T): Fields

implicit def pipeToRichPipe(pipe: Pipe): RichPipe

implicit def productToFields(f: Product): Fields

implicit def read(src: Source): Pipe

def run: Boolean

implicit def scaldingConfig: Config

def skipStrategy: Option[FlowSkipStrategy]

implicit def sourceToRichPipe(src: Source): RichPipe

def stepListeners: List[FlowStepListener]

def stepStrategy: Option[FlowStepStrategy[_]]

implicit def strFields[T <: TraversableOnce[String]](f: T): Fields

implicit def stringToFields(x: String): Fields

implicit def symbolToFields(x: Symbol): Fields

final def synchronized[T0](arg0: ⇒ T0): T0

def timeout[T](timeout: AbsoluteDuration)(t: ⇒ T): Option[T]

implicit def toPipe[T](iter: Iterable[T])(implicit set: TupleSetter[T], conv: TupleConverter[T]): Pipe

def toString(): String

implicit def tuple2ToFieldsPair[T, U](pair: (T, U))(implicit tf: (T) ⇒ Fields, uf: (U) ⇒ Fields): (Fields, Fields)

implicit def unitToFields(u: Unit): Fields

def validate: Unit

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

def write(pipe: Pipe, src: Source): Unit