LocalTextSet

Instance Constructors

new LocalTextSet(array: Array[TextFeature])

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
def ->(transformer: Preprocessing[TextFeature, TextFeature]): TextSet

Definition Classes
TextSet
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
var array: Array[TextFeature]
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def generateSample(): TextSet

Generate BigDL Sample.
Generate BigDL Sample. See TextFeatureToSample for more details.

Definition Classes
TextSet
def generateWordIndexMap(removeTopN: Int = 0, maxWordsNum: Int = 1): Map[String, Int]

Generate wordIndex map based on sorted word frequencies in descending order.
Generate wordIndex map based on sorted word frequencies in descending order. Return the result map, which will also be stored in 'wordIndex'. Make sure you call this after tokenize. Otherwise you will get an exception. See word2idx for more details.

Definition Classes
LocalTextSet → TextSet
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getWordIndex: Map[String, Int]

Get the word index map of this TextSet.
Get the word index map of this TextSet. If the TextSet hasn't been transformed from word to index, null will be returned.

Definition Classes
TextSet
def hashCode(): Int

Definition Classes
AnyRef → Any
def isDistributed: Boolean

Whether it is a DistributedTextSet.
Whether it is a DistributedTextSet.

Definition Classes
LocalTextSet → TextSet
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isLocal: Boolean

Whether it is a LocalTextSet.
Whether it is a LocalTextSet.

Definition Classes
LocalTextSet → TextSet
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def normalize(): TextSet

Do normalization on tokens.
Do normalization on tokens. See Normalizer for more details.

Definition Classes
TextSet
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def randomSplit(weights: Array[Double]): Array[TextSet]

Randomly split into array of TextSet with provided weights.
Randomly split into array of TextSet with provided weights. Only available for DistributedTextSet for now.
weights
Array of Double indicating the split portions.

Definition Classes
LocalTextSet → TextSet
def setWordIndex(map: Map[String, Int]): LocalTextSet.this.type

Definition Classes
TextSet
def shapeSequence(len: Int, truncMode: TruncMode = TruncMode.pre): TextSet

Shape the sequence of tokens to a fixed length.
Shape the sequence of tokens to a fixed length. Padding element will be "##". See SequenceShaper for more details.

Definition Classes
TextSet
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toDataSet: DataSet[Sample[Float]]

Convert TextSet to DataSet of Sample.
Convert TextSet to DataSet of Sample.

Definition Classes
LocalTextSet → TextSet
def toDistributed(sc: SparkContext, partitionNum: Int = 4): DistributedTextSet

Convert to a DistributedTextSet.
Convert to a DistributedTextSet.
Need to specify SparkContext to convert a LocalTextSet to a DistributedTextSet. In this case, you may also want to specify partitionNum, the default of which is 4.

Definition Classes
LocalTextSet → TextSet
def toLocal(): LocalTextSet

Convert to a LocalTextSet.
Convert to a LocalTextSet.

Definition Classes
LocalTextSet → TextSet
def toString(): String

Definition Classes
AnyRef → Any
def tokenize(): TextSet

Do tokenization on original text.
Do tokenization on original text. See Tokenizer for more details.

Definition Classes
TextSet
def transform(transformer: Preprocessing[TextFeature, TextFeature]): TextSet

Transform from one TextSet to another.
Transform from one TextSet to another.

Definition Classes
LocalTextSet → TextSet
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def word2idx(removeTopN: Int = 0, maxWordsNum: Int = 1): TextSet

Map word tokens to indices.
Map word tokens to indices. Index will start from 1 and corresponds to the occurrence frequency of each word sorted in descending order. See WordIndexer for more details. After word2idx, you can get the wordIndex map by calling 'getWordIndex'.
removeTopN
Integer. Remove the topN words with highest frequencies in the case where those are treated as stopwords. Default is 0, namely remove nothing.
maxWordsNum
Integer. The maximum number of words to be taken into consideration. Default is -1, namely all words will be considered.

Definition Classes
TextSet

class LocalTextSet extends TextSet

Instance Constructors

new LocalTextSet(array: Array[TextFeature])

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

def ->(transformer: Preprocessing[TextFeature, TextFeature]): TextSet

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

var array: Array[TextFeature]

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

def generateSample(): TextSet

def generateWordIndexMap(removeTopN: Int = 0, maxWordsNum: Int = 1): Map[String, Int]

final def getClass(): Class[_]

def getWordIndex: Map[String, Int]

def hashCode(): Int

def isDistributed: Boolean

final def isInstanceOf[T0]: Boolean

def isLocal: Boolean

final def ne(arg0: AnyRef): Boolean

def normalize(): TextSet

final def notify(): Unit

final def notifyAll(): Unit

def randomSplit(weights: Array[Double]): Array[TextSet]

def setWordIndex(map: Map[String, Int]): LocalTextSet.this.type

def shapeSequence(len: Int, truncMode: TruncMode = TruncMode.pre): TextSet

final def synchronized[T0](arg0: ⇒ T0): T0

def toDataSet: DataSet[Sample[Float]]

def toDistributed(sc: SparkContext, partitionNum: Int = 4): DistributedTextSet

def toLocal(): LocalTextSet

def toString(): String

def tokenize(): TextSet

def transform(transformer: Preprocessing[TextFeature, TextFeature]): TextSet

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

def word2idx(removeTopN: Int = 0, maxWordsNum: Int = 1): TextSet

Inherited from TextSet

Inherited from AnyRef

Inherited from Any

Ungrouped