public class DataSet extends Object implements DataSet
Constructor and Description |
---|
DataSet() |
DataSet(INDArray first,
INDArray second)
Creates a dataset with the specified input matrix and labels
|
Modifier and Type | Method and Description |
---|---|
void |
addFeatureVector(INDArray toAdd)
Adds a feature for each example on to the current feature vector
|
void |
addFeatureVector(INDArray feature,
int example)
The feature to add, and the example/row number
|
void |
addRow(DataSet d,
int i) |
void |
apply(Condition condition,
com.google.common.base.Function<Number,Number> function) |
List<DataSet> |
asList() |
List<List<DataSet>> |
batchBy(int num)
Partitions a dataset in to mini batches where
each dataset in each list is of the specified number of examples
|
List<List<DataSet>> |
batchByNumLabels() |
void |
binarize()
Same as calling binarize(0)
|
void |
binarize(double cutoff)
Binarizes the dataset such that any number greater than cutoff is 1 otherwise zero
|
DataSet |
copy()
Clone the dataset
|
List<DataSet> |
dataSetBatches(int num)
Partitions the data applyTransformToDestination by the specified number.
|
void |
divideBy(int num) |
static DataSet |
empty()
Returns a single dataset
|
INDArray |
exampleMaxs() |
INDArray |
exampleMeans() |
INDArray |
exampleSums() |
void |
filterAndStrip(int[] labels)
Strips the dataset down to the specified labels
and remaps them
|
DataSet |
filterBy(int[] labels)
Strips the data applyTransformToDestination of all but the passed in labels
|
DataSet |
get(int i)
Gets a copy of example i
|
DataSet |
get(int[] i)
Gets a copy of example i
|
List<String> |
getColumnNames()
Optional column names of the data applyTransformToDestination, this is mainly used
for interpeting what columns are in the dataset
|
INDArray |
getFeatureMatrix()
Get the feature matrix (inputs for the data)
|
INDArray |
getFeatures() |
List<String> |
getLabelNames()
Gets the optional label names
|
INDArray |
getLabels()
Returns the labels for the dataset
|
Iterator<DataSet> |
iterator() |
Map<Integer,Double> |
labelCounts() |
static DataSet |
merge(List<DataSet> data)
Merge the list of datasets in to one list.
|
void |
multiplyBy(double num) |
void |
normalize() |
void |
normalizeZeroMeanZeroUnitVariance()
Subtract by the column means and divide by the standard deviation
|
int |
numExamples() |
int |
numInputs()
The number of inputs in the feature matrix
|
int |
numOutcomes() |
int |
outcome() |
DataSet |
reshape(int rows,
int cols)
Reshapes the input in to the given rows and columns
|
void |
roundToTheNearest(int roundTo) |
DataSet |
sample(int numSamples)
Sample without replacement and a random rng
|
DataSet |
sample(int numSamples,
boolean withReplacement)
Sample a dataset numSamples times
|
DataSet |
sample(int numSamples,
Random rng)
Sample without replacement
|
DataSet |
sample(int numSamples,
Random rng,
boolean withReplacement)
Sample a dataset
|
void |
scale()
Divides the input data applyTransformToDestination by the max number in each row
|
void |
scaleMinAndMax(double min,
double max) |
void |
setColumnNames(List<String> columnNames)
Sets the column names, will throw an exception if the column names
don't match the number of columns
|
void |
setFeatures(INDArray features) |
void |
setLabelNames(List<String> labelNames)
Sets the label names, will throw an exception if the passed
in label names doesn't equal the number of outcomes
|
void |
setLabels(INDArray labels) |
void |
setNewNumberOfLabels(int labels)
Clears the outcome matrix setting a new number of labels
|
void |
setOutcome(int example,
int label)
Sets the outcome of a particular example
|
void |
shuffle() |
List<List<DataSet>> |
sortAndBatchByNumLabels()
Sorts the dataset by label:
Splits the data applyTransformToDestination such that examples are sorted by their labels.
|
void |
sortByLabel()
Organizes the dataset to minimize sampling error
while still allowing efficient batching.
|
SplitTestAndTrain |
splitTestAndTrain(int numHoldout)
Splits a dataset in to test and train
|
void |
squishToRange(double min,
double max)
Squeezes input data to a max and a min
|
String |
toString() |
void |
validate() |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
forEach, spliterator
public DataSet()
public DataSet(INDArray first, INDArray second)
first
- the feature matrixsecond
- the labels (these should be binarized label matrices such that the specified label
has a value of 1 in the desired column with the label)public static DataSet empty()
public static DataSet merge(List<DataSet> data)
data
- the data to mergepublic INDArray getFeatures()
getFeatures
in interface DataSet
public void setFeatures(INDArray features)
setFeatures
in interface DataSet
public Map<Integer,Double> labelCounts()
labelCounts
in interface DataSet
public void apply(Condition condition, com.google.common.base.Function<Number,Number> function)
public DataSet copy()
public DataSet reshape(int rows, int cols)
public void multiplyBy(double num)
multiplyBy
in interface DataSet
public void squishToRange(double min, double max)
squishToRange
in interface DataSet
min
- the min value to occur in the datasetmax
- the max value to ccur in the datasetpublic void scaleMinAndMax(double min, double max)
scaleMinAndMax
in interface DataSet
public void scale()
public void addFeatureVector(INDArray toAdd)
addFeatureVector
in interface DataSet
toAdd
- the feature vector to addpublic void addFeatureVector(INDArray feature, int example)
addFeatureVector
in interface DataSet
feature
- the feature vector to addexample
- the number of the example to append topublic void binarize()
public void binarize(double cutoff)
public void normalizeZeroMeanZeroUnitVariance()
normalizeZeroMeanZeroUnitVariance
in interface DataSet
public int numInputs()
public void setNewNumberOfLabels(int labels)
setNewNumberOfLabels
in interface DataSet
labels
- the number of labels/columns in the outcome matrix
Note that this clears the labels for each examplepublic void setOutcome(int example, int label)
setOutcome
in interface DataSet
example
- the example to applyTransformToDestinationlabel
- the label of the outcomepublic DataSet get(int i)
public DataSet get(int[] i)
public List<List<DataSet>> batchBy(int num)
public DataSet filterBy(int[] labels)
public void filterAndStrip(int[] labels)
filterAndStrip
in interface DataSet
labels
- the labels to strip down topublic List<DataSet> dataSetBatches(int num)
dataSetBatches
in interface DataSet
num
- the number to split bypublic List<List<DataSet>> sortAndBatchByNumLabels()
sortAndBatchByNumLabels
in interface DataSet
public List<List<DataSet>> batchByNumLabels()
batchByNumLabels
in interface DataSet
public SplitTestAndTrain splitTestAndTrain(int numHoldout)
splitTestAndTrain
in interface DataSet
numHoldout
- the number to hold out for trainingpublic INDArray getLabels()
public INDArray getFeatureMatrix()
getFeatureMatrix
in interface DataSet
public void sortByLabel()
sortByLabel
in interface DataSet
public INDArray exampleSums()
exampleSums
in interface DataSet
public INDArray exampleMaxs()
exampleMaxs
in interface DataSet
public INDArray exampleMeans()
exampleMeans
in interface DataSet
public DataSet sample(int numSamples)
public DataSet sample(int numSamples, boolean withReplacement)
public void roundToTheNearest(int roundTo)
roundToTheNearest
in interface DataSet
public int numOutcomes()
numOutcomes
in interface DataSet
public int numExamples()
numExamples
in interface DataSet
public List<String> getLabelNames()
getLabelNames
in interface DataSet
public void setLabelNames(List<String> labelNames)
setLabelNames
in interface DataSet
labelNames
- the label names to usepublic List<String> getColumnNames()
getColumnNames
in interface DataSet
public void setColumnNames(List<String> columnNames)
setColumnNames
in interface DataSet
columnNames
- Copyright © 2015. All Rights Reserved.