|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.hadoop.mapreduce.InputFormat<Key,Value> org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat
public class AccumuloInputFormat
This class allows MapReduce jobs to use Accumulo as the source of data. This input format provides keys and values of type Key and Value to the Map() and Reduce() functions. The user must specify the following via static methods:
Nested Class Summary | |
---|---|
static class |
AccumuloInputFormat.RangeInputSplit
The Class RangeInputSplit. |
static class |
AccumuloInputFormat.RegexType
|
Constructor Summary | |
---|---|
AccumuloInputFormat()
|
Method Summary | |
---|---|
org.apache.hadoop.mapreduce.RecordReader<Key,Value> |
createRecordReader(org.apache.hadoop.mapreduce.InputSplit inSplit,
org.apache.hadoop.mapreduce.TaskAttemptContext attempt)
|
static void |
disableAutoAdjustRanges(org.apache.hadoop.mapreduce.JobContext job)
|
static void |
fetchColumns(org.apache.hadoop.mapreduce.JobContext job,
java.util.Collection<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> columnFamilyColumnQualifierPairs)
|
protected static Authorizations |
getAuthorizations(org.apache.hadoop.mapreduce.JobContext job)
|
protected static boolean |
getAutoAdjustRanges(org.apache.hadoop.mapreduce.JobContext job)
|
protected static java.util.Set<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> |
getFetchedColumns(org.apache.hadoop.mapreduce.JobContext job)
|
protected static Instance |
getInstance(org.apache.hadoop.mapreduce.JobContext job)
|
protected static java.util.List<org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat.AccumuloIteratorOption> |
getIteratorOptions(org.apache.hadoop.mapreduce.JobContext job)
|
protected static java.util.List<org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat.AccumuloIterator> |
getIterators(org.apache.hadoop.mapreduce.JobContext job)
|
protected static org.apache.log4j.Level |
getLogLevel(org.apache.hadoop.mapreduce.JobContext job)
|
protected static int |
getMaxVersions(org.apache.hadoop.mapreduce.JobContext job)
|
protected static byte[] |
getPassword(org.apache.hadoop.mapreduce.JobContext job)
WARNING: The password is stored in the Configuration and shared with all MapReduce tasks; It is BASE64 encoded to provide a charset safe conversion to a string, and is not intended to be secure. |
protected static java.util.List<Range> |
getRanges(org.apache.hadoop.mapreduce.JobContext job)
|
protected static java.lang.String |
getRegex(org.apache.hadoop.mapreduce.JobContext job,
AccumuloInputFormat.RegexType type)
|
java.util.List<org.apache.hadoop.mapreduce.InputSplit> |
getSplits(org.apache.hadoop.mapreduce.JobContext job)
read the metadata table to get tablets of interest these each become a split |
protected static java.lang.String |
getTablename(org.apache.hadoop.mapreduce.JobContext job)
|
protected static TabletLocator |
getTabletLocator(org.apache.hadoop.mapreduce.JobContext job)
|
protected static java.lang.String |
getUsername(org.apache.hadoop.mapreduce.JobContext job)
|
protected static boolean |
isIsolated(org.apache.hadoop.mapreduce.JobContext job)
|
static void |
setInputInfo(org.apache.hadoop.mapreduce.JobContext job,
java.lang.String user,
byte[] passwd,
java.lang.String table,
Authorizations auths)
|
static void |
setIsolated(org.apache.hadoop.mapreduce.JobContext job,
boolean enable)
Enable or disable use of the IsolatedScanner . |
static void |
setIterator(org.apache.hadoop.mapreduce.JobContext job,
int priority,
java.lang.String iteratorClass,
java.lang.String iteratorName)
Specify a Accumulo iterator type to manage the behavior of the underlying table scan this InputFormat's Record Reader will conduct, w/ priority dictating the order in which specified iterators are applied. |
static void |
setIteratorOption(org.apache.hadoop.mapreduce.JobContext job,
java.lang.String iteratorName,
java.lang.String key,
java.lang.String value)
Specify an option for a named Accumulo iterator, further specifying that iterator's behavior. |
static void |
setLogLevel(org.apache.hadoop.mapreduce.JobContext job,
org.apache.log4j.Level level)
|
static void |
setMaxVersions(org.apache.hadoop.mapreduce.JobContext job,
int maxVersions)
Sets the max # of values that may be returned for an individual Accumulo cell. |
static void |
setMockInstance(org.apache.hadoop.mapreduce.JobContext job,
java.lang.String instanceName)
|
static void |
setRanges(org.apache.hadoop.mapreduce.JobContext job,
java.util.Collection<Range> ranges)
|
static void |
setRegex(org.apache.hadoop.mapreduce.JobContext job,
AccumuloInputFormat.RegexType type,
java.lang.String regex)
|
static void |
setZooKeeperInstance(org.apache.hadoop.mapreduce.JobContext job,
java.lang.String instanceName,
java.lang.String zooKeepers)
|
protected static void |
validateOptions(org.apache.hadoop.mapreduce.JobContext job)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public AccumuloInputFormat()
Method Detail |
---|
public static void setIsolated(org.apache.hadoop.mapreduce.JobContext job, boolean enable)
IsolatedScanner
. By default it is not enabled.
job
- enable
- public static void setInputInfo(org.apache.hadoop.mapreduce.JobContext job, java.lang.String user, byte[] passwd, java.lang.String table, Authorizations auths)
public static void setZooKeeperInstance(org.apache.hadoop.mapreduce.JobContext job, java.lang.String instanceName, java.lang.String zooKeepers)
public static void setMockInstance(org.apache.hadoop.mapreduce.JobContext job, java.lang.String instanceName)
public static void setRanges(org.apache.hadoop.mapreduce.JobContext job, java.util.Collection<Range> ranges)
public static void disableAutoAdjustRanges(org.apache.hadoop.mapreduce.JobContext job)
public static void setRegex(org.apache.hadoop.mapreduce.JobContext job, AccumuloInputFormat.RegexType type, java.lang.String regex)
public static void setMaxVersions(org.apache.hadoop.mapreduce.JobContext job, int maxVersions) throws java.io.IOException
job
- the jobmaxVersions
- the max versions
java.io.IOException
public static void fetchColumns(org.apache.hadoop.mapreduce.JobContext job, java.util.Collection<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> columnFamilyColumnQualifierPairs)
columnFamilyColumnQualifierPairs
- A pair of Text
objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
selected. An empty set is the default and is equivalent to scanning the all columns.public static void setLogLevel(org.apache.hadoop.mapreduce.JobContext job, org.apache.log4j.Level level)
public static void setIterator(org.apache.hadoop.mapreduce.JobContext job, int priority, java.lang.String iteratorClass, java.lang.String iteratorName)
job
- the jobpriority
- the priorityiteratorClass
- the iterator classiteratorName
- the iterator namepublic static void setIteratorOption(org.apache.hadoop.mapreduce.JobContext job, java.lang.String iteratorName, java.lang.String key, java.lang.String value)
job
- the jobiteratorName
- the iterator name. Should correspond to an iterator set w/ a prior setIterator call.key
- the keyvalue
- the valueprotected static boolean isIsolated(org.apache.hadoop.mapreduce.JobContext job)
protected static java.lang.String getUsername(org.apache.hadoop.mapreduce.JobContext job)
protected static byte[] getPassword(org.apache.hadoop.mapreduce.JobContext job)
protected static java.lang.String getTablename(org.apache.hadoop.mapreduce.JobContext job)
protected static Authorizations getAuthorizations(org.apache.hadoop.mapreduce.JobContext job)
protected static Instance getInstance(org.apache.hadoop.mapreduce.JobContext job)
protected static TabletLocator getTabletLocator(org.apache.hadoop.mapreduce.JobContext job) throws TableNotFoundException
TableNotFoundException
protected static java.util.List<Range> getRanges(org.apache.hadoop.mapreduce.JobContext job) throws java.io.IOException
java.io.IOException
protected static java.lang.String getRegex(org.apache.hadoop.mapreduce.JobContext job, AccumuloInputFormat.RegexType type)
protected static java.util.Set<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> getFetchedColumns(org.apache.hadoop.mapreduce.JobContext job)
protected static boolean getAutoAdjustRanges(org.apache.hadoop.mapreduce.JobContext job)
protected static org.apache.log4j.Level getLogLevel(org.apache.hadoop.mapreduce.JobContext job)
protected static void validateOptions(org.apache.hadoop.mapreduce.JobContext job) throws java.io.IOException
java.io.IOException
protected static int getMaxVersions(org.apache.hadoop.mapreduce.JobContext job)
protected static java.util.List<org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat.AccumuloIterator> getIterators(org.apache.hadoop.mapreduce.JobContext job)
protected static java.util.List<org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat.AccumuloIteratorOption> getIteratorOptions(org.apache.hadoop.mapreduce.JobContext job)
public org.apache.hadoop.mapreduce.RecordReader<Key,Value> createRecordReader(org.apache.hadoop.mapreduce.InputSplit inSplit, org.apache.hadoop.mapreduce.TaskAttemptContext attempt) throws java.io.IOException, java.lang.InterruptedException
createRecordReader
in class org.apache.hadoop.mapreduce.InputFormat<Key,Value>
java.io.IOException
java.lang.InterruptedException
public java.util.List<org.apache.hadoop.mapreduce.InputSplit> getSplits(org.apache.hadoop.mapreduce.JobContext job) throws java.io.IOException
getSplits
in class org.apache.hadoop.mapreduce.InputFormat<Key,Value>
java.io.IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |