@InterfaceAudience.Public @InterfaceStability.Unstable public class MagicS3GuardCommitter extends AbstractS3ACommitter
AbstractS3ACommitter.ActiveCommit, AbstractS3ACommitter.JobUUIDSource
Modifier and Type | Field and Description |
---|---|
static String |
NAME
Name: "magic".
|
E_SELF_GENERATED_JOB_UUID, THREAD_PREFIX
Constructor and Description |
---|
MagicS3GuardCommitter(org.apache.hadoop.fs.Path outputPath,
org.apache.hadoop.mapreduce.TaskAttemptContext context)
Create a task committer.
|
Modifier and Type | Method and Description |
---|---|
void |
abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext context)
Abort a task.
|
void |
cleanupStagingDirs()
Delete the magic directory.
|
void |
commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext context) |
protected org.apache.hadoop.fs.Path |
getBaseTaskAttemptPath(org.apache.hadoop.mapreduce.TaskAttemptContext context)
Compute the base path where the output of a task attempt is written.
|
protected org.apache.hadoop.fs.Path |
getJobAttemptPath(int appAttemptId)
Compute the path where the output of a given job attempt will be placed.
|
protected org.apache.hadoop.fs.Path |
getJobPath()
Compute the path under which all job attempts will be placed.
|
String |
getName()
Get the name of this committer.
|
org.apache.hadoop.fs.Path |
getTaskAttemptPath(org.apache.hadoop.mapreduce.TaskAttemptContext context)
Compute the path where the output of a task attempt is stored until
that task is committed.
|
org.apache.hadoop.fs.Path |
getTempTaskAttemptPath(org.apache.hadoop.mapreduce.TaskAttemptContext context)
Get a temporary directory for data.
|
protected AbstractS3ACommitter.ActiveCommit |
listPendingUploadsToCommit(CommitContext commitContext)
Get the list of pending uploads for this job attempt, by listing
all .pendingset files in the job attempt directory.
|
protected PendingSet |
loadPendingCommits(org.apache.hadoop.mapreduce.TaskAttemptContext context)
Loads pending commits from either memory or from the remote store (S3) based on the config.
|
boolean |
needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext context)
Did this task write any files in the work directory?
Probes for a task existing by looking to see if the attempt dir exists.
|
protected boolean |
requiresDelayedCommitOutputInFileSystem()
Require magic paths in the FS client.
|
void |
setupJob(org.apache.hadoop.mapreduce.JobContext context)
Base job setup (optionally) deletes the success marker and
always creates the destination directory.
|
String |
toString() |
abortJob, abortJobInternal, abortPendingUploads, abortPendingUploads, abortPendingUploadsInCleanup, buildJobUUID, cleanup, cleanupJob, commitJob, commitJobInternal, commitPendingUploads, deleteTaskAttemptPathQuietly, getAuditSpanSource, getCommitOperations, getConf, getDestFS, getDestinationFS, getDestS3AFS, getIOStatistics, getJobAttemptPath, getJobContext, getOutputPath, getRole, getTaskAttemptFilesystem, getUUID, getUUIDSource, getWorkPath, initiateJobOperation, initiateTaskOperation, initOutput, jobCompleted, maybeCreateSuccessMarker, maybeCreateSuccessMarkerFromCommits, maybeIgnore, maybeIgnore, precommitCheckPendingFiles, preCommitJob, recoverTask, setConf, setDestFS, setOutputPath, setupTask, setWorkPath, startOperation, updateCommonContext, warnOnActiveUploads
hasOutputPath
public static final String NAME
public MagicS3GuardCommitter(org.apache.hadoop.fs.Path outputPath, org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException
outputPath
- the job's output pathcontext
- the task's contextIOException
- on a failurepublic String getName()
AbstractS3ACommitter
getName
in class AbstractS3ACommitter
protected boolean requiresDelayedCommitOutputInFileSystem()
requiresDelayedCommitOutputInFileSystem
in class AbstractS3ACommitter
public void setupJob(org.apache.hadoop.mapreduce.JobContext context) throws IOException
AbstractS3ACommitter
The option InternalCommitterConstants.FS_S3A_COMMITTER_UUID
is set to the job UUID; if generated locally
InternalCommitterConstants.SPARK_WRITE_UUID
is also patched.
The field AbstractS3ACommitter.jobSetup
is set to true to note that
this specific committer instance was used to set up a job.
setupJob
in class AbstractS3ACommitter
context
- contextIOException
- IO failureprotected AbstractS3ACommitter.ActiveCommit listPendingUploadsToCommit(CommitContext commitContext) throws IOException
listPendingUploadsToCommit
in class AbstractS3ACommitter
commitContext
- job contextIOException
- Any IO failurepublic void cleanupStagingDirs()
cleanupStagingDirs
in class AbstractS3ACommitter
public boolean needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException
needsTaskCommit
in class org.apache.hadoop.mapreduce.OutputCommitter
context
- the task's contextIOException
- failure to list the pathpublic void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException
commitTask
in class org.apache.hadoop.mapreduce.OutputCommitter
IOException
protected PendingSet loadPendingCommits(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException
context
- TaskAttemptContextIOException
- if there is an error trying to read the commit datapublic void abortTask(org.apache.hadoop.mapreduce.TaskAttemptContext context) throws IOException
abortTask
in class org.apache.hadoop.mapreduce.OutputCommitter
context
- task contextIOException
- if there was some problem querying the path other
than it not actually existing.protected org.apache.hadoop.fs.Path getJobPath()
getJobPath
in class AbstractS3ACommitter
protected final org.apache.hadoop.fs.Path getJobAttemptPath(int appAttemptId)
getJobAttemptPath
in class AbstractS3ACommitter
appAttemptId
- the ID of the application attempt for this job.public final org.apache.hadoop.fs.Path getTaskAttemptPath(org.apache.hadoop.mapreduce.TaskAttemptContext context)
getTaskAttemptPath
in class AbstractS3ACommitter
context
- the context of the task attempt.protected final org.apache.hadoop.fs.Path getBaseTaskAttemptPath(org.apache.hadoop.mapreduce.TaskAttemptContext context)
AbstractS3ACommitter
getBaseTaskAttemptPath
in class AbstractS3ACommitter
context
- the context of the task attempt.public org.apache.hadoop.fs.Path getTempTaskAttemptPath(org.apache.hadoop.mapreduce.TaskAttemptContext context)
getTempTaskAttemptPath
in class AbstractS3ACommitter
context
- task contextpublic String toString()
toString
in class AbstractS3ACommitter
Copyright © 2008–2024 Apache Software Foundation. All rights reserved.