@SideEffectFree @TriggerWhenEmpty @InputRequirement(value=INPUT_REQUIRED) @Tags(value={"merge","content","correlation","tar","zip","stream","concatenation","archive","flowfile-stream","flowfile-stream-v3"}) @CapabilityDescription(value="Merges a Group of FlowFiles together based on a user-defined strategy and packages them into a single FlowFile. It is recommended that the Processor be configured with only a single incoming connection, as Group of FlowFiles will not be created from FlowFiles in different connections. This processor updates the mime.type attribute as appropriate.") @ReadsAttribute(attribute="fragment.identifier",description="Applicable only if the <Merge Strategy> property is set to Defragment. All FlowFiles with the same value for this attribute will be bundled together.") @ReadsAttribute(attribute="fragment.index",description="Applicable only if the <Merge Strategy> property is set to Defragment. This attribute indicates the order in which the fragments should be assembled. This attribute must be present on all FlowFiles when using the Defragment Merge Strategy and must be a unique (i.e., unique across all FlowFiles that have the same value for the \"fragment.identifier\" attribute) integer between 0 and the value of the fragment.count attribute. If two or more FlowFiles have the same value for the \"fragment.identifier\" attribute and the same value for the \"fragment.index\" attribute, the first FlowFile processed will be accepted and subsequent FlowFiles will not be accepted into the Bin.") @ReadsAttribute(attribute="fragment.count",description="Applicable only if the <Merge Strategy> property is set to Defragment. This attribute must be present on all FlowFiles with the same value for the fragment.identifier attribute. All FlowFiles in the same bundle must have the same value for this attribute. The value of this attribute indicates how many FlowFiles should be expected in the given bundle.") @ReadsAttribute(attribute="segment.original.filename",description="Applicable only if the <Merge Strategy> property is set to Defragment. This attribute must be present on all FlowFiles with the same value for the fragment.identifier attribute. All FlowFiles in the same bundle must have the same value for this attribute. The value of this attribute will be used for the filename of the completed merged FlowFile.") @ReadsAttribute(attribute="tar.permissions",description="Applicable only if the <Merge Format> property is set to TAR. The value of this attribute must be 3 characters; each character must be in the range 0 to 7 (inclusive) and indicates the file permissions that should be used for the FlowFile\'s TAR entry. If this attribute is missing or has an invalid value, the default value of 644 will be used") @WritesAttribute(attribute="filename",description="When more than 1 file is merged, the filename comes from the segment.original.filename attribute. If that attribute does not exist in the source FlowFiles, then the filename is set to the number of nanoseconds matching system time. Then a filename extension may be applied:if Merge Format is TAR, then the filename will be appended with .tar, if Merge Format is ZIP, then the filename will be appended with .zip, if Merge Format is FlowFileStream, then the filename will be appended with .pkg") @WritesAttribute(attribute="merge.count",description="The number of FlowFiles that were merged into this bundle") @WritesAttribute(attribute="merge.bin.age",description="The age of the bin, in milliseconds, when it was merged and output. Effectively this is the greatest amount of time that any FlowFile in this bundle remained waiting in this processor before it was output") @WritesAttribute(attribute="merge.uuid",description="UUID of the merged flow file that will be added to the original flow files attributes.") @SeeAlso(value={SegmentContent.class,MergeRecord.class}) @SystemResourceConsideration(resource=MEMORY, description="While content is not stored in memory, the FlowFiles\' attributes are. The configuration of MergeContent (maximum bin size, maximum group size, maximum bin age, max number of entries) will influence how much memory is used. If merging together many small FlowFiles, a two-stage approach may be necessary in order to avoid excessive use of memory.") public class MergeContent extends BinFiles
Modifier and Type | Class and Description |
---|---|
private class |
MergeContent.AvroMerge |
private class |
MergeContent.BinaryConcatenationMerge |
private class |
MergeContent.FlowFileStreamMerger |
private static class |
MergeContent.FragmentComparator |
private static interface |
MergeContent.MergeBin |
private class |
MergeContent.TarMerge |
private class |
MergeContent.ZipMerge |
MAX_BIN_AGE, MAX_BIN_COUNT, MAX_ENTRIES, MAX_SIZE, MIN_ENTRIES, MIN_SIZE, REL_FAILURE, REL_ORIGINAL
Constructor and Description |
---|
MergeContent() |
Modifier and Type | Method and Description |
---|---|
protected Collection<ValidationResult> |
additionalCustomValidation(ValidationContext context) |
private String |
createFilename(List<FlowFile> flowFiles) |
private String |
getDefragmentValidationError(List<FlowFile> binContents) |
protected String |
getGroupId(ProcessContext context,
FlowFile flowFile,
ProcessSession session) |
private String |
getPath(FlowFile flowFile) |
Set<Relationship> |
getRelationships() |
protected List<PropertyDescriptor> |
getSupportedPropertyDescriptors() |
private boolean |
isNumber(String value) |
protected FlowFile |
preprocessFlowFile(ProcessContext context,
ProcessSession session,
FlowFile flowFile) |
protected BinProcessingResult |
processBin(Bin bin,
ProcessContext context) |
private byte[] |
readContent(String filename) |
protected void |
setUpBinManager(BinManager binManager,
ProcessContext context) |
customValidate, onScheduled, onTrigger, resetState
getControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, init, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrue
equals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validate
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
getPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validate
public static final String FRAGMENT_ID_ATTRIBUTE
public static final String FRAGMENT_INDEX_ATTRIBUTE
public static final String FRAGMENT_COUNT_ATTRIBUTE
public static final String SEGMENT_ID_ATTRIBUTE
public static final String SEGMENT_INDEX_ATTRIBUTE
public static final String SEGMENT_COUNT_ATTRIBUTE
public static final String SEGMENT_ORIGINAL_FILENAME
public static final AllowableValue METADATA_STRATEGY_USE_FIRST
public static final AllowableValue METADATA_STRATEGY_ALL_COMMON
public static final AllowableValue METADATA_STRATEGY_IGNORE
public static final AllowableValue METADATA_STRATEGY_DO_NOT_MERGE
public static final PropertyDescriptor METADATA_STRATEGY
public static final AllowableValue MERGE_STRATEGY_BIN_PACK
public static final AllowableValue MERGE_STRATEGY_DEFRAGMENT
public static final AllowableValue DELIMITER_STRATEGY_FILENAME
public static final AllowableValue DELIMITER_STRATEGY_TEXT
public static final String MERGE_FORMAT_TAR_VALUE
public static final String MERGE_FORMAT_ZIP_VALUE
public static final String MERGE_FORMAT_FLOWFILE_STREAM_V3_VALUE
public static final String MERGE_FORMAT_FLOWFILE_STREAM_V2_VALUE
public static final String MERGE_FORMAT_FLOWFILE_TAR_V1_VALUE
public static final String MERGE_FORMAT_CONCAT_VALUE
public static final String MERGE_FORMAT_AVRO_VALUE
public static final AllowableValue MERGE_FORMAT_TAR
public static final AllowableValue MERGE_FORMAT_ZIP
public static final AllowableValue MERGE_FORMAT_FLOWFILE_STREAM_V3
public static final AllowableValue MERGE_FORMAT_FLOWFILE_STREAM_V2
public static final AllowableValue MERGE_FORMAT_FLOWFILE_TAR_V1
public static final AllowableValue MERGE_FORMAT_CONCAT
public static final AllowableValue MERGE_FORMAT_AVRO
public static final String TAR_PERMISSIONS_ATTRIBUTE
public static final String MERGE_COUNT_ATTRIBUTE
public static final String MERGE_BIN_AGE_ATTRIBUTE
public static final String MERGE_UUID_ATTRIBUTE
public static final PropertyDescriptor MERGE_STRATEGY
public static final PropertyDescriptor MERGE_FORMAT
public static final PropertyDescriptor CORRELATION_ATTRIBUTE_NAME
public static final PropertyDescriptor DELIMITER_STRATEGY
public static final PropertyDescriptor HEADER
public static final PropertyDescriptor FOOTER
public static final PropertyDescriptor DEMARCATOR
public static final PropertyDescriptor COMPRESSION_LEVEL
public static final PropertyDescriptor KEEP_PATH
public static final PropertyDescriptor TAR_MODIFIED_TIME
public static final Relationship REL_MERGED
public static final Pattern NUMBER_PATTERN
public Set<Relationship> getRelationships()
getRelationships
in interface Processor
getRelationships
in class AbstractSessionFactoryProcessor
protected List<PropertyDescriptor> getSupportedPropertyDescriptors()
getSupportedPropertyDescriptors
in class AbstractConfigurableComponent
protected Collection<ValidationResult> additionalCustomValidation(ValidationContext context)
additionalCustomValidation
in class BinFiles
private byte[] readContent(String filename) throws IOException
IOException
protected FlowFile preprocessFlowFile(ProcessContext context, ProcessSession session, FlowFile flowFile)
preprocessFlowFile
in class BinFiles
protected String getGroupId(ProcessContext context, FlowFile flowFile, ProcessSession session)
getGroupId
in class BinFiles
protected void setUpBinManager(BinManager binManager, ProcessContext context)
setUpBinManager
in class BinFiles
protected BinProcessingResult processBin(Bin bin, ProcessContext context) throws ProcessException
processBin
in class BinFiles
ProcessException
private String getDefragmentValidationError(List<FlowFile> binContents)
private boolean isNumber(String value)
Copyright © 2020 Apache NiFi Project. All rights reserved.