@EventDriven @SideEffectFree @SupportsBatching @Tags(value={"count","text","line","word","character"}) @InputRequirement(value=INPUT_REQUIRED) @CapabilityDescription(value="Counts various metrics on incoming text. The requested results will be recorded as attributes. The resulting flowfile will not have its content modified.") @WritesAttribute(attribute="text.line.count",description="The number of lines of text present in the FlowFile content") @WritesAttribute(attribute="text.line.nonempty.count",description="The number of lines of text (with at least one non-whitespace character) present in the original FlowFile") @WritesAttribute(attribute="text.word.count",description="The number of words present in the original FlowFile") @WritesAttribute(attribute="text.character.count",description="The number of characters (given the specified character encoding) present in the original FlowFile") @SeeAlso(value=SplitText.class) public class CountText extends AbstractProcessor
Constructor and Description |
---|
CountText() |
Modifier and Type | Method and Description |
---|---|
(package private) int |
countWordsInLine(String line,
boolean splitWordsOnSymbols) |
private String |
generateMetricsMessage(int lineCount,
int lineNonEmptyCount,
int wordCount,
int characterCount) |
Set<Relationship> |
getRelationships() |
private static Set<String> |
getStandardCharsetNames() |
protected List<PropertyDescriptor> |
getSupportedPropertyDescriptors() |
void |
onSchedule(ProcessContext context) |
void |
onTrigger(ProcessContext context,
ProcessSession processSession)
Will count text attributes of the incoming stream.
|
onTrigger
getControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, init, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrue
customValidate, equals, getPropertyDescriptor, getPropertyDescriptors, getSupportedDynamicPropertyDescriptor, hashCode, onPropertyModified, validate
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
getPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validate
private static final Pattern SYMBOL_PATTERN
private static final Pattern WHITESPACE_ONLY_PATTERN
public static final String TEXT_LINE_COUNT
public static final String TEXT_LINE_NONEMPTY_COUNT
public static final String TEXT_WORD_COUNT
public static final String TEXT_CHARACTER_COUNT
public static final PropertyDescriptor TEXT_LINE_COUNT_PD
public static final PropertyDescriptor TEXT_LINE_NONEMPTY_COUNT_PD
public static final PropertyDescriptor TEXT_WORD_COUNT_PD
public static final PropertyDescriptor TEXT_CHARACTER_COUNT_PD
public static final PropertyDescriptor SPLIT_WORDS_ON_SYMBOLS_PD
public static final PropertyDescriptor CHARACTER_ENCODING_PD
public static final PropertyDescriptor ADJUST_IMMEDIATELY
public static final Relationship REL_SUCCESS
public static final Relationship REL_FAILURE
private static final List<PropertyDescriptor> properties
private static final Set<Relationship> relationships
private volatile boolean countLines
private volatile boolean countLinesNonEmpty
private volatile boolean countWords
private volatile boolean countCharacters
private volatile boolean splitWordsOnSymbols
private volatile boolean adjustImmediately
private volatile String characterEncoding
public Set<Relationship> getRelationships()
getRelationships
in interface Processor
getRelationships
in class AbstractSessionFactoryProcessor
@OnScheduled public void onSchedule(ProcessContext context)
public void onTrigger(ProcessContext context, ProcessSession processSession) throws ProcessException
onTrigger
in class AbstractProcessor
ProcessException
private String generateMetricsMessage(int lineCount, int lineNonEmptyCount, int wordCount, int characterCount)
int countWordsInLine(String line, boolean splitWordsOnSymbols) throws IOException
IOException
protected List<PropertyDescriptor> getSupportedPropertyDescriptors()
getSupportedPropertyDescriptors
in class AbstractConfigurableComponent
Copyright © 2020 Apache NiFi Project. All rights reserved.