@EventDriven @SideEffectFree @SupportsBatching @Tags(value={"sql","query","calcite","route","record","transform","select","update","modify","etl","filter","record","csv","json","logs","text","avro","aggregate"}) @InputRequirement(value=INPUT_REQUIRED) @CapabilityDescription(value="Evaluates one or more SQL queries against the contents of a FlowFile. The result of the SQL query then becomes the content of the output FlowFile. This can be used, for example, for field-specific filtering, transformation, and row-level filtering. Columns can be renamed, simple calculations and aggregations performed, etc. The Processor is configured with a Record Reader Controller Service and a Record Writer service so as to allow flexibility in incoming and outgoing data formats. The Processor must be configured with at least one user-defined property. The name of the Property is the Relationship to route data to, and the value of the Property is a SQL SELECT statement that is used to specify how input data should be transformed/filtered. The SQL statement must be valid ANSI SQL and is powered by Apache Calcite. If the transformation fails, the original FlowFile is routed to the \'failure\' relationship. Otherwise, the data selected will be routed to the associated relationship. If the Record Writer chooses to inherit the schema from the Record, it is important to note that the schema that is inherited will be from the ResultSet, rather than the input Record. This allows a single instance of the QueryRecord processor to have multiple queries, each of which returns a different set of columns and aggregations. As a result, though, the schema that is derived will have no schema name, so it is important that the configured Record Writer not attempt to write the Schema Name as an attribute if inheriting the Schema from the Record. See the Processor Usage documentation for more information.") @DynamicRelationship(name="<Property Name>", description="Each user-defined property defines a new Relationship for this Processor.") @DynamicProperty(name="The name of the relationship to route data to", value="A SQL SELECT statement that is used to determine what data should be routed to this relationship.", expressionLanguageScope=FLOWFILE_ATTRIBUTES, description="Each user-defined property specifies a SQL SELECT statement to run over the data, with the data that is selected being routed to the relationship whose name is the property name") @WritesAttribute(attribute="mime.type",description="Sets the mime.type attribute to the MIME Type specified by the Record Writer") @WritesAttribute(attribute="record.count",description="The number of records selected by the query") public class QueryRecord extends AbstractProcessor
Modifier and Type | Class and Description |
---|---|
private static class |
QueryRecord.CachedStatement |
static class |
QueryRecord.DateRecordPath |
static class |
QueryRecord.DoubleRecordPath |
static class |
QueryRecord.FloatRecordPath |
static class |
QueryRecord.IntegerRecordPath |
static class |
QueryRecord.LongRecordPath |
static class |
QueryRecord.ObjectRecordPath |
private static interface |
QueryRecord.QueryResult |
static class |
QueryRecord.RecordPathFunction |
private static class |
QueryRecord.SqlValidator |
static class |
QueryRecord.StringRecordPath |
Modifier and Type | Field and Description |
---|---|
(package private) static PropertyDescriptor |
CACHE_SCHEMA |
(package private) static PropertyDescriptor |
INCLUDE_ZERO_RECORD_FLOWFILES |
private List<PropertyDescriptor> |
properties |
(package private) static PropertyDescriptor |
RECORD_READER_FACTORY |
(package private) static PropertyDescriptor |
RECORD_WRITER_FACTORY |
static Relationship |
REL_FAILURE |
static Relationship |
REL_ORIGINAL |
private Set<Relationship> |
relationships |
private com.github.benmanes.caffeine.cache.Cache<Tuple<String,RecordSchema>,BlockingQueue<QueryRecord.CachedStatement>> |
statementQueues |
Constructor and Description |
---|
QueryRecord() |
onTrigger
getControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrue
customValidate, equals, getPropertyDescriptor, getPropertyDescriptors, hashCode, validate
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
getPropertyDescriptor, getPropertyDescriptors, validate
static final PropertyDescriptor RECORD_READER_FACTORY
static final PropertyDescriptor RECORD_WRITER_FACTORY
static final PropertyDescriptor INCLUDE_ZERO_RECORD_FLOWFILES
static final PropertyDescriptor CACHE_SCHEMA
public static final Relationship REL_ORIGINAL
public static final Relationship REL_FAILURE
private List<PropertyDescriptor> properties
private final Set<Relationship> relationships
private final com.github.benmanes.caffeine.cache.Cache<Tuple<String,RecordSchema>,BlockingQueue<QueryRecord.CachedStatement>> statementQueues
protected void init(ProcessorInitializationContext context)
init
in class AbstractSessionFactoryProcessor
public Set<Relationship> getRelationships()
getRelationships
in interface Processor
getRelationships
in class AbstractSessionFactoryProcessor
protected List<PropertyDescriptor> getSupportedPropertyDescriptors()
getSupportedPropertyDescriptors
in class AbstractConfigurableComponent
public void onPropertyModified(PropertyDescriptor descriptor, String oldValue, String newValue)
onPropertyModified
in interface ConfigurableComponent
onPropertyModified
in class AbstractConfigurableComponent
protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName)
getSupportedDynamicPropertyDescriptor
in class AbstractConfigurableComponent
@OnStopped public void cleanup()
private void onCacheEviction(Tuple<String,RecordSchema> key, BlockingQueue<QueryRecord.CachedStatement> queue, com.github.benmanes.caffeine.cache.RemovalCause cause)
private void clearQueue(BlockingQueue<QueryRecord.CachedStatement> statementQueue)
public void onTrigger(ProcessContext context, ProcessSession session)
onTrigger
in class AbstractProcessor
private QueryRecord.CachedStatement getStatement(String sql, RecordSchema schema, Supplier<QueryRecord.CachedStatement> statementBuilder)
private QueryRecord.CachedStatement buildCachedStatement(String sql, ProcessSession session, FlowFile flowFile, RecordSchema schema, RecordReaderFactory recordReaderFactory)
private org.apache.calcite.jdbc.CalciteConnection createConnection()
protected QueryRecord.QueryResult query(ProcessSession session, FlowFile flowFile, RecordSchema schema, String sql, RecordReaderFactory recordReaderFactory) throws SQLException
SQLException
private org.apache.calcite.schema.SchemaPlus createRootSchema(org.apache.calcite.jdbc.CalciteConnection calciteConnection)
private void closeQuietly(AutoCloseable... closeables)
Copyright © 2020 Apache NiFi Project. All rights reserved.