class MetadataLogFileIndex extends PartitioningAwareFileIndex
A FileIndex that generates the list of files to processing by reading them from the metadata log files generated by the FileStreamSink.
- Alphabetic
- By Inheritance
- MetadataLogFileIndex
- PartitioningAwareFileIndex
- Logging
- FileIndex
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- 
      
      
      
        
      
    
      
        
        new
      
      
        MetadataLogFileIndex(sparkSession: SparkSession, path: Path, parameters: Map[String, String], userSpecifiedSchema: Option[StructType])
      
      
      - userSpecifiedSchema
- an optional user specified schema that will be use to provide types for the discovered partitions 
 
Value Members
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        !=(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ##(): Int
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ==(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        allFiles(): Seq[FileStatus]
      
      
      - Definition Classes
- PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        asInstanceOf[T0]: T0
      
      
      - Definition Classes
- Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        clone(): AnyRef
      
      
      - Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        eq(arg0: AnyRef): Boolean
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        equals(arg0: Any): Boolean
      
      
      - Definition Classes
- AnyRef → Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        finalize(): Unit
      
      
      - Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        getClass(): Class[_]
      
      
      - Definition Classes
- AnyRef → Any
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        val
      
      
        hadoopConf: Configuration
      
      
      - Attributes
- protected
- Definition Classes
- PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        hashCode(): Int
      
      
      - Definition Classes
- AnyRef → Any
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        inferPartitioning(): PartitionSpec
      
      
      - Attributes
- protected
- Definition Classes
- PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        initializeLogIfNecessary(isInterpreter: Boolean): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        inputFiles: Array[String]
      
      
      Returns the list of files that will be read when scanning this relation. Returns the list of files that will be read when scanning this relation. - Definition Classes
- PartitioningAwareFileIndex → FileIndex
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        isInstanceOf[T0]: Boolean
      
      
      - Definition Classes
- Any
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        isTraceEnabled(): Boolean
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        val
      
      
        leafDirToChildrenFiles: Map[Path, Array[FileStatus]]
      
      
      - Attributes
- protected
- Definition Classes
- MetadataLogFileIndex → PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        val
      
      
        leafFiles: LinkedHashMap[Path, FileStatus]
      
      
      - Attributes
- protected
- Definition Classes
- MetadataLogFileIndex → PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        listFiles(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionDirectory]
      
      
      Returns all valid files grouped into partitions when the data is partitioned. Returns all valid files grouped into partitions when the data is partitioned. If the data is unpartitioned, this will return a single partition with no partition values. - partitionFilters
- The filters used to prune which partitions are returned. These filters must only refer to partition columns and this method will only return files where these predicates are guaranteed to evaluate to - true. Thus, these filters will not need to be evaluated again on the returned data.
- dataFilters
- Filters that can be applied on non-partitioned columns. The implementation does not need to guarantee these filters are applied, i.e. the execution engine will ensure these filters are still applied on the returned files. 
 - Definition Classes
- PartitioningAwareFileIndex → FileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        log: Logger
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logDebug(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logDebug(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logError(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logError(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logInfo(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logInfo(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logName: String
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logTrace(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logTrace(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logWarning(msg: ⇒ String, throwable: Throwable): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        logWarning(msg: ⇒ String): Unit
      
      
      - Attributes
- protected
- Definition Classes
- Logging
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        matchPathPattern(file: FileStatus): Boolean
      
      
      - Attributes
- protected
- Definition Classes
- PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        metadataOpsTimeNs: Option[Long]
      
      
      Returns an optional metadata operation time, in nanoseconds, for listing files. Returns an optional metadata operation time, in nanoseconds, for listing files. We do file listing in query optimization (in order to get the proper statistics) and we want to account for file listing time in physical execution (as metrics). To do that, we save the file listing time in some implementations and physical execution calls it in this method to update the metrics. - Definition Classes
- FileIndex
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        ne(arg0: AnyRef): Boolean
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        notify(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        notifyAll(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @native()
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        partitionSchema: StructType
      
      
      Schema of the partitioning columns, or the empty schema if the table is not partitioned. Schema of the partitioning columns, or the empty schema if the table is not partitioned. - Definition Classes
- PartitioningAwareFileIndex → FileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        partitionSpec(): PartitionSpec
      
      
      Returns the specification of the partitions inferred from the data. Returns the specification of the partitions inferred from the data. - Definition Classes
- MetadataLogFileIndex → PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        lazy val
      
      
        recursiveFileLookup: Boolean
      
      
      - Attributes
- protected
- Definition Classes
- PartitioningAwareFileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        refresh(): Unit
      
      
      Refresh any cached file listings Refresh any cached file listings - Definition Classes
- MetadataLogFileIndex → FileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        rootPaths: Seq[Path]
      
      
      Returns the list of root input paths from which the catalog will get files. Returns the list of root input paths from which the catalog will get files. There may be a single root path from which partitions are discovered, or individual partitions may be specified by each path. - Definition Classes
- MetadataLogFileIndex → FileIndex
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        sizeInBytes: Long
      
      
      Sum of table file sizes, in bytes Sum of table file sizes, in bytes - Definition Classes
- PartitioningAwareFileIndex → FileIndex
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        synchronized[T0](arg0: ⇒ T0): T0
      
      
      - Definition Classes
- AnyRef
 
- 
      
      
      
        
      
    
      
        
        def
      
      
        toString(): String
      
      
      - Definition Classes
- FileIndex → AnyRef → Any
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long, arg1: Int): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... )
 
- 
      
      
      
        
      
    
      
        final 
        def
      
      
        wait(arg0: Long): Unit
      
      
      - Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()