Object

it.agilelab.bigdata.wasp.consumers.spark.plugins.raw.tools

FolderCompactionUtils

Related Doc: package tools

Permalink

object FolderCompactionUtils

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. FolderCompactionUtils
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Type Members

  1. type ColumnName = String

    Permalink
  2. type ColumnValue = String

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. val INPUT_MODEL_CONF_KEY: String

    Permalink
  5. val NUM_PARTITIONS_CONF_KEY: String

    Permalink
  6. val OUTPUT_MODEL_CONF_KEY: String

    Permalink
  7. val PARTITIONS_CONF_KEY: String

    Permalink
  8. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  9. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  10. def discoverPartitionFiles(fileSystem: FileSystem, basePath: Path)(partitions: Map[ColumnName, List[ColumnValue]]): List[Path]

    Permalink

    Given a basePath, finds all files associated to the specified partitions

    Given a basePath, finds all files associated to the specified partitions

    fileSystem

    the file system from which to read the files

    basePath

    the base path from which to begin the search

    partitions

    the partitions to search

    returns

    the list of Path of files found

  11. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  12. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  13. def filterPartitionsCombination(folders: Array[String], combination: PartitionsCombination): Boolean

    Permalink

    folders

    the list of directories that a file path contains E.g. [ "journey", "raw", "a=1", "b=2" ]

    combination

    the combination of partitions E.g. [ "a=1", "b=2" ]

    returns

    true if the partitions of combinations are all present in folders, false otherwise

  14. def filterPath(path: Path)(combinations: List[String]): Boolean

    Permalink

    path

    the path of the file

    combinations

    the combination of partitions

    returns

    true if the partitions of combinations are all present in path, false otherwise

  15. def filterSingleQuery(whereCondition: WhereCondition)(file: Path): Boolean

    Permalink

    whereCondition

    the WhereCondition to filter

    file

    the file to check

    returns

    true if whereCondition covers the path of file, false otherwise

  16. def filterWhereCondition(files: List[Path])(whereCondition: WhereCondition): Boolean

    Permalink

    files

    the list of files to

    whereCondition

    the WhereCondition to filter

    returns

    true if at least one file is present for this WhereCondition, false otherwise

  17. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  18. def generateCombinations(partitions: List[(ColumnName, List[ColumnValue])]): List[PartitionsCombination]

    Permalink

    Generates all the possible combinations of columnName and columnValue Example: partitions = List( "a" -> List("1"), "b" -> List("2", "3"), "c" -> List("4", "5", "6") ) output = List( ("a", "1") :: ("b", "2) :: ("c", "4") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "6") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "4") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "6") :: Nil, )

    Generates all the possible combinations of columnName and columnValue Example: partitions = List( "a" -> List("1"), "b" -> List("2", "3"), "c" -> List("4", "5", "6") ) output = List( ("a", "1") :: ("b", "2) :: ("c", "4") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "6") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "4") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "6") :: Nil, )

    partitions

    the list of partitions to generate the combinations

    returns

    all the possible combinations obtained from the input partitions

  19. def generateWhereConditions(partitions: Map[ColumnName, List[ColumnValue]], inputModel: RawModel, outputModel: RawModel): List[WhereCondition]

    Permalink

    Builds the list of WhereCondition used to filter the original DataFrame read from the input model.

    Builds the list of WhereCondition used to filter the original DataFrame read from the input model. This list has one element for each output partition combination, in order to write the correct number of files to the partitions specified by the output model. Each of these combinations is put in AND with all the input partitions combinations, in order to write only the files of the partitions requested. Example: inputModel.partitions = [a, b, c, d] outputModel.partitions = [a, b] partitions = a -> [1, 2], b -> [3, 4], c -> [5, 6], d -> [7, 8]

    output = [ (a=1 AND b=3) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=1 AND b=4) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=2 AND b=3) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=2 AND b=4) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) ]

    partitions

    the list of partitions to generate the conditions

    inputModel

    the inputModel defining the input partitions

    outputModel

    the outputModel defining the output partitions

    returns

    the list of WhereCondition generated

  20. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  21. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  22. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  23. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  24. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  25. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  26. def parseConfigModel(conf: Config): RawModel

    Permalink
  27. def parseModel(conf: Config, key: String): RawModel

    Permalink
  28. def parsePartitions(conf: Config): Map[ColumnName, List[ColumnValue]]

    Permalink
  29. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  30. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  31. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  32. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  33. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from AnyRef

Inherited from Any

Ungrouped