Class ParquetMetadataConverter


  • public class ParquetMetadataConverter
    extends Object
    • Constructor Detail

      • ParquetMetadataConverter

        public ParquetMetadataConverter()
      • ParquetMetadataConverter

        public ParquetMetadataConverter​(int statisticsTruncateLength)
      • ParquetMetadataConverter

        @Deprecated
        public ParquetMetadataConverter​(org.apache.hadoop.conf.Configuration conf)
        Deprecated.
        will be removed in 2.0.0; use ParquetMetadataConverter(ParquetReadOptions)
        Parameters:
        conf - a configuration
      • ParquetMetadataConverter

        public ParquetMetadataConverter​(ParquetReadOptions options)
    • Method Detail

      • toParquetMetadata

        public org.apache.parquet.format.FileMetaData toParquetMetadata​(int currentVersion,
                                                                        ParquetMetadata parquetMetadata)
      • toParquetMetadata

        public org.apache.parquet.format.FileMetaData toParquetMetadata​(int currentVersion,
                                                                        ParquetMetadata parquetMetadata,
                                                                        InternalFileEncryptor fileEncryptor)
      • getEncoding

        public org.apache.parquet.column.Encoding getEncoding​(org.apache.parquet.format.Encoding encoding)
      • getEncoding

        public org.apache.parquet.format.Encoding getEncoding​(org.apache.parquet.column.Encoding encoding)
      • convertEncodingStats

        public org.apache.parquet.column.EncodingStats convertEncodingStats​(List<org.apache.parquet.format.PageEncodingStats> stats)
      • convertEncodingStats

        public List<org.apache.parquet.format.PageEncodingStats> convertEncodingStats​(org.apache.parquet.column.EncodingStats stats)
      • toParquetStatistics

        public static org.apache.parquet.format.Statistics toParquetStatistics​(org.apache.parquet.column.statistics.Statistics stats)
      • toParquetStatistics

        public static org.apache.parquet.format.Statistics toParquetStatistics​(org.apache.parquet.column.statistics.Statistics stats,
                                                                               int truncateLength)
      • fromParquetStatistics

        @Deprecated
        public static org.apache.parquet.column.statistics.Statistics fromParquetStatistics​(org.apache.parquet.format.Statistics statistics,
                                                                                            org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName type)
        Deprecated.
        will be removed in 2.0.0.
        Parameters:
        statistics - parquet format statistics
        type - a primitive type name
        Returns:
        the statistics
      • fromParquetStatistics

        @Deprecated
        public static org.apache.parquet.column.statistics.Statistics fromParquetStatistics​(String createdBy,
                                                                                            org.apache.parquet.format.Statistics statistics,
                                                                                            org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName type)
        Deprecated.
        will be removed in 2.0.0.
        Parameters:
        createdBy - the created-by string from the file
        statistics - parquet format statistics
        type - a primitive type name
        Returns:
        the statistics
      • fromParquetStatistics

        public org.apache.parquet.column.statistics.Statistics fromParquetStatistics​(String createdBy,
                                                                                     org.apache.parquet.format.Statistics statistics,
                                                                                     org.apache.parquet.schema.PrimitiveType type)
      • getPrimitive

        public org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName getPrimitive​(org.apache.parquet.format.Type type)
      • range

        public static ParquetMetadataConverter.MetadataFilter range​(long startOffset,
                                                                    long endOffset)
        [ startOffset, endOffset )
        Parameters:
        startOffset - a start offset (inclusive)
        endOffset - an end offset (exclusive)
        Returns:
        a range filter from the offsets
      • buildColumnChunkMetaData

        public ColumnChunkMetaData buildColumnChunkMetaData​(org.apache.parquet.format.ColumnMetaData metaData,
                                                            org.apache.parquet.hadoop.metadata.ColumnPath columnPath,
                                                            org.apache.parquet.schema.PrimitiveType type,
                                                            String createdBy)
      • writeDataPageHeader

        @Deprecated
        public void writeDataPageHeader​(int uncompressedSize,
                                        int compressedSize,
                                        int valueCount,
                                        org.apache.parquet.column.Encoding rlEncoding,
                                        org.apache.parquet.column.Encoding dlEncoding,
                                        org.apache.parquet.column.Encoding valuesEncoding,
                                        OutputStream to)
                                 throws IOException
        Deprecated.
        Throws:
        IOException
      • writeDataPageHeader

        @Deprecated
        public void writeDataPageHeader​(int uncompressedSize,
                                        int compressedSize,
                                        int valueCount,
                                        org.apache.parquet.column.statistics.Statistics statistics,
                                        org.apache.parquet.column.Encoding rlEncoding,
                                        org.apache.parquet.column.Encoding dlEncoding,
                                        org.apache.parquet.column.Encoding valuesEncoding,
                                        OutputStream to)
                                 throws IOException
        Deprecated.
        Throws:
        IOException
      • writeDataPageV2Header

        @Deprecated
        public void writeDataPageV2Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          int nullCount,
                                          int rowCount,
                                          org.apache.parquet.column.statistics.Statistics statistics,
                                          org.apache.parquet.column.Encoding dataEncoding,
                                          int rlByteLength,
                                          int dlByteLength,
                                          OutputStream to)
                                   throws IOException
        Deprecated.
        Throws:
        IOException
      • writeDataPageV1Header

        public void writeDataPageV1Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          org.apache.parquet.column.Encoding rlEncoding,
                                          org.apache.parquet.column.Encoding dlEncoding,
                                          org.apache.parquet.column.Encoding valuesEncoding,
                                          OutputStream to)
                                   throws IOException
        Throws:
        IOException
      • writeDataPageV1Header

        public void writeDataPageV1Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          org.apache.parquet.column.Encoding rlEncoding,
                                          org.apache.parquet.column.Encoding dlEncoding,
                                          org.apache.parquet.column.Encoding valuesEncoding,
                                          OutputStream to,
                                          org.apache.parquet.format.BlockCipher.Encryptor blockEncryptor,
                                          byte[] pageHeaderAAD)
                                   throws IOException
        Throws:
        IOException
      • writeDataPageV1Header

        public void writeDataPageV1Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          org.apache.parquet.column.Encoding rlEncoding,
                                          org.apache.parquet.column.Encoding dlEncoding,
                                          org.apache.parquet.column.Encoding valuesEncoding,
                                          int crc,
                                          OutputStream to)
                                   throws IOException
        Throws:
        IOException
      • writeDataPageV1Header

        public void writeDataPageV1Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          org.apache.parquet.column.Encoding rlEncoding,
                                          org.apache.parquet.column.Encoding dlEncoding,
                                          org.apache.parquet.column.Encoding valuesEncoding,
                                          int crc,
                                          OutputStream to,
                                          org.apache.parquet.format.BlockCipher.Encryptor blockEncryptor,
                                          byte[] pageHeaderAAD)
                                   throws IOException
        Throws:
        IOException
      • writeDataPageV2Header

        public void writeDataPageV2Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          int nullCount,
                                          int rowCount,
                                          org.apache.parquet.column.Encoding dataEncoding,
                                          int rlByteLength,
                                          int dlByteLength,
                                          OutputStream to)
                                   throws IOException
        Throws:
        IOException
      • writeDataPageV2Header

        public void writeDataPageV2Header​(int uncompressedSize,
                                          int compressedSize,
                                          int valueCount,
                                          int nullCount,
                                          int rowCount,
                                          org.apache.parquet.column.Encoding dataEncoding,
                                          int rlByteLength,
                                          int dlByteLength,
                                          OutputStream to,
                                          org.apache.parquet.format.BlockCipher.Encryptor blockEncryptor,
                                          byte[] pageHeaderAAD)
                                   throws IOException
        Throws:
        IOException
      • writeDictionaryPageHeader

        public void writeDictionaryPageHeader​(int uncompressedSize,
                                              int compressedSize,
                                              int valueCount,
                                              org.apache.parquet.column.Encoding valuesEncoding,
                                              OutputStream to)
                                       throws IOException
        Throws:
        IOException
      • writeDictionaryPageHeader

        public void writeDictionaryPageHeader​(int uncompressedSize,
                                              int compressedSize,
                                              int valueCount,
                                              org.apache.parquet.column.Encoding valuesEncoding,
                                              OutputStream to,
                                              org.apache.parquet.format.BlockCipher.Encryptor blockEncryptor,
                                              byte[] pageHeaderAAD)
                                       throws IOException
        Throws:
        IOException
      • writeDictionaryPageHeader

        public void writeDictionaryPageHeader​(int uncompressedSize,
                                              int compressedSize,
                                              int valueCount,
                                              org.apache.parquet.column.Encoding valuesEncoding,
                                              int crc,
                                              OutputStream to)
                                       throws IOException
        Throws:
        IOException
      • writeDictionaryPageHeader

        public void writeDictionaryPageHeader​(int uncompressedSize,
                                              int compressedSize,
                                              int valueCount,
                                              org.apache.parquet.column.Encoding valuesEncoding,
                                              int crc,
                                              OutputStream to,
                                              org.apache.parquet.format.BlockCipher.Encryptor blockEncryptor,
                                              byte[] pageHeaderAAD)
                                       throws IOException
        Throws:
        IOException
      • toParquetColumnIndex

        public static org.apache.parquet.format.ColumnIndex toParquetColumnIndex​(org.apache.parquet.schema.PrimitiveType type,
                                                                                 org.apache.parquet.internal.column.columnindex.ColumnIndex columnIndex)
      • fromParquetColumnIndex

        public static org.apache.parquet.internal.column.columnindex.ColumnIndex fromParquetColumnIndex​(org.apache.parquet.schema.PrimitiveType type,
                                                                                                        org.apache.parquet.format.ColumnIndex parquetColumnIndex)
      • toParquetOffsetIndex

        public static org.apache.parquet.format.OffsetIndex toParquetOffsetIndex​(org.apache.parquet.internal.column.columnindex.OffsetIndex offsetIndex)
      • fromParquetOffsetIndex

        public static org.apache.parquet.internal.column.columnindex.OffsetIndex fromParquetOffsetIndex​(org.apache.parquet.format.OffsetIndex parquetOffsetIndex)
      • toBloomFilterHeader

        public static org.apache.parquet.format.BloomFilterHeader toBloomFilterHeader​(org.apache.parquet.column.values.bloomfilter.BloomFilter bloomFilter)