001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 032 * 033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 034 * @since 1.14 035 * @NotThreadSafe 036 */ 037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { 038 039 private static final byte[] END_MARK = new byte[4]; 040 041 // used in one-arg write method 042 private final byte[] oneByte = new byte[1]; 043 044 private final byte[] blockData; 045 private final OutputStream out; 046 private final Parameters params; 047 private boolean finished = false; 048 private int currentIndex = 0; 049 050 // used for frame header checksum and content checksum, if requested 051 private final XXHash32 contentHash = new XXHash32(); 052 // used for block checksum, if requested 053 private final XXHash32 blockHash; 054 055 // only created if the config requires block dependency 056 private byte[] blockDependencyBuffer; 057 private int collectedBlockDependencyBytes; 058 059 /** 060 * The block sizes supported by the format. 061 */ 062 public enum BlockSize { 063 /** Block size of 64K */ 064 K64(64 * 1024, 4), 065 /** Block size of 256K */ 066 K256(256 * 1024, 5), 067 /** Block size of 1M */ 068 M1(1024 * 1024, 6), 069 /** Block size of 4M */ 070 M4(4096 * 1024, 7); 071 072 private final int size, index; 073 private BlockSize(int size, int index) { 074 this.size = size; 075 this.index = index; 076 } 077 int getSize() { 078 return size; 079 } 080 int getIndex() { 081 return index; 082 } 083 } 084 085 /** 086 * Parameters of the LZ4 frame format. 087 */ 088 public static class Parameters { 089 private final BlockSize blockSize; 090 private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; 091 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 092 093 /** 094 * The default parameters of 4M block size, enabled content 095 * checksum, disabled block checksums and independent blocks. 096 * 097 * <p>This matches the defaults of the lz4 command line utility.</p> 098 */ 099 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 100 101 /** 102 * Sets up custom a custom block size for the LZ4 stream but 103 * otherwise uses the defaults of enabled content checksum, 104 * disabled block checksums and independent blocks. 105 * @param blockSize the size of a single block. 106 */ 107 public Parameters(BlockSize blockSize) { 108 this(blockSize, true, false, false); 109 } 110 /** 111 * Sets up custom a custom block size for the LZ4 stream but 112 * otherwise uses the defaults of enabled content checksum, 113 * disabled block checksums and independent blocks. 114 * @param blockSize the size of a single block. 115 * @param lz77params parameters used to fine-tune compression, 116 * in particular to balance compression ratio vs compression 117 * speed. 118 */ 119 public Parameters(BlockSize blockSize, 120 org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 121 this(blockSize, true, false, false, lz77params); 122 } 123 /** 124 * Sets up custom parameters for the LZ4 stream. 125 * @param blockSize the size of a single block. 126 * @param withContentChecksum whether to write a content checksum 127 * @param withBlockChecksum whether to write a block checksum. 128 * Note that block checksums are not supported by the lz4 129 * command line utility 130 * @param withBlockDependency whether a block may depend on 131 * the content of a previous block. Enabling this may improve 132 * compression ratio but makes it impossible to decompress the 133 * output in parallel. 134 */ 135 public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, 136 boolean withBlockDependency) { 137 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, 138 BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 139 } 140 141 /** 142 * Sets up custom parameters for the LZ4 stream. 143 * @param blockSize the size of a single block. 144 * @param withContentChecksum whether to write a content checksum 145 * @param withBlockChecksum whether to write a block checksum. 146 * Note that block checksums are not supported by the lz4 147 * command line utility 148 * @param withBlockDependency whether a block may depend on 149 * the content of a previous block. Enabling this may improve 150 * compression ratio but makes it impossible to decompress the 151 * output in parallel. 152 * @param lz77params parameters used to fine-tune compression, 153 * in particular to balance compression ratio vs compression 154 * speed. 155 */ 156 public Parameters(BlockSize blockSize, boolean withContentChecksum, boolean withBlockChecksum, 157 boolean withBlockDependency, 158 org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 159 this.blockSize = blockSize; 160 this.withContentChecksum = withContentChecksum; 161 this.withBlockChecksum = withBlockChecksum; 162 this.withBlockDependency = withBlockDependency; 163 this.lz77params = lz77params; 164 } 165 166 @Override 167 public String toString() { 168 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum 169 + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; 170 } 171 } 172 173 /** 174 * Constructs a new output stream that compresses data using the 175 * LZ4 frame format using the default block size of 4MB. 176 * @param out the OutputStream to which to write the compressed data 177 * @throws IOException if writing the signature fails 178 */ 179 public FramedLZ4CompressorOutputStream(OutputStream out) throws IOException { 180 this(out, Parameters.DEFAULT); 181 } 182 183 /** 184 * Constructs a new output stream that compresses data using the 185 * LZ4 frame format using the given block size. 186 * @param out the OutputStream to which to write the compressed data 187 * @param params the parameters to use 188 * @throws IOException if writing the signature fails 189 */ 190 public FramedLZ4CompressorOutputStream(OutputStream out, Parameters params) throws IOException { 191 this.params = params; 192 blockData = new byte[params.blockSize.getSize()]; 193 this.out = out; 194 blockHash = params.withBlockChecksum ? new XXHash32() : null; 195 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 196 writeFrameDescriptor(); 197 blockDependencyBuffer = params.withBlockDependency 198 ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] 199 : null; 200 } 201 202 @Override 203 public void write(int b) throws IOException { 204 oneByte[0] = (byte) (b & 0xff); 205 write(oneByte); 206 } 207 208 @Override 209 public void write(byte[] data, int off, int len) throws IOException { 210 if (params.withContentChecksum) { 211 contentHash.update(data, off, len); 212 } 213 if (currentIndex + len > blockData.length) { 214 flushBlock(); 215 while (len > blockData.length) { 216 System.arraycopy(data, off, blockData, 0, blockData.length); 217 off += blockData.length; 218 len -= blockData.length; 219 currentIndex = blockData.length; 220 flushBlock(); 221 } 222 } 223 System.arraycopy(data, off, blockData, currentIndex, len); 224 currentIndex += len; 225 } 226 227 @Override 228 public void close() throws IOException { 229 finish(); 230 out.close(); 231 } 232 233 /** 234 * Compresses all remaining data and writes it to the stream, 235 * doesn't close the underlying stream. 236 * @throws IOException if an error occurs 237 */ 238 public void finish() throws IOException { 239 if (!finished) { 240 if (currentIndex > 0) { 241 flushBlock(); 242 } 243 writeTrailer(); 244 finished = true; 245 } 246 } 247 248 private void writeFrameDescriptor() throws IOException { 249 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 250 if (!params.withBlockDependency) { 251 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 252 } 253 if (params.withContentChecksum) { 254 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 255 } 256 if (params.withBlockChecksum) { 257 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 258 } 259 out.write(flags); 260 contentHash.update(flags); 261 int bd = (params.blockSize.getIndex() << 4) & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 262 out.write(bd); 263 contentHash.update(bd); 264 out.write((int) ((contentHash.getValue() >> 8) & 0xff)); 265 contentHash.reset(); 266 } 267 268 private void flushBlock() throws IOException { 269 final boolean withBlockDependency = params.withBlockDependency; 270 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 271 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 272 if (withBlockDependency) { 273 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, 274 collectedBlockDependencyBytes); 275 } 276 o.write(blockData, 0, currentIndex); 277 } 278 if (withBlockDependency) { 279 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 280 } 281 byte[] b = baos.toByteArray(); 282 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 283 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 284 4); 285 out.write(blockData, 0, currentIndex); 286 if (params.withBlockChecksum) { 287 blockHash.update(blockData, 0, currentIndex); 288 } 289 } else { 290 ByteUtils.toLittleEndian(out, b.length, 4); 291 out.write(b); 292 if (params.withBlockChecksum) { 293 blockHash.update(b, 0, b.length); 294 } 295 } 296 if (params.withBlockChecksum) { 297 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 298 blockHash.reset(); 299 } 300 currentIndex = 0; 301 } 302 303 private void writeTrailer() throws IOException { 304 out.write(END_MARK); 305 if (params.withContentChecksum) { 306 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 307 } 308 } 309 310 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 311 len = Math.min(len, blockDependencyBuffer.length); 312 if (len > 0) { 313 int keep = blockDependencyBuffer.length - len; 314 if (keep > 0) { 315 // move last keep bytes towards the start of the buffer 316 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 317 } 318 // append new data 319 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 320 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, 321 blockDependencyBuffer.length); 322 } 323 } 324 325} 326