001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.zip; 020 021 import java.io.ByteArrayInputStream; 022 import java.io.ByteArrayOutputStream; 023 import java.io.EOFException; 024 import java.io.IOException; 025 import java.io.InputStream; 026 import java.io.PushbackInputStream; 027 import java.util.zip.CRC32; 028 import java.util.zip.DataFormatException; 029 import java.util.zip.Inflater; 030 import java.util.zip.ZipException; 031 032 import org.apache.commons.compress.archivers.ArchiveEntry; 033 import org.apache.commons.compress.archivers.ArchiveInputStream; 034 035 /** 036 * Implements an input stream that can read Zip archives. 037 * <p> 038 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information 039 * is not available from the header. 040 * <p> 041 * The {@link ZipFile} class is preferred when reading from files. 042 * 043 * @see ZipFile 044 * @NotThreadSafe 045 */ 046 public class ZipArchiveInputStream extends ArchiveInputStream { 047 048 private static final int SHORT = 2; 049 private static final int WORD = 4; 050 051 /** 052 * The zip encoding to use for filenames and the file comment. 053 */ 054 private final ZipEncoding zipEncoding; 055 056 /** 057 * Whether to look for and use Unicode extra fields. 058 */ 059 private final boolean useUnicodeExtraFields; 060 061 private final InputStream in; 062 063 private final Inflater inf = new Inflater(true); 064 private final CRC32 crc = new CRC32(); 065 066 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE]; 067 068 private ZipArchiveEntry current = null; 069 private boolean closed = false; 070 private boolean hitCentralDirectory = false; 071 private int readBytesOfEntry = 0, offsetInBuffer = 0; 072 private int bytesReadFromStream = 0; 073 private int lengthOfLastRead = 0; 074 private boolean hasDataDescriptor = false; 075 private ByteArrayInputStream lastStoredEntry = null; 076 077 private boolean allowStoredEntriesWithDataDescriptor = false; 078 079 private static final int LFH_LEN = 30; 080 /* 081 local file header signature 4 bytes (0x04034b50) 082 version needed to extract 2 bytes 083 general purpose bit flag 2 bytes 084 compression method 2 bytes 085 last mod file time 2 bytes 086 last mod file date 2 bytes 087 crc-32 4 bytes 088 compressed size 4 bytes 089 uncompressed size 4 bytes 090 file name length 2 bytes 091 extra field length 2 bytes 092 */ 093 094 public ZipArchiveInputStream(InputStream inputStream) { 095 this(inputStream, ZipEncodingHelper.UTF8, true); 096 } 097 098 /** 099 * @param encoding the encoding to use for file names, use null 100 * for the platform's default encoding 101 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 102 * Extra Fields (if present) to set the file names. 103 */ 104 public ZipArchiveInputStream(InputStream inputStream, 105 String encoding, 106 boolean useUnicodeExtraFields) { 107 this(inputStream, encoding, useUnicodeExtraFields, false); 108 } 109 110 /** 111 * @param encoding the encoding to use for file names, use null 112 * for the platform's default encoding 113 * @param useUnicodeExtraFields whether to use InfoZIP Unicode 114 * Extra Fields (if present) to set the file names. 115 * @param allowStoredEntriesWithDataDescriptor whether the stream 116 * will try to read STORED entries that use a data descriptor 117 * @since Apache Commons Compress 1.1 118 */ 119 public ZipArchiveInputStream(InputStream inputStream, 120 String encoding, 121 boolean useUnicodeExtraFields, 122 boolean allowStoredEntriesWithDataDescriptor) { 123 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 124 this.useUnicodeExtraFields = useUnicodeExtraFields; 125 in = new PushbackInputStream(inputStream, buf.length); 126 this.allowStoredEntriesWithDataDescriptor = 127 allowStoredEntriesWithDataDescriptor; 128 } 129 130 public ZipArchiveEntry getNextZipEntry() throws IOException { 131 if (closed || hitCentralDirectory) { 132 return null; 133 } 134 if (current != null) { 135 closeEntry(); 136 } 137 byte[] lfh = new byte[LFH_LEN]; 138 try { 139 readFully(lfh); 140 } catch (EOFException e) { 141 return null; 142 } 143 ZipLong sig = new ZipLong(lfh); 144 if (sig.equals(ZipLong.CFH_SIG)) { 145 hitCentralDirectory = true; 146 return null; 147 } 148 if (!sig.equals(ZipLong.LFH_SIG)) { 149 return null; 150 } 151 152 int off = WORD; 153 current = new ZipArchiveEntry(); 154 155 int versionMadeBy = ZipShort.getValue(lfh, off); 156 off += SHORT; 157 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) 158 & ZipFile.NIBLET_MASK); 159 160 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfh, off); 161 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 162 final ZipEncoding entryEncoding = 163 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding; 164 hasDataDescriptor = gpFlag.usesDataDescriptor(); 165 current.setGeneralPurposeBit(gpFlag); 166 167 off += SHORT; 168 169 current.setMethod(ZipShort.getValue(lfh, off)); 170 off += SHORT; 171 172 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off)); 173 current.setTime(time); 174 off += WORD; 175 176 if (!hasDataDescriptor) { 177 current.setCrc(ZipLong.getValue(lfh, off)); 178 off += WORD; 179 180 current.setCompressedSize(ZipLong.getValue(lfh, off)); 181 off += WORD; 182 183 current.setSize(ZipLong.getValue(lfh, off)); 184 off += WORD; 185 } else { 186 off += 3 * WORD; 187 } 188 189 int fileNameLen = ZipShort.getValue(lfh, off); 190 191 off += SHORT; 192 193 int extraLen = ZipShort.getValue(lfh, off); 194 off += SHORT; 195 196 byte[] fileName = new byte[fileNameLen]; 197 readFully(fileName); 198 current.setName(entryEncoding.decode(fileName)); 199 200 byte[] extraData = new byte[extraLen]; 201 readFully(extraData); 202 current.setExtra(extraData); 203 204 if (!hasUTF8Flag && useUnicodeExtraFields) { 205 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null); 206 } 207 return current; 208 } 209 210 /** {@inheritDoc} */ 211 public ArchiveEntry getNextEntry() throws IOException { 212 return getNextZipEntry(); 213 } 214 215 /** 216 * Whether this class is able to read the given entry. 217 * 218 * <p>May return false if it is set up to use encryption or a 219 * compression method that hasn't been implemented yet.</p> 220 * @since Apache Commons Compress 1.1 221 */ 222 public boolean canReadEntryData(ArchiveEntry ae) { 223 if (ae instanceof ZipArchiveEntry) { 224 ZipArchiveEntry ze = (ZipArchiveEntry) ae; 225 return ZipUtil.canHandleEntryData(ze) 226 && supportsDataDescriptorFor(ze); 227 228 } 229 return false; 230 } 231 232 public int read(byte[] buffer, int start, int length) throws IOException { 233 if (closed) { 234 throw new IOException("The stream is closed"); 235 } 236 if (inf.finished() || current == null) { 237 return -1; 238 } 239 240 // avoid int overflow, check null buffer 241 if (start <= buffer.length && length >= 0 && start >= 0 242 && buffer.length - start >= length) { 243 ZipUtil.checkRequestedFeatures(current); 244 if (!supportsDataDescriptorFor(current)) { 245 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException 246 .Feature 247 .DATA_DESCRIPTOR, 248 current); 249 } 250 251 if (current.getMethod() == ZipArchiveOutputStream.STORED) { 252 if (hasDataDescriptor) { 253 if (lastStoredEntry == null) { 254 readStoredEntry(); 255 } 256 return lastStoredEntry.read(buffer, start, length); 257 } 258 259 int csize = (int) current.getSize(); 260 if (readBytesOfEntry >= csize) { 261 return -1; 262 } 263 if (offsetInBuffer >= lengthOfLastRead) { 264 offsetInBuffer = 0; 265 if ((lengthOfLastRead = in.read(buf)) == -1) { 266 return -1; 267 } 268 count(lengthOfLastRead); 269 bytesReadFromStream += lengthOfLastRead; 270 } 271 int toRead = length > lengthOfLastRead 272 ? lengthOfLastRead - offsetInBuffer 273 : length; 274 if ((csize - readBytesOfEntry) < toRead) { 275 toRead = csize - readBytesOfEntry; 276 } 277 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead); 278 offsetInBuffer += toRead; 279 readBytesOfEntry += toRead; 280 crc.update(buffer, start, toRead); 281 return toRead; 282 } 283 284 if (inf.needsInput()) { 285 fill(); 286 if (lengthOfLastRead > 0) { 287 bytesReadFromStream += lengthOfLastRead; 288 } 289 } 290 int read = 0; 291 try { 292 read = inf.inflate(buffer, start, length); 293 } catch (DataFormatException e) { 294 throw new ZipException(e.getMessage()); 295 } 296 if (read == 0) { 297 if (inf.finished()) { 298 return -1; 299 } else if (lengthOfLastRead == -1) { 300 throw new IOException("Truncated ZIP file"); 301 } 302 } 303 crc.update(buffer, start, read); 304 return read; 305 } 306 throw new ArrayIndexOutOfBoundsException(); 307 } 308 309 public void close() throws IOException { 310 if (!closed) { 311 closed = true; 312 in.close(); 313 } 314 } 315 316 public long skip(long value) throws IOException { 317 if (value >= 0) { 318 long skipped = 0; 319 byte[] b = new byte[1024]; 320 while (skipped != value) { 321 long rem = value - skipped; 322 int x = read(b, 0, (int) (b.length > rem ? rem : b.length)); 323 if (x == -1) { 324 return skipped; 325 } 326 skipped += x; 327 } 328 return skipped; 329 } 330 throw new IllegalArgumentException(); 331 } 332 333 /** 334 * Checks if the signature matches what is expected for a zip file. 335 * Does not currently handle self-extracting zips which may have arbitrary 336 * leading content. 337 * 338 * @param signature 339 * the bytes to check 340 * @param length 341 * the number of bytes to check 342 * @return true, if this stream is a zip archive stream, false otherwise 343 */ 344 public static boolean matches(byte[] signature, int length) { 345 if (length < ZipArchiveOutputStream.LFH_SIG.length) { 346 return false; 347 } 348 349 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file 350 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip 351 } 352 353 private static boolean checksig(byte[] signature, byte[] expected){ 354 for (int i = 0; i < expected.length; i++) { 355 if (signature[i] != expected[i]) { 356 return false; 357 } 358 } 359 return true; 360 } 361 362 /** 363 * Closes the current ZIP archive entry and positions the underlying 364 * stream to the beginning of the next entry. All per-entry variables 365 * and data structures are cleared. 366 * <p> 367 * If the compressed size of this entry is included in the entry header, 368 * then any outstanding bytes are simply skipped from the underlying 369 * stream without uncompressing them. This allows an entry to be safely 370 * closed even if the compression method is unsupported. 371 * <p> 372 * In case we don't know the compressed size of this entry or have 373 * already buffered too much data from the underlying stream to support 374 * uncompression, then the uncompression process is completed and the 375 * end position of the stream is adjusted based on the result of that 376 * process. 377 * 378 * @throws IOException if an error occurs 379 */ 380 private void closeEntry() throws IOException { 381 if (closed) { 382 throw new IOException("The stream is closed"); 383 } 384 if (current == null) { 385 return; 386 } 387 388 // Ensure all entry bytes are read 389 if (bytesReadFromStream <= current.getCompressedSize() 390 && !hasDataDescriptor) { 391 long remaining = current.getCompressedSize() - bytesReadFromStream; 392 while (remaining > 0) { 393 long n = in.read(buf, 0, (int) Math.min(buf.length, remaining)); 394 if (n < 0) { 395 throw new EOFException( 396 "Truncated ZIP entry: " + current.getName()); 397 } else { 398 count(n); 399 remaining -= n; 400 } 401 } 402 } else { 403 skip(Long.MAX_VALUE); 404 405 int inB; 406 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) { 407 inB = inf.getTotalIn(); 408 } else { 409 inB = readBytesOfEntry; 410 } 411 int diff = 0; 412 413 // Pushback any required bytes 414 if ((diff = bytesReadFromStream - inB) != 0) { 415 ((PushbackInputStream) in).unread( 416 buf, lengthOfLastRead - diff, diff); 417 pushedBackBytes(diff); 418 } 419 } 420 421 if (lastStoredEntry == null && hasDataDescriptor) { 422 readDataDescriptor(); 423 } 424 425 inf.reset(); 426 readBytesOfEntry = offsetInBuffer = bytesReadFromStream = 427 lengthOfLastRead = 0; 428 crc.reset(); 429 current = null; 430 lastStoredEntry = null; 431 } 432 433 private void fill() throws IOException { 434 if (closed) { 435 throw new IOException("The stream is closed"); 436 } 437 if ((lengthOfLastRead = in.read(buf)) > 0) { 438 count(lengthOfLastRead); 439 inf.setInput(buf, 0, lengthOfLastRead); 440 } 441 } 442 443 private void readFully(byte[] b) throws IOException { 444 int count = 0, x = 0; 445 while (count != b.length) { 446 count += x = in.read(b, count, b.length - count); 447 if (x == -1) { 448 throw new EOFException(); 449 } 450 count(x); 451 } 452 } 453 454 private void readDataDescriptor() throws IOException { 455 byte[] b = new byte[WORD]; 456 readFully(b); 457 ZipLong val = new ZipLong(b); 458 if (ZipLong.DD_SIG.equals(val)) { 459 // data descriptor with signature, skip sig 460 readFully(b); 461 val = new ZipLong(b); 462 } 463 current.setCrc(val.getValue()); 464 readFully(b); 465 current.setCompressedSize(new ZipLong(b).getValue()); 466 readFully(b); 467 current.setSize(new ZipLong(b).getValue()); 468 } 469 470 /** 471 * Whether this entry requires a data descriptor this library can work with. 472 * 473 * @return true if allowStoredEntriesWithDataDescriptor is true, 474 * the entry doesn't require any data descriptor or the method is 475 * DEFLATED. 476 */ 477 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) { 478 return allowStoredEntriesWithDataDescriptor || 479 !entry.getGeneralPurposeBit().usesDataDescriptor() 480 || entry.getMethod() == ZipArchiveEntry.DEFLATED; 481 } 482 483 /** 484 * Caches a stored entry that uses the data descriptor. 485 * 486 * <ul> 487 * <li>Reads a stored entry until the signature of a local file 488 * header, central directory header or data descriptor has been 489 * found.</li> 490 * <li>Stores all entry data in lastStoredEntry.</p> 491 * <li>Rewinds the stream to position at the data 492 * descriptor.</li> 493 * <li>reads the data descriptor</li> 494 * </ul> 495 * 496 * <p>After calling this method the entry should know its size, 497 * the entry's data is cached and the stream is positioned at the 498 * next local file or central directory header.</p> 499 */ 500 private void readStoredEntry() throws IOException { 501 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 502 byte[] LFH = ZipLong.LFH_SIG.getBytes(); 503 byte[] CFH = ZipLong.CFH_SIG.getBytes(); 504 byte[] DD = ZipLong.DD_SIG.getBytes(); 505 int off = 0; 506 boolean done = false; 507 508 while (!done) { 509 int r = in.read(buf, off, ZipArchiveOutputStream.BUFFER_SIZE - off); 510 if (r <= 0) { 511 // read the whole archive without ever finding a 512 // central directory 513 throw new IOException("Truncated ZIP file"); 514 } 515 if (r + off < 4) { 516 // buf is too small to check for a signature, loop 517 off += r; 518 continue; 519 } 520 521 int readTooMuch = 0; 522 for (int i = 0; !done && i < r - 4; i++) { 523 if (buf[i] == LFH[0] && buf[i + 1] == LFH[1]) { 524 if ((buf[i + 2] == LFH[2] && buf[i + 3] == LFH[3]) 525 || (buf[i] == CFH[2] && buf[i + 3] == CFH[3])) { 526 // found a LFH or CFH: 527 readTooMuch = off + r - i - 12 /* dd without signature */; 528 done = true; 529 } 530 else if (buf[i + 2] == DD[2] && buf[i + 3] == DD[3]) { 531 // found DD: 532 readTooMuch = off + r - i; 533 done = true; 534 } 535 if (done) { 536 // * push back bytes read in excess as well as the data 537 // descriptor 538 // * copy the remaining bytes to cache 539 // * read data descriptor 540 ((PushbackInputStream) in).unread(buf, off + r - readTooMuch, readTooMuch); 541 bos.write(buf, 0, i); 542 readDataDescriptor(); 543 } 544 } 545 } 546 if (!done) { 547 // worst case we've read a data descriptor without a 548 // signature (12 bytes) plus the first three bytes of 549 // a LFH or CFH signature 550 // save the last 15 bytes in the buffer, cache 551 // anything in front of that, read on 552 if (off + r > 15) { 553 bos.write(buf, 0, off + r - 15); 554 System.arraycopy(buf, off + r - 15, buf, 0, 15); 555 off = 15; 556 } else { 557 off += r; 558 } 559 } 560 } 561 562 byte[] b = bos.toByteArray(); 563 lastStoredEntry = new ByteArrayInputStream(b); 564 } 565 }