001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 package org.apache.commons.compress.archivers.dump; 020 021 import org.apache.commons.compress.archivers.ArchiveException; 022 import org.apache.commons.compress.archivers.ArchiveInputStream; 023 024 import java.io.EOFException; 025 import java.io.IOException; 026 import java.io.InputStream; 027 028 import java.util.Arrays; 029 import java.util.Comparator; 030 import java.util.HashMap; 031 import java.util.Map; 032 import java.util.PriorityQueue; 033 import java.util.Queue; 034 import java.util.Stack; 035 036 /** 037 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. 038 * Methods are provided to position at each successive entry in 039 * the archive, and the read each entry as a normal input stream 040 * using read(). 041 * 042 * @NotThreadSafe 043 */ 044 public class DumpArchiveInputStream extends ArchiveInputStream { 045 private DumpArchiveSummary summary; 046 private DumpArchiveEntry active; 047 private boolean isClosed; 048 private boolean hasHitEOF; 049 private long entrySize; 050 private long entryOffset; 051 private int readIdx; 052 private byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 053 private byte[] blockBuffer; 054 private int recordOffset; 055 private long filepos; 056 protected TapeInputStream raw; 057 058 // map of ino -> dirent entry. We can use this to reconstruct full paths. 059 private Map<Integer, Dirent> names = new HashMap<Integer, Dirent>(); 060 061 // map of ino -> (directory) entry when we're missing one or more elements in the path. 062 private Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>(); 063 064 // queue of (directory) entries where we now have the full path. 065 private Queue<DumpArchiveEntry> queue; 066 067 /** 068 * Constructor. 069 * 070 * @param is 071 * @throws ArchiveException 072 */ 073 public DumpArchiveInputStream(InputStream is) throws ArchiveException { 074 this.raw = new TapeInputStream(is); 075 this.hasHitEOF = false; 076 077 try { 078 // read header, verify it's a dump archive. 079 byte[] headerBytes = raw.readRecord(); 080 081 if (!DumpArchiveUtil.verify(headerBytes)) { 082 throw new UnrecognizedFormatException(); 083 } 084 085 // get summary information 086 summary = new DumpArchiveSummary(headerBytes); 087 088 // reset buffer with actual block size. 089 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 090 091 // allocate our read buffer. 092 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 093 094 // skip past CLRI and BITS segments since we don't handle them yet. 095 readCLRI(); 096 readBITS(); 097 } catch (IOException ex) { 098 throw new ArchiveException(ex.getMessage(), ex); 099 } 100 101 // put in a dummy record for the root node. 102 Dirent root = new Dirent(2, 2, 4, "."); 103 names.put(Integer.valueOf(2), root); 104 105 // use priority based on queue to ensure parent directories are 106 // released first. 107 queue = new PriorityQueue<DumpArchiveEntry>(10, 108 new Comparator<DumpArchiveEntry>() { 109 public int compare(DumpArchiveEntry p, DumpArchiveEntry q) { 110 if ((p.getOriginalName() == null) || (q.getOriginalName() == null)) { 111 return Integer.MAX_VALUE; 112 } 113 114 return p.getOriginalName().compareTo(q.getOriginalName()); 115 } 116 }); 117 } 118 119 @Deprecated 120 @Override 121 public int getCount() { 122 return (int) getBytesRead(); 123 } 124 125 @Override 126 public long getBytesRead() { 127 return raw.getBytesRead(); 128 } 129 130 /** 131 * Return the archive summary information. 132 */ 133 public DumpArchiveSummary getSummary() { 134 return summary; 135 } 136 137 /** 138 * Read CLRI (deleted inode) segment. 139 */ 140 private void readCLRI() throws IOException { 141 byte[] readBuf = raw.readRecord(); 142 143 if (!DumpArchiveUtil.verify(readBuf)) { 144 throw new InvalidFormatException(); 145 } 146 147 active = DumpArchiveEntry.parse(readBuf); 148 149 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 150 throw new InvalidFormatException(); 151 } 152 153 // we don't do anything with this yet. 154 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 155 == -1) { 156 throw new EOFException(); 157 } 158 readIdx = active.getHeaderCount(); 159 } 160 161 /** 162 * Read BITS segment. 163 */ 164 private void readBITS() throws IOException { 165 byte[] readBuf = raw.readRecord(); 166 167 if (!DumpArchiveUtil.verify(readBuf)) { 168 throw new InvalidFormatException(); 169 } 170 171 active = DumpArchiveEntry.parse(readBuf); 172 173 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 174 throw new InvalidFormatException(); 175 } 176 177 // we don't do anything with this yet. 178 if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 179 == -1) { 180 throw new EOFException(); 181 } 182 readIdx = active.getHeaderCount(); 183 } 184 185 /** 186 * Read the next entry. 187 */ 188 public DumpArchiveEntry getNextDumpEntry() throws IOException { 189 return getNextEntry(); 190 } 191 192 /** 193 * Read the next entry. 194 */ 195 @Override 196 public DumpArchiveEntry getNextEntry() throws IOException { 197 DumpArchiveEntry entry = null; 198 String path = null; 199 200 // is there anything in the queue? 201 if (!queue.isEmpty()) { 202 return queue.remove(); 203 } 204 205 while (entry == null) { 206 if (hasHitEOF) { 207 return null; 208 } 209 210 // skip any remaining records in this segment for prior file. 211 // we might still have holes... easiest to do it 212 // block by block. We may want to revisit this if 213 // the unnecessary decompression time adds up. 214 while (readIdx < active.getHeaderCount()) { 215 if (!active.isSparseRecord(readIdx++) 216 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 217 throw new EOFException(); 218 } 219 } 220 221 readIdx = 0; 222 filepos = raw.getBytesRead(); 223 224 byte[] headerBytes = raw.readRecord(); 225 226 if (!DumpArchiveUtil.verify(headerBytes)) { 227 throw new InvalidFormatException(); 228 } 229 230 active = DumpArchiveEntry.parse(headerBytes); 231 232 // skip any remaining segments for prior file. 233 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 234 if (raw.skip(DumpArchiveConstants.TP_SIZE 235 * (active.getHeaderCount() 236 - active.getHeaderHoles())) == -1) { 237 throw new EOFException(); 238 } 239 240 filepos = raw.getBytesRead(); 241 headerBytes = raw.readRecord(); 242 243 if (!DumpArchiveUtil.verify(headerBytes)) { 244 throw new InvalidFormatException(); 245 } 246 247 active = DumpArchiveEntry.parse(headerBytes); 248 } 249 250 // check if this is an end-of-volume marker. 251 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 252 hasHitEOF = true; 253 isClosed = true; 254 raw.close(); 255 256 return null; 257 } 258 259 entry = active; 260 261 if (entry.isDirectory()) { 262 readDirectoryEntry(active); 263 264 // now we create an empty InputStream. 265 entryOffset = 0; 266 entrySize = 0; 267 readIdx = active.getHeaderCount(); 268 } else { 269 entryOffset = 0; 270 entrySize = active.getEntrySize(); 271 readIdx = 0; 272 } 273 274 recordOffset = readBuf.length; 275 276 path = getPath(entry); 277 278 if (path == null) { 279 entry = null; 280 } 281 } 282 283 entry.setName(path); 284 entry.setSimpleName(names.get(Integer.valueOf(entry.getIno())).getName()); 285 entry.setOffset(filepos); 286 287 return entry; 288 } 289 290 /** 291 * Read directory entry. 292 */ 293 private void readDirectoryEntry(DumpArchiveEntry entry) 294 throws IOException { 295 long size = entry.getEntrySize(); 296 boolean first = true; 297 298 while (first || 299 (DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType())) { 300 // read the header that we just peeked at. 301 if (!first) { 302 raw.readRecord(); 303 } 304 305 if (!names.containsKey(Integer.valueOf(entry.getIno())) && 306 (DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType())) { 307 pending.put(Integer.valueOf(entry.getIno()), entry); 308 } 309 310 int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 311 312 if (blockBuffer.length < datalen) { 313 blockBuffer = new byte[datalen]; 314 } 315 316 if (raw.read(blockBuffer, 0, datalen) != datalen) { 317 throw new EOFException(); 318 } 319 320 int reclen = 0; 321 322 for (int i = 0; (i < (datalen - 8)) && (i < (size - 8)); 323 i += reclen) { 324 int ino = DumpArchiveUtil.convert32(blockBuffer, i); 325 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 326 327 byte type = blockBuffer[i + 6]; 328 329 String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset? 330 331 if (".".equals(name) || "..".equals(name)) { 332 // do nothing... 333 continue; 334 } 335 336 Dirent d = new Dirent(ino, entry.getIno(), type, name); 337 338 /* 339 if ((type == 4) && names.containsKey(ino)) { 340 System.out.println("we already have ino: " + 341 names.get(ino)); 342 } 343 */ 344 345 names.put(Integer.valueOf(ino), d); 346 347 // check whether this allows us to fill anything in the pending list. 348 for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) { 349 String path = getPath(e.getValue()); 350 351 if (path != null) { 352 e.getValue().setName(path); 353 e.getValue() 354 .setSimpleName(names.get(e.getKey()).getName()); 355 queue.add(e.getValue()); 356 } 357 } 358 359 // remove anything that we found. (We can't do it earlier 360 // because of concurrent modification exceptions.) 361 for (DumpArchiveEntry e : queue) { 362 pending.remove(Integer.valueOf(e.getIno())); 363 } 364 } 365 366 byte[] peekBytes = raw.peek(); 367 368 if (!DumpArchiveUtil.verify(peekBytes)) { 369 throw new InvalidFormatException(); 370 } 371 372 entry = DumpArchiveEntry.parse(peekBytes); 373 first = false; 374 size -= DumpArchiveConstants.TP_SIZE; 375 } 376 } 377 378 /** 379 * Get full path for specified archive entry, or null if there's a gap. 380 * 381 * @param entry 382 * @return full path for specified archive entry, or null if there's a gap. 383 */ 384 private String getPath(DumpArchiveEntry entry) { 385 // build the stack of elements. It's possible that we're 386 // still missing an intermediate value and if so we 387 Stack<String> elements = new Stack<String>(); 388 Dirent dirent = null; 389 390 for (int i = entry.getIno();; i = dirent.getParentIno()) { 391 if (!names.containsKey(Integer.valueOf(i))) { 392 elements.clear(); 393 break; 394 } 395 396 dirent = names.get(Integer.valueOf(i)); 397 elements.push(dirent.getName()); 398 399 if (dirent.getIno() == dirent.getParentIno()) { 400 break; 401 } 402 } 403 404 // if an element is missing defer the work and read next entry. 405 if (elements.isEmpty()) { 406 pending.put(Integer.valueOf(entry.getIno()), entry); 407 408 return null; 409 } 410 411 // generate full path from stack of elements. 412 StringBuilder sb = new StringBuilder(elements.pop()); 413 414 while (!elements.isEmpty()) { 415 sb.append('/'); 416 sb.append(elements.pop()); 417 } 418 419 return sb.toString(); 420 } 421 422 /** 423 * Reads bytes from the current dump archive entry. 424 * 425 * This method is aware of the boundaries of the current 426 * entry in the archive and will deal with them as if they 427 * were this stream's start and EOF. 428 * 429 * @param buf The buffer into which to place bytes read. 430 * @param off The offset at which to place bytes read. 431 * @param len The number of bytes to read. 432 * @return The number of bytes read, or -1 at EOF. 433 * @throws IOException on error 434 */ 435 @Override 436 public int read(byte[] buf, int off, int len) throws IOException { 437 int totalRead = 0; 438 439 if (isClosed || (entryOffset >= entrySize)) { 440 return -1; 441 } 442 443 if ((len + entryOffset) > entrySize) { 444 len = (int) (entrySize - entryOffset); 445 } 446 447 while (len > 0) { 448 int sz = (len > (readBuf.length - recordOffset)) 449 ? (readBuf.length - recordOffset) : len; 450 451 // copy any data we have 452 if ((recordOffset + sz) <= readBuf.length) { 453 System.arraycopy(readBuf, recordOffset, buf, off, sz); 454 totalRead += sz; 455 recordOffset += sz; 456 len -= sz; 457 off += sz; 458 } 459 460 // load next block if necessary. 461 if (len > 0) { 462 if (readIdx >= 512) { 463 byte[] headerBytes = raw.readRecord(); 464 465 if (!DumpArchiveUtil.verify(headerBytes)) { 466 throw new InvalidFormatException(); 467 } 468 469 active = DumpArchiveEntry.parse(headerBytes); 470 readIdx = 0; 471 } 472 473 if (!active.isSparseRecord(readIdx++)) { 474 int r = raw.read(readBuf, 0, readBuf.length); 475 if (r != readBuf.length) { 476 throw new EOFException(); 477 } 478 } else { 479 Arrays.fill(readBuf, (byte) 0); 480 } 481 482 recordOffset = 0; 483 } 484 } 485 486 entryOffset += totalRead; 487 488 return totalRead; 489 } 490 491 /** 492 * Closes the stream for this entry. 493 */ 494 @Override 495 public void close() throws IOException { 496 if (!isClosed) { 497 isClosed = true; 498 raw.close(); 499 } 500 } 501 502 /** 503 * Look at the first few bytes of the file to decide if it's a dump 504 * archive. With 32 bytes we can look at the magic value, with a full 505 * 1k we can verify the checksum. 506 */ 507 public static boolean matches(byte[] buffer, int length) { 508 // do we have enough of the header? 509 if (length < 32) { 510 return false; 511 } 512 513 // this is the best test 514 if (length >= DumpArchiveConstants.TP_SIZE) { 515 return DumpArchiveUtil.verify(buffer); 516 } 517 518 // this will work in a pinch. 519 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 520 24); 521 } 522 523 }