001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     * http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing,
013     * software distributed under the License is distributed on an
014     * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     * KIND, either express or implied.  See the License for the
016     * specific language governing permissions and limitations
017     * under the License.
018     */
019    package org.apache.commons.compress.archivers.dump;
020    
021    import org.apache.commons.compress.archivers.ArchiveException;
022    import org.apache.commons.compress.archivers.ArchiveInputStream;
023    
024    import java.io.EOFException;
025    import java.io.IOException;
026    import java.io.InputStream;
027    
028    import java.util.Arrays;
029    import java.util.Comparator;
030    import java.util.HashMap;
031    import java.util.Map;
032    import java.util.PriorityQueue;
033    import java.util.Queue;
034    import java.util.Stack;
035    
036    /**
037     * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream.
038     * Methods are provided to position at each successive entry in
039     * the archive, and the read each entry as a normal input stream
040     * using read().
041     *
042     * @NotThreadSafe
043     */
044    public class DumpArchiveInputStream extends ArchiveInputStream {
045        private DumpArchiveSummary summary;
046        private DumpArchiveEntry active;
047        private boolean isClosed;
048        private boolean hasHitEOF;
049        private long entrySize;
050        private long entryOffset;
051        private int readIdx;
052        private byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE];
053        private byte[] blockBuffer;
054        private int recordOffset;
055        private long filepos;
056        protected TapeInputStream raw;
057    
058        // map of ino -> dirent entry. We can use this to reconstruct full paths.
059        private Map<Integer, Dirent> names = new HashMap<Integer, Dirent>();
060    
061        // map of ino -> (directory) entry when we're missing one or more elements in the path.
062        private Map<Integer, DumpArchiveEntry> pending = new HashMap<Integer, DumpArchiveEntry>();
063    
064        // queue of (directory) entries where we now have the full path.
065        private Queue<DumpArchiveEntry> queue;
066    
067        /**
068         * Constructor.
069         *
070         * @param is
071         * @throws ArchiveException
072         */
073        public DumpArchiveInputStream(InputStream is) throws ArchiveException {
074            this.raw = new TapeInputStream(is);
075            this.hasHitEOF = false;
076    
077            try {
078                // read header, verify it's a dump archive.
079                byte[] headerBytes = raw.readRecord();
080    
081                if (!DumpArchiveUtil.verify(headerBytes)) {
082                    throw new UnrecognizedFormatException();
083                }
084    
085                // get summary information
086                summary = new DumpArchiveSummary(headerBytes);
087    
088                // reset buffer with actual block size.
089                raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
090    
091                // allocate our read buffer.
092                blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE];
093    
094                // skip past CLRI and BITS segments since we don't handle them yet.
095                readCLRI();
096                readBITS();
097            } catch (IOException ex) {
098                throw new ArchiveException(ex.getMessage(), ex);
099            }
100    
101            // put in a dummy record for the root node.
102            Dirent root = new Dirent(2, 2, 4, ".");
103            names.put(Integer.valueOf(2), root);
104    
105            // use priority based on queue to ensure parent directories are
106            // released first.
107            queue = new PriorityQueue<DumpArchiveEntry>(10,
108                    new Comparator<DumpArchiveEntry>() {
109                        public int compare(DumpArchiveEntry p, DumpArchiveEntry q) {
110                            if ((p.getOriginalName() == null) || (q.getOriginalName() == null)) {
111                                return Integer.MAX_VALUE;
112                            }
113    
114                            return p.getOriginalName().compareTo(q.getOriginalName());
115                        }
116                    });
117        }
118    
119        @Deprecated
120        @Override
121        public int getCount() {
122            return (int) getBytesRead();
123        }
124    
125        @Override
126        public long getBytesRead() {
127            return raw.getBytesRead();
128        }
129    
130        /**
131         * Return the archive summary information.
132         */
133        public DumpArchiveSummary getSummary() {
134            return summary;
135        }
136    
137        /**
138         * Read CLRI (deleted inode) segment.
139         */
140        private void readCLRI() throws IOException {
141            byte[] readBuf = raw.readRecord();
142    
143            if (!DumpArchiveUtil.verify(readBuf)) {
144                throw new InvalidFormatException();
145            }
146    
147            active = DumpArchiveEntry.parse(readBuf);
148    
149            if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) {
150                throw new InvalidFormatException();
151            }
152    
153            // we don't do anything with this yet.
154            if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
155                == -1) {
156                throw new EOFException();
157            }
158            readIdx = active.getHeaderCount();
159        }
160    
161        /**
162         * Read BITS segment.
163         */
164        private void readBITS() throws IOException {
165            byte[] readBuf = raw.readRecord();
166    
167            if (!DumpArchiveUtil.verify(readBuf)) {
168                throw new InvalidFormatException();
169            }
170    
171            active = DumpArchiveEntry.parse(readBuf);
172    
173            if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) {
174                throw new InvalidFormatException();
175            }
176    
177            // we don't do anything with this yet.
178            if (raw.skip(DumpArchiveConstants.TP_SIZE * active.getHeaderCount())
179                == -1) {
180                throw new EOFException();
181            }
182            readIdx = active.getHeaderCount();
183        }
184    
185        /**
186         * Read the next entry.
187         */
188        public DumpArchiveEntry getNextDumpEntry() throws IOException {
189            return getNextEntry();
190        }
191    
192        /**
193         * Read the next entry.
194         */
195        @Override
196        public DumpArchiveEntry getNextEntry() throws IOException {
197            DumpArchiveEntry entry = null;
198            String path = null;
199    
200            // is there anything in the queue?
201            if (!queue.isEmpty()) {
202                return queue.remove();
203            }
204    
205            while (entry == null) {
206                if (hasHitEOF) {
207                    return null;
208                }
209    
210                // skip any remaining records in this segment for prior file.
211                // we might still have holes... easiest to do it
212                // block by block. We may want to revisit this if
213                // the unnecessary decompression time adds up.
214                while (readIdx < active.getHeaderCount()) {
215                    if (!active.isSparseRecord(readIdx++)
216                        && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) {
217                        throw new EOFException();
218                    }
219                }
220    
221                readIdx = 0;
222                filepos = raw.getBytesRead();
223    
224                byte[] headerBytes = raw.readRecord();
225    
226                if (!DumpArchiveUtil.verify(headerBytes)) {
227                    throw new InvalidFormatException();
228                }
229    
230                active = DumpArchiveEntry.parse(headerBytes);
231    
232                // skip any remaining segments for prior file.
233                while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) {
234                    if (raw.skip(DumpArchiveConstants.TP_SIZE
235                                 * (active.getHeaderCount()
236                                    - active.getHeaderHoles())) == -1) {
237                        throw new EOFException();
238                    }
239    
240                    filepos = raw.getBytesRead();
241                    headerBytes = raw.readRecord();
242    
243                    if (!DumpArchiveUtil.verify(headerBytes)) {
244                        throw new InvalidFormatException();
245                    }
246    
247                    active = DumpArchiveEntry.parse(headerBytes);
248                }
249    
250                // check if this is an end-of-volume marker.
251                if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) {
252                    hasHitEOF = true;
253                    isClosed = true;
254                    raw.close();
255    
256                    return null;
257                }
258    
259                entry = active;
260    
261                if (entry.isDirectory()) {
262                    readDirectoryEntry(active);
263    
264                    // now we create an empty InputStream.
265                    entryOffset = 0;
266                    entrySize = 0;
267                    readIdx = active.getHeaderCount();
268                } else {
269                    entryOffset = 0;
270                    entrySize = active.getEntrySize();
271                    readIdx = 0;
272                }
273    
274                recordOffset = readBuf.length;
275    
276                path = getPath(entry);
277    
278                if (path == null) {
279                    entry = null;
280                }
281            }
282    
283            entry.setName(path);
284            entry.setSimpleName(names.get(Integer.valueOf(entry.getIno())).getName());
285            entry.setOffset(filepos);
286    
287            return entry;
288        }
289    
290        /**
291         * Read directory entry.
292         */
293        private void readDirectoryEntry(DumpArchiveEntry entry)
294            throws IOException {
295            long size = entry.getEntrySize();
296            boolean first = true;
297    
298            while (first ||
299                    (DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType())) {
300                // read the header that we just peeked at.
301                if (!first) {
302                    raw.readRecord();
303                }
304    
305                if (!names.containsKey(Integer.valueOf(entry.getIno())) &&
306                        (DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType())) {
307                    pending.put(Integer.valueOf(entry.getIno()), entry);
308                }
309    
310                int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount();
311    
312                if (blockBuffer.length < datalen) {
313                    blockBuffer = new byte[datalen];
314                }
315    
316                if (raw.read(blockBuffer, 0, datalen) != datalen) {
317                    throw new EOFException();
318                }
319    
320                int reclen = 0;
321    
322                for (int i = 0; (i < (datalen - 8)) && (i < (size - 8));
323                        i += reclen) {
324                    int ino = DumpArchiveUtil.convert32(blockBuffer, i);
325                    reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4);
326    
327                    byte type = blockBuffer[i + 6];
328    
329                    String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset?
330    
331                    if (".".equals(name) || "..".equals(name)) {
332                        // do nothing...
333                        continue;
334                    }
335    
336                    Dirent d = new Dirent(ino, entry.getIno(), type, name);
337    
338                    /*
339                    if ((type == 4) && names.containsKey(ino)) {
340                        System.out.println("we already have ino: " +
341                                           names.get(ino));
342                    }
343                    */
344    
345                    names.put(Integer.valueOf(ino), d);
346    
347                    // check whether this allows us to fill anything in the pending list.
348                    for (Map.Entry<Integer, DumpArchiveEntry> e : pending.entrySet()) {
349                        String path = getPath(e.getValue());
350    
351                        if (path != null) {
352                            e.getValue().setName(path);
353                            e.getValue()
354                             .setSimpleName(names.get(e.getKey()).getName());
355                            queue.add(e.getValue());
356                        }
357                    }
358    
359                    // remove anything that we found. (We can't do it earlier
360                    // because of concurrent modification exceptions.)
361                    for (DumpArchiveEntry e : queue) {
362                        pending.remove(Integer.valueOf(e.getIno()));
363                    }
364                }
365    
366                byte[] peekBytes = raw.peek();
367    
368                if (!DumpArchiveUtil.verify(peekBytes)) {
369                    throw new InvalidFormatException();
370                }
371    
372                entry = DumpArchiveEntry.parse(peekBytes);
373                first = false;
374                size -= DumpArchiveConstants.TP_SIZE;
375            }
376        }
377    
378        /**
379         * Get full path for specified archive entry, or null if there's a gap.
380         *
381         * @param entry
382         * @return  full path for specified archive entry, or null if there's a gap.
383         */
384        private String getPath(DumpArchiveEntry entry) {
385            // build the stack of elements. It's possible that we're 
386            // still missing an intermediate value and if so we
387            Stack<String> elements = new Stack<String>();
388            Dirent dirent = null;
389    
390            for (int i = entry.getIno();; i = dirent.getParentIno()) {
391                if (!names.containsKey(Integer.valueOf(i))) {
392                    elements.clear();
393                    break;
394                }
395    
396                dirent = names.get(Integer.valueOf(i));
397                elements.push(dirent.getName());
398    
399                if (dirent.getIno() == dirent.getParentIno()) {
400                    break;
401                }
402            }
403    
404            // if an element is missing defer the work and read next entry.
405            if (elements.isEmpty()) {
406                pending.put(Integer.valueOf(entry.getIno()), entry);
407    
408                return null;
409            }
410    
411            // generate full path from stack of elements.
412            StringBuilder sb = new StringBuilder(elements.pop());
413    
414            while (!elements.isEmpty()) {
415                sb.append('/');
416                sb.append(elements.pop());
417            }
418    
419            return sb.toString();
420        }
421    
422        /**
423         * Reads bytes from the current dump archive entry.
424         *
425         * This method is aware of the boundaries of the current
426         * entry in the archive and will deal with them as if they
427         * were this stream's start and EOF.
428         *
429         * @param buf The buffer into which to place bytes read.
430         * @param off The offset at which to place bytes read.
431         * @param len The number of bytes to read.
432         * @return The number of bytes read, or -1 at EOF.
433         * @throws IOException on error
434         */
435        @Override
436        public int read(byte[] buf, int off, int len) throws IOException {
437            int totalRead = 0;
438    
439            if (isClosed || (entryOffset >= entrySize)) {
440                return -1;
441            }
442    
443            if ((len + entryOffset) > entrySize) {
444                len = (int) (entrySize - entryOffset);
445            }
446    
447            while (len > 0) {
448                int sz = (len > (readBuf.length - recordOffset))
449                    ? (readBuf.length - recordOffset) : len;
450    
451                // copy any data we have
452                if ((recordOffset + sz) <= readBuf.length) {
453                    System.arraycopy(readBuf, recordOffset, buf, off, sz);
454                    totalRead += sz;
455                    recordOffset += sz;
456                    len -= sz;
457                    off += sz;
458                }
459    
460                // load next block if necessary.
461                if (len > 0) {
462                    if (readIdx >= 512) {
463                        byte[] headerBytes = raw.readRecord();
464    
465                        if (!DumpArchiveUtil.verify(headerBytes)) {
466                            throw new InvalidFormatException();
467                        }
468    
469                        active = DumpArchiveEntry.parse(headerBytes);
470                        readIdx = 0;
471                    }
472    
473                    if (!active.isSparseRecord(readIdx++)) {
474                        int r = raw.read(readBuf, 0, readBuf.length);
475                        if (r != readBuf.length) {
476                            throw new EOFException();
477                        }
478                    } else {
479                        Arrays.fill(readBuf, (byte) 0);
480                    }
481    
482                    recordOffset = 0;
483                }
484            }
485    
486            entryOffset += totalRead;
487    
488            return totalRead;
489        }
490    
491        /**
492         * Closes the stream for this entry.
493         */
494        @Override
495        public void close() throws IOException {
496            if (!isClosed) {
497                isClosed = true;
498                raw.close();
499            }
500        }
501    
502        /**
503         * Look at the first few bytes of the file to decide if it's a dump
504         * archive. With 32 bytes we can look at the magic value, with a full
505         * 1k we can verify the checksum.
506         */
507        public static boolean matches(byte[] buffer, int length) {
508            // do we have enough of the header?
509            if (length < 32) {
510                return false;
511            }
512    
513            // this is the best test
514            if (length >= DumpArchiveConstants.TP_SIZE) {
515                return DumpArchiveUtil.verify(buffer);
516            }
517    
518            // this will work in a pinch.
519            return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer,
520                24);
521        }
522    
523    }