001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.archivers.tar;
020
021import java.io.IOException;
022import java.math.BigInteger;
023import java.nio.ByteBuffer;
024import org.apache.commons.compress.archivers.zip.ZipEncoding;
025import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
026
027import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
028import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;
029import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN;
030import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN;
031
032/**
033 * This class provides static utility methods to work with byte streams.
034 *
035 * @Immutable
036 */
037// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
038public class TarUtils {
039
040    private static final int BYTE_MASK = 255;
041
042    static final ZipEncoding DEFAULT_ENCODING =
043        ZipEncodingHelper.getZipEncoding(null);
044
045    /**
046     * Encapsulates the algorithms used up to Commons Compress 1.3 as
047     * ZipEncoding.
048     */
049    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
050            @Override
051            public boolean canEncode(final String name) { return true; }
052
053            @Override
054            public ByteBuffer encode(final String name) {
055                final int length = name.length();
056                final byte[] buf = new byte[length];
057
058                // copy until end of input or output is reached.
059                for (int i = 0; i < length; ++i) {
060                    buf[i] = (byte) name.charAt(i);
061                }
062                return ByteBuffer.wrap(buf);
063            }
064
065            @Override
066            public String decode(final byte[] buffer) {
067                final int length = buffer.length;
068                final StringBuilder result = new StringBuilder(length);
069
070                for (final byte b : buffer) {
071                    if (b == 0) { // Trailing null
072                        break;
073                    }
074                    result.append((char) (b & 0xFF)); // Allow for sign-extension
075                }
076
077                return result.toString();
078            }
079        };
080
081    /** Private constructor to prevent instantiation of this utility class. */
082    private TarUtils(){
083    }
084
085    /**
086     * Parse an octal string from a buffer.
087     *
088     * <p>Leading spaces are ignored.
089     * The buffer must contain a trailing space or NUL,
090     * and may contain an additional trailing space or NUL.</p>
091     *
092     * <p>The input buffer is allowed to contain all NULs,
093     * in which case the method returns 0L
094     * (this allows for missing fields).</p>
095     *
096     * <p>To work-around some tar implementations that insert a
097     * leading NUL this method returns 0 if it detects a leading NUL
098     * since Commons Compress 1.4.</p>
099     *
100     * @param buffer The buffer from which to parse.
101     * @param offset The offset into the buffer from which to parse.
102     * @param length The maximum number of bytes to parse - must be at least 2 bytes.
103     * @return The long value of the octal string.
104     * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected.
105     */
106    public static long parseOctal(final byte[] buffer, final int offset, final int length) {
107        long    result = 0;
108        int     end = offset + length;
109        int     start = offset;
110
111        if (length < 2){
112            throw new IllegalArgumentException("Length "+length+" must be at least 2");
113        }
114
115        if (buffer[start] == 0) {
116            return 0L;
117        }
118
119        // Skip leading spaces
120        while (start < end){
121            if (buffer[start] == ' '){
122                start++;
123            } else {
124                break;
125            }
126        }
127
128        // Trim all trailing NULs and spaces.
129        // The ustar and POSIX tar specs require a trailing NUL or
130        // space but some implementations use the extra digit for big
131        // sizes/uids/gids ...
132        byte trailer = buffer[end - 1];
133        while (start < end && (trailer == 0 || trailer == ' ')) {
134            end--;
135            trailer = buffer[end - 1];
136        }
137
138        for ( ;start < end; start++) {
139            final byte currentByte = buffer[start];
140            // CheckStyle:MagicNumber OFF
141            if (currentByte < '0' || currentByte > '7'){
142                throw new IllegalArgumentException(
143                        exceptionMessage(buffer, offset, length, start, currentByte));
144            }
145            result = (result << 3) + (currentByte - '0'); // convert from ASCII
146            // CheckStyle:MagicNumber ON
147        }
148
149        return result;
150    }
151
152    /**
153     * Compute the value contained in a byte buffer.  If the most
154     * significant bit of the first byte in the buffer is set, this
155     * bit is ignored and the rest of the buffer is interpreted as a
156     * binary number.  Otherwise, the buffer is interpreted as an
157     * octal number as per the parseOctal function above.
158     *
159     * @param buffer The buffer from which to parse.
160     * @param offset The offset into the buffer from which to parse.
161     * @param length The maximum number of bytes to parse.
162     * @return The long value of the octal or binary string.
163     * @throws IllegalArgumentException if the trailing space/NUL is
164     * missing or an invalid byte is detected in an octal number, or
165     * if a binary number would exceed the size of a signed long
166     * 64-bit integer.
167     * @since 1.4
168     */
169    public static long parseOctalOrBinary(final byte[] buffer, final int offset,
170                                          final int length) {
171
172        if ((buffer[offset] & 0x80) == 0) {
173            return parseOctal(buffer, offset, length);
174        }
175        final boolean negative = buffer[offset] == (byte) 0xff;
176        if (length < 9) {
177            return parseBinaryLong(buffer, offset, length, negative);
178        }
179        return parseBinaryBigInteger(buffer, offset, length, negative);
180    }
181
182    private static long parseBinaryLong(final byte[] buffer, final int offset,
183                                        final int length,
184                                        final boolean negative) {
185        if (length >= 9) {
186            throw new IllegalArgumentException("At offset " + offset + ", "
187                                               + length + " byte binary number"
188                                               + " exceeds maximum signed long"
189                                               + " value");
190        }
191        long val = 0;
192        for (int i = 1; i < length; i++) {
193            val = (val << 8) + (buffer[offset + i] & 0xff);
194        }
195        if (negative) {
196            // 2's complement
197            val--;
198            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
199        }
200        return negative ? -val : val;
201    }
202
203    private static long parseBinaryBigInteger(final byte[] buffer,
204                                              final int offset,
205                                              final int length,
206                                              final boolean negative) {
207        final byte[] remainder = new byte[length - 1];
208        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
209        BigInteger val = new BigInteger(remainder);
210        if (negative) {
211            // 2's complement
212            val = val.add(BigInteger.valueOf(-1)).not();
213        }
214        if (val.bitLength() > 63) {
215            throw new IllegalArgumentException("At offset " + offset + ", "
216                                               + length + " byte binary number"
217                                               + " exceeds maximum signed long"
218                                               + " value");
219        }
220        return negative ? -val.longValue() : val.longValue();
221    }
222
223    /**
224     * Parse a boolean byte from a buffer.
225     * Leading spaces and NUL are ignored.
226     * The buffer may contain trailing spaces or NULs.
227     *
228     * @param buffer The buffer from which to parse.
229     * @param offset The offset into the buffer from which to parse.
230     * @return The boolean value of the bytes.
231     * @throws IllegalArgumentException if an invalid byte is detected.
232     */
233    public static boolean parseBoolean(final byte[] buffer, final int offset) {
234        return buffer[offset] == 1;
235    }
236
237    // Helper method to generate the exception message
238    private static String exceptionMessage(final byte[] buffer, final int offset,
239            final int length, final int current, final byte currentByte) {
240        // default charset is good enough for an exception message,
241        //
242        // the alternative was to modify parseOctal and
243        // parseOctalOrBinary to receive the ZipEncoding of the
244        // archive (deprecating the existing public methods, of
245        // course) and dealing with the fact that ZipEncoding#decode
246        // can throw an IOException which parseOctal* doesn't declare
247        String string = new String(buffer, offset, length);
248
249        string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
250        return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
251    }
252
253    /**
254     * Parse an entry name from a buffer.
255     * Parsing stops when a NUL is found
256     * or the buffer length is reached.
257     *
258     * @param buffer The buffer from which to parse.
259     * @param offset The offset into the buffer from which to parse.
260     * @param length The maximum number of bytes to parse.
261     * @return The entry name.
262     */
263    public static String parseName(final byte[] buffer, final int offset, final int length) {
264        try {
265            return parseName(buffer, offset, length, DEFAULT_ENCODING);
266        } catch (final IOException ex) { // NOSONAR
267            try {
268                return parseName(buffer, offset, length, FALLBACK_ENCODING);
269            } catch (final IOException ex2) {
270                // impossible
271                throw new RuntimeException(ex2); //NOSONAR
272            }
273        }
274    }
275
276    /**
277     * Parse an entry name from a buffer.
278     * Parsing stops when a NUL is found
279     * or the buffer length is reached.
280     *
281     * @param buffer The buffer from which to parse.
282     * @param offset The offset into the buffer from which to parse.
283     * @param length The maximum number of bytes to parse.
284     * @param encoding name of the encoding to use for file names
285     * @since 1.4
286     * @return The entry name.
287     * @throws IOException on error
288     */
289    public static String parseName(final byte[] buffer, final int offset,
290                                   final int length,
291                                   final ZipEncoding encoding)
292        throws IOException {
293
294        int len = 0;
295        for (int i = offset; len < length && buffer[i] != 0; i++) {
296            len++;
297        }
298        if (len > 0) {
299            final byte[] b = new byte[len];
300            System.arraycopy(buffer, offset, b, 0, len);
301            return encoding.decode(b);
302        }
303        return "";
304    }
305
306    /**
307     * Parses the content of a PAX 1.0 sparse block.
308     * @since 1.20
309     * @param buffer The buffer from which to parse.
310     * @param offset The offset into the buffer from which to parse.
311     * @return a parsed sparse struct
312     */
313    public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) {
314        long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN);
315        long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN);
316
317        return new TarArchiveStructSparse(sparseOffset, sparseNumbytes);
318    }
319
320    /**
321     * Copy a name into a buffer.
322     * Copies characters from the name into the buffer
323     * starting at the specified offset.
324     * If the buffer is longer than the name, the buffer
325     * is filled with trailing NULs.
326     * If the name is longer than the buffer,
327     * the output is truncated.
328     *
329     * @param name The header name from which to copy the characters.
330     * @param buf The buffer where the name is to be stored.
331     * @param offset The starting offset into the buffer
332     * @param length The maximum number of header bytes to copy.
333     * @return The updated offset, i.e. offset + length
334     */
335    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
336        try {
337            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
338        } catch (final IOException ex) { // NOSONAR
339            try {
340                return formatNameBytes(name, buf, offset, length,
341                                       FALLBACK_ENCODING);
342            } catch (final IOException ex2) {
343                // impossible
344                throw new RuntimeException(ex2); //NOSONAR
345            }
346        }
347    }
348
349    /**
350     * Copy a name into a buffer.
351     * Copies characters from the name into the buffer
352     * starting at the specified offset.
353     * If the buffer is longer than the name, the buffer
354     * is filled with trailing NULs.
355     * If the name is longer than the buffer,
356     * the output is truncated.
357     *
358     * @param name The header name from which to copy the characters.
359     * @param buf The buffer where the name is to be stored.
360     * @param offset The starting offset into the buffer
361     * @param length The maximum number of header bytes to copy.
362     * @param encoding name of the encoding to use for file names
363     * @since 1.4
364     * @return The updated offset, i.e. offset + length
365     * @throws IOException on error
366     */
367    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
368                                      final int length,
369                                      final ZipEncoding encoding)
370        throws IOException {
371        int len = name.length();
372        ByteBuffer b = encoding.encode(name);
373        while (b.limit() > length && len > 0) {
374            b = encoding.encode(name.substring(0, --len));
375        }
376        final int limit = b.limit() - b.position();
377        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);
378
379        // Pad any remaining output bytes with NUL
380        for (int i = limit; i < length; ++i) {
381            buf[offset + i] = 0;
382        }
383
384        return offset + length;
385    }
386
387    /**
388     * Fill buffer with unsigned octal number, padded with leading zeroes.
389     *
390     * @param value number to convert to octal - treated as unsigned
391     * @param buffer destination buffer
392     * @param offset starting offset in buffer
393     * @param length length of buffer to fill
394     * @throws IllegalArgumentException if the value will not fit in the buffer
395     */
396    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
397            final int offset, final int length) {
398        int remaining = length;
399        remaining--;
400        if (value == 0) {
401            buffer[offset + remaining--] = (byte) '0';
402        } else {
403            long val = value;
404            for (; remaining >= 0 && val != 0; --remaining) {
405                // CheckStyle:MagicNumber OFF
406                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
407                val = val >>> 3;
408                // CheckStyle:MagicNumber ON
409            }
410            if (val != 0){
411                throw new IllegalArgumentException
412                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
413            }
414        }
415
416        for (; remaining >= 0; --remaining) { // leading zeros
417            buffer[offset + remaining] = (byte) '0';
418        }
419    }
420
421    /**
422     * Write an octal integer into a buffer.
423     *
424     * Uses {@link #formatUnsignedOctalString} to format
425     * the value as an octal string with leading zeros.
426     * The converted number is followed by space and NUL
427     *
428     * @param value The value to write
429     * @param buf The buffer to receive the output
430     * @param offset The starting offset into the buffer
431     * @param length The size of the output buffer
432     * @return The updated offset, i.e offset+length
433     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
434     */
435    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
436
437        int idx=length-2; // For space and trailing null
438        formatUnsignedOctalString(value, buf, offset, idx);
439
440        buf[offset + idx++] = (byte) ' '; // Trailing space
441        buf[offset + idx]   = 0; // Trailing null
442
443        return offset + length;
444    }
445
446    /**
447     * Write an octal long integer into a buffer.
448     *
449     * Uses {@link #formatUnsignedOctalString} to format
450     * the value as an octal string with leading zeros.
451     * The converted number is followed by a space.
452     *
453     * @param value The value to write as octal
454     * @param buf The destinationbuffer.
455     * @param offset The starting offset into the buffer.
456     * @param length The length of the buffer
457     * @return The updated offset
458     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
459     */
460    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
461
462        final int idx=length-1; // For space
463
464        formatUnsignedOctalString(value, buf, offset, idx);
465        buf[offset + idx] = (byte) ' '; // Trailing space
466
467        return offset + length;
468    }
469
470    /**
471     * Write an long integer into a buffer as an octal string if this
472     * will fit, or as a binary number otherwise.
473     *
474     * Uses {@link #formatUnsignedOctalString} to format
475     * the value as an octal string with leading zeros.
476     * The converted number is followed by a space.
477     *
478     * @param value The value to write into the buffer.
479     * @param buf The destination buffer.
480     * @param offset The starting offset into the buffer.
481     * @param length The length of the buffer.
482     * @return The updated offset.
483     * @throws IllegalArgumentException if the value (and trailer)
484     * will not fit in the buffer.
485     * @since 1.4
486     */
487    public static int formatLongOctalOrBinaryBytes(
488        final long value, final byte[] buf, final int offset, final int length) {
489
490        // Check whether we are dealing with UID/GID or SIZE field
491        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;
492
493        final boolean negative = value < 0;
494        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
495            return formatLongOctalBytes(value, buf, offset, length);
496        }
497
498        if (length < 9) {
499            formatLongBinary(value, buf, offset, length, negative);
500        } else {
501            formatBigIntegerBinary(value, buf, offset, length, negative);
502        }
503
504        buf[offset] = (byte) (negative ? 0xff : 0x80);
505        return offset + length;
506    }
507
508    private static void formatLongBinary(final long value, final byte[] buf,
509                                         final int offset, final int length,
510                                         final boolean negative) {
511        final int bits = (length - 1) * 8;
512        final long max = 1L << bits;
513        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
514        if (val < 0 || val >= max) {
515            throw new IllegalArgumentException("Value " + value +
516                " is too large for " + length + " byte field.");
517        }
518        if (negative) {
519            val ^= max - 1;
520            val++;
521            val |= 0xffL << bits;
522        }
523        for (int i = offset + length - 1; i >= offset; i--) {
524            buf[i] = (byte) val;
525            val >>= 8;
526        }
527    }
528
529    private static void formatBigIntegerBinary(final long value, final byte[] buf,
530                                               final int offset,
531                                               final int length,
532                                               final boolean negative) {
533        final BigInteger val = BigInteger.valueOf(value);
534        final byte[] b = val.toByteArray();
535        final int len = b.length;
536        if (len > length - 1) {
537            throw new IllegalArgumentException("Value " + value +
538                " is too large for " + length + " byte field.");
539        }
540        final int off = offset + length - len;
541        System.arraycopy(b, 0, buf, off, len);
542        final byte fill = (byte) (negative ? 0xff : 0);
543        for (int i = offset + 1; i < off; i++) {
544            buf[i] = fill;
545        }
546    }
547
548    /**
549     * Writes an octal value into a buffer.
550     *
551     * Uses {@link #formatUnsignedOctalString} to format
552     * the value as an octal string with leading zeros.
553     * The converted number is followed by NUL and then space.
554     *
555     * @param value The value to convert
556     * @param buf The destination buffer
557     * @param offset The starting offset into the buffer.
558     * @param length The size of the buffer.
559     * @return The updated value of offset, i.e. offset+length
560     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
561     */
562    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {
563
564        int idx=length-2; // for NUL and space
565        formatUnsignedOctalString(value, buf, offset, idx);
566
567        buf[offset + idx++]   = 0; // Trailing null
568        buf[offset + idx]     = (byte) ' '; // Trailing space
569
570        return offset + length;
571    }
572
573    /**
574     * Compute the checksum of a tar entry header.
575     *
576     * @param buf The tar entry's header buffer.
577     * @return The computed checksum.
578     */
579    public static long computeCheckSum(final byte[] buf) {
580        long sum = 0;
581
582        for (final byte element : buf) {
583            sum += BYTE_MASK & element;
584        }
585
586        return sum;
587    }
588
589    /**
590     * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>:
591     * <blockquote>
592     * The checksum is calculated by taking the sum of the unsigned byte values
593     * of the header block with the eight checksum bytes taken to be ascii
594     * spaces (decimal value 32). It is stored as a six digit octal number with
595     * leading zeroes followed by a NUL and then a space. Various
596     * implementations do not adhere to this format. For better compatibility,
597     * ignore leading and trailing whitespace, and get the first six digits. In
598     * addition, some historic tar implementations treated bytes as signed.
599     * Implementations typically calculate the checksum both ways, and treat it
600     * as good if either the signed or unsigned sum matches the included
601     * checksum.
602     * </blockquote>
603     * <p>
604     * The return value of this method should be treated as a best-effort
605     * heuristic rather than an absolute and final truth. The checksum
606     * verification logic may well evolve over time as more special cases
607     * are encountered.
608     *
609     * @param header tar header
610     * @return whether the checksum is reasonably good
611     * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a>
612     * @since 1.5
613     */
614    public static boolean verifyCheckSum(final byte[] header) {
615        final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN);
616        long unsignedSum = 0;
617        long signedSum = 0;
618
619        for (int i = 0; i < header.length; i++) {
620            byte b = header[i];
621            if (CHKSUM_OFFSET  <= i && i < CHKSUM_OFFSET + CHKSUMLEN) {
622                b = ' ';
623            }
624            unsignedSum += 0xff & b;
625            signedSum += b;
626        }
627        return storedSum == unsignedSum || storedSum == signedSum;
628    }
629
630}