001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.tar; 020 021import java.io.IOException; 022import java.math.BigInteger; 023import java.nio.ByteBuffer; 024import org.apache.commons.compress.archivers.zip.ZipEncoding; 025import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 026 027import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN; 028import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET; 029import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_NUMBYTES_LEN; 030import static org.apache.commons.compress.archivers.tar.TarConstants.SPARSE_OFFSET_LEN; 031 032/** 033 * This class provides static utility methods to work with byte streams. 034 * 035 * @Immutable 036 */ 037// CheckStyle:HideUtilityClassConstructorCheck OFF (bc) 038public class TarUtils { 039 040 private static final int BYTE_MASK = 255; 041 042 static final ZipEncoding DEFAULT_ENCODING = 043 ZipEncodingHelper.getZipEncoding(null); 044 045 /** 046 * Encapsulates the algorithms used up to Commons Compress 1.3 as 047 * ZipEncoding. 048 */ 049 static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() { 050 @Override 051 public boolean canEncode(final String name) { return true; } 052 053 @Override 054 public ByteBuffer encode(final String name) { 055 final int length = name.length(); 056 final byte[] buf = new byte[length]; 057 058 // copy until end of input or output is reached. 059 for (int i = 0; i < length; ++i) { 060 buf[i] = (byte) name.charAt(i); 061 } 062 return ByteBuffer.wrap(buf); 063 } 064 065 @Override 066 public String decode(final byte[] buffer) { 067 final int length = buffer.length; 068 final StringBuilder result = new StringBuilder(length); 069 070 for (final byte b : buffer) { 071 if (b == 0) { // Trailing null 072 break; 073 } 074 result.append((char) (b & 0xFF)); // Allow for sign-extension 075 } 076 077 return result.toString(); 078 } 079 }; 080 081 /** Private constructor to prevent instantiation of this utility class. */ 082 private TarUtils(){ 083 } 084 085 /** 086 * Parse an octal string from a buffer. 087 * 088 * <p>Leading spaces are ignored. 089 * The buffer must contain a trailing space or NUL, 090 * and may contain an additional trailing space or NUL.</p> 091 * 092 * <p>The input buffer is allowed to contain all NULs, 093 * in which case the method returns 0L 094 * (this allows for missing fields).</p> 095 * 096 * <p>To work-around some tar implementations that insert a 097 * leading NUL this method returns 0 if it detects a leading NUL 098 * since Commons Compress 1.4.</p> 099 * 100 * @param buffer The buffer from which to parse. 101 * @param offset The offset into the buffer from which to parse. 102 * @param length The maximum number of bytes to parse - must be at least 2 bytes. 103 * @return The long value of the octal string. 104 * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. 105 */ 106 public static long parseOctal(final byte[] buffer, final int offset, final int length) { 107 long result = 0; 108 int end = offset + length; 109 int start = offset; 110 111 if (length < 2){ 112 throw new IllegalArgumentException("Length "+length+" must be at least 2"); 113 } 114 115 if (buffer[start] == 0) { 116 return 0L; 117 } 118 119 // Skip leading spaces 120 while (start < end){ 121 if (buffer[start] == ' '){ 122 start++; 123 } else { 124 break; 125 } 126 } 127 128 // Trim all trailing NULs and spaces. 129 // The ustar and POSIX tar specs require a trailing NUL or 130 // space but some implementations use the extra digit for big 131 // sizes/uids/gids ... 132 byte trailer = buffer[end - 1]; 133 while (start < end && (trailer == 0 || trailer == ' ')) { 134 end--; 135 trailer = buffer[end - 1]; 136 } 137 138 for ( ;start < end; start++) { 139 final byte currentByte = buffer[start]; 140 // CheckStyle:MagicNumber OFF 141 if (currentByte < '0' || currentByte > '7'){ 142 throw new IllegalArgumentException( 143 exceptionMessage(buffer, offset, length, start, currentByte)); 144 } 145 result = (result << 3) + (currentByte - '0'); // convert from ASCII 146 // CheckStyle:MagicNumber ON 147 } 148 149 return result; 150 } 151 152 /** 153 * Compute the value contained in a byte buffer. If the most 154 * significant bit of the first byte in the buffer is set, this 155 * bit is ignored and the rest of the buffer is interpreted as a 156 * binary number. Otherwise, the buffer is interpreted as an 157 * octal number as per the parseOctal function above. 158 * 159 * @param buffer The buffer from which to parse. 160 * @param offset The offset into the buffer from which to parse. 161 * @param length The maximum number of bytes to parse. 162 * @return The long value of the octal or binary string. 163 * @throws IllegalArgumentException if the trailing space/NUL is 164 * missing or an invalid byte is detected in an octal number, or 165 * if a binary number would exceed the size of a signed long 166 * 64-bit integer. 167 * @since 1.4 168 */ 169 public static long parseOctalOrBinary(final byte[] buffer, final int offset, 170 final int length) { 171 172 if ((buffer[offset] & 0x80) == 0) { 173 return parseOctal(buffer, offset, length); 174 } 175 final boolean negative = buffer[offset] == (byte) 0xff; 176 if (length < 9) { 177 return parseBinaryLong(buffer, offset, length, negative); 178 } 179 return parseBinaryBigInteger(buffer, offset, length, negative); 180 } 181 182 private static long parseBinaryLong(final byte[] buffer, final int offset, 183 final int length, 184 final boolean negative) { 185 if (length >= 9) { 186 throw new IllegalArgumentException("At offset " + offset + ", " 187 + length + " byte binary number" 188 + " exceeds maximum signed long" 189 + " value"); 190 } 191 long val = 0; 192 for (int i = 1; i < length; i++) { 193 val = (val << 8) + (buffer[offset + i] & 0xff); 194 } 195 if (negative) { 196 // 2's complement 197 val--; 198 val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1; 199 } 200 return negative ? -val : val; 201 } 202 203 private static long parseBinaryBigInteger(final byte[] buffer, 204 final int offset, 205 final int length, 206 final boolean negative) { 207 final byte[] remainder = new byte[length - 1]; 208 System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); 209 BigInteger val = new BigInteger(remainder); 210 if (negative) { 211 // 2's complement 212 val = val.add(BigInteger.valueOf(-1)).not(); 213 } 214 if (val.bitLength() > 63) { 215 throw new IllegalArgumentException("At offset " + offset + ", " 216 + length + " byte binary number" 217 + " exceeds maximum signed long" 218 + " value"); 219 } 220 return negative ? -val.longValue() : val.longValue(); 221 } 222 223 /** 224 * Parse a boolean byte from a buffer. 225 * Leading spaces and NUL are ignored. 226 * The buffer may contain trailing spaces or NULs. 227 * 228 * @param buffer The buffer from which to parse. 229 * @param offset The offset into the buffer from which to parse. 230 * @return The boolean value of the bytes. 231 * @throws IllegalArgumentException if an invalid byte is detected. 232 */ 233 public static boolean parseBoolean(final byte[] buffer, final int offset) { 234 return buffer[offset] == 1; 235 } 236 237 // Helper method to generate the exception message 238 private static String exceptionMessage(final byte[] buffer, final int offset, 239 final int length, final int current, final byte currentByte) { 240 // default charset is good enough for an exception message, 241 // 242 // the alternative was to modify parseOctal and 243 // parseOctalOrBinary to receive the ZipEncoding of the 244 // archive (deprecating the existing public methods, of 245 // course) and dealing with the fact that ZipEncoding#decode 246 // can throw an IOException which parseOctal* doesn't declare 247 String string = new String(buffer, offset, length); 248 249 string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed 250 return "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; 251 } 252 253 /** 254 * Parse an entry name from a buffer. 255 * Parsing stops when a NUL is found 256 * or the buffer length is reached. 257 * 258 * @param buffer The buffer from which to parse. 259 * @param offset The offset into the buffer from which to parse. 260 * @param length The maximum number of bytes to parse. 261 * @return The entry name. 262 */ 263 public static String parseName(final byte[] buffer, final int offset, final int length) { 264 try { 265 return parseName(buffer, offset, length, DEFAULT_ENCODING); 266 } catch (final IOException ex) { // NOSONAR 267 try { 268 return parseName(buffer, offset, length, FALLBACK_ENCODING); 269 } catch (final IOException ex2) { 270 // impossible 271 throw new RuntimeException(ex2); //NOSONAR 272 } 273 } 274 } 275 276 /** 277 * Parse an entry name from a buffer. 278 * Parsing stops when a NUL is found 279 * or the buffer length is reached. 280 * 281 * @param buffer The buffer from which to parse. 282 * @param offset The offset into the buffer from which to parse. 283 * @param length The maximum number of bytes to parse. 284 * @param encoding name of the encoding to use for file names 285 * @since 1.4 286 * @return The entry name. 287 * @throws IOException on error 288 */ 289 public static String parseName(final byte[] buffer, final int offset, 290 final int length, 291 final ZipEncoding encoding) 292 throws IOException { 293 294 int len = 0; 295 for (int i = offset; len < length && buffer[i] != 0; i++) { 296 len++; 297 } 298 if (len > 0) { 299 final byte[] b = new byte[len]; 300 System.arraycopy(buffer, offset, b, 0, len); 301 return encoding.decode(b); 302 } 303 return ""; 304 } 305 306 /** 307 * Parses the content of a PAX 1.0 sparse block. 308 * @since 1.20 309 * @param buffer The buffer from which to parse. 310 * @param offset The offset into the buffer from which to parse. 311 * @return a parsed sparse struct 312 */ 313 public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { 314 long sparseOffset = parseOctalOrBinary(buffer, offset, SPARSE_OFFSET_LEN); 315 long sparseNumbytes = parseOctalOrBinary(buffer, offset + SPARSE_OFFSET_LEN, SPARSE_NUMBYTES_LEN); 316 317 return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); 318 } 319 320 /** 321 * Copy a name into a buffer. 322 * Copies characters from the name into the buffer 323 * starting at the specified offset. 324 * If the buffer is longer than the name, the buffer 325 * is filled with trailing NULs. 326 * If the name is longer than the buffer, 327 * the output is truncated. 328 * 329 * @param name The header name from which to copy the characters. 330 * @param buf The buffer where the name is to be stored. 331 * @param offset The starting offset into the buffer 332 * @param length The maximum number of header bytes to copy. 333 * @return The updated offset, i.e. offset + length 334 */ 335 public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) { 336 try { 337 return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); 338 } catch (final IOException ex) { // NOSONAR 339 try { 340 return formatNameBytes(name, buf, offset, length, 341 FALLBACK_ENCODING); 342 } catch (final IOException ex2) { 343 // impossible 344 throw new RuntimeException(ex2); //NOSONAR 345 } 346 } 347 } 348 349 /** 350 * Copy a name into a buffer. 351 * Copies characters from the name into the buffer 352 * starting at the specified offset. 353 * If the buffer is longer than the name, the buffer 354 * is filled with trailing NULs. 355 * If the name is longer than the buffer, 356 * the output is truncated. 357 * 358 * @param name The header name from which to copy the characters. 359 * @param buf The buffer where the name is to be stored. 360 * @param offset The starting offset into the buffer 361 * @param length The maximum number of header bytes to copy. 362 * @param encoding name of the encoding to use for file names 363 * @since 1.4 364 * @return The updated offset, i.e. offset + length 365 * @throws IOException on error 366 */ 367 public static int formatNameBytes(final String name, final byte[] buf, final int offset, 368 final int length, 369 final ZipEncoding encoding) 370 throws IOException { 371 int len = name.length(); 372 ByteBuffer b = encoding.encode(name); 373 while (b.limit() > length && len > 0) { 374 b = encoding.encode(name.substring(0, --len)); 375 } 376 final int limit = b.limit() - b.position(); 377 System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); 378 379 // Pad any remaining output bytes with NUL 380 for (int i = limit; i < length; ++i) { 381 buf[offset + i] = 0; 382 } 383 384 return offset + length; 385 } 386 387 /** 388 * Fill buffer with unsigned octal number, padded with leading zeroes. 389 * 390 * @param value number to convert to octal - treated as unsigned 391 * @param buffer destination buffer 392 * @param offset starting offset in buffer 393 * @param length length of buffer to fill 394 * @throws IllegalArgumentException if the value will not fit in the buffer 395 */ 396 public static void formatUnsignedOctalString(final long value, final byte[] buffer, 397 final int offset, final int length) { 398 int remaining = length; 399 remaining--; 400 if (value == 0) { 401 buffer[offset + remaining--] = (byte) '0'; 402 } else { 403 long val = value; 404 for (; remaining >= 0 && val != 0; --remaining) { 405 // CheckStyle:MagicNumber OFF 406 buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); 407 val = val >>> 3; 408 // CheckStyle:MagicNumber ON 409 } 410 if (val != 0){ 411 throw new IllegalArgumentException 412 (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); 413 } 414 } 415 416 for (; remaining >= 0; --remaining) { // leading zeros 417 buffer[offset + remaining] = (byte) '0'; 418 } 419 } 420 421 /** 422 * Write an octal integer into a buffer. 423 * 424 * Uses {@link #formatUnsignedOctalString} to format 425 * the value as an octal string with leading zeros. 426 * The converted number is followed by space and NUL 427 * 428 * @param value The value to write 429 * @param buf The buffer to receive the output 430 * @param offset The starting offset into the buffer 431 * @param length The size of the output buffer 432 * @return The updated offset, i.e offset+length 433 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 434 */ 435 public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 436 437 int idx=length-2; // For space and trailing null 438 formatUnsignedOctalString(value, buf, offset, idx); 439 440 buf[offset + idx++] = (byte) ' '; // Trailing space 441 buf[offset + idx] = 0; // Trailing null 442 443 return offset + length; 444 } 445 446 /** 447 * Write an octal long integer into a buffer. 448 * 449 * Uses {@link #formatUnsignedOctalString} to format 450 * the value as an octal string with leading zeros. 451 * The converted number is followed by a space. 452 * 453 * @param value The value to write as octal 454 * @param buf The destinationbuffer. 455 * @param offset The starting offset into the buffer. 456 * @param length The length of the buffer 457 * @return The updated offset 458 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 459 */ 460 public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 461 462 final int idx=length-1; // For space 463 464 formatUnsignedOctalString(value, buf, offset, idx); 465 buf[offset + idx] = (byte) ' '; // Trailing space 466 467 return offset + length; 468 } 469 470 /** 471 * Write an long integer into a buffer as an octal string if this 472 * will fit, or as a binary number otherwise. 473 * 474 * Uses {@link #formatUnsignedOctalString} to format 475 * the value as an octal string with leading zeros. 476 * The converted number is followed by a space. 477 * 478 * @param value The value to write into the buffer. 479 * @param buf The destination buffer. 480 * @param offset The starting offset into the buffer. 481 * @param length The length of the buffer. 482 * @return The updated offset. 483 * @throws IllegalArgumentException if the value (and trailer) 484 * will not fit in the buffer. 485 * @since 1.4 486 */ 487 public static int formatLongOctalOrBinaryBytes( 488 final long value, final byte[] buf, final int offset, final int length) { 489 490 // Check whether we are dealing with UID/GID or SIZE field 491 final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; 492 493 final boolean negative = value < 0; 494 if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars 495 return formatLongOctalBytes(value, buf, offset, length); 496 } 497 498 if (length < 9) { 499 formatLongBinary(value, buf, offset, length, negative); 500 } else { 501 formatBigIntegerBinary(value, buf, offset, length, negative); 502 } 503 504 buf[offset] = (byte) (negative ? 0xff : 0x80); 505 return offset + length; 506 } 507 508 private static void formatLongBinary(final long value, final byte[] buf, 509 final int offset, final int length, 510 final boolean negative) { 511 final int bits = (length - 1) * 8; 512 final long max = 1L << bits; 513 long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE 514 if (val < 0 || val >= max) { 515 throw new IllegalArgumentException("Value " + value + 516 " is too large for " + length + " byte field."); 517 } 518 if (negative) { 519 val ^= max - 1; 520 val++; 521 val |= 0xffL << bits; 522 } 523 for (int i = offset + length - 1; i >= offset; i--) { 524 buf[i] = (byte) val; 525 val >>= 8; 526 } 527 } 528 529 private static void formatBigIntegerBinary(final long value, final byte[] buf, 530 final int offset, 531 final int length, 532 final boolean negative) { 533 final BigInteger val = BigInteger.valueOf(value); 534 final byte[] b = val.toByteArray(); 535 final int len = b.length; 536 if (len > length - 1) { 537 throw new IllegalArgumentException("Value " + value + 538 " is too large for " + length + " byte field."); 539 } 540 final int off = offset + length - len; 541 System.arraycopy(b, 0, buf, off, len); 542 final byte fill = (byte) (negative ? 0xff : 0); 543 for (int i = offset + 1; i < off; i++) { 544 buf[i] = fill; 545 } 546 } 547 548 /** 549 * Writes an octal value into a buffer. 550 * 551 * Uses {@link #formatUnsignedOctalString} to format 552 * the value as an octal string with leading zeros. 553 * The converted number is followed by NUL and then space. 554 * 555 * @param value The value to convert 556 * @param buf The destination buffer 557 * @param offset The starting offset into the buffer. 558 * @param length The size of the buffer. 559 * @return The updated value of offset, i.e. offset+length 560 * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer 561 */ 562 public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) { 563 564 int idx=length-2; // for NUL and space 565 formatUnsignedOctalString(value, buf, offset, idx); 566 567 buf[offset + idx++] = 0; // Trailing null 568 buf[offset + idx] = (byte) ' '; // Trailing space 569 570 return offset + length; 571 } 572 573 /** 574 * Compute the checksum of a tar entry header. 575 * 576 * @param buf The tar entry's header buffer. 577 * @return The computed checksum. 578 */ 579 public static long computeCheckSum(final byte[] buf) { 580 long sum = 0; 581 582 for (final byte element : buf) { 583 sum += BYTE_MASK & element; 584 } 585 586 return sum; 587 } 588 589 /** 590 * Wikipedia <a href="https://en.wikipedia.org/wiki/Tar_(file_format)#File_header">says</a>: 591 * <blockquote> 592 * The checksum is calculated by taking the sum of the unsigned byte values 593 * of the header block with the eight checksum bytes taken to be ascii 594 * spaces (decimal value 32). It is stored as a six digit octal number with 595 * leading zeroes followed by a NUL and then a space. Various 596 * implementations do not adhere to this format. For better compatibility, 597 * ignore leading and trailing whitespace, and get the first six digits. In 598 * addition, some historic tar implementations treated bytes as signed. 599 * Implementations typically calculate the checksum both ways, and treat it 600 * as good if either the signed or unsigned sum matches the included 601 * checksum. 602 * </blockquote> 603 * <p> 604 * The return value of this method should be treated as a best-effort 605 * heuristic rather than an absolute and final truth. The checksum 606 * verification logic may well evolve over time as more special cases 607 * are encountered. 608 * 609 * @param header tar header 610 * @return whether the checksum is reasonably good 611 * @see <a href="https://issues.apache.org/jira/browse/COMPRESS-191">COMPRESS-191</a> 612 * @since 1.5 613 */ 614 public static boolean verifyCheckSum(final byte[] header) { 615 final long storedSum = parseOctal(header, CHKSUM_OFFSET, CHKSUMLEN); 616 long unsignedSum = 0; 617 long signedSum = 0; 618 619 for (int i = 0; i < header.length; i++) { 620 byte b = header[i]; 621 if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { 622 b = ' '; 623 } 624 unsignedSum += 0xff & b; 625 signedSum += b; 626 } 627 return storedSum == unsignedSum || storedSum == signedSum; 628 } 629 630}