001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.annotations.J2ktIncompatible;
030import com.google.common.base.Ascii;
031import com.google.errorprone.annotations.concurrent.LazyInit;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.io.Writer;
037import java.util.Arrays;
038import java.util.Objects;
039import javax.annotation.CheckForNull;
040
041/**
042 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
043 * strings. This class includes several constants for encoding schemes specified by <a
044 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
045 *
046 * <pre>{@code
047 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
048 * }</pre>
049 *
050 * <p>returns the string {@code "MZXW6==="}, and
051 *
052 * <pre>{@code
053 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
054 * }</pre>
055 *
056 * <p>...returns the ASCII bytes of the string {@code "foo"}.
057 *
058 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
059 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
060 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
061 * behavior:
062 *
063 * <pre>{@code
064 * BaseEncoding.base16().lowerCase().decode("deadbeef");
065 * }</pre>
066 *
067 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
068 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
069 *
070 * <pre>{@code
071 * // Do NOT do this
072 * BaseEncoding hex = BaseEncoding.base16();
073 * hex.lowerCase(); // does nothing!
074 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
075 * }</pre>
076 *
077 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
078 * x}, but the reverse does not necessarily hold.
079 *
080 * <table>
081 * <caption>Encodings</caption>
082 * <tr>
083 * <th>Encoding
084 * <th>Alphabet
085 * <th>{@code char:byte} ratio
086 * <th>Default padding
087 * <th>Comments
088 * <tr>
089 * <td>{@link #base16()}
090 * <td>0-9 A-F
091 * <td>2.00
092 * <td>N/A
093 * <td>Traditional hexadecimal. Defaults to upper case.
094 * <tr>
095 * <td>{@link #base32()}
096 * <td>A-Z 2-7
097 * <td>1.60
098 * <td>=
099 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
100 * <tr>
101 * <td>{@link #base32Hex()}
102 * <td>0-9 A-V
103 * <td>1.60
104 * <td>=
105 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
106 * <tr>
107 * <td>{@link #base64()}
108 * <td>A-Z a-z 0-9 + /
109 * <td>1.33
110 * <td>=
111 * <td>
112 * <tr>
113 * <td>{@link #base64Url()}
114 * <td>A-Z a-z 0-9 - _
115 * <td>1.33
116 * <td>=
117 * <td>Safe to use as filenames, or to pass in URLs without escaping
118 * </table>
119 *
120 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
121 *
122 * @author Louis Wasserman
123 * @since 14.0
124 */
125@GwtCompatible(emulated = true)
126@ElementTypesAreNonnullByDefault
127public abstract class BaseEncoding {
128  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
129
130  BaseEncoding() {}
131
132  /**
133   * Exception indicating invalid base-encoded input encountered while decoding.
134   *
135   * @author Louis Wasserman
136   * @since 15.0
137   */
138  public static final class DecodingException extends IOException {
139    DecodingException(String message) {
140      super(message);
141    }
142
143    DecodingException(Throwable cause) {
144      super(cause);
145    }
146  }
147
148  /** Encodes the specified byte array, and returns the encoded {@code String}. */
149  public String encode(byte[] bytes) {
150    return encode(bytes, 0, bytes.length);
151  }
152
153  /**
154   * Encodes the specified range of the specified byte array, and returns the encoded {@code
155   * String}.
156   */
157  public final String encode(byte[] bytes, int off, int len) {
158    checkPositionIndexes(off, off + len, bytes.length);
159    StringBuilder result = new StringBuilder(maxEncodedSize(len));
160    try {
161      encodeTo(result, bytes, off, len);
162    } catch (IOException impossible) {
163      throw new AssertionError(impossible);
164    }
165    return result.toString();
166  }
167
168  /**
169   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
170   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
171   * Writer}.
172   */
173  @J2ktIncompatible
174  @GwtIncompatible // Writer,OutputStream
175  public abstract OutputStream encodingStream(Writer writer);
176
177  /**
178   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
179   */
180  @J2ktIncompatible
181  @GwtIncompatible // ByteSink,CharSink
182  public final ByteSink encodingSink(CharSink encodedSink) {
183    checkNotNull(encodedSink);
184    return new ByteSink() {
185      @Override
186      public OutputStream openStream() throws IOException {
187        return encodingStream(encodedSink.openStream());
188      }
189    };
190  }
191
192  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
193
194  private static byte[] extract(byte[] result, int length) {
195    if (length == result.length) {
196      return result;
197    }
198    byte[] trunc = new byte[length];
199    System.arraycopy(result, 0, trunc, 0, length);
200    return trunc;
201  }
202
203  /**
204   * Determines whether the specified character sequence is a valid encoded string according to this
205   * encoding.
206   *
207   * @since 20.0
208   */
209  public abstract boolean canDecode(CharSequence chars);
210
211  /**
212   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
213   * inverse operation to {@link #encode(byte[])}.
214   *
215   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
216   *     encoding.
217   */
218  public final byte[] decode(CharSequence chars) {
219    try {
220      return decodeChecked(chars);
221    } catch (DecodingException badInput) {
222      throw new IllegalArgumentException(badInput);
223    }
224  }
225
226  /**
227   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
228   * inverse operation to {@link #encode(byte[])}.
229   *
230   * @throws DecodingException if the input is not a valid encoded string according to this
231   *     encoding.
232   */
233  final byte[] decodeChecked(CharSequence chars)
234      throws DecodingException {
235    chars = trimTrailingPadding(chars);
236    byte[] tmp = new byte[maxDecodedSize(chars.length())];
237    int len = decodeTo(tmp, chars);
238    return extract(tmp, len);
239  }
240
241  /**
242   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
243   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
244   */
245  @J2ktIncompatible
246  @GwtIncompatible // Reader,InputStream
247  public abstract InputStream decodingStream(Reader reader);
248
249  /**
250   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
251   * CharSource}.
252   */
253  @J2ktIncompatible
254  @GwtIncompatible // ByteSource,CharSource
255  public final ByteSource decodingSource(CharSource encodedSource) {
256    checkNotNull(encodedSource);
257    return new ByteSource() {
258      @Override
259      public InputStream openStream() throws IOException {
260        return decodingStream(encodedSource.openStream());
261      }
262    };
263  }
264
265  // Implementations for encoding/decoding
266
267  abstract int maxEncodedSize(int bytes);
268
269  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
270
271  abstract int maxDecodedSize(int chars);
272
273  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
274
275  CharSequence trimTrailingPadding(CharSequence chars) {
276    return checkNotNull(chars);
277  }
278
279  // Modified encoding generators
280
281  /**
282   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
283   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
284   * section 3.2</a>, Padding of Encoded Data.
285   */
286  public abstract BaseEncoding omitPadding();
287
288  /**
289   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
290   * for padding.
291   *
292   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
293   *     separator
294   */
295  public abstract BaseEncoding withPadChar(char padChar);
296
297  /**
298   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
299   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
300   * are skipped over in decoding.
301   *
302   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
303   *     string, or if {@code n <= 0}
304   * @throws UnsupportedOperationException if this encoding already uses a separator
305   */
306  public abstract BaseEncoding withSeparator(String separator, int n);
307
308  /**
309   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
310   * uppercase letters. Padding and separator characters remain in their original case.
311   *
312   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
313   *     lower-case characters
314   */
315  public abstract BaseEncoding upperCase();
316
317  /**
318   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
319   * lowercase letters. Padding and separator characters remain in their original case.
320   *
321   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
322   *     lower-case characters
323   */
324  public abstract BaseEncoding lowerCase();
325
326  /**
327   * Returns an encoding that behaves equivalently to this encoding, but decodes letters without
328   * regard to case.
329   *
330   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
331   *     lower-case characters
332   * @since 32.0.0
333   */
334  public abstract BaseEncoding ignoreCase();
335
336  private static final BaseEncoding BASE64 =
337      new Base64Encoding(
338          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
339
340  /**
341   * The "base64" base encoding specified by <a
342   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
343   * (This is the same as the base 64 encoding from <a
344   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
345   *
346   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
347   * omitted} or {@linkplain #withPadChar(char) replaced}.
348   *
349   * <p>No line feeds are added by default, as per <a
350   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
351   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
352   */
353  public static BaseEncoding base64() {
354    return BASE64;
355  }
356
357  private static final BaseEncoding BASE64_URL =
358      new Base64Encoding(
359          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
360
361  /**
362   * The "base64url" encoding specified by <a
363   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
364   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
365   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
366   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
367   *
368   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
369   * omitted} or {@linkplain #withPadChar(char) replaced}.
370   *
371   * <p>No line feeds are added by default, as per <a
372   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
373   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
374   */
375  public static BaseEncoding base64Url() {
376    return BASE64_URL;
377  }
378
379  private static final BaseEncoding BASE32 =
380      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
381
382  /**
383   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
384   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
385   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
386   *
387   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
388   * omitted} or {@linkplain #withPadChar(char) replaced}.
389   *
390   * <p>No line feeds are added by default, as per <a
391   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
392   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
393   */
394  public static BaseEncoding base32() {
395    return BASE32;
396  }
397
398  private static final BaseEncoding BASE32_HEX =
399      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
400
401  /**
402   * The "base32hex" encoding specified by <a
403   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
404   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
405   *
406   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
407   * omitted} or {@linkplain #withPadChar(char) replaced}.
408   *
409   * <p>No line feeds are added by default, as per <a
410   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
411   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
412   */
413  public static BaseEncoding base32Hex() {
414    return BASE32_HEX;
415  }
416
417  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
418
419  /**
420   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
421   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
422   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
423   * "hexadecimal" format.
424   *
425   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
426   * have no effect.
427   *
428   * <p>No line feeds are added by default, as per <a
429   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
430   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
431   */
432  public static BaseEncoding base16() {
433    return BASE16;
434  }
435
436  private static final class Alphabet {
437    private final String name;
438    // this is meant to be immutable -- don't modify it!
439    private final char[] chars;
440    final int mask;
441    final int bitsPerChar;
442    final int charsPerChunk;
443    final int bytesPerChunk;
444    private final byte[] decodabet;
445    private final boolean[] validPadding;
446    private final boolean ignoreCase;
447
448    Alphabet(String name, char[] chars) {
449      this(name, chars, decodabetFor(chars), /* ignoreCase= */ false);
450    }
451
452    private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) {
453      this.name = checkNotNull(name);
454      this.chars = checkNotNull(chars);
455      try {
456        this.bitsPerChar = log2(chars.length, UNNECESSARY);
457      } catch (ArithmeticException e) {
458        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
459      }
460
461      // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into
462      // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3.
463      // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a
464      // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many
465      // extra zero bits we need to add to the end of bitsPerChar to get 3 in total.
466      // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII
467      // characters that can't happen.
468      int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar);
469      this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar);
470      this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar;
471
472      this.mask = chars.length - 1;
473
474      this.decodabet = decodabet;
475
476      boolean[] validPadding = new boolean[charsPerChunk];
477      for (int i = 0; i < bytesPerChunk; i++) {
478        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
479      }
480      this.validPadding = validPadding;
481      this.ignoreCase = ignoreCase;
482    }
483
484    private static byte[] decodabetFor(char[] chars) {
485      byte[] decodabet = new byte[Ascii.MAX + 1];
486      Arrays.fill(decodabet, (byte) -1);
487      for (int i = 0; i < chars.length; i++) {
488        char c = chars[i];
489        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
490        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
491        decodabet[c] = (byte) i;
492      }
493      return decodabet;
494    }
495
496    /** Returns an equivalent {@code Alphabet} except it ignores case. */
497    Alphabet ignoreCase() {
498      if (ignoreCase) {
499        return this;
500      }
501
502      // We can't use .clone() because of GWT.
503      byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length);
504      for (int upper = 'A'; upper <= 'Z'; upper++) {
505        int lower = upper | 0x20;
506        byte decodeUpper = decodabet[upper];
507        byte decodeLower = decodabet[lower];
508        if (decodeUpper == -1) {
509          newDecodabet[upper] = decodeLower;
510        } else {
511          checkState(
512              decodeLower == -1,
513              "Can't ignoreCase() since '%s' and '%s' encode different values",
514              (char) upper,
515              (char) lower);
516          newDecodabet[lower] = decodeUpper;
517        }
518      }
519      return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true);
520    }
521
522    char encode(int bits) {
523      return chars[bits];
524    }
525
526    boolean isValidPaddingStartPosition(int index) {
527      return validPadding[index % charsPerChunk];
528    }
529
530    boolean canDecode(char ch) {
531      return ch <= Ascii.MAX && decodabet[ch] != -1;
532    }
533
534    int decode(char ch) throws DecodingException {
535      if (ch > Ascii.MAX) {
536        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
537      }
538      int result = decodabet[ch];
539      if (result == -1) {
540        if (ch <= 0x20 || ch == Ascii.MAX) {
541          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
542        } else {
543          throw new DecodingException("Unrecognized character: " + ch);
544        }
545      }
546      return result;
547    }
548
549    private boolean hasLowerCase() {
550      for (char c : chars) {
551        if (Ascii.isLowerCase(c)) {
552          return true;
553        }
554      }
555      return false;
556    }
557
558    private boolean hasUpperCase() {
559      for (char c : chars) {
560        if (Ascii.isUpperCase(c)) {
561          return true;
562        }
563      }
564      return false;
565    }
566
567    Alphabet upperCase() {
568      if (!hasLowerCase()) {
569        return this;
570      }
571      checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
572      char[] upperCased = new char[chars.length];
573      for (int i = 0; i < chars.length; i++) {
574        upperCased[i] = Ascii.toUpperCase(chars[i]);
575      }
576      Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased);
577      return ignoreCase ? upperCase.ignoreCase() : upperCase;
578    }
579
580    Alphabet lowerCase() {
581      if (!hasUpperCase()) {
582        return this;
583      }
584      checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
585      char[] lowerCased = new char[chars.length];
586      for (int i = 0; i < chars.length; i++) {
587        lowerCased[i] = Ascii.toLowerCase(chars[i]);
588      }
589      Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased);
590      return ignoreCase ? lowerCase.ignoreCase() : lowerCase;
591    }
592
593    public boolean matches(char c) {
594      return c < decodabet.length && decodabet[c] != -1;
595    }
596
597    @Override
598    public String toString() {
599      return name;
600    }
601
602    @Override
603    public boolean equals(@CheckForNull Object other) {
604      if (other instanceof Alphabet) {
605        Alphabet that = (Alphabet) other;
606        return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars);
607      }
608      return false;
609    }
610
611    @Override
612    public int hashCode() {
613      return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237);
614    }
615  }
616
617  static class StandardBaseEncoding extends BaseEncoding {
618    final Alphabet alphabet;
619
620    @CheckForNull final Character paddingChar;
621
622    StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
623      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
624    }
625
626    StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
627      this.alphabet = checkNotNull(alphabet);
628      checkArgument(
629          paddingChar == null || !alphabet.matches(paddingChar),
630          "Padding character %s was already in alphabet",
631          paddingChar);
632      this.paddingChar = paddingChar;
633    }
634
635    @Override
636    int maxEncodedSize(int bytes) {
637      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
638    }
639
640    @J2ktIncompatible
641    @GwtIncompatible // Writer,OutputStream
642    @Override
643    public OutputStream encodingStream(Writer out) {
644      checkNotNull(out);
645      return new OutputStream() {
646        int bitBuffer = 0;
647        int bitBufferLength = 0;
648        int writtenChars = 0;
649
650        @Override
651        public void write(int b) throws IOException {
652          bitBuffer <<= 8;
653          bitBuffer |= b & 0xFF;
654          bitBufferLength += 8;
655          while (bitBufferLength >= alphabet.bitsPerChar) {
656            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
657            out.write(alphabet.encode(charIndex));
658            writtenChars++;
659            bitBufferLength -= alphabet.bitsPerChar;
660          }
661        }
662
663        @Override
664        public void flush() throws IOException {
665          out.flush();
666        }
667
668        @Override
669        public void close() throws IOException {
670          if (bitBufferLength > 0) {
671            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
672            out.write(alphabet.encode(charIndex));
673            writtenChars++;
674            if (paddingChar != null) {
675              while (writtenChars % alphabet.charsPerChunk != 0) {
676                out.write(paddingChar.charValue());
677                writtenChars++;
678              }
679            }
680          }
681          out.close();
682        }
683      };
684    }
685
686    @Override
687    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
688      checkNotNull(target);
689      checkPositionIndexes(off, off + len, bytes.length);
690      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
691        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
692      }
693    }
694
695    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
696      checkNotNull(target);
697      checkPositionIndexes(off, off + len, bytes.length);
698      checkArgument(len <= alphabet.bytesPerChunk);
699      long bitBuffer = 0;
700      for (int i = 0; i < len; ++i) {
701        bitBuffer |= bytes[off + i] & 0xFF;
702        bitBuffer <<= 8; // Add additional zero byte in the end.
703      }
704      // Position of first character is length of bitBuffer minus bitsPerChar.
705      int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
706      int bitsProcessed = 0;
707      while (bitsProcessed < len * 8) {
708        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
709        target.append(alphabet.encode(charIndex));
710        bitsProcessed += alphabet.bitsPerChar;
711      }
712      if (paddingChar != null) {
713        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
714          target.append(paddingChar.charValue());
715          bitsProcessed += alphabet.bitsPerChar;
716        }
717      }
718    }
719
720    @Override
721    int maxDecodedSize(int chars) {
722      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
723    }
724
725    @Override
726    CharSequence trimTrailingPadding(CharSequence chars) {
727      checkNotNull(chars);
728      if (paddingChar == null) {
729        return chars;
730      }
731      char padChar = paddingChar.charValue();
732      int l;
733      for (l = chars.length() - 1; l >= 0; l--) {
734        if (chars.charAt(l) != padChar) {
735          break;
736        }
737      }
738      return chars.subSequence(0, l + 1);
739    }
740
741    @Override
742    public boolean canDecode(CharSequence chars) {
743      checkNotNull(chars);
744      chars = trimTrailingPadding(chars);
745      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
746        return false;
747      }
748      for (int i = 0; i < chars.length(); i++) {
749        if (!alphabet.canDecode(chars.charAt(i))) {
750          return false;
751        }
752      }
753      return true;
754    }
755
756    @Override
757    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
758      checkNotNull(target);
759      chars = trimTrailingPadding(chars);
760      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
761        throw new DecodingException("Invalid input length " + chars.length());
762      }
763      int bytesWritten = 0;
764      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
765        long chunk = 0;
766        int charsProcessed = 0;
767        for (int i = 0; i < alphabet.charsPerChunk; i++) {
768          chunk <<= alphabet.bitsPerChar;
769          if (charIdx + i < chars.length()) {
770            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
771          }
772        }
773        int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
774        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
775          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
776        }
777      }
778      return bytesWritten;
779    }
780
781    @Override
782    @J2ktIncompatible
783    @GwtIncompatible // Reader,InputStream
784    public InputStream decodingStream(Reader reader) {
785      checkNotNull(reader);
786      return new InputStream() {
787        int bitBuffer = 0;
788        int bitBufferLength = 0;
789        int readChars = 0;
790        boolean hitPadding = false;
791
792        @Override
793        public int read() throws IOException {
794          while (true) {
795            int readChar = reader.read();
796            if (readChar == -1) {
797              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
798                throw new DecodingException("Invalid input length " + readChars);
799              }
800              return -1;
801            }
802            readChars++;
803            char ch = (char) readChar;
804            if (paddingChar != null && paddingChar.charValue() == ch) {
805              if (!hitPadding
806                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
807                throw new DecodingException("Padding cannot start at index " + readChars);
808              }
809              hitPadding = true;
810            } else if (hitPadding) {
811              throw new DecodingException(
812                  "Expected padding character but found '" + ch + "' at index " + readChars);
813            } else {
814              bitBuffer <<= alphabet.bitsPerChar;
815              bitBuffer |= alphabet.decode(ch);
816              bitBufferLength += alphabet.bitsPerChar;
817
818              if (bitBufferLength >= 8) {
819                bitBufferLength -= 8;
820                return (bitBuffer >> bitBufferLength) & 0xFF;
821              }
822            }
823          }
824        }
825
826        @Override
827        public int read(byte[] buf, int off, int len) throws IOException {
828          // Overriding this to work around the fact that InputStream's default implementation of
829          // this method will silently swallow exceptions thrown by the single-byte read() method
830          // (other than on the first call to it), which in this case can cause invalid encoded
831          // strings to not throw an exception.
832          // See https://github.com/google/guava/issues/3542
833          checkPositionIndexes(off, off + len, buf.length);
834
835          int i = off;
836          for (; i < off + len; i++) {
837            int b = read();
838            if (b == -1) {
839              int read = i - off;
840              return read == 0 ? -1 : read;
841            }
842            buf[i] = (byte) b;
843          }
844          return i - off;
845        }
846
847        @Override
848        public void close() throws IOException {
849          reader.close();
850        }
851      };
852    }
853
854    @Override
855    public BaseEncoding omitPadding() {
856      return (paddingChar == null) ? this : newInstance(alphabet, null);
857    }
858
859    @Override
860    public BaseEncoding withPadChar(char padChar) {
861      if (8 % alphabet.bitsPerChar == 0
862          || (paddingChar != null && paddingChar.charValue() == padChar)) {
863        return this;
864      } else {
865        return newInstance(alphabet, padChar);
866      }
867    }
868
869    @Override
870    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
871      for (int i = 0; i < separator.length(); i++) {
872        checkArgument(
873            !alphabet.matches(separator.charAt(i)),
874            "Separator (%s) cannot contain alphabet characters",
875            separator);
876      }
877      if (paddingChar != null) {
878        checkArgument(
879            separator.indexOf(paddingChar.charValue()) < 0,
880            "Separator (%s) cannot contain padding character",
881            separator);
882      }
883      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
884    }
885
886    @LazyInit @CheckForNull private volatile BaseEncoding upperCase;
887    @LazyInit @CheckForNull private volatile BaseEncoding lowerCase;
888    @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase;
889
890    @Override
891    public BaseEncoding upperCase() {
892      BaseEncoding result = upperCase;
893      if (result == null) {
894        Alphabet upper = alphabet.upperCase();
895        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
896      }
897      return result;
898    }
899
900    @Override
901    public BaseEncoding lowerCase() {
902      BaseEncoding result = lowerCase;
903      if (result == null) {
904        Alphabet lower = alphabet.lowerCase();
905        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
906      }
907      return result;
908    }
909
910    @Override
911    public BaseEncoding ignoreCase() {
912      BaseEncoding result = ignoreCase;
913      if (result == null) {
914        Alphabet ignore = alphabet.ignoreCase();
915        result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar);
916      }
917      return result;
918    }
919
920    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
921      return new StandardBaseEncoding(alphabet, paddingChar);
922    }
923
924    @Override
925    public String toString() {
926      StringBuilder builder = new StringBuilder("BaseEncoding.");
927      builder.append(alphabet);
928      if (8 % alphabet.bitsPerChar != 0) {
929        if (paddingChar == null) {
930          builder.append(".omitPadding()");
931        } else {
932          builder.append(".withPadChar('").append(paddingChar).append("')");
933        }
934      }
935      return builder.toString();
936    }
937
938    @Override
939    public boolean equals(@CheckForNull Object other) {
940      if (other instanceof StandardBaseEncoding) {
941        StandardBaseEncoding that = (StandardBaseEncoding) other;
942        return this.alphabet.equals(that.alphabet)
943            && Objects.equals(this.paddingChar, that.paddingChar);
944      }
945      return false;
946    }
947
948    @Override
949    public int hashCode() {
950      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
951    }
952  }
953
954  static final class Base16Encoding extends StandardBaseEncoding {
955    final char[] encoding = new char[512];
956
957    Base16Encoding(String name, String alphabetChars) {
958      this(new Alphabet(name, alphabetChars.toCharArray()));
959    }
960
961    private Base16Encoding(Alphabet alphabet) {
962      super(alphabet, null);
963      checkArgument(alphabet.chars.length == 16);
964      for (int i = 0; i < 256; ++i) {
965        encoding[i] = alphabet.encode(i >>> 4);
966        encoding[i | 0x100] = alphabet.encode(i & 0xF);
967      }
968    }
969
970    @Override
971    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
972      checkNotNull(target);
973      checkPositionIndexes(off, off + len, bytes.length);
974      for (int i = 0; i < len; ++i) {
975        int b = bytes[off + i] & 0xFF;
976        target.append(encoding[b]);
977        target.append(encoding[b | 0x100]);
978      }
979    }
980
981    @Override
982    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
983      checkNotNull(target);
984      if (chars.length() % 2 == 1) {
985        throw new DecodingException("Invalid input length " + chars.length());
986      }
987      int bytesWritten = 0;
988      for (int i = 0; i < chars.length(); i += 2) {
989        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
990        target[bytesWritten++] = (byte) decoded;
991      }
992      return bytesWritten;
993    }
994
995    @Override
996    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
997      return new Base16Encoding(alphabet);
998    }
999  }
1000
1001  static final class Base64Encoding extends StandardBaseEncoding {
1002    Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
1003      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
1004    }
1005
1006    private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
1007      super(alphabet, paddingChar);
1008      checkArgument(alphabet.chars.length == 64);
1009    }
1010
1011    @Override
1012    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1013      checkNotNull(target);
1014      checkPositionIndexes(off, off + len, bytes.length);
1015      int i = off;
1016      for (int remaining = len; remaining >= 3; remaining -= 3) {
1017        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
1018        target.append(alphabet.encode(chunk >>> 18));
1019        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
1020        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
1021        target.append(alphabet.encode(chunk & 0x3F));
1022      }
1023      if (i < off + len) {
1024        encodeChunkTo(target, bytes, i, off + len - i);
1025      }
1026    }
1027
1028    @Override
1029    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1030      checkNotNull(target);
1031      chars = trimTrailingPadding(chars);
1032      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
1033        throw new DecodingException("Invalid input length " + chars.length());
1034      }
1035      int bytesWritten = 0;
1036      for (int i = 0; i < chars.length(); ) {
1037        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
1038        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
1039        target[bytesWritten++] = (byte) (chunk >>> 16);
1040        if (i < chars.length()) {
1041          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
1042          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
1043          if (i < chars.length()) {
1044            chunk |= alphabet.decode(chars.charAt(i++));
1045            target[bytesWritten++] = (byte) (chunk & 0xFF);
1046          }
1047        }
1048      }
1049      return bytesWritten;
1050    }
1051
1052    @Override
1053    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
1054      return new Base64Encoding(alphabet, paddingChar);
1055    }
1056  }
1057
1058  @J2ktIncompatible
1059  @GwtIncompatible
1060  static Reader ignoringReader(Reader delegate, String toIgnore) {
1061    checkNotNull(delegate);
1062    checkNotNull(toIgnore);
1063    return new Reader() {
1064      @Override
1065      public int read() throws IOException {
1066        int readChar;
1067        do {
1068          readChar = delegate.read();
1069        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1070        return readChar;
1071      }
1072
1073      @Override
1074      public int read(char[] cbuf, int off, int len) throws IOException {
1075        throw new UnsupportedOperationException();
1076      }
1077
1078      @Override
1079      public void close() throws IOException {
1080        delegate.close();
1081      }
1082    };
1083  }
1084
1085  static Appendable separatingAppendable(
1086      Appendable delegate, String separator, int afterEveryChars) {
1087    checkNotNull(delegate);
1088    checkNotNull(separator);
1089    checkArgument(afterEveryChars > 0);
1090    return new Appendable() {
1091      int charsUntilSeparator = afterEveryChars;
1092
1093      @Override
1094      public Appendable append(char c) throws IOException {
1095        if (charsUntilSeparator == 0) {
1096          delegate.append(separator);
1097          charsUntilSeparator = afterEveryChars;
1098        }
1099        delegate.append(c);
1100        charsUntilSeparator--;
1101        return this;
1102      }
1103
1104      @Override
1105      public Appendable append(@CheckForNull CharSequence chars, int off, int len) {
1106        throw new UnsupportedOperationException();
1107      }
1108
1109      @Override
1110      public Appendable append(@CheckForNull CharSequence chars) {
1111        throw new UnsupportedOperationException();
1112      }
1113    };
1114  }
1115
1116  @J2ktIncompatible
1117  @GwtIncompatible // Writer
1118  static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) {
1119    Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars);
1120    return new Writer() {
1121      @Override
1122      public void write(int c) throws IOException {
1123        separatingAppendable.append((char) c);
1124      }
1125
1126      @Override
1127      public void write(char[] chars, int off, int len) throws IOException {
1128        throw new UnsupportedOperationException();
1129      }
1130
1131      @Override
1132      public void flush() throws IOException {
1133        delegate.flush();
1134      }
1135
1136      @Override
1137      public void close() throws IOException {
1138        delegate.close();
1139      }
1140    };
1141  }
1142
1143  static final class SeparatedBaseEncoding extends BaseEncoding {
1144    private final BaseEncoding delegate;
1145    private final String separator;
1146    private final int afterEveryChars;
1147
1148    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1149      this.delegate = checkNotNull(delegate);
1150      this.separator = checkNotNull(separator);
1151      this.afterEveryChars = afterEveryChars;
1152      checkArgument(
1153          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1154    }
1155
1156    @Override
1157    CharSequence trimTrailingPadding(CharSequence chars) {
1158      return delegate.trimTrailingPadding(chars);
1159    }
1160
1161    @Override
1162    int maxEncodedSize(int bytes) {
1163      int unseparatedSize = delegate.maxEncodedSize(bytes);
1164      return unseparatedSize
1165          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1166    }
1167
1168    @J2ktIncompatible
1169    @GwtIncompatible // Writer,OutputStream
1170    @Override
1171    public OutputStream encodingStream(Writer output) {
1172      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1173    }
1174
1175    @Override
1176    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1177      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1178    }
1179
1180    @Override
1181    int maxDecodedSize(int chars) {
1182      return delegate.maxDecodedSize(chars);
1183    }
1184
1185    @Override
1186    public boolean canDecode(CharSequence chars) {
1187      StringBuilder builder = new StringBuilder();
1188      for (int i = 0; i < chars.length(); i++) {
1189        char c = chars.charAt(i);
1190        if (separator.indexOf(c) < 0) {
1191          builder.append(c);
1192        }
1193      }
1194      return delegate.canDecode(builder);
1195    }
1196
1197    @Override
1198    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1199      StringBuilder stripped = new StringBuilder(chars.length());
1200      for (int i = 0; i < chars.length(); i++) {
1201        char c = chars.charAt(i);
1202        if (separator.indexOf(c) < 0) {
1203          stripped.append(c);
1204        }
1205      }
1206      return delegate.decodeTo(target, stripped);
1207    }
1208
1209    @Override
1210    @J2ktIncompatible
1211    @GwtIncompatible // Reader,InputStream
1212    public InputStream decodingStream(Reader reader) {
1213      return delegate.decodingStream(ignoringReader(reader, separator));
1214    }
1215
1216    @Override
1217    public BaseEncoding omitPadding() {
1218      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1219    }
1220
1221    @Override
1222    public BaseEncoding withPadChar(char padChar) {
1223      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1224    }
1225
1226    @Override
1227    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1228      throw new UnsupportedOperationException("Already have a separator");
1229    }
1230
1231    @Override
1232    public BaseEncoding upperCase() {
1233      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1234    }
1235
1236    @Override
1237    public BaseEncoding lowerCase() {
1238      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1239    }
1240
1241    @Override
1242    public BaseEncoding ignoreCase() {
1243      return delegate.ignoreCase().withSeparator(separator, afterEveryChars);
1244    }
1245
1246    @Override
1247    public String toString() {
1248      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1249    }
1250  }
1251}