001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.errorprone.annotations.concurrent.LazyInit;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.OutputStream;
034import java.io.Reader;
035import java.io.Writer;
036import java.util.Arrays;
037import java.util.Objects;
038import javax.annotation.CheckForNull;
039
040/**
041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
042 * strings. This class includes several constants for encoding schemes specified by <a
043 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
044 *
045 * <pre>{@code
046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
047 * }</pre>
048 *
049 * <p>returns the string {@code "MZXW6==="}, and
050 *
051 * <pre>{@code
052 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
053 * }</pre>
054 *
055 * <p>...returns the ASCII bytes of the string {@code "foo"}.
056 *
057 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
058 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
059 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
060 * behavior:
061 *
062 * <pre>{@code
063 * BaseEncoding.base16().lowerCase().decode("deadbeef");
064 * }</pre>
065 *
066 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
067 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
068 *
069 * <pre>{@code
070 * // Do NOT do this
071 * BaseEncoding hex = BaseEncoding.base16();
072 * hex.lowerCase(); // does nothing!
073 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
074 * }</pre>
075 *
076 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
077 * x}, but the reverse does not necessarily hold.
078 *
079 * <table>
080 * <caption>Encodings</caption>
081 * <tr>
082 * <th>Encoding
083 * <th>Alphabet
084 * <th>{@code char:byte} ratio
085 * <th>Default padding
086 * <th>Comments
087 * <tr>
088 * <td>{@link #base16()}
089 * <td>0-9 A-F
090 * <td>2.00
091 * <td>N/A
092 * <td>Traditional hexadecimal. Defaults to upper case.
093 * <tr>
094 * <td>{@link #base32()}
095 * <td>A-Z 2-7
096 * <td>1.60
097 * <td>=
098 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
099 * <tr>
100 * <td>{@link #base32Hex()}
101 * <td>0-9 A-V
102 * <td>1.60
103 * <td>=
104 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
105 * <tr>
106 * <td>{@link #base64()}
107 * <td>A-Z a-z 0-9 + /
108 * <td>1.33
109 * <td>=
110 * <td>
111 * <tr>
112 * <td>{@link #base64Url()}
113 * <td>A-Z a-z 0-9 - _
114 * <td>1.33
115 * <td>=
116 * <td>Safe to use as filenames, or to pass in URLs without escaping
117 * </table>
118 *
119 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
120 *
121 * @author Louis Wasserman
122 * @since 14.0
123 */
124@GwtCompatible(emulated = true)
125@ElementTypesAreNonnullByDefault
126public abstract class BaseEncoding {
127  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
128
129  BaseEncoding() {}
130
131  /**
132   * Exception indicating invalid base-encoded input encountered while decoding.
133   *
134   * @author Louis Wasserman
135   * @since 15.0
136   */
137  public static final class DecodingException extends IOException {
138    DecodingException(String message) {
139      super(message);
140    }
141
142    DecodingException(Throwable cause) {
143      super(cause);
144    }
145  }
146
147  /** Encodes the specified byte array, and returns the encoded {@code String}. */
148  public String encode(byte[] bytes) {
149    return encode(bytes, 0, bytes.length);
150  }
151
152  /**
153   * Encodes the specified range of the specified byte array, and returns the encoded {@code
154   * String}.
155   */
156  public final String encode(byte[] bytes, int off, int len) {
157    checkPositionIndexes(off, off + len, bytes.length);
158    StringBuilder result = new StringBuilder(maxEncodedSize(len));
159    try {
160      encodeTo(result, bytes, off, len);
161    } catch (IOException impossible) {
162      throw new AssertionError(impossible);
163    }
164    return result.toString();
165  }
166
167  /**
168   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
169   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
170   * Writer}.
171   */
172  @GwtIncompatible // Writer,OutputStream
173  public abstract OutputStream encodingStream(Writer writer);
174
175  /**
176   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
177   */
178  @GwtIncompatible // ByteSink,CharSink
179  public final ByteSink encodingSink(CharSink encodedSink) {
180    checkNotNull(encodedSink);
181    return new ByteSink() {
182      @Override
183      public OutputStream openStream() throws IOException {
184        return encodingStream(encodedSink.openStream());
185      }
186    };
187  }
188
189  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
190
191  private static byte[] extract(byte[] result, int length) {
192    if (length == result.length) {
193      return result;
194    }
195    byte[] trunc = new byte[length];
196    System.arraycopy(result, 0, trunc, 0, length);
197    return trunc;
198  }
199
200  /**
201   * Determines whether the specified character sequence is a valid encoded string according to this
202   * encoding.
203   *
204   * @since 20.0
205   */
206  public abstract boolean canDecode(CharSequence chars);
207
208  /**
209   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
210   * inverse operation to {@link #encode(byte[])}.
211   *
212   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
213   *     encoding.
214   */
215  public final byte[] decode(CharSequence chars) {
216    try {
217      return decodeChecked(chars);
218    } catch (DecodingException badInput) {
219      throw new IllegalArgumentException(badInput);
220    }
221  }
222
223  /**
224   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
225   * inverse operation to {@link #encode(byte[])}.
226   *
227   * @throws DecodingException if the input is not a valid encoded string according to this
228   *     encoding.
229   */
230  final byte[] decodeChecked(CharSequence chars)
231      throws DecodingException {
232    chars = trimTrailingPadding(chars);
233    byte[] tmp = new byte[maxDecodedSize(chars.length())];
234    int len = decodeTo(tmp, chars);
235    return extract(tmp, len);
236  }
237
238  /**
239   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
240   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
241   */
242  @GwtIncompatible // Reader,InputStream
243  public abstract InputStream decodingStream(Reader reader);
244
245  /**
246   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
247   * CharSource}.
248   */
249  @GwtIncompatible // ByteSource,CharSource
250  public final ByteSource decodingSource(CharSource encodedSource) {
251    checkNotNull(encodedSource);
252    return new ByteSource() {
253      @Override
254      public InputStream openStream() throws IOException {
255        return decodingStream(encodedSource.openStream());
256      }
257    };
258  }
259
260  // Implementations for encoding/decoding
261
262  abstract int maxEncodedSize(int bytes);
263
264  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
265
266  abstract int maxDecodedSize(int chars);
267
268  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
269
270  CharSequence trimTrailingPadding(CharSequence chars) {
271    return checkNotNull(chars);
272  }
273
274  // Modified encoding generators
275
276  /**
277   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
278   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
279   * section 3.2</a>, Padding of Encoded Data.
280   */
281  public abstract BaseEncoding omitPadding();
282
283  /**
284   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
285   * for padding.
286   *
287   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
288   *     separator
289   */
290  public abstract BaseEncoding withPadChar(char padChar);
291
292  /**
293   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
294   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
295   * are skipped over in decoding.
296   *
297   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
298   *     string, or if {@code n <= 0}
299   * @throws UnsupportedOperationException if this encoding already uses a separator
300   */
301  public abstract BaseEncoding withSeparator(String separator, int n);
302
303  /**
304   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
305   * uppercase letters. Padding and separator characters remain in their original case.
306   *
307   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
308   *     lower-case characters
309   */
310  public abstract BaseEncoding upperCase();
311
312  /**
313   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
314   * lowercase letters. Padding and separator characters remain in their original case.
315   *
316   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
317   *     lower-case characters
318   */
319  public abstract BaseEncoding lowerCase();
320
321  /**
322   * Returns an encoding that behaves equivalently to this encoding, but decodes letters without
323   * regard to case.
324   *
325   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
326   *     lower-case characters
327   */
328  public abstract BaseEncoding ignoreCase();
329
330  private static final BaseEncoding BASE64 =
331      new Base64Encoding(
332          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
333
334  /**
335   * The "base64" base encoding specified by <a
336   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
337   * (This is the same as the base 64 encoding from <a
338   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
339   *
340   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
341   * omitted} or {@linkplain #withPadChar(char) replaced}.
342   *
343   * <p>No line feeds are added by default, as per <a
344   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
345   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
346   */
347  public static BaseEncoding base64() {
348    return BASE64;
349  }
350
351  private static final BaseEncoding BASE64_URL =
352      new Base64Encoding(
353          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
354
355  /**
356   * The "base64url" encoding specified by <a
357   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
358   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
359   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
360   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
361   *
362   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
363   * omitted} or {@linkplain #withPadChar(char) replaced}.
364   *
365   * <p>No line feeds are added by default, as per <a
366   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
367   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
368   */
369  public static BaseEncoding base64Url() {
370    return BASE64_URL;
371  }
372
373  private static final BaseEncoding BASE32 =
374      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
375
376  /**
377   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
378   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
379   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
380   *
381   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
382   * omitted} or {@linkplain #withPadChar(char) replaced}.
383   *
384   * <p>No line feeds are added by default, as per <a
385   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
386   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
387   */
388  public static BaseEncoding base32() {
389    return BASE32;
390  }
391
392  private static final BaseEncoding BASE32_HEX =
393      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
394
395  /**
396   * The "base32hex" encoding specified by <a
397   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
398   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
399   *
400   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
401   * omitted} or {@linkplain #withPadChar(char) replaced}.
402   *
403   * <p>No line feeds are added by default, as per <a
404   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
405   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
406   */
407  public static BaseEncoding base32Hex() {
408    return BASE32_HEX;
409  }
410
411  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
412
413  /**
414   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
415   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
416   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
417   * "hexadecimal" format.
418   *
419   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
420   * have no effect.
421   *
422   * <p>No line feeds are added by default, as per <a
423   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
424   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
425   */
426  public static BaseEncoding base16() {
427    return BASE16;
428  }
429
430  private static final class Alphabet {
431    private final String name;
432    // this is meant to be immutable -- don't modify it!
433    private final char[] chars;
434    final int mask;
435    final int bitsPerChar;
436    final int charsPerChunk;
437    final int bytesPerChunk;
438    private final byte[] decodabet;
439    private final boolean[] validPadding;
440    private final boolean ignoreCase;
441
442    Alphabet(String name, char[] chars) {
443      this(name, chars, decodabetFor(chars), /* ignoreCase= */ false);
444    }
445
446    private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) {
447      this.name = checkNotNull(name);
448      this.chars = checkNotNull(chars);
449      try {
450        this.bitsPerChar = log2(chars.length, UNNECESSARY);
451      } catch (ArithmeticException e) {
452        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
453      }
454
455      // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into
456      // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3.
457      // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a
458      // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many
459      // extra zero bits we need to add to the end of bitsPerChar to get 3 in total.
460      // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII
461      // characters that can't happen.
462      int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar);
463      this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar);
464      this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar;
465
466      this.mask = chars.length - 1;
467
468      this.decodabet = decodabet;
469
470      boolean[] validPadding = new boolean[charsPerChunk];
471      for (int i = 0; i < bytesPerChunk; i++) {
472        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
473      }
474      this.validPadding = validPadding;
475      this.ignoreCase = ignoreCase;
476    }
477
478    private static byte[] decodabetFor(char[] chars) {
479      byte[] decodabet = new byte[Ascii.MAX + 1];
480      Arrays.fill(decodabet, (byte) -1);
481      for (int i = 0; i < chars.length; i++) {
482        char c = chars[i];
483        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
484        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
485        decodabet[c] = (byte) i;
486      }
487      return decodabet;
488    }
489
490    /** Returns an equivalent {@code Alphabet} except it ignores case. */
491    Alphabet ignoreCase() {
492      if (ignoreCase) {
493        return this;
494      }
495
496      // We can't use .clone() because of GWT.
497      byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length);
498      for (int upper = 'A'; upper <= 'Z'; upper++) {
499        int lower = upper | 0x20;
500        byte decodeUpper = decodabet[upper];
501        byte decodeLower = decodabet[lower];
502        if (decodeUpper == -1) {
503          newDecodabet[upper] = decodeLower;
504        } else {
505          checkState(
506              decodeLower == -1,
507              "Can't ignoreCase() since '%s' and '%s' encode different values",
508              (char) upper,
509              (char) lower);
510          newDecodabet[lower] = decodeUpper;
511        }
512      }
513      return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true);
514    }
515
516    char encode(int bits) {
517      return chars[bits];
518    }
519
520    boolean isValidPaddingStartPosition(int index) {
521      return validPadding[index % charsPerChunk];
522    }
523
524    boolean canDecode(char ch) {
525      return ch <= Ascii.MAX && decodabet[ch] != -1;
526    }
527
528    int decode(char ch) throws DecodingException {
529      if (ch > Ascii.MAX) {
530        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
531      }
532      int result = decodabet[ch];
533      if (result == -1) {
534        if (ch <= 0x20 || ch == Ascii.MAX) {
535          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
536        } else {
537          throw new DecodingException("Unrecognized character: " + ch);
538        }
539      }
540      return result;
541    }
542
543    private boolean hasLowerCase() {
544      for (char c : chars) {
545        if (Ascii.isLowerCase(c)) {
546          return true;
547        }
548      }
549      return false;
550    }
551
552    private boolean hasUpperCase() {
553      for (char c : chars) {
554        if (Ascii.isUpperCase(c)) {
555          return true;
556        }
557      }
558      return false;
559    }
560
561    Alphabet upperCase() {
562      if (!hasLowerCase()) {
563        return this;
564      }
565      checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
566      char[] upperCased = new char[chars.length];
567      for (int i = 0; i < chars.length; i++) {
568        upperCased[i] = Ascii.toUpperCase(chars[i]);
569      }
570      Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased);
571      return ignoreCase ? upperCase.ignoreCase() : upperCase;
572    }
573
574    Alphabet lowerCase() {
575      if (!hasUpperCase()) {
576        return this;
577      }
578      checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
579      char[] lowerCased = new char[chars.length];
580      for (int i = 0; i < chars.length; i++) {
581        lowerCased[i] = Ascii.toLowerCase(chars[i]);
582      }
583      Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased);
584      return ignoreCase ? lowerCase.ignoreCase() : lowerCase;
585    }
586
587    public boolean matches(char c) {
588      return c < decodabet.length && decodabet[c] != -1;
589    }
590
591    @Override
592    public String toString() {
593      return name;
594    }
595
596    @Override
597    public boolean equals(@CheckForNull Object other) {
598      if (other instanceof Alphabet) {
599        Alphabet that = (Alphabet) other;
600        return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars);
601      }
602      return false;
603    }
604
605    @Override
606    public int hashCode() {
607      return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237);
608    }
609  }
610
611  static class StandardBaseEncoding extends BaseEncoding {
612    final Alphabet alphabet;
613
614    @CheckForNull final Character paddingChar;
615
616    StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
617      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
618    }
619
620    StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
621      this.alphabet = checkNotNull(alphabet);
622      checkArgument(
623          paddingChar == null || !alphabet.matches(paddingChar),
624          "Padding character %s was already in alphabet",
625          paddingChar);
626      this.paddingChar = paddingChar;
627    }
628
629    @Override
630    int maxEncodedSize(int bytes) {
631      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
632    }
633
634    @GwtIncompatible // Writer,OutputStream
635    @Override
636    public OutputStream encodingStream(Writer out) {
637      checkNotNull(out);
638      return new OutputStream() {
639        int bitBuffer = 0;
640        int bitBufferLength = 0;
641        int writtenChars = 0;
642
643        @Override
644        public void write(int b) throws IOException {
645          bitBuffer <<= 8;
646          bitBuffer |= b & 0xFF;
647          bitBufferLength += 8;
648          while (bitBufferLength >= alphabet.bitsPerChar) {
649            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
650            out.write(alphabet.encode(charIndex));
651            writtenChars++;
652            bitBufferLength -= alphabet.bitsPerChar;
653          }
654        }
655
656        @Override
657        public void flush() throws IOException {
658          out.flush();
659        }
660
661        @Override
662        public void close() throws IOException {
663          if (bitBufferLength > 0) {
664            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
665            out.write(alphabet.encode(charIndex));
666            writtenChars++;
667            if (paddingChar != null) {
668              while (writtenChars % alphabet.charsPerChunk != 0) {
669                out.write(paddingChar.charValue());
670                writtenChars++;
671              }
672            }
673          }
674          out.close();
675        }
676      };
677    }
678
679    @Override
680    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
681      checkNotNull(target);
682      checkPositionIndexes(off, off + len, bytes.length);
683      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
684        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
685      }
686    }
687
688    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
689      checkNotNull(target);
690      checkPositionIndexes(off, off + len, bytes.length);
691      checkArgument(len <= alphabet.bytesPerChunk);
692      long bitBuffer = 0;
693      for (int i = 0; i < len; ++i) {
694        bitBuffer |= bytes[off + i] & 0xFF;
695        bitBuffer <<= 8; // Add additional zero byte in the end.
696      }
697      // Position of first character is length of bitBuffer minus bitsPerChar.
698      int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
699      int bitsProcessed = 0;
700      while (bitsProcessed < len * 8) {
701        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
702        target.append(alphabet.encode(charIndex));
703        bitsProcessed += alphabet.bitsPerChar;
704      }
705      if (paddingChar != null) {
706        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
707          target.append(paddingChar.charValue());
708          bitsProcessed += alphabet.bitsPerChar;
709        }
710      }
711    }
712
713    @Override
714    int maxDecodedSize(int chars) {
715      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
716    }
717
718    @Override
719    CharSequence trimTrailingPadding(CharSequence chars) {
720      checkNotNull(chars);
721      if (paddingChar == null) {
722        return chars;
723      }
724      char padChar = paddingChar.charValue();
725      int l;
726      for (l = chars.length() - 1; l >= 0; l--) {
727        if (chars.charAt(l) != padChar) {
728          break;
729        }
730      }
731      return chars.subSequence(0, l + 1);
732    }
733
734    @Override
735    public boolean canDecode(CharSequence chars) {
736      checkNotNull(chars);
737      chars = trimTrailingPadding(chars);
738      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
739        return false;
740      }
741      for (int i = 0; i < chars.length(); i++) {
742        if (!alphabet.canDecode(chars.charAt(i))) {
743          return false;
744        }
745      }
746      return true;
747    }
748
749    @Override
750    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
751      checkNotNull(target);
752      chars = trimTrailingPadding(chars);
753      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
754        throw new DecodingException("Invalid input length " + chars.length());
755      }
756      int bytesWritten = 0;
757      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
758        long chunk = 0;
759        int charsProcessed = 0;
760        for (int i = 0; i < alphabet.charsPerChunk; i++) {
761          chunk <<= alphabet.bitsPerChar;
762          if (charIdx + i < chars.length()) {
763            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
764          }
765        }
766        int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
767        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
768          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
769        }
770      }
771      return bytesWritten;
772    }
773
774    @Override
775    @GwtIncompatible // Reader,InputStream
776    public InputStream decodingStream(Reader reader) {
777      checkNotNull(reader);
778      return new InputStream() {
779        int bitBuffer = 0;
780        int bitBufferLength = 0;
781        int readChars = 0;
782        boolean hitPadding = false;
783
784        @Override
785        public int read() throws IOException {
786          while (true) {
787            int readChar = reader.read();
788            if (readChar == -1) {
789              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
790                throw new DecodingException("Invalid input length " + readChars);
791              }
792              return -1;
793            }
794            readChars++;
795            char ch = (char) readChar;
796            if (paddingChar != null && paddingChar.charValue() == ch) {
797              if (!hitPadding
798                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
799                throw new DecodingException("Padding cannot start at index " + readChars);
800              }
801              hitPadding = true;
802            } else if (hitPadding) {
803              throw new DecodingException(
804                  "Expected padding character but found '" + ch + "' at index " + readChars);
805            } else {
806              bitBuffer <<= alphabet.bitsPerChar;
807              bitBuffer |= alphabet.decode(ch);
808              bitBufferLength += alphabet.bitsPerChar;
809
810              if (bitBufferLength >= 8) {
811                bitBufferLength -= 8;
812                return (bitBuffer >> bitBufferLength) & 0xFF;
813              }
814            }
815          }
816        }
817
818        @Override
819        public int read(byte[] buf, int off, int len) throws IOException {
820          // Overriding this to work around the fact that InputStream's default implementation of
821          // this method will silently swallow exceptions thrown by the single-byte read() method
822          // (other than on the first call to it), which in this case can cause invalid encoded
823          // strings to not throw an exception.
824          // See https://github.com/google/guava/issues/3542
825          checkPositionIndexes(off, off + len, buf.length);
826
827          int i = off;
828          for (; i < off + len; i++) {
829            int b = read();
830            if (b == -1) {
831              int read = i - off;
832              return read == 0 ? -1 : read;
833            }
834            buf[i] = (byte) b;
835          }
836          return i - off;
837        }
838
839        @Override
840        public void close() throws IOException {
841          reader.close();
842        }
843      };
844    }
845
846    @Override
847    public BaseEncoding omitPadding() {
848      return (paddingChar == null) ? this : newInstance(alphabet, null);
849    }
850
851    @Override
852    public BaseEncoding withPadChar(char padChar) {
853      if (8 % alphabet.bitsPerChar == 0
854          || (paddingChar != null && paddingChar.charValue() == padChar)) {
855        return this;
856      } else {
857        return newInstance(alphabet, padChar);
858      }
859    }
860
861    @Override
862    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
863      for (int i = 0; i < separator.length(); i++) {
864        checkArgument(
865            !alphabet.matches(separator.charAt(i)),
866            "Separator (%s) cannot contain alphabet characters",
867            separator);
868      }
869      if (paddingChar != null) {
870        checkArgument(
871            separator.indexOf(paddingChar.charValue()) < 0,
872            "Separator (%s) cannot contain padding character",
873            separator);
874      }
875      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
876    }
877
878    @LazyInit @CheckForNull private volatile BaseEncoding upperCase;
879    @LazyInit @CheckForNull private volatile BaseEncoding lowerCase;
880    @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase;
881
882    @Override
883    public BaseEncoding upperCase() {
884      BaseEncoding result = upperCase;
885      if (result == null) {
886        Alphabet upper = alphabet.upperCase();
887        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
888      }
889      return result;
890    }
891
892    @Override
893    public BaseEncoding lowerCase() {
894      BaseEncoding result = lowerCase;
895      if (result == null) {
896        Alphabet lower = alphabet.lowerCase();
897        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
898      }
899      return result;
900    }
901
902    @Override
903    public BaseEncoding ignoreCase() {
904      BaseEncoding result = ignoreCase;
905      if (result == null) {
906        Alphabet ignore = alphabet.ignoreCase();
907        result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar);
908      }
909      return result;
910    }
911
912    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
913      return new StandardBaseEncoding(alphabet, paddingChar);
914    }
915
916    @Override
917    public String toString() {
918      StringBuilder builder = new StringBuilder("BaseEncoding.");
919      builder.append(alphabet);
920      if (8 % alphabet.bitsPerChar != 0) {
921        if (paddingChar == null) {
922          builder.append(".omitPadding()");
923        } else {
924          builder.append(".withPadChar('").append(paddingChar).append("')");
925        }
926      }
927      return builder.toString();
928    }
929
930    @Override
931    public boolean equals(@CheckForNull Object other) {
932      if (other instanceof StandardBaseEncoding) {
933        StandardBaseEncoding that = (StandardBaseEncoding) other;
934        return this.alphabet.equals(that.alphabet)
935            && Objects.equals(this.paddingChar, that.paddingChar);
936      }
937      return false;
938    }
939
940    @Override
941    public int hashCode() {
942      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
943    }
944  }
945
946  static final class Base16Encoding extends StandardBaseEncoding {
947    final char[] encoding = new char[512];
948
949    Base16Encoding(String name, String alphabetChars) {
950      this(new Alphabet(name, alphabetChars.toCharArray()));
951    }
952
953    private Base16Encoding(Alphabet alphabet) {
954      super(alphabet, null);
955      checkArgument(alphabet.chars.length == 16);
956      for (int i = 0; i < 256; ++i) {
957        encoding[i] = alphabet.encode(i >>> 4);
958        encoding[i | 0x100] = alphabet.encode(i & 0xF);
959      }
960    }
961
962    @Override
963    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
964      checkNotNull(target);
965      checkPositionIndexes(off, off + len, bytes.length);
966      for (int i = 0; i < len; ++i) {
967        int b = bytes[off + i] & 0xFF;
968        target.append(encoding[b]);
969        target.append(encoding[b | 0x100]);
970      }
971    }
972
973    @Override
974    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
975      checkNotNull(target);
976      if (chars.length() % 2 == 1) {
977        throw new DecodingException("Invalid input length " + chars.length());
978      }
979      int bytesWritten = 0;
980      for (int i = 0; i < chars.length(); i += 2) {
981        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
982        target[bytesWritten++] = (byte) decoded;
983      }
984      return bytesWritten;
985    }
986
987    @Override
988    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
989      return new Base16Encoding(alphabet);
990    }
991  }
992
993  static final class Base64Encoding extends StandardBaseEncoding {
994    Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
995      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
996    }
997
998    private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
999      super(alphabet, paddingChar);
1000      checkArgument(alphabet.chars.length == 64);
1001    }
1002
1003    @Override
1004    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1005      checkNotNull(target);
1006      checkPositionIndexes(off, off + len, bytes.length);
1007      int i = off;
1008      for (int remaining = len; remaining >= 3; remaining -= 3) {
1009        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
1010        target.append(alphabet.encode(chunk >>> 18));
1011        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
1012        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
1013        target.append(alphabet.encode(chunk & 0x3F));
1014      }
1015      if (i < off + len) {
1016        encodeChunkTo(target, bytes, i, off + len - i);
1017      }
1018    }
1019
1020    @Override
1021    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1022      checkNotNull(target);
1023      chars = trimTrailingPadding(chars);
1024      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
1025        throw new DecodingException("Invalid input length " + chars.length());
1026      }
1027      int bytesWritten = 0;
1028      for (int i = 0; i < chars.length(); ) {
1029        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
1030        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
1031        target[bytesWritten++] = (byte) (chunk >>> 16);
1032        if (i < chars.length()) {
1033          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
1034          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
1035          if (i < chars.length()) {
1036            chunk |= alphabet.decode(chars.charAt(i++));
1037            target[bytesWritten++] = (byte) (chunk & 0xFF);
1038          }
1039        }
1040      }
1041      return bytesWritten;
1042    }
1043
1044    @Override
1045    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
1046      return new Base64Encoding(alphabet, paddingChar);
1047    }
1048  }
1049
1050  @GwtIncompatible
1051  static Reader ignoringReader(Reader delegate, String toIgnore) {
1052    checkNotNull(delegate);
1053    checkNotNull(toIgnore);
1054    return new Reader() {
1055      @Override
1056      public int read() throws IOException {
1057        int readChar;
1058        do {
1059          readChar = delegate.read();
1060        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1061        return readChar;
1062      }
1063
1064      @Override
1065      public int read(char[] cbuf, int off, int len) throws IOException {
1066        throw new UnsupportedOperationException();
1067      }
1068
1069      @Override
1070      public void close() throws IOException {
1071        delegate.close();
1072      }
1073    };
1074  }
1075
1076  static Appendable separatingAppendable(
1077      Appendable delegate, String separator, int afterEveryChars) {
1078    checkNotNull(delegate);
1079    checkNotNull(separator);
1080    checkArgument(afterEveryChars > 0);
1081    return new Appendable() {
1082      int charsUntilSeparator = afterEveryChars;
1083
1084      @Override
1085      public Appendable append(char c) throws IOException {
1086        if (charsUntilSeparator == 0) {
1087          delegate.append(separator);
1088          charsUntilSeparator = afterEveryChars;
1089        }
1090        delegate.append(c);
1091        charsUntilSeparator--;
1092        return this;
1093      }
1094
1095      @Override
1096      public Appendable append(@CheckForNull CharSequence chars, int off, int len) {
1097        throw new UnsupportedOperationException();
1098      }
1099
1100      @Override
1101      public Appendable append(@CheckForNull CharSequence chars) {
1102        throw new UnsupportedOperationException();
1103      }
1104    };
1105  }
1106
1107  @GwtIncompatible // Writer
1108  static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) {
1109    Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars);
1110    return new Writer() {
1111      @Override
1112      public void write(int c) throws IOException {
1113        separatingAppendable.append((char) c);
1114      }
1115
1116      @Override
1117      public void write(char[] chars, int off, int len) throws IOException {
1118        throw new UnsupportedOperationException();
1119      }
1120
1121      @Override
1122      public void flush() throws IOException {
1123        delegate.flush();
1124      }
1125
1126      @Override
1127      public void close() throws IOException {
1128        delegate.close();
1129      }
1130    };
1131  }
1132
1133  static final class SeparatedBaseEncoding extends BaseEncoding {
1134    private final BaseEncoding delegate;
1135    private final String separator;
1136    private final int afterEveryChars;
1137
1138    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1139      this.delegate = checkNotNull(delegate);
1140      this.separator = checkNotNull(separator);
1141      this.afterEveryChars = afterEveryChars;
1142      checkArgument(
1143          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1144    }
1145
1146    @Override
1147    CharSequence trimTrailingPadding(CharSequence chars) {
1148      return delegate.trimTrailingPadding(chars);
1149    }
1150
1151    @Override
1152    int maxEncodedSize(int bytes) {
1153      int unseparatedSize = delegate.maxEncodedSize(bytes);
1154      return unseparatedSize
1155          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1156    }
1157
1158    @GwtIncompatible // Writer,OutputStream
1159    @Override
1160    public OutputStream encodingStream(Writer output) {
1161      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1162    }
1163
1164    @Override
1165    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1166      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1167    }
1168
1169    @Override
1170    int maxDecodedSize(int chars) {
1171      return delegate.maxDecodedSize(chars);
1172    }
1173
1174    @Override
1175    public boolean canDecode(CharSequence chars) {
1176      StringBuilder builder = new StringBuilder();
1177      for (int i = 0; i < chars.length(); i++) {
1178        char c = chars.charAt(i);
1179        if (separator.indexOf(c) < 0) {
1180          builder.append(c);
1181        }
1182      }
1183      return delegate.canDecode(builder);
1184    }
1185
1186    @Override
1187    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1188      StringBuilder stripped = new StringBuilder(chars.length());
1189      for (int i = 0; i < chars.length(); i++) {
1190        char c = chars.charAt(i);
1191        if (separator.indexOf(c) < 0) {
1192          stripped.append(c);
1193        }
1194      }
1195      return delegate.decodeTo(target, stripped);
1196    }
1197
1198    @Override
1199    @GwtIncompatible // Reader,InputStream
1200    public InputStream decodingStream(Reader reader) {
1201      return delegate.decodingStream(ignoringReader(reader, separator));
1202    }
1203
1204    @Override
1205    public BaseEncoding omitPadding() {
1206      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1207    }
1208
1209    @Override
1210    public BaseEncoding withPadChar(char padChar) {
1211      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1212    }
1213
1214    @Override
1215    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1216      throw new UnsupportedOperationException("Already have a separator");
1217    }
1218
1219    @Override
1220    public BaseEncoding upperCase() {
1221      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1222    }
1223
1224    @Override
1225    public BaseEncoding lowerCase() {
1226      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1227    }
1228
1229    @Override
1230    public BaseEncoding ignoreCase() {
1231      return delegate.ignoreCase().withSeparator(separator, afterEveryChars);
1232    }
1233
1234    @Override
1235    public String toString() {
1236      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1237    }
1238  }
1239}