001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.lang.Math.max;
024import static java.lang.Math.min;
025import static java.math.RoundingMode.CEILING;
026import static java.math.RoundingMode.FLOOR;
027import static java.math.RoundingMode.UNNECESSARY;
028
029import com.google.common.annotations.GwtCompatible;
030import com.google.common.annotations.GwtIncompatible;
031import com.google.common.annotations.J2ktIncompatible;
032import com.google.common.base.Ascii;
033import com.google.errorprone.annotations.concurrent.LazyInit;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.io.Reader;
038import java.io.Writer;
039import java.util.Arrays;
040import java.util.Objects;
041import javax.annotation.CheckForNull;
042import org.checkerframework.checker.nullness.qual.Nullable;
043
044/**
045 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
046 * strings. This class includes several constants for encoding schemes specified by <a
047 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
048 *
049 * <pre>{@code
050 * BaseEncoding.base32().encode("foo".getBytes(US_ASCII))
051 * }</pre>
052 *
053 * <p>returns the string {@code "MZXW6==="}, and
054 *
055 * <pre>{@code
056 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
057 * }</pre>
058 *
059 * <p>...returns the ASCII bytes of the string {@code "foo"}.
060 *
061 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
062 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
063 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
064 * behavior:
065 *
066 * <pre>{@code
067 * BaseEncoding.base16().lowerCase().decode("deadbeef");
068 * }</pre>
069 *
070 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
071 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
072 *
073 * <pre>{@code
074 * // Do NOT do this
075 * BaseEncoding hex = BaseEncoding.base16();
076 * hex.lowerCase(); // does nothing!
077 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
078 * }</pre>
079 *
080 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
081 * x}, but the reverse does not necessarily hold.
082 *
083 * <table>
084 * <caption>Encodings</caption>
085 * <tr>
086 * <th>Encoding
087 * <th>Alphabet
088 * <th>{@code char:byte} ratio
089 * <th>Default padding
090 * <th>Comments
091 * <tr>
092 * <td>{@link #base16()}
093 * <td>0-9 A-F
094 * <td>2.00
095 * <td>N/A
096 * <td>Traditional hexadecimal. Defaults to upper case.
097 * <tr>
098 * <td>{@link #base32()}
099 * <td>A-Z 2-7
100 * <td>1.60
101 * <td>=
102 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
103 * <tr>
104 * <td>{@link #base32Hex()}
105 * <td>0-9 A-V
106 * <td>1.60
107 * <td>=
108 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
109 * <tr>
110 * <td>{@link #base64()}
111 * <td>A-Z a-z 0-9 + /
112 * <td>1.33
113 * <td>=
114 * <td>
115 * <tr>
116 * <td>{@link #base64Url()}
117 * <td>A-Z a-z 0-9 - _
118 * <td>1.33
119 * <td>=
120 * <td>Safe to use as filenames, or to pass in URLs without escaping
121 * </table>
122 *
123 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
124 *
125 * @author Louis Wasserman
126 * @since 14.0
127 */
128@GwtCompatible(emulated = true)
129public abstract class BaseEncoding {
130  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
131
132  BaseEncoding() {}
133
134  /**
135   * Exception indicating invalid base-encoded input encountered while decoding.
136   *
137   * @author Louis Wasserman
138   * @since 15.0
139   */
140  public static final class DecodingException extends IOException {
141    DecodingException(@Nullable String message) {
142      super(message);
143    }
144  }
145
146  /** Encodes the specified byte array, and returns the encoded {@code String}. */
147  public String encode(byte[] bytes) {
148    return encode(bytes, 0, bytes.length);
149  }
150
151  /**
152   * Encodes the specified range of the specified byte array, and returns the encoded {@code
153   * String}.
154   */
155  public final String encode(byte[] bytes, int off, int len) {
156    checkPositionIndexes(off, off + len, bytes.length);
157    StringBuilder result = new StringBuilder(maxEncodedSize(len));
158    try {
159      encodeTo(result, bytes, off, len);
160    } catch (IOException impossible) {
161      throw new AssertionError(impossible);
162    }
163    return result.toString();
164  }
165
166  /**
167   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
168   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
169   * Writer}.
170   */
171  @J2ktIncompatible
172  @GwtIncompatible // Writer,OutputStream
173  public abstract OutputStream encodingStream(Writer writer);
174
175  /**
176   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
177   */
178  @J2ktIncompatible
179  @GwtIncompatible // ByteSink,CharSink
180  public final ByteSink encodingSink(CharSink encodedSink) {
181    checkNotNull(encodedSink);
182    return new ByteSink() {
183      @Override
184      public OutputStream openStream() throws IOException {
185        return encodingStream(encodedSink.openStream());
186      }
187    };
188  }
189
190  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
191
192  private static byte[] extract(byte[] result, int length) {
193    if (length == result.length) {
194      return result;
195    }
196    byte[] trunc = new byte[length];
197    System.arraycopy(result, 0, trunc, 0, length);
198    return trunc;
199  }
200
201  /**
202   * Determines whether the specified character sequence is a valid encoded string according to this
203   * encoding.
204   *
205   * @since 20.0
206   */
207  public abstract boolean canDecode(CharSequence chars);
208
209  /**
210   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
211   * inverse operation to {@link #encode(byte[])}.
212   *
213   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
214   *     encoding.
215   */
216  public final byte[] decode(CharSequence chars) {
217    try {
218      return decodeChecked(chars);
219    } catch (DecodingException badInput) {
220      throw new IllegalArgumentException(badInput);
221    }
222  }
223
224  /**
225   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
226   * inverse operation to {@link #encode(byte[])}.
227   *
228   * @throws DecodingException if the input is not a valid encoded string according to this
229   *     encoding.
230   */
231  final byte[] decodeChecked(CharSequence chars)
232      throws DecodingException {
233    chars = trimTrailingPadding(chars);
234    byte[] tmp = new byte[maxDecodedSize(chars.length())];
235    int len = decodeTo(tmp, chars);
236    return extract(tmp, len);
237  }
238
239  /**
240   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
241   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
242   */
243  @J2ktIncompatible
244  @GwtIncompatible // Reader,InputStream
245  public abstract InputStream decodingStream(Reader reader);
246
247  /**
248   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
249   * CharSource}.
250   */
251  @J2ktIncompatible
252  @GwtIncompatible // ByteSource,CharSource
253  public final ByteSource decodingSource(CharSource encodedSource) {
254    checkNotNull(encodedSource);
255    return new ByteSource() {
256      @Override
257      public InputStream openStream() throws IOException {
258        return decodingStream(encodedSource.openStream());
259      }
260    };
261  }
262
263  // Implementations for encoding/decoding
264
265  abstract int maxEncodedSize(int bytes);
266
267  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
268
269  abstract int maxDecodedSize(int chars);
270
271  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
272
273  CharSequence trimTrailingPadding(CharSequence chars) {
274    return checkNotNull(chars);
275  }
276
277  // Modified encoding generators
278
279  /**
280   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
281   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
282   * section 3.2</a>, Padding of Encoded Data.
283   */
284  public abstract BaseEncoding omitPadding();
285
286  /**
287   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
288   * for padding.
289   *
290   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
291   *     separator
292   */
293  public abstract BaseEncoding withPadChar(char padChar);
294
295  /**
296   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
297   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
298   * are skipped over in decoding.
299   *
300   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
301   *     string, or if {@code n <= 0}
302   * @throws UnsupportedOperationException if this encoding already uses a separator
303   */
304  public abstract BaseEncoding withSeparator(String separator, int n);
305
306  /**
307   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
308   * uppercase letters. Padding and separator characters remain in their original case.
309   *
310   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
311   *     lower-case characters
312   */
313  public abstract BaseEncoding upperCase();
314
315  /**
316   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
317   * lowercase letters. Padding and separator characters remain in their original case.
318   *
319   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
320   *     lower-case characters
321   */
322  public abstract BaseEncoding lowerCase();
323
324  /**
325   * Returns an encoding that behaves equivalently to this encoding, but decodes letters without
326   * regard to case.
327   *
328   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
329   *     lower-case characters
330   * @since 32.0.0
331   */
332  public abstract BaseEncoding ignoreCase();
333
334  private static final BaseEncoding BASE64 =
335      new Base64Encoding(
336          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
337
338  /**
339   * The "base64" base encoding specified by <a
340   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
341   * (This is the same as the base 64 encoding from <a
342   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
343   *
344   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
345   * omitted} or {@linkplain #withPadChar(char) replaced}.
346   *
347   * <p>No line feeds are added by default, as per <a
348   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
349   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
350   */
351  public static BaseEncoding base64() {
352    return BASE64;
353  }
354
355  private static final BaseEncoding BASE64_URL =
356      new Base64Encoding(
357          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
358
359  /**
360   * The "base64url" encoding specified by <a
361   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
362   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
363   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
364   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
365   *
366   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
367   * omitted} or {@linkplain #withPadChar(char) replaced}.
368   *
369   * <p>No line feeds are added by default, as per <a
370   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
371   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
372   */
373  public static BaseEncoding base64Url() {
374    return BASE64_URL;
375  }
376
377  private static final BaseEncoding BASE32 =
378      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
379
380  /**
381   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
382   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
383   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
384   *
385   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
386   * omitted} or {@linkplain #withPadChar(char) replaced}.
387   *
388   * <p>No line feeds are added by default, as per <a
389   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
390   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
391   */
392  public static BaseEncoding base32() {
393    return BASE32;
394  }
395
396  private static final BaseEncoding BASE32_HEX =
397      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
398
399  /**
400   * The "base32hex" encoding specified by <a
401   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
402   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
403   *
404   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
405   * omitted} or {@linkplain #withPadChar(char) replaced}.
406   *
407   * <p>No line feeds are added by default, as per <a
408   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
409   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
410   */
411  public static BaseEncoding base32Hex() {
412    return BASE32_HEX;
413  }
414
415  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
416
417  /**
418   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
419   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
420   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
421   * "hexadecimal" format.
422   *
423   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
424   * have no effect.
425   *
426   * <p>No line feeds are added by default, as per <a
427   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
428   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
429   */
430  public static BaseEncoding base16() {
431    return BASE16;
432  }
433
434  static final class Alphabet {
435    private final String name;
436    // this is meant to be immutable -- don't modify it!
437    private final char[] chars;
438    final int mask;
439    final int bitsPerChar;
440    final int charsPerChunk;
441    final int bytesPerChunk;
442    private final byte[] decodabet;
443    private final boolean[] validPadding;
444    private final boolean ignoreCase;
445
446    Alphabet(String name, char[] chars) {
447      this(name, chars, decodabetFor(chars), /* ignoreCase= */ false);
448    }
449
450    private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) {
451      this.name = checkNotNull(name);
452      this.chars = checkNotNull(chars);
453      try {
454        this.bitsPerChar = log2(chars.length, UNNECESSARY);
455      } catch (ArithmeticException e) {
456        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
457      }
458
459      // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into
460      // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3.
461      // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a
462      // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many
463      // extra zero bits we need to add to the end of bitsPerChar to get 3 in total.
464      // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII
465      // characters that can't happen.
466      int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar);
467      this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar);
468      this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar;
469
470      this.mask = chars.length - 1;
471
472      this.decodabet = decodabet;
473
474      boolean[] validPadding = new boolean[charsPerChunk];
475      for (int i = 0; i < bytesPerChunk; i++) {
476        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
477      }
478      this.validPadding = validPadding;
479      this.ignoreCase = ignoreCase;
480    }
481
482    private static byte[] decodabetFor(char[] chars) {
483      byte[] decodabet = new byte[Ascii.MAX + 1];
484      Arrays.fill(decodabet, (byte) -1);
485      for (int i = 0; i < chars.length; i++) {
486        char c = chars[i];
487        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
488        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
489        decodabet[c] = (byte) i;
490      }
491      return decodabet;
492    }
493
494    /** Returns an equivalent {@code Alphabet} except it ignores case. */
495    Alphabet ignoreCase() {
496      if (ignoreCase) {
497        return this;
498      }
499
500      // We can't use .clone() because of GWT.
501      byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length);
502      for (int upper = 'A'; upper <= 'Z'; upper++) {
503        int lower = upper | 0x20;
504        byte decodeUpper = decodabet[upper];
505        byte decodeLower = decodabet[lower];
506        if (decodeUpper == -1) {
507          newDecodabet[upper] = decodeLower;
508        } else {
509          checkState(
510              decodeLower == -1,
511              "Can't ignoreCase() since '%s' and '%s' encode different values",
512              (char) upper,
513              (char) lower);
514          newDecodabet[lower] = decodeUpper;
515        }
516      }
517      return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true);
518    }
519
520    char encode(int bits) {
521      return chars[bits];
522    }
523
524    boolean isValidPaddingStartPosition(int index) {
525      return validPadding[index % charsPerChunk];
526    }
527
528    boolean canDecode(char ch) {
529      return ch <= Ascii.MAX && decodabet[ch] != -1;
530    }
531
532    int decode(char ch) throws DecodingException {
533      if (ch > Ascii.MAX) {
534        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
535      }
536      int result = decodabet[ch];
537      if (result == -1) {
538        if (ch <= 0x20 || ch == Ascii.MAX) {
539          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
540        } else {
541          throw new DecodingException("Unrecognized character: " + ch);
542        }
543      }
544      return result;
545    }
546
547    private boolean hasLowerCase() {
548      for (char c : chars) {
549        if (Ascii.isLowerCase(c)) {
550          return true;
551        }
552      }
553      return false;
554    }
555
556    private boolean hasUpperCase() {
557      for (char c : chars) {
558        if (Ascii.isUpperCase(c)) {
559          return true;
560        }
561      }
562      return false;
563    }
564
565    Alphabet upperCase() {
566      if (!hasLowerCase()) {
567        return this;
568      }
569      checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
570      char[] upperCased = new char[chars.length];
571      for (int i = 0; i < chars.length; i++) {
572        upperCased[i] = Ascii.toUpperCase(chars[i]);
573      }
574      Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased);
575      return ignoreCase ? upperCase.ignoreCase() : upperCase;
576    }
577
578    Alphabet lowerCase() {
579      if (!hasUpperCase()) {
580        return this;
581      }
582      checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
583      char[] lowerCased = new char[chars.length];
584      for (int i = 0; i < chars.length; i++) {
585        lowerCased[i] = Ascii.toLowerCase(chars[i]);
586      }
587      Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased);
588      return ignoreCase ? lowerCase.ignoreCase() : lowerCase;
589    }
590
591    public boolean matches(char c) {
592      return c < decodabet.length && decodabet[c] != -1;
593    }
594
595    @Override
596    public String toString() {
597      return name;
598    }
599
600    @Override
601    public boolean equals(@CheckForNull Object other) {
602      if (other instanceof Alphabet) {
603        Alphabet that = (Alphabet) other;
604        return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars);
605      }
606      return false;
607    }
608
609    @Override
610    public int hashCode() {
611      return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237);
612    }
613  }
614
615  private static class StandardBaseEncoding extends BaseEncoding {
616    final Alphabet alphabet;
617
618    @CheckForNull final Character paddingChar;
619
620    StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
621      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
622    }
623
624    StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
625      this.alphabet = checkNotNull(alphabet);
626      checkArgument(
627          paddingChar == null || !alphabet.matches(paddingChar),
628          "Padding character %s was already in alphabet",
629          paddingChar);
630      this.paddingChar = paddingChar;
631    }
632
633    @Override
634    int maxEncodedSize(int bytes) {
635      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
636    }
637
638    @J2ktIncompatible
639    @GwtIncompatible // Writer,OutputStream
640    @Override
641    public OutputStream encodingStream(Writer out) {
642      checkNotNull(out);
643      return new OutputStream() {
644        int bitBuffer = 0;
645        int bitBufferLength = 0;
646        int writtenChars = 0;
647
648        @Override
649        public void write(int b) throws IOException {
650          bitBuffer <<= 8;
651          bitBuffer |= b & 0xFF;
652          bitBufferLength += 8;
653          while (bitBufferLength >= alphabet.bitsPerChar) {
654            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
655            out.write(alphabet.encode(charIndex));
656            writtenChars++;
657            bitBufferLength -= alphabet.bitsPerChar;
658          }
659        }
660
661        @Override
662        public void flush() throws IOException {
663          out.flush();
664        }
665
666        @Override
667        public void close() throws IOException {
668          if (bitBufferLength > 0) {
669            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
670            out.write(alphabet.encode(charIndex));
671            writtenChars++;
672            if (paddingChar != null) {
673              while (writtenChars % alphabet.charsPerChunk != 0) {
674                out.write(paddingChar.charValue());
675                writtenChars++;
676              }
677            }
678          }
679          out.close();
680        }
681      };
682    }
683
684    @Override
685    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
686      checkNotNull(target);
687      checkPositionIndexes(off, off + len, bytes.length);
688      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
689        encodeChunkTo(target, bytes, off + i, min(alphabet.bytesPerChunk, len - i));
690      }
691    }
692
693    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
694      checkNotNull(target);
695      checkPositionIndexes(off, off + len, bytes.length);
696      checkArgument(len <= alphabet.bytesPerChunk);
697      long bitBuffer = 0;
698      for (int i = 0; i < len; ++i) {
699        bitBuffer |= bytes[off + i] & 0xFF;
700        bitBuffer <<= 8; // Add additional zero byte in the end.
701      }
702      // Position of first character is length of bitBuffer minus bitsPerChar.
703      int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
704      int bitsProcessed = 0;
705      while (bitsProcessed < len * 8) {
706        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
707        target.append(alphabet.encode(charIndex));
708        bitsProcessed += alphabet.bitsPerChar;
709      }
710      if (paddingChar != null) {
711        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
712          target.append(paddingChar.charValue());
713          bitsProcessed += alphabet.bitsPerChar;
714        }
715      }
716    }
717
718    @Override
719    int maxDecodedSize(int chars) {
720      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
721    }
722
723    @Override
724    CharSequence trimTrailingPadding(CharSequence chars) {
725      checkNotNull(chars);
726      if (paddingChar == null) {
727        return chars;
728      }
729      char padChar = paddingChar.charValue();
730      int l;
731      for (l = chars.length() - 1; l >= 0; l--) {
732        if (chars.charAt(l) != padChar) {
733          break;
734        }
735      }
736      return chars.subSequence(0, l + 1);
737    }
738
739    @Override
740    public boolean canDecode(CharSequence chars) {
741      checkNotNull(chars);
742      chars = trimTrailingPadding(chars);
743      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
744        return false;
745      }
746      for (int i = 0; i < chars.length(); i++) {
747        if (!alphabet.canDecode(chars.charAt(i))) {
748          return false;
749        }
750      }
751      return true;
752    }
753
754    @Override
755    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
756      checkNotNull(target);
757      chars = trimTrailingPadding(chars);
758      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
759        throw new DecodingException("Invalid input length " + chars.length());
760      }
761      int bytesWritten = 0;
762      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
763        long chunk = 0;
764        int charsProcessed = 0;
765        for (int i = 0; i < alphabet.charsPerChunk; i++) {
766          chunk <<= alphabet.bitsPerChar;
767          if (charIdx + i < chars.length()) {
768            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
769          }
770        }
771        int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
772        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
773          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
774        }
775      }
776      return bytesWritten;
777    }
778
779    @Override
780    @J2ktIncompatible
781    @GwtIncompatible // Reader,InputStream
782    public InputStream decodingStream(Reader reader) {
783      checkNotNull(reader);
784      return new InputStream() {
785        int bitBuffer = 0;
786        int bitBufferLength = 0;
787        int readChars = 0;
788        boolean hitPadding = false;
789
790        @Override
791        public int read() throws IOException {
792          while (true) {
793            int readChar = reader.read();
794            if (readChar == -1) {
795              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
796                throw new DecodingException("Invalid input length " + readChars);
797              }
798              return -1;
799            }
800            readChars++;
801            char ch = (char) readChar;
802            if (paddingChar != null && paddingChar.charValue() == ch) {
803              if (!hitPadding
804                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
805                throw new DecodingException("Padding cannot start at index " + readChars);
806              }
807              hitPadding = true;
808            } else if (hitPadding) {
809              throw new DecodingException(
810                  "Expected padding character but found '" + ch + "' at index " + readChars);
811            } else {
812              bitBuffer <<= alphabet.bitsPerChar;
813              bitBuffer |= alphabet.decode(ch);
814              bitBufferLength += alphabet.bitsPerChar;
815
816              if (bitBufferLength >= 8) {
817                bitBufferLength -= 8;
818                return (bitBuffer >> bitBufferLength) & 0xFF;
819              }
820            }
821          }
822        }
823
824        @Override
825        public int read(byte[] buf, int off, int len) throws IOException {
826          // Overriding this to work around the fact that InputStream's default implementation of
827          // this method will silently swallow exceptions thrown by the single-byte read() method
828          // (other than on the first call to it), which in this case can cause invalid encoded
829          // strings to not throw an exception.
830          // See https://github.com/google/guava/issues/3542
831          checkPositionIndexes(off, off + len, buf.length);
832
833          int i = off;
834          for (; i < off + len; i++) {
835            int b = read();
836            if (b == -1) {
837              int read = i - off;
838              return read == 0 ? -1 : read;
839            }
840            buf[i] = (byte) b;
841          }
842          return i - off;
843        }
844
845        @Override
846        public void close() throws IOException {
847          reader.close();
848        }
849      };
850    }
851
852    @Override
853    public BaseEncoding omitPadding() {
854      return (paddingChar == null) ? this : newInstance(alphabet, null);
855    }
856
857    @Override
858    public BaseEncoding withPadChar(char padChar) {
859      if (8 % alphabet.bitsPerChar == 0
860          || (paddingChar != null && paddingChar.charValue() == padChar)) {
861        return this;
862      } else {
863        return newInstance(alphabet, padChar);
864      }
865    }
866
867    @Override
868    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
869      for (int i = 0; i < separator.length(); i++) {
870        checkArgument(
871            !alphabet.matches(separator.charAt(i)),
872            "Separator (%s) cannot contain alphabet characters",
873            separator);
874      }
875      if (paddingChar != null) {
876        checkArgument(
877            separator.indexOf(paddingChar.charValue()) < 0,
878            "Separator (%s) cannot contain padding character",
879            separator);
880      }
881      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
882    }
883
884    @LazyInit @CheckForNull private volatile BaseEncoding upperCase;
885    @LazyInit @CheckForNull private volatile BaseEncoding lowerCase;
886    @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase;
887
888    @Override
889    public BaseEncoding upperCase() {
890      BaseEncoding result = upperCase;
891      if (result == null) {
892        Alphabet upper = alphabet.upperCase();
893        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
894      }
895      return result;
896    }
897
898    @Override
899    public BaseEncoding lowerCase() {
900      BaseEncoding result = lowerCase;
901      if (result == null) {
902        Alphabet lower = alphabet.lowerCase();
903        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
904      }
905      return result;
906    }
907
908    @Override
909    public BaseEncoding ignoreCase() {
910      BaseEncoding result = ignoreCase;
911      if (result == null) {
912        Alphabet ignore = alphabet.ignoreCase();
913        result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar);
914      }
915      return result;
916    }
917
918    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
919      return new StandardBaseEncoding(alphabet, paddingChar);
920    }
921
922    @Override
923    public String toString() {
924      StringBuilder builder = new StringBuilder("BaseEncoding.");
925      builder.append(alphabet);
926      if (8 % alphabet.bitsPerChar != 0) {
927        if (paddingChar == null) {
928          builder.append(".omitPadding()");
929        } else {
930          builder.append(".withPadChar('").append(paddingChar).append("')");
931        }
932      }
933      return builder.toString();
934    }
935
936    @Override
937    public boolean equals(@CheckForNull Object other) {
938      if (other instanceof StandardBaseEncoding) {
939        StandardBaseEncoding that = (StandardBaseEncoding) other;
940        return this.alphabet.equals(that.alphabet)
941            && Objects.equals(this.paddingChar, that.paddingChar);
942      }
943      return false;
944    }
945
946    @Override
947    public int hashCode() {
948      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
949    }
950  }
951
952  private static final class Base16Encoding extends StandardBaseEncoding {
953    final char[] encoding = new char[512];
954
955    Base16Encoding(String name, String alphabetChars) {
956      this(new Alphabet(name, alphabetChars.toCharArray()));
957    }
958
959    private Base16Encoding(Alphabet alphabet) {
960      super(alphabet, null);
961      checkArgument(alphabet.chars.length == 16);
962      for (int i = 0; i < 256; ++i) {
963        encoding[i] = alphabet.encode(i >>> 4);
964        encoding[i | 0x100] = alphabet.encode(i & 0xF);
965      }
966    }
967
968    @Override
969    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
970      checkNotNull(target);
971      checkPositionIndexes(off, off + len, bytes.length);
972      for (int i = 0; i < len; ++i) {
973        int b = bytes[off + i] & 0xFF;
974        target.append(encoding[b]);
975        target.append(encoding[b | 0x100]);
976      }
977    }
978
979    @Override
980    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
981      checkNotNull(target);
982      if (chars.length() % 2 == 1) {
983        throw new DecodingException("Invalid input length " + chars.length());
984      }
985      int bytesWritten = 0;
986      for (int i = 0; i < chars.length(); i += 2) {
987        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
988        target[bytesWritten++] = (byte) decoded;
989      }
990      return bytesWritten;
991    }
992
993    @Override
994    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
995      return new Base16Encoding(alphabet);
996    }
997  }
998
999  private static final class Base64Encoding extends StandardBaseEncoding {
1000    Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
1001      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
1002    }
1003
1004    private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
1005      super(alphabet, paddingChar);
1006      checkArgument(alphabet.chars.length == 64);
1007    }
1008
1009    @Override
1010    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1011      checkNotNull(target);
1012      checkPositionIndexes(off, off + len, bytes.length);
1013      int i = off;
1014      for (int remaining = len; remaining >= 3; remaining -= 3) {
1015        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
1016        target.append(alphabet.encode(chunk >>> 18));
1017        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
1018        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
1019        target.append(alphabet.encode(chunk & 0x3F));
1020      }
1021      if (i < off + len) {
1022        encodeChunkTo(target, bytes, i, off + len - i);
1023      }
1024    }
1025
1026    @Override
1027    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1028      checkNotNull(target);
1029      chars = trimTrailingPadding(chars);
1030      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
1031        throw new DecodingException("Invalid input length " + chars.length());
1032      }
1033      int bytesWritten = 0;
1034      for (int i = 0; i < chars.length(); ) {
1035        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
1036        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
1037        target[bytesWritten++] = (byte) (chunk >>> 16);
1038        if (i < chars.length()) {
1039          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
1040          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
1041          if (i < chars.length()) {
1042            chunk |= alphabet.decode(chars.charAt(i++));
1043            target[bytesWritten++] = (byte) (chunk & 0xFF);
1044          }
1045        }
1046      }
1047      return bytesWritten;
1048    }
1049
1050    @Override
1051    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
1052      return new Base64Encoding(alphabet, paddingChar);
1053    }
1054  }
1055
1056  @J2ktIncompatible
1057  @GwtIncompatible
1058  static Reader ignoringReader(Reader delegate, String toIgnore) {
1059    checkNotNull(delegate);
1060    checkNotNull(toIgnore);
1061    return new Reader() {
1062      @Override
1063      public int read() throws IOException {
1064        int readChar;
1065        do {
1066          readChar = delegate.read();
1067        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1068        return readChar;
1069      }
1070
1071      @Override
1072      public int read(char[] cbuf, int off, int len) throws IOException {
1073        throw new UnsupportedOperationException();
1074      }
1075
1076      @Override
1077      public void close() throws IOException {
1078        delegate.close();
1079      }
1080    };
1081  }
1082
1083  static Appendable separatingAppendable(
1084      Appendable delegate, String separator, int afterEveryChars) {
1085    checkNotNull(delegate);
1086    checkNotNull(separator);
1087    checkArgument(afterEveryChars > 0);
1088    return new Appendable() {
1089      int charsUntilSeparator = afterEveryChars;
1090
1091      @Override
1092      public Appendable append(char c) throws IOException {
1093        if (charsUntilSeparator == 0) {
1094          delegate.append(separator);
1095          charsUntilSeparator = afterEveryChars;
1096        }
1097        delegate.append(c);
1098        charsUntilSeparator--;
1099        return this;
1100      }
1101
1102      @Override
1103      public Appendable append(@CheckForNull CharSequence chars, int off, int len) {
1104        throw new UnsupportedOperationException();
1105      }
1106
1107      @Override
1108      public Appendable append(@CheckForNull CharSequence chars) {
1109        throw new UnsupportedOperationException();
1110      }
1111    };
1112  }
1113
1114  @J2ktIncompatible
1115  @GwtIncompatible // Writer
1116  static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) {
1117    Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars);
1118    return new Writer() {
1119      @Override
1120      public void write(int c) throws IOException {
1121        separatingAppendable.append((char) c);
1122      }
1123
1124      @Override
1125      public void write(char[] chars, int off, int len) throws IOException {
1126        throw new UnsupportedOperationException();
1127      }
1128
1129      @Override
1130      public void flush() throws IOException {
1131        delegate.flush();
1132      }
1133
1134      @Override
1135      public void close() throws IOException {
1136        delegate.close();
1137      }
1138    };
1139  }
1140
1141  static final class SeparatedBaseEncoding extends BaseEncoding {
1142    private final BaseEncoding delegate;
1143    private final String separator;
1144    private final int afterEveryChars;
1145
1146    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1147      this.delegate = checkNotNull(delegate);
1148      this.separator = checkNotNull(separator);
1149      this.afterEveryChars = afterEveryChars;
1150      checkArgument(
1151          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1152    }
1153
1154    @Override
1155    CharSequence trimTrailingPadding(CharSequence chars) {
1156      return delegate.trimTrailingPadding(chars);
1157    }
1158
1159    @Override
1160    int maxEncodedSize(int bytes) {
1161      int unseparatedSize = delegate.maxEncodedSize(bytes);
1162      return unseparatedSize
1163          + separator.length() * divide(max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1164    }
1165
1166    @J2ktIncompatible
1167    @GwtIncompatible // Writer,OutputStream
1168    @Override
1169    public OutputStream encodingStream(Writer output) {
1170      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1171    }
1172
1173    @Override
1174    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1175      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1176    }
1177
1178    @Override
1179    int maxDecodedSize(int chars) {
1180      return delegate.maxDecodedSize(chars);
1181    }
1182
1183    @Override
1184    public boolean canDecode(CharSequence chars) {
1185      StringBuilder builder = new StringBuilder();
1186      for (int i = 0; i < chars.length(); i++) {
1187        char c = chars.charAt(i);
1188        if (separator.indexOf(c) < 0) {
1189          builder.append(c);
1190        }
1191      }
1192      return delegate.canDecode(builder);
1193    }
1194
1195    @Override
1196    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1197      StringBuilder stripped = new StringBuilder(chars.length());
1198      for (int i = 0; i < chars.length(); i++) {
1199        char c = chars.charAt(i);
1200        if (separator.indexOf(c) < 0) {
1201          stripped.append(c);
1202        }
1203      }
1204      return delegate.decodeTo(target, stripped);
1205    }
1206
1207    @Override
1208    @J2ktIncompatible
1209    @GwtIncompatible // Reader,InputStream
1210    public InputStream decodingStream(Reader reader) {
1211      return delegate.decodingStream(ignoringReader(reader, separator));
1212    }
1213
1214    @Override
1215    public BaseEncoding omitPadding() {
1216      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1217    }
1218
1219    @Override
1220    public BaseEncoding withPadChar(char padChar) {
1221      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1222    }
1223
1224    @Override
1225    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1226      throw new UnsupportedOperationException("Already have a separator");
1227    }
1228
1229    @Override
1230    public BaseEncoding upperCase() {
1231      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1232    }
1233
1234    @Override
1235    public BaseEncoding lowerCase() {
1236      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1237    }
1238
1239    @Override
1240    public BaseEncoding ignoreCase() {
1241      return delegate.ignoreCase().withSeparator(separator, afterEveryChars);
1242    }
1243
1244    @Override
1245    public String toString() {
1246      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1247    }
1248  }
1249}