001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.Objects;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.OutputStream;
034import java.io.Reader;
035import java.io.Writer;
036import java.util.Arrays;
037import org.checkerframework.checker.nullness.compatqual.MonotonicNonNullDecl;
038import org.checkerframework.checker.nullness.compatqual.NullableDecl;
039
040/**
041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
042 * strings. This class includes several constants for encoding schemes specified by <a
043 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
044 *
045 * <pre>{@code
046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
047 * }</pre>
048 *
049 * <p>returns the string {@code "MZXW6==="}, and
050 *
051 * <pre>{@code
052 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
053 * }</pre>
054 *
055 * <p>...returns the ASCII bytes of the string {@code "foo"}.
056 *
057 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
058 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
059 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
060 * behavior:
061 *
062 * <pre>{@code
063 * BaseEncoding.base16().lowerCase().decode("deadbeef");
064 * }</pre>
065 *
066 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
067 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
068 *
069 * <pre>{@code
070 * // Do NOT do this
071 * BaseEncoding hex = BaseEncoding.base16();
072 * hex.lowerCase(); // does nothing!
073 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
074 * }</pre>
075 *
076 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
077 * x}, but the reverse does not necessarily hold.
078 *
079 * <table>
080 * <caption>Encodings</caption>
081 * <tr>
082 * <th>Encoding
083 * <th>Alphabet
084 * <th>{@code char:byte} ratio
085 * <th>Default padding
086 * <th>Comments
087 * <tr>
088 * <td>{@link #base16()}
089 * <td>0-9 A-F
090 * <td>2.00
091 * <td>N/A
092 * <td>Traditional hexadecimal. Defaults to upper case.
093 * <tr>
094 * <td>{@link #base32()}
095 * <td>A-Z 2-7
096 * <td>1.60
097 * <td>=
098 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
099 * <tr>
100 * <td>{@link #base32Hex()}
101 * <td>0-9 A-V
102 * <td>1.60
103 * <td>=
104 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
105 * <tr>
106 * <td>{@link #base64()}
107 * <td>A-Z a-z 0-9 + /
108 * <td>1.33
109 * <td>=
110 * <td>
111 * <tr>
112 * <td>{@link #base64Url()}
113 * <td>A-Z a-z 0-9 - _
114 * <td>1.33
115 * <td>=
116 * <td>Safe to use as filenames, or to pass in URLs without escaping
117 * </table>
118 *
119 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
120 *
121 * @author Louis Wasserman
122 * @since 14.0
123 */
124@GwtCompatible(emulated = true)
125public abstract class BaseEncoding {
126  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
127
128  BaseEncoding() {}
129
130  /**
131   * Exception indicating invalid base-encoded input encountered while decoding.
132   *
133   * @author Louis Wasserman
134   * @since 15.0
135   */
136  public static final class DecodingException extends IOException {
137    DecodingException(String message) {
138      super(message);
139    }
140
141    DecodingException(Throwable cause) {
142      super(cause);
143    }
144  }
145
146  /** Encodes the specified byte array, and returns the encoded {@code String}. */
147  public String encode(byte[] bytes) {
148    return encode(bytes, 0, bytes.length);
149  }
150
151  /**
152   * Encodes the specified range of the specified byte array, and returns the encoded {@code
153   * String}.
154   */
155  public final String encode(byte[] bytes, int off, int len) {
156    checkPositionIndexes(off, off + len, bytes.length);
157    StringBuilder result = new StringBuilder(maxEncodedSize(len));
158    try {
159      encodeTo(result, bytes, off, len);
160    } catch (IOException impossible) {
161      throw new AssertionError(impossible);
162    }
163    return result.toString();
164  }
165
166  /**
167   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
168   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
169   * Writer}.
170   */
171  @GwtIncompatible // Writer,OutputStream
172  public abstract OutputStream encodingStream(Writer writer);
173
174  /**
175   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
176   */
177  @GwtIncompatible // ByteSink,CharSink
178  public final ByteSink encodingSink(final CharSink encodedSink) {
179    checkNotNull(encodedSink);
180    return new ByteSink() {
181      @Override
182      public OutputStream openStream() throws IOException {
183        return encodingStream(encodedSink.openStream());
184      }
185    };
186  }
187
188  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
189
190  private static byte[] extract(byte[] result, int length) {
191    if (length == result.length) {
192      return result;
193    } else {
194      byte[] trunc = new byte[length];
195      System.arraycopy(result, 0, trunc, 0, length);
196      return trunc;
197    }
198  }
199
200  /**
201   * Determines whether the specified character sequence is a valid encoded string according to this
202   * encoding.
203   *
204   * @since 20.0
205   */
206  public abstract boolean canDecode(CharSequence chars);
207
208  /**
209   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
210   * inverse operation to {@link #encode(byte[])}.
211   *
212   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
213   *     encoding.
214   */
215  public final byte[] decode(CharSequence chars) {
216    try {
217      return decodeChecked(chars);
218    } catch (DecodingException badInput) {
219      throw new IllegalArgumentException(badInput);
220    }
221  }
222
223  /**
224   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
225   * inverse operation to {@link #encode(byte[])}.
226   *
227   * @throws DecodingException if the input is not a valid encoded string according to this
228   *     encoding.
229   */ final byte[] decodeChecked(CharSequence chars)
230      throws DecodingException {
231    chars = trimTrailingPadding(chars);
232    byte[] tmp = new byte[maxDecodedSize(chars.length())];
233    int len = decodeTo(tmp, chars);
234    return extract(tmp, len);
235  }
236
237  /**
238   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
239   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
240   */
241  @GwtIncompatible // Reader,InputStream
242  public abstract InputStream decodingStream(Reader reader);
243
244  /**
245   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
246   * CharSource}.
247   */
248  @GwtIncompatible // ByteSource,CharSource
249  public final ByteSource decodingSource(final CharSource encodedSource) {
250    checkNotNull(encodedSource);
251    return new ByteSource() {
252      @Override
253      public InputStream openStream() throws IOException {
254        return decodingStream(encodedSource.openStream());
255      }
256    };
257  }
258
259  // Implementations for encoding/decoding
260
261  abstract int maxEncodedSize(int bytes);
262
263  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
264
265  abstract int maxDecodedSize(int chars);
266
267  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
268
269  CharSequence trimTrailingPadding(CharSequence chars) {
270    return checkNotNull(chars);
271  }
272
273  // Modified encoding generators
274
275  /**
276   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
277   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
278   * section 3.2</a>, Padding of Encoded Data.
279   */
280  public abstract BaseEncoding omitPadding();
281
282  /**
283   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
284   * for padding.
285   *
286   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
287   *     separator
288   */
289  public abstract BaseEncoding withPadChar(char padChar);
290
291  /**
292   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
293   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
294   * are skipped over in decoding.
295   *
296   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
297   *     string, or if {@code n <= 0}
298   * @throws UnsupportedOperationException if this encoding already uses a separator
299   */
300  public abstract BaseEncoding withSeparator(String separator, int n);
301
302  /**
303   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
304   * uppercase letters. Padding and separator characters remain in their original case.
305   *
306   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
307   *     lower-case characters
308   */
309  public abstract BaseEncoding upperCase();
310
311  /**
312   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
313   * lowercase letters. Padding and separator characters remain in their original case.
314   *
315   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
316   *     lower-case characters
317   */
318  public abstract BaseEncoding lowerCase();
319
320  private static final BaseEncoding BASE64 =
321      new Base64Encoding(
322          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
323
324  /**
325   * The "base64" base encoding specified by <a
326   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
327   * (This is the same as the base 64 encoding from <a
328   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
329   *
330   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
331   * omitted} or {@linkplain #withPadChar(char) replaced}.
332   *
333   * <p>No line feeds are added by default, as per <a
334   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
335   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
336   */
337  public static BaseEncoding base64() {
338    return BASE64;
339  }
340
341  private static final BaseEncoding BASE64_URL =
342      new Base64Encoding(
343          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
344
345  /**
346   * The "base64url" encoding specified by <a
347   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
348   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
349   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
350   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
351   *
352   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
353   * omitted} or {@linkplain #withPadChar(char) replaced}.
354   *
355   * <p>No line feeds are added by default, as per <a
356   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
357   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
358   */
359  public static BaseEncoding base64Url() {
360    return BASE64_URL;
361  }
362
363  private static final BaseEncoding BASE32 =
364      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
365
366  /**
367   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
368   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
369   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
370   *
371   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
372   * omitted} or {@linkplain #withPadChar(char) replaced}.
373   *
374   * <p>No line feeds are added by default, as per <a
375   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
376   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
377   */
378  public static BaseEncoding base32() {
379    return BASE32;
380  }
381
382  private static final BaseEncoding BASE32_HEX =
383      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
384
385  /**
386   * The "base32hex" encoding specified by <a
387   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
388   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
389   *
390   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
391   * omitted} or {@linkplain #withPadChar(char) replaced}.
392   *
393   * <p>No line feeds are added by default, as per <a
394   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
395   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
396   */
397  public static BaseEncoding base32Hex() {
398    return BASE32_HEX;
399  }
400
401  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
402
403  /**
404   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
405   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
406   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
407   * "hexadecimal" format.
408   *
409   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
410   * have no effect.
411   *
412   * <p>No line feeds are added by default, as per <a
413   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
414   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
415   */
416  public static BaseEncoding base16() {
417    return BASE16;
418  }
419
420  private static final class Alphabet {
421    private final String name;
422    // this is meant to be immutable -- don't modify it!
423    private final char[] chars;
424    final int mask;
425    final int bitsPerChar;
426    final int charsPerChunk;
427    final int bytesPerChunk;
428    private final byte[] decodabet;
429    private final boolean[] validPadding;
430
431    Alphabet(String name, char[] chars) {
432      this.name = checkNotNull(name);
433      this.chars = checkNotNull(chars);
434      try {
435        this.bitsPerChar = log2(chars.length, UNNECESSARY);
436      } catch (ArithmeticException e) {
437        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
438      }
439
440      /*
441       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
442       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
443       */
444      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
445      try {
446        this.charsPerChunk = 8 / gcd;
447        this.bytesPerChunk = bitsPerChar / gcd;
448      } catch (ArithmeticException e) {
449        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
450      }
451
452      this.mask = chars.length - 1;
453
454      byte[] decodabet = new byte[Ascii.MAX + 1];
455      Arrays.fill(decodabet, (byte) -1);
456      for (int i = 0; i < chars.length; i++) {
457        char c = chars[i];
458        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
459        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
460        decodabet[c] = (byte) i;
461      }
462      this.decodabet = decodabet;
463
464      boolean[] validPadding = new boolean[charsPerChunk];
465      for (int i = 0; i < bytesPerChunk; i++) {
466        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
467      }
468      this.validPadding = validPadding;
469    }
470
471    char encode(int bits) {
472      return chars[bits];
473    }
474
475    boolean isValidPaddingStartPosition(int index) {
476      return validPadding[index % charsPerChunk];
477    }
478
479    boolean canDecode(char ch) {
480      return ch <= Ascii.MAX && decodabet[ch] != -1;
481    }
482
483    int decode(char ch) throws DecodingException {
484      if (ch > Ascii.MAX) {
485        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
486      }
487      int result = decodabet[ch];
488      if (result == -1) {
489        if (ch <= 0x20 || ch == Ascii.MAX) {
490          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
491        } else {
492          throw new DecodingException("Unrecognized character: " + ch);
493        }
494      }
495      return result;
496    }
497
498    private boolean hasLowerCase() {
499      for (char c : chars) {
500        if (Ascii.isLowerCase(c)) {
501          return true;
502        }
503      }
504      return false;
505    }
506
507    private boolean hasUpperCase() {
508      for (char c : chars) {
509        if (Ascii.isUpperCase(c)) {
510          return true;
511        }
512      }
513      return false;
514    }
515
516    Alphabet upperCase() {
517      if (!hasLowerCase()) {
518        return this;
519      } else {
520        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
521        char[] upperCased = new char[chars.length];
522        for (int i = 0; i < chars.length; i++) {
523          upperCased[i] = Ascii.toUpperCase(chars[i]);
524        }
525        return new Alphabet(name + ".upperCase()", upperCased);
526      }
527    }
528
529    Alphabet lowerCase() {
530      if (!hasUpperCase()) {
531        return this;
532      } else {
533        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
534        char[] lowerCased = new char[chars.length];
535        for (int i = 0; i < chars.length; i++) {
536          lowerCased[i] = Ascii.toLowerCase(chars[i]);
537        }
538        return new Alphabet(name + ".lowerCase()", lowerCased);
539      }
540    }
541
542    public boolean matches(char c) {
543      return c < decodabet.length && decodabet[c] != -1;
544    }
545
546    @Override
547    public String toString() {
548      return name;
549    }
550
551    @Override
552    public boolean equals(@NullableDecl Object other) {
553      if (other instanceof Alphabet) {
554        Alphabet that = (Alphabet) other;
555        return Arrays.equals(this.chars, that.chars);
556      }
557      return false;
558    }
559
560    @Override
561    public int hashCode() {
562      return Arrays.hashCode(chars);
563    }
564  }
565
566  static class StandardBaseEncoding extends BaseEncoding {
567    // TODO(lowasser): provide a useful toString
568    final Alphabet alphabet;
569
570    @NullableDecl final Character paddingChar;
571
572    StandardBaseEncoding(String name, String alphabetChars, @NullableDecl Character paddingChar) {
573      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
574    }
575
576    StandardBaseEncoding(Alphabet alphabet, @NullableDecl Character paddingChar) {
577      this.alphabet = checkNotNull(alphabet);
578      checkArgument(
579          paddingChar == null || !alphabet.matches(paddingChar),
580          "Padding character %s was already in alphabet",
581          paddingChar);
582      this.paddingChar = paddingChar;
583    }
584
585    @Override
586    int maxEncodedSize(int bytes) {
587      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
588    }
589
590    @GwtIncompatible // Writer,OutputStream
591    @Override
592    public OutputStream encodingStream(final Writer out) {
593      checkNotNull(out);
594      return new OutputStream() {
595        int bitBuffer = 0;
596        int bitBufferLength = 0;
597        int writtenChars = 0;
598
599        @Override
600        public void write(int b) throws IOException {
601          bitBuffer <<= 8;
602          bitBuffer |= b & 0xFF;
603          bitBufferLength += 8;
604          while (bitBufferLength >= alphabet.bitsPerChar) {
605            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
606            out.write(alphabet.encode(charIndex));
607            writtenChars++;
608            bitBufferLength -= alphabet.bitsPerChar;
609          }
610        }
611
612        @Override
613        public void flush() throws IOException {
614          out.flush();
615        }
616
617        @Override
618        public void close() throws IOException {
619          if (bitBufferLength > 0) {
620            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
621            out.write(alphabet.encode(charIndex));
622            writtenChars++;
623            if (paddingChar != null) {
624              while (writtenChars % alphabet.charsPerChunk != 0) {
625                out.write(paddingChar.charValue());
626                writtenChars++;
627              }
628            }
629          }
630          out.close();
631        }
632      };
633    }
634
635    @Override
636    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
637      checkNotNull(target);
638      checkPositionIndexes(off, off + len, bytes.length);
639      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
640        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
641      }
642    }
643
644    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
645      checkNotNull(target);
646      checkPositionIndexes(off, off + len, bytes.length);
647      checkArgument(len <= alphabet.bytesPerChunk);
648      long bitBuffer = 0;
649      for (int i = 0; i < len; ++i) {
650        bitBuffer |= bytes[off + i] & 0xFF;
651        bitBuffer <<= 8; // Add additional zero byte in the end.
652      }
653      // Position of first character is length of bitBuffer minus bitsPerChar.
654      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
655      int bitsProcessed = 0;
656      while (bitsProcessed < len * 8) {
657        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
658        target.append(alphabet.encode(charIndex));
659        bitsProcessed += alphabet.bitsPerChar;
660      }
661      if (paddingChar != null) {
662        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
663          target.append(paddingChar.charValue());
664          bitsProcessed += alphabet.bitsPerChar;
665        }
666      }
667    }
668
669    @Override
670    int maxDecodedSize(int chars) {
671      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
672    }
673
674    @Override
675    CharSequence trimTrailingPadding(CharSequence chars) {
676      checkNotNull(chars);
677      if (paddingChar == null) {
678        return chars;
679      }
680      char padChar = paddingChar.charValue();
681      int l;
682      for (l = chars.length() - 1; l >= 0; l--) {
683        if (chars.charAt(l) != padChar) {
684          break;
685        }
686      }
687      return chars.subSequence(0, l + 1);
688    }
689
690    @Override
691    public boolean canDecode(CharSequence chars) {
692      checkNotNull(chars);
693      chars = trimTrailingPadding(chars);
694      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
695        return false;
696      }
697      for (int i = 0; i < chars.length(); i++) {
698        if (!alphabet.canDecode(chars.charAt(i))) {
699          return false;
700        }
701      }
702      return true;
703    }
704
705    @Override
706    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
707      checkNotNull(target);
708      chars = trimTrailingPadding(chars);
709      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
710        throw new DecodingException("Invalid input length " + chars.length());
711      }
712      int bytesWritten = 0;
713      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
714        long chunk = 0;
715        int charsProcessed = 0;
716        for (int i = 0; i < alphabet.charsPerChunk; i++) {
717          chunk <<= alphabet.bitsPerChar;
718          if (charIdx + i < chars.length()) {
719            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
720          }
721        }
722        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
723        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
724          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
725        }
726      }
727      return bytesWritten;
728    }
729
730    @Override
731    @GwtIncompatible // Reader,InputStream
732    public InputStream decodingStream(final Reader reader) {
733      checkNotNull(reader);
734      return new InputStream() {
735        int bitBuffer = 0;
736        int bitBufferLength = 0;
737        int readChars = 0;
738        boolean hitPadding = false;
739
740        @Override
741        public int read() throws IOException {
742          while (true) {
743            int readChar = reader.read();
744            if (readChar == -1) {
745              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
746                throw new DecodingException("Invalid input length " + readChars);
747              }
748              return -1;
749            }
750            readChars++;
751            char ch = (char) readChar;
752            if (paddingChar != null && paddingChar.charValue() == ch) {
753              if (!hitPadding
754                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
755                throw new DecodingException("Padding cannot start at index " + readChars);
756              }
757              hitPadding = true;
758            } else if (hitPadding) {
759              throw new DecodingException(
760                  "Expected padding character but found '" + ch + "' at index " + readChars);
761            } else {
762              bitBuffer <<= alphabet.bitsPerChar;
763              bitBuffer |= alphabet.decode(ch);
764              bitBufferLength += alphabet.bitsPerChar;
765
766              if (bitBufferLength >= 8) {
767                bitBufferLength -= 8;
768                return (bitBuffer >> bitBufferLength) & 0xFF;
769              }
770            }
771          }
772        }
773
774        @Override
775        public int read(byte[] buf, int off, int len) throws IOException {
776          // Overriding this to work around the fact that InputStream's default implementation of
777          // this method will silently swallow exceptions thrown by the single-byte read() method
778          // (other than on the first call to it), which in this case can cause invalid encoded
779          // strings to not throw an exception.
780          // See https://github.com/google/guava/issues/3542
781          checkPositionIndexes(off, off + len, buf.length);
782
783          int i = off;
784          for (; i < off + len; i++) {
785            int b = read();
786            if (b == -1) {
787              int read = i - off;
788              return read == 0 ? -1 : read;
789            }
790            buf[i] = (byte) b;
791          }
792          return i - off;
793        }
794
795        @Override
796        public void close() throws IOException {
797          reader.close();
798        }
799      };
800    }
801
802    @Override
803    public BaseEncoding omitPadding() {
804      return (paddingChar == null) ? this : newInstance(alphabet, null);
805    }
806
807    @Override
808    public BaseEncoding withPadChar(char padChar) {
809      if (8 % alphabet.bitsPerChar == 0
810          || (paddingChar != null && paddingChar.charValue() == padChar)) {
811        return this;
812      } else {
813        return newInstance(alphabet, padChar);
814      }
815    }
816
817    @Override
818    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
819      for (int i = 0; i < separator.length(); i++) {
820        checkArgument(
821            !alphabet.matches(separator.charAt(i)),
822            "Separator (%s) cannot contain alphabet characters",
823            separator);
824      }
825      if (paddingChar != null) {
826        checkArgument(
827            separator.indexOf(paddingChar.charValue()) < 0,
828            "Separator (%s) cannot contain padding character",
829            separator);
830      }
831      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
832    }
833
834    @MonotonicNonNullDecl private transient BaseEncoding upperCase;
835    @MonotonicNonNullDecl private transient BaseEncoding lowerCase;
836
837    @Override
838    public BaseEncoding upperCase() {
839      BaseEncoding result = upperCase;
840      if (result == null) {
841        Alphabet upper = alphabet.upperCase();
842        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
843      }
844      return result;
845    }
846
847    @Override
848    public BaseEncoding lowerCase() {
849      BaseEncoding result = lowerCase;
850      if (result == null) {
851        Alphabet lower = alphabet.lowerCase();
852        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
853      }
854      return result;
855    }
856
857    BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) {
858      return new StandardBaseEncoding(alphabet, paddingChar);
859    }
860
861    @Override
862    public String toString() {
863      StringBuilder builder = new StringBuilder("BaseEncoding.");
864      builder.append(alphabet.toString());
865      if (8 % alphabet.bitsPerChar != 0) {
866        if (paddingChar == null) {
867          builder.append(".omitPadding()");
868        } else {
869          builder.append(".withPadChar('").append(paddingChar).append("')");
870        }
871      }
872      return builder.toString();
873    }
874
875    @Override
876    public boolean equals(@NullableDecl Object other) {
877      if (other instanceof StandardBaseEncoding) {
878        StandardBaseEncoding that = (StandardBaseEncoding) other;
879        return this.alphabet.equals(that.alphabet)
880            && Objects.equal(this.paddingChar, that.paddingChar);
881      }
882      return false;
883    }
884
885    @Override
886    public int hashCode() {
887      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
888    }
889  }
890
891  static final class Base16Encoding extends StandardBaseEncoding {
892    final char[] encoding = new char[512];
893
894    Base16Encoding(String name, String alphabetChars) {
895      this(new Alphabet(name, alphabetChars.toCharArray()));
896    }
897
898    private Base16Encoding(Alphabet alphabet) {
899      super(alphabet, null);
900      checkArgument(alphabet.chars.length == 16);
901      for (int i = 0; i < 256; ++i) {
902        encoding[i] = alphabet.encode(i >>> 4);
903        encoding[i | 0x100] = alphabet.encode(i & 0xF);
904      }
905    }
906
907    @Override
908    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
909      checkNotNull(target);
910      checkPositionIndexes(off, off + len, bytes.length);
911      for (int i = 0; i < len; ++i) {
912        int b = bytes[off + i] & 0xFF;
913        target.append(encoding[b]);
914        target.append(encoding[b | 0x100]);
915      }
916    }
917
918    @Override
919    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
920      checkNotNull(target);
921      if (chars.length() % 2 == 1) {
922        throw new DecodingException("Invalid input length " + chars.length());
923      }
924      int bytesWritten = 0;
925      for (int i = 0; i < chars.length(); i += 2) {
926        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
927        target[bytesWritten++] = (byte) decoded;
928      }
929      return bytesWritten;
930    }
931
932    @Override
933    BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) {
934      return new Base16Encoding(alphabet);
935    }
936  }
937
938  static final class Base64Encoding extends StandardBaseEncoding {
939    Base64Encoding(String name, String alphabetChars, @NullableDecl Character paddingChar) {
940      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
941    }
942
943    private Base64Encoding(Alphabet alphabet, @NullableDecl Character paddingChar) {
944      super(alphabet, paddingChar);
945      checkArgument(alphabet.chars.length == 64);
946    }
947
948    @Override
949    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
950      checkNotNull(target);
951      checkPositionIndexes(off, off + len, bytes.length);
952      int i = off;
953      for (int remaining = len; remaining >= 3; remaining -= 3) {
954        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
955        target.append(alphabet.encode(chunk >>> 18));
956        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
957        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
958        target.append(alphabet.encode(chunk & 0x3F));
959      }
960      if (i < off + len) {
961        encodeChunkTo(target, bytes, i, off + len - i);
962      }
963    }
964
965    @Override
966    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
967      checkNotNull(target);
968      chars = trimTrailingPadding(chars);
969      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
970        throw new DecodingException("Invalid input length " + chars.length());
971      }
972      int bytesWritten = 0;
973      for (int i = 0; i < chars.length(); ) {
974        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
975        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
976        target[bytesWritten++] = (byte) (chunk >>> 16);
977        if (i < chars.length()) {
978          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
979          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
980          if (i < chars.length()) {
981            chunk |= alphabet.decode(chars.charAt(i++));
982            target[bytesWritten++] = (byte) (chunk & 0xFF);
983          }
984        }
985      }
986      return bytesWritten;
987    }
988
989    @Override
990    BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) {
991      return new Base64Encoding(alphabet, paddingChar);
992    }
993  }
994
995  @GwtIncompatible
996  static Reader ignoringReader(final Reader delegate, final String toIgnore) {
997    checkNotNull(delegate);
998    checkNotNull(toIgnore);
999    return new Reader() {
1000      @Override
1001      public int read() throws IOException {
1002        int readChar;
1003        do {
1004          readChar = delegate.read();
1005        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1006        return readChar;
1007      }
1008
1009      @Override
1010      public int read(char[] cbuf, int off, int len) throws IOException {
1011        throw new UnsupportedOperationException();
1012      }
1013
1014      @Override
1015      public void close() throws IOException {
1016        delegate.close();
1017      }
1018    };
1019  }
1020
1021  static Appendable separatingAppendable(
1022      final Appendable delegate, final String separator, final int afterEveryChars) {
1023    checkNotNull(delegate);
1024    checkNotNull(separator);
1025    checkArgument(afterEveryChars > 0);
1026    return new Appendable() {
1027      int charsUntilSeparator = afterEveryChars;
1028
1029      @Override
1030      public Appendable append(char c) throws IOException {
1031        if (charsUntilSeparator == 0) {
1032          delegate.append(separator);
1033          charsUntilSeparator = afterEveryChars;
1034        }
1035        delegate.append(c);
1036        charsUntilSeparator--;
1037        return this;
1038      }
1039
1040      @Override
1041      public Appendable append(@NullableDecl CharSequence chars, int off, int len)
1042          throws IOException {
1043        throw new UnsupportedOperationException();
1044      }
1045
1046      @Override
1047      public Appendable append(@NullableDecl CharSequence chars) throws IOException {
1048        throw new UnsupportedOperationException();
1049      }
1050    };
1051  }
1052
1053  @GwtIncompatible // Writer
1054  static Writer separatingWriter(
1055      final Writer delegate, final String separator, final int afterEveryChars) {
1056    final Appendable seperatingAppendable =
1057        separatingAppendable(delegate, separator, afterEveryChars);
1058    return new Writer() {
1059      @Override
1060      public void write(int c) throws IOException {
1061        seperatingAppendable.append((char) c);
1062      }
1063
1064      @Override
1065      public void write(char[] chars, int off, int len) throws IOException {
1066        throw new UnsupportedOperationException();
1067      }
1068
1069      @Override
1070      public void flush() throws IOException {
1071        delegate.flush();
1072      }
1073
1074      @Override
1075      public void close() throws IOException {
1076        delegate.close();
1077      }
1078    };
1079  }
1080
1081  static final class SeparatedBaseEncoding extends BaseEncoding {
1082    private final BaseEncoding delegate;
1083    private final String separator;
1084    private final int afterEveryChars;
1085
1086    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1087      this.delegate = checkNotNull(delegate);
1088      this.separator = checkNotNull(separator);
1089      this.afterEveryChars = afterEveryChars;
1090      checkArgument(
1091          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1092    }
1093
1094    @Override
1095    CharSequence trimTrailingPadding(CharSequence chars) {
1096      return delegate.trimTrailingPadding(chars);
1097    }
1098
1099    @Override
1100    int maxEncodedSize(int bytes) {
1101      int unseparatedSize = delegate.maxEncodedSize(bytes);
1102      return unseparatedSize
1103          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1104    }
1105
1106    @GwtIncompatible // Writer,OutputStream
1107    @Override
1108    public OutputStream encodingStream(final Writer output) {
1109      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1110    }
1111
1112    @Override
1113    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1114      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1115    }
1116
1117    @Override
1118    int maxDecodedSize(int chars) {
1119      return delegate.maxDecodedSize(chars);
1120    }
1121
1122    @Override
1123    public boolean canDecode(CharSequence chars) {
1124      StringBuilder builder = new StringBuilder();
1125      for (int i = 0; i < chars.length(); i++) {
1126        char c = chars.charAt(i);
1127        if (separator.indexOf(c) < 0) {
1128          builder.append(c);
1129        }
1130      }
1131      return delegate.canDecode(builder);
1132    }
1133
1134    @Override
1135    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1136      StringBuilder stripped = new StringBuilder(chars.length());
1137      for (int i = 0; i < chars.length(); i++) {
1138        char c = chars.charAt(i);
1139        if (separator.indexOf(c) < 0) {
1140          stripped.append(c);
1141        }
1142      }
1143      return delegate.decodeTo(target, stripped);
1144    }
1145
1146    @Override
1147    @GwtIncompatible // Reader,InputStream
1148    public InputStream decodingStream(final Reader reader) {
1149      return delegate.decodingStream(ignoringReader(reader, separator));
1150    }
1151
1152    @Override
1153    public BaseEncoding omitPadding() {
1154      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1155    }
1156
1157    @Override
1158    public BaseEncoding withPadChar(char padChar) {
1159      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1160    }
1161
1162    @Override
1163    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1164      throw new UnsupportedOperationException("Already have a separator");
1165    }
1166
1167    @Override
1168    public BaseEncoding upperCase() {
1169      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1170    }
1171
1172    @Override
1173    public BaseEncoding lowerCase() {
1174      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1175    }
1176
1177    @Override
1178    public String toString() {
1179      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1180    }
1181  }
1182}