Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.Objects;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.OutputStream;
034import java.io.Reader;
035import java.io.Writer;
036import java.util.Arrays;
037import org.checkerframework.checker.nullness.compatqual.NullableDecl;
038
039/**
040 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
041 * strings. This class includes several constants for encoding schemes specified by <a
042 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
043 *
044 * <pre>{@code
045 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
046 * }</pre>
047 *
048 * <p>returns the string {@code "MZXW6==="}, and
049 *
050 * <pre>{@code
051 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
052 * }</pre>
053 *
054 * <p>...returns the ASCII bytes of the string {@code "foo"}.
055 *
056 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
057 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
058 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
059 * behavior:
060 *
061 * <pre>{@code
062 * BaseEncoding.base16().lowerCase().decode("deadbeef");
063 * }</pre>
064 *
065 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
066 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
067 *
068 * <pre>{@code
069 * // Do NOT do this
070 * BaseEncoding hex = BaseEncoding.base16();
071 * hex.lowerCase(); // does nothing!
072 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
073 * }</pre>
074 *
075 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
076 * x}, but the reverse does not necessarily hold.
077 *
078 * <table>
079 * <caption>Encodings</caption>
080 * <tr>
081 * <th>Encoding
082 * <th>Alphabet
083 * <th>{@code char:byte} ratio
084 * <th>Default padding
085 * <th>Comments
086 * <tr>
087 * <td>{@link #base16()}
088 * <td>0-9 A-F
089 * <td>2.00
090 * <td>N/A
091 * <td>Traditional hexadecimal. Defaults to upper case.
092 * <tr>
093 * <td>{@link #base32()}
094 * <td>A-Z 2-7
095 * <td>1.60
096 * <td>=
097 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
098 * <tr>
099 * <td>{@link #base32Hex()}
100 * <td>0-9 A-V
101 * <td>1.60
102 * <td>=
103 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
104 * <tr>
105 * <td>{@link #base64()}
106 * <td>A-Z a-z 0-9 + /
107 * <td>1.33
108 * <td>=
109 * <td>
110 * <tr>
111 * <td>{@link #base64Url()}
112 * <td>A-Z a-z 0-9 - _
113 * <td>1.33
114 * <td>=
115 * <td>Safe to use as filenames, or to pass in URLs without escaping
116 * </table>
117 *
118 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
119 *
120 * @author Louis Wasserman
121 * @since 14.0
122 */
123@GwtCompatible(emulated = true)
124public abstract class BaseEncoding {
125  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
126
127  BaseEncoding() {}
128
129  /**
130   * Exception indicating invalid base-encoded input encountered while decoding.
131   *
132   * @author Louis Wasserman
133   * @since 15.0
134   */
135  public static final class DecodingException extends IOException {
136    DecodingException(String message) {
137      super(message);
138    }
139
140    DecodingException(Throwable cause) {
141      super(cause);
142    }
143  }
144
145  /** Encodes the specified byte array, and returns the encoded {@code String}. */
146  public String encode(byte[] bytes) {
147    return encode(bytes, 0, bytes.length);
148  }
149
150  /**
151   * Encodes the specified range of the specified byte array, and returns the encoded {@code
152   * String}.
153   */
154  public final String encode(byte[] bytes, int off, int len) {
155    checkPositionIndexes(off, off + len, bytes.length);
156    StringBuilder result = new StringBuilder(maxEncodedSize(len));
157    try {
158      encodeTo(result, bytes, off, len);
159    } catch (IOException impossible) {
160      throw new AssertionError(impossible);
161    }
162    return result.toString();
163  }
164
165  /**
166   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
167   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
168   * Writer}.
169   */
170  @GwtIncompatible // Writer,OutputStream
171  public abstract OutputStream encodingStream(Writer writer);
172
173  /**
174   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
175   */
176  @GwtIncompatible // ByteSink,CharSink
177  public final ByteSink encodingSink(final CharSink encodedSink) {
178    checkNotNull(encodedSink);
179    return new ByteSink() {
180      @Override
181      public OutputStream openStream() throws IOException {
182        return encodingStream(encodedSink.openStream());
183      }
184    };
185  }
186
187  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
188
189  private static byte[] extract(byte[] result, int length) {
190    if (length == result.length) {
191      return result;
192    } else {
193      byte[] trunc = new byte[length];
194      System.arraycopy(result, 0, trunc, 0, length);
195      return trunc;
196    }
197  }
198
199  /**
200   * Determines whether the specified character sequence is a valid encoded string according to this
201   * encoding.
202   *
203   * @since 20.0
204   */
205  public abstract boolean canDecode(CharSequence chars);
206
207  /**
208   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
209   * inverse operation to {@link #encode(byte[])}.
210   *
211   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
212   *     encoding.
213   */
214  public final byte[] decode(CharSequence chars) {
215    try {
216      return decodeChecked(chars);
217    } catch (DecodingException badInput) {
218      throw new IllegalArgumentException(badInput);
219    }
220  }
221
222  /**
223   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
224   * inverse operation to {@link #encode(byte[])}.
225   *
226   * @throws DecodingException if the input is not a valid encoded string according to this
227   *     encoding.
228   */ final byte[] decodeChecked(CharSequence chars)
229      throws DecodingException {
230    chars = trimTrailingPadding(chars);
231    byte[] tmp = new byte[maxDecodedSize(chars.length())];
232    int len = decodeTo(tmp, chars);
233    return extract(tmp, len);
234  }
235
236  /**
237   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
238   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
239   */
240  @GwtIncompatible // Reader,InputStream
241  public abstract InputStream decodingStream(Reader reader);
242
243  /**
244   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
245   * CharSource}.
246   */
247  @GwtIncompatible // ByteSource,CharSource
248  public final ByteSource decodingSource(final CharSource encodedSource) {
249    checkNotNull(encodedSource);
250    return new ByteSource() {
251      @Override
252      public InputStream openStream() throws IOException {
253        return decodingStream(encodedSource.openStream());
254      }
255    };
256  }
257
258  // Implementations for encoding/decoding
259
260  abstract int maxEncodedSize(int bytes);
261
262  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
263
264  abstract int maxDecodedSize(int chars);
265
266  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
267
268  CharSequence trimTrailingPadding(CharSequence chars) {
269    return checkNotNull(chars);
270  }
271
272  // Modified encoding generators
273
274  /**
275   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
276   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
277   * section 3.2</a>, Padding of Encoded Data.
278   */
279  public abstract BaseEncoding omitPadding();
280
281  /**
282   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
283   * for padding.
284   *
285   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
286   *     separator
287   */
288  public abstract BaseEncoding withPadChar(char padChar);
289
290  /**
291   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
292   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
293   * are skipped over in decoding.
294   *
295   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
296   *     string, or if {@code n <= 0}
297   * @throws UnsupportedOperationException if this encoding already uses a separator
298   */
299  public abstract BaseEncoding withSeparator(String separator, int n);
300
301  /**
302   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
303   * uppercase letters. Padding and separator characters remain in their original case.
304   *
305   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
306   *     lower-case characters
307   */
308  public abstract BaseEncoding upperCase();
309
310  /**
311   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
312   * lowercase letters. Padding and separator characters remain in their original case.
313   *
314   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
315   *     lower-case characters
316   */
317  public abstract BaseEncoding lowerCase();
318
319  private static final BaseEncoding BASE64 =
320      new Base64Encoding(
321          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
322
323  /**
324   * The "base64" base encoding specified by <a
325   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
326   * (This is the same as the base 64 encoding from <a
327   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
328   *
329   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
330   * omitted} or {@linkplain #withPadChar(char) replaced}.
331   *
332   * <p>No line feeds are added by default, as per <a
333   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
334   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
335   */
336  public static BaseEncoding base64() {
337    return BASE64;
338  }
339
340  private static final BaseEncoding BASE64_URL =
341      new Base64Encoding(
342          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
343
344  /**
345   * The "base64url" encoding specified by <a
346   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
347   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
348   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
349   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
350   *
351   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
352   * omitted} or {@linkplain #withPadChar(char) replaced}.
353   *
354   * <p>No line feeds are added by default, as per <a
355   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
356   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
357   */
358  public static BaseEncoding base64Url() {
359    return BASE64_URL;
360  }
361
362  private static final BaseEncoding BASE32 =
363      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
364
365  /**
366   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
367   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
368   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
369   *
370   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
371   * omitted} or {@linkplain #withPadChar(char) replaced}.
372   *
373   * <p>No line feeds are added by default, as per <a
374   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
375   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
376   */
377  public static BaseEncoding base32() {
378    return BASE32;
379  }
380
381  private static final BaseEncoding BASE32_HEX =
382      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
383
384  /**
385   * The "base32hex" encoding specified by <a
386   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
387   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
388   *
389   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
390   * omitted} or {@linkplain #withPadChar(char) replaced}.
391   *
392   * <p>No line feeds are added by default, as per <a
393   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
394   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
395   */
396  public static BaseEncoding base32Hex() {
397    return BASE32_HEX;
398  }
399
400  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
401
402  /**
403   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
404   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
405   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
406   * "hexadecimal" format.
407   *
408   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
409   * have no effect.
410   *
411   * <p>No line feeds are added by default, as per <a
412   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
413   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
414   */
415  public static BaseEncoding base16() {
416    return BASE16;
417  }
418
419  private static final class Alphabet {
420    private final String name;
421    // this is meant to be immutable -- don't modify it!
422    private final char[] chars;
423    final int mask;
424    final int bitsPerChar;
425    final int charsPerChunk;
426    final int bytesPerChunk;
427    private final byte[] decodabet;
428    private final boolean[] validPadding;
429
430    Alphabet(String name, char[] chars) {
431      this.name = checkNotNull(name);
432      this.chars = checkNotNull(chars);
433      try {
434        this.bitsPerChar = log2(chars.length, UNNECESSARY);
435      } catch (ArithmeticException e) {
436        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
437      }
438
439      /*
440       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
441       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
442       */
443      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
444      try {
445        this.charsPerChunk = 8 / gcd;
446        this.bytesPerChunk = bitsPerChar / gcd;
447      } catch (ArithmeticException e) {
448        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
449      }
450
451      this.mask = chars.length - 1;
452
453      byte[] decodabet = new byte[Ascii.MAX + 1];
454      Arrays.fill(decodabet, (byte) -1);
455      for (int i = 0; i < chars.length; i++) {
456        char c = chars[i];
457        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
458        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
459        decodabet[c] = (byte) i;
460      }
461      this.decodabet = decodabet;
462
463      boolean[] validPadding = new boolean[charsPerChunk];
464      for (int i = 0; i < bytesPerChunk; i++) {
465        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
466      }
467      this.validPadding = validPadding;
468    }
469
470    char encode(int bits) {
471      return chars[bits];
472    }
473
474    boolean isValidPaddingStartPosition(int index) {
475      return validPadding[index % charsPerChunk];
476    }
477
478    boolean canDecode(char ch) {
479      return ch <= Ascii.MAX && decodabet[ch] != -1;
480    }
481
482    int decode(char ch) throws DecodingException {
483      if (ch > Ascii.MAX) {
484        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
485      }
486      int result = decodabet[ch];
487      if (result == -1) {
488        if (ch <= 0x20 || ch == Ascii.MAX) {
489          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
490        } else {
491          throw new DecodingException("Unrecognized character: " + ch);
492        }
493      }
494      return result;
495    }
496
497    private boolean hasLowerCase() {
498      for (char c : chars) {
499        if (Ascii.isLowerCase(c)) {
500          return true;
501        }
502      }
503      return false;
504    }
505
506    private boolean hasUpperCase() {
507      for (char c : chars) {
508        if (Ascii.isUpperCase(c)) {
509          return true;
510        }
511      }
512      return false;
513    }
514
515    Alphabet upperCase() {
516      if (!hasLowerCase()) {
517        return this;
518      } else {
519        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
520        char[] upperCased = new char[chars.length];
521        for (int i = 0; i < chars.length; i++) {
522          upperCased[i] = Ascii.toUpperCase(chars[i]);
523        }
524        return new Alphabet(name + ".upperCase()", upperCased);
525      }
526    }
527
528    Alphabet lowerCase() {
529      if (!hasUpperCase()) {
530        return this;
531      } else {
532        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
533        char[] lowerCased = new char[chars.length];
534        for (int i = 0; i < chars.length; i++) {
535          lowerCased[i] = Ascii.toLowerCase(chars[i]);
536        }
537        return new Alphabet(name + ".lowerCase()", lowerCased);
538      }
539    }
540
541    public boolean matches(char c) {
542      return c < decodabet.length && decodabet[c] != -1;
543    }
544
545    @Override
546    public String toString() {
547      return name;
548    }
549
550    @Override
551    public boolean equals(@NullableDecl Object other) {
552      if (other instanceof Alphabet) {
553        Alphabet that = (Alphabet) other;
554        return Arrays.equals(this.chars, that.chars);
555      }
556      return false;
557    }
558
559    @Override
560    public int hashCode() {
561      return Arrays.hashCode(chars);
562    }
563  }
564
565  static class StandardBaseEncoding extends BaseEncoding {
566    // TODO(lowasser): provide a useful toString
567    final Alphabet alphabet;
568
569    @NullableDecl final Character paddingChar;
570
571    StandardBaseEncoding(String name, String alphabetChars, @NullableDecl Character paddingChar) {
572      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
573    }
574
575    StandardBaseEncoding(Alphabet alphabet, @NullableDecl Character paddingChar) {
576      this.alphabet = checkNotNull(alphabet);
577      checkArgument(
578          paddingChar == null || !alphabet.matches(paddingChar),
579          "Padding character %s was already in alphabet",
580          paddingChar);
581      this.paddingChar = paddingChar;
582    }
583
584    @Override
585    int maxEncodedSize(int bytes) {
586      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
587    }
588
589    @GwtIncompatible // Writer,OutputStream
590    @Override
591    public OutputStream encodingStream(final Writer out) {
592      checkNotNull(out);
593      return new OutputStream() {
594        int bitBuffer = 0;
595        int bitBufferLength = 0;
596        int writtenChars = 0;
597
598        @Override
599        public void write(int b) throws IOException {
600          bitBuffer <<= 8;
601          bitBuffer |= b & 0xFF;
602          bitBufferLength += 8;
603          while (bitBufferLength >= alphabet.bitsPerChar) {
604            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
605            out.write(alphabet.encode(charIndex));
606            writtenChars++;
607            bitBufferLength -= alphabet.bitsPerChar;
608          }
609        }
610
611        @Override
612        public void flush() throws IOException {
613          out.flush();
614        }
615
616        @Override
617        public void close() throws IOException {
618          if (bitBufferLength > 0) {
619            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
620            out.write(alphabet.encode(charIndex));
621            writtenChars++;
622            if (paddingChar != null) {
623              while (writtenChars % alphabet.charsPerChunk != 0) {
624                out.write(paddingChar.charValue());
625                writtenChars++;
626              }
627            }
628          }
629          out.close();
630        }
631      };
632    }
633
634    @Override
635    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
636      checkNotNull(target);
637      checkPositionIndexes(off, off + len, bytes.length);
638      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
639        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
640      }
641    }
642
643    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
644      checkNotNull(target);
645      checkPositionIndexes(off, off + len, bytes.length);
646      checkArgument(len <= alphabet.bytesPerChunk);
647      long bitBuffer = 0;
648      for (int i = 0; i < len; ++i) {
649        bitBuffer |= bytes[off + i] & 0xFF;
650        bitBuffer <<= 8; // Add additional zero byte in the end.
651      }
652      // Position of first character is length of bitBuffer minus bitsPerChar.
653      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
654      int bitsProcessed = 0;
655      while (bitsProcessed < len * 8) {
656        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
657        target.append(alphabet.encode(charIndex));
658        bitsProcessed += alphabet.bitsPerChar;
659      }
660      if (paddingChar != null) {
661        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
662          target.append(paddingChar.charValue());
663          bitsProcessed += alphabet.bitsPerChar;
664        }
665      }
666    }
667
668    @Override
669    int maxDecodedSize(int chars) {
670      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
671    }
672
673    @Override
674    CharSequence trimTrailingPadding(CharSequence chars) {
675      checkNotNull(chars);
676      if (paddingChar == null) {
677        return chars;
678      }
679      char padChar = paddingChar.charValue();
680      int l;
681      for (l = chars.length() - 1; l >= 0; l--) {
682        if (chars.charAt(l) != padChar) {
683          break;
684        }
685      }
686      return chars.subSequence(0, l + 1);
687    }
688
689    @Override
690    public boolean canDecode(CharSequence chars) {
691      checkNotNull(chars);
692      chars = trimTrailingPadding(chars);
693      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
694        return false;
695      }
696      for (int i = 0; i < chars.length(); i++) {
697        if (!alphabet.canDecode(chars.charAt(i))) {
698          return false;
699        }
700      }
701      return true;
702    }
703
704    @Override
705    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
706      checkNotNull(target);
707      chars = trimTrailingPadding(chars);
708      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
709        throw new DecodingException("Invalid input length " + chars.length());
710      }
711      int bytesWritten = 0;
712      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
713        long chunk = 0;
714        int charsProcessed = 0;
715        for (int i = 0; i < alphabet.charsPerChunk; i++) {
716          chunk <<= alphabet.bitsPerChar;
717          if (charIdx + i < chars.length()) {
718            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
719          }
720        }
721        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
722        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
723          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
724        }
725      }
726      return bytesWritten;
727    }
728
729    @Override
730    @GwtIncompatible // Reader,InputStream
731    public InputStream decodingStream(final Reader reader) {
732      checkNotNull(reader);
733      return new InputStream() {
734        int bitBuffer = 0;
735        int bitBufferLength = 0;
736        int readChars = 0;
737        boolean hitPadding = false;
738
739        @Override
740        public int read() throws IOException {
741          while (true) {
742            int readChar = reader.read();
743            if (readChar == -1) {
744              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
745                throw new DecodingException("Invalid input length " + readChars);
746              }
747              return -1;
748            }
749            readChars++;
750            char ch = (char) readChar;
751            if (paddingChar != null && paddingChar.charValue() == ch) {
752              if (!hitPadding
753                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
754                throw new DecodingException("Padding cannot start at index " + readChars);
755              }
756              hitPadding = true;
757            } else if (hitPadding) {
758              throw new DecodingException(
759                  "Expected padding character but found '" + ch + "' at index " + readChars);
760            } else {
761              bitBuffer <<= alphabet.bitsPerChar;
762              bitBuffer |= alphabet.decode(ch);
763              bitBufferLength += alphabet.bitsPerChar;
764
765              if (bitBufferLength >= 8) {
766                bitBufferLength -= 8;
767                return (bitBuffer >> bitBufferLength) & 0xFF;
768              }
769            }
770          }
771        }
772
773        @Override
774        public void close() throws IOException {
775          reader.close();
776        }
777      };
778    }
779
780    @Override
781    public BaseEncoding omitPadding() {
782      return (paddingChar == null) ? this : newInstance(alphabet, null);
783    }
784
785    @Override
786    public BaseEncoding withPadChar(char padChar) {
787      if (8 % alphabet.bitsPerChar == 0
788          || (paddingChar != null && paddingChar.charValue() == padChar)) {
789        return this;
790      } else {
791        return newInstance(alphabet, padChar);
792      }
793    }
794
795    @Override
796    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
797      for (int i = 0; i < separator.length(); i++) {
798        checkArgument(
799            !alphabet.matches(separator.charAt(i)),
800            "Separator (%s) cannot contain alphabet characters",
801            separator);
802      }
803      if (paddingChar != null) {
804        checkArgument(
805            separator.indexOf(paddingChar.charValue()) < 0,
806            "Separator (%s) cannot contain padding character",
807            separator);
808      }
809      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
810    }
811
812    private transient BaseEncoding upperCase;
813    private transient BaseEncoding lowerCase;
814
815    @Override
816    public BaseEncoding upperCase() {
817      BaseEncoding result = upperCase;
818      if (result == null) {
819        Alphabet upper = alphabet.upperCase();
820        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
821      }
822      return result;
823    }
824
825    @Override
826    public BaseEncoding lowerCase() {
827      BaseEncoding result = lowerCase;
828      if (result == null) {
829        Alphabet lower = alphabet.lowerCase();
830        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
831      }
832      return result;
833    }
834
835    BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) {
836      return new StandardBaseEncoding(alphabet, paddingChar);
837    }
838
839    @Override
840    public String toString() {
841      StringBuilder builder = new StringBuilder("BaseEncoding.");
842      builder.append(alphabet.toString());
843      if (8 % alphabet.bitsPerChar != 0) {
844        if (paddingChar == null) {
845          builder.append(".omitPadding()");
846        } else {
847          builder.append(".withPadChar('").append(paddingChar).append("')");
848        }
849      }
850      return builder.toString();
851    }
852
853    @Override
854    public boolean equals(@NullableDecl Object other) {
855      if (other instanceof StandardBaseEncoding) {
856        StandardBaseEncoding that = (StandardBaseEncoding) other;
857        return this.alphabet.equals(that.alphabet)
858            && Objects.equal(this.paddingChar, that.paddingChar);
859      }
860      return false;
861    }
862
863    @Override
864    public int hashCode() {
865      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
866    }
867  }
868
869  static final class Base16Encoding extends StandardBaseEncoding {
870    final char[] encoding = new char[512];
871
872    Base16Encoding(String name, String alphabetChars) {
873      this(new Alphabet(name, alphabetChars.toCharArray()));
874    }
875
876    private Base16Encoding(Alphabet alphabet) {
877      super(alphabet, null);
878      checkArgument(alphabet.chars.length == 16);
879      for (int i = 0; i < 256; ++i) {
880        encoding[i] = alphabet.encode(i >>> 4);
881        encoding[i | 0x100] = alphabet.encode(i & 0xF);
882      }
883    }
884
885    @Override
886    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
887      checkNotNull(target);
888      checkPositionIndexes(off, off + len, bytes.length);
889      for (int i = 0; i < len; ++i) {
890        int b = bytes[off + i] & 0xFF;
891        target.append(encoding[b]);
892        target.append(encoding[b | 0x100]);
893      }
894    }
895
896    @Override
897    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
898      checkNotNull(target);
899      if (chars.length() % 2 == 1) {
900        throw new DecodingException("Invalid input length " + chars.length());
901      }
902      int bytesWritten = 0;
903      for (int i = 0; i < chars.length(); i += 2) {
904        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
905        target[bytesWritten++] = (byte) decoded;
906      }
907      return bytesWritten;
908    }
909
910    @Override
911    BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) {
912      return new Base16Encoding(alphabet);
913    }
914  }
915
916  static final class Base64Encoding extends StandardBaseEncoding {
917    Base64Encoding(String name, String alphabetChars, @NullableDecl Character paddingChar) {
918      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
919    }
920
921    private Base64Encoding(Alphabet alphabet, @NullableDecl Character paddingChar) {
922      super(alphabet, paddingChar);
923      checkArgument(alphabet.chars.length == 64);
924    }
925
926    @Override
927    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
928      checkNotNull(target);
929      checkPositionIndexes(off, off + len, bytes.length);
930      int i = off;
931      for (int remaining = len; remaining >= 3; remaining -= 3) {
932        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
933        target.append(alphabet.encode(chunk >>> 18));
934        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
935        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
936        target.append(alphabet.encode(chunk & 0x3F));
937      }
938      if (i < off + len) {
939        encodeChunkTo(target, bytes, i, off + len - i);
940      }
941    }
942
943    @Override
944    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
945      checkNotNull(target);
946      chars = trimTrailingPadding(chars);
947      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
948        throw new DecodingException("Invalid input length " + chars.length());
949      }
950      int bytesWritten = 0;
951      for (int i = 0; i < chars.length(); ) {
952        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
953        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
954        target[bytesWritten++] = (byte) (chunk >>> 16);
955        if (i < chars.length()) {
956          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
957          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
958          if (i < chars.length()) {
959            chunk |= alphabet.decode(chars.charAt(i++));
960            target[bytesWritten++] = (byte) (chunk & 0xFF);
961          }
962        }
963      }
964      return bytesWritten;
965    }
966
967    @Override
968    BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) {
969      return new Base64Encoding(alphabet, paddingChar);
970    }
971  }
972
973  @GwtIncompatible
974  static Reader ignoringReader(final Reader delegate, final String toIgnore) {
975    checkNotNull(delegate);
976    checkNotNull(toIgnore);
977    return new Reader() {
978      @Override
979      public int read() throws IOException {
980        int readChar;
981        do {
982          readChar = delegate.read();
983        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
984        return readChar;
985      }
986
987      @Override
988      public int read(char[] cbuf, int off, int len) throws IOException {
989        throw new UnsupportedOperationException();
990      }
991
992      @Override
993      public void close() throws IOException {
994        delegate.close();
995      }
996    };
997  }
998
999  static Appendable separatingAppendable(
1000      final Appendable delegate, final String separator, final int afterEveryChars) {
1001    checkNotNull(delegate);
1002    checkNotNull(separator);
1003    checkArgument(afterEveryChars > 0);
1004    return new Appendable() {
1005      int charsUntilSeparator = afterEveryChars;
1006
1007      @Override
1008      public Appendable append(char c) throws IOException {
1009        if (charsUntilSeparator == 0) {
1010          delegate.append(separator);
1011          charsUntilSeparator = afterEveryChars;
1012        }
1013        delegate.append(c);
1014        charsUntilSeparator--;
1015        return this;
1016      }
1017
1018      @Override
1019      public Appendable append(@NullableDecl CharSequence chars, int off, int len)
1020          throws IOException {
1021        throw new UnsupportedOperationException();
1022      }
1023
1024      @Override
1025      public Appendable append(@NullableDecl CharSequence chars) throws IOException {
1026        throw new UnsupportedOperationException();
1027      }
1028    };
1029  }
1030
1031  @GwtIncompatible // Writer
1032  static Writer separatingWriter(
1033      final Writer delegate, final String separator, final int afterEveryChars) {
1034    final Appendable seperatingAppendable =
1035        separatingAppendable(delegate, separator, afterEveryChars);
1036    return new Writer() {
1037      @Override
1038      public void write(int c) throws IOException {
1039        seperatingAppendable.append((char) c);
1040      }
1041
1042      @Override
1043      public void write(char[] chars, int off, int len) throws IOException {
1044        throw new UnsupportedOperationException();
1045      }
1046
1047      @Override
1048      public void flush() throws IOException {
1049        delegate.flush();
1050      }
1051
1052      @Override
1053      public void close() throws IOException {
1054        delegate.close();
1055      }
1056    };
1057  }
1058
1059  static final class SeparatedBaseEncoding extends BaseEncoding {
1060    private final BaseEncoding delegate;
1061    private final String separator;
1062    private final int afterEveryChars;
1063
1064    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1065      this.delegate = checkNotNull(delegate);
1066      this.separator = checkNotNull(separator);
1067      this.afterEveryChars = afterEveryChars;
1068      checkArgument(
1069          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1070    }
1071
1072    @Override
1073    CharSequence trimTrailingPadding(CharSequence chars) {
1074      return delegate.trimTrailingPadding(chars);
1075    }
1076
1077    @Override
1078    int maxEncodedSize(int bytes) {
1079      int unseparatedSize = delegate.maxEncodedSize(bytes);
1080      return unseparatedSize
1081          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1082    }
1083
1084    @GwtIncompatible // Writer,OutputStream
1085    @Override
1086    public OutputStream encodingStream(final Writer output) {
1087      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1088    }
1089
1090    @Override
1091    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1092      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1093    }
1094
1095    @Override
1096    int maxDecodedSize(int chars) {
1097      return delegate.maxDecodedSize(chars);
1098    }
1099
1100    @Override
1101    public boolean canDecode(CharSequence chars) {
1102      StringBuilder builder = new StringBuilder();
1103      for (int i = 0; i < chars.length(); i++) {
1104        char c = chars.charAt(i);
1105        if (separator.indexOf(c) < 0) {
1106          builder.append(c);
1107        }
1108      }
1109      return delegate.canDecode(builder);
1110    }
1111
1112    @Override
1113    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1114      StringBuilder stripped = new StringBuilder(chars.length());
1115      for (int i = 0; i < chars.length(); i++) {
1116        char c = chars.charAt(i);
1117        if (separator.indexOf(c) < 0) {
1118          stripped.append(c);
1119        }
1120      }
1121      return delegate.decodeTo(target, stripped);
1122    }
1123
1124    @Override
1125    @GwtIncompatible // Reader,InputStream
1126    public InputStream decodingStream(final Reader reader) {
1127      return delegate.decodingStream(ignoringReader(reader, separator));
1128    }
1129
1130    @Override
1131    public BaseEncoding omitPadding() {
1132      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1133    }
1134
1135    @Override
1136    public BaseEncoding withPadChar(char padChar) {
1137      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1138    }
1139
1140    @Override
1141    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1142      throw new UnsupportedOperationException("Already have a separator");
1143    }
1144
1145    @Override
1146    public BaseEncoding upperCase() {
1147      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1148    }
1149
1150    @Override
1151    public BaseEncoding lowerCase() {
1152      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1153    }
1154
1155    @Override
1156    public String toString() {
1157      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1158    }
1159  }
1160}