Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.CharMatcher;
031import com.google.common.base.Objects;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.io.Writer;
037import java.util.Arrays;
038import javax.annotation.Nullable;
039
040/**
041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
042 * strings. This class includes several constants for encoding schemes specified by
043 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
044 *
045 * <pre>   {@code
046 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
047 *
048 * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
049 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
050 *
051 * <p>...returns the ASCII bytes of the string {@code "foo"}.
052 *
053 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
054 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
055 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
056 * behavior:
057 *
058 * <pre>   {@code
059 *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
060 *
061 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
062 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
063 *
064 * <pre>   {@code
065 *   // Do NOT do this
066 *   BaseEncoding hex = BaseEncoding.base16();
067 *   hex.lowerCase(); // does nothing!
068 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
069 *
070 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
071 * {@code x}, but the reverse does not necessarily hold.
072 *
073 * <table>
074 * <caption>Encodings</caption>
075 * <tr>
076 * <th>Encoding
077 * <th>Alphabet
078 * <th>{@code char:byte} ratio
079 * <th>Default padding
080 * <th>Comments
081 * <tr>
082 * <td>{@link #base16()}
083 * <td>0-9 A-F
084 * <td>2.00
085 * <td>N/A
086 * <td>Traditional hexadecimal. Defaults to upper case.
087 * <tr>
088 * <td>{@link #base32()}
089 * <td>A-Z 2-7
090 * <td>1.60
091 * <td>=
092 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
093 * <tr>
094 * <td>{@link #base32Hex()}
095 * <td>0-9 A-V
096 * <td>1.60
097 * <td>=
098 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
099 * <tr>
100 * <td>{@link #base64()}
101 * <td>A-Z a-z 0-9 + /
102 * <td>1.33
103 * <td>=
104 * <td>
105 * <tr>
106 * <td>{@link #base64Url()}
107 * <td>A-Z a-z 0-9 - _
108 * <td>1.33
109 * <td>=
110 * <td>Safe to use as filenames, or to pass in URLs without escaping
111 * </table>
112 *
113 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
114 *
115 * @author Louis Wasserman
116 * @since 14.0
117 */
118@GwtCompatible(emulated = true)
119public abstract class BaseEncoding {
120  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
121
122  BaseEncoding() {}
123
124  /**
125   * Exception indicating invalid base-encoded input encountered while decoding.
126   *
127   * @author Louis Wasserman
128   * @since 15.0
129   */
130  public static final class DecodingException extends IOException {
131    DecodingException(String message) {
132      super(message);
133    }
134
135    DecodingException(Throwable cause) {
136      super(cause);
137    }
138  }
139
140  /**
141   * Encodes the specified byte array, and returns the encoded {@code String}.
142   */
143  public String encode(byte[] bytes) {
144    return encode(bytes, 0, bytes.length);
145  }
146
147  /**
148   * Encodes the specified range of the specified byte array, and returns the encoded
149   * {@code String}.
150   */
151  public final String encode(byte[] bytes, int off, int len) {
152    checkPositionIndexes(off, off + len, bytes.length);
153    StringBuilder result = new StringBuilder(maxEncodedSize(len));
154    try {
155      encodeTo(result, bytes, off, len);
156    } catch (IOException impossible) {
157      throw new AssertionError(impossible);
158    }
159    return result.toString();
160  }
161
162  /**
163   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
164   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing
165   * {@code Writer}.
166   */
167  @GwtIncompatible // Writer,OutputStream
168  public abstract OutputStream encodingStream(Writer writer);
169
170  /**
171   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
172   */
173  @GwtIncompatible // ByteSink,CharSink
174  public final ByteSink encodingSink(final CharSink encodedSink) {
175    checkNotNull(encodedSink);
176    return new ByteSink() {
177      @Override
178      public OutputStream openStream() throws IOException {
179        return encodingStream(encodedSink.openStream());
180      }
181    };
182  }
183
184  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
185
186  private static byte[] extract(byte[] result, int length) {
187    if (length == result.length) {
188      return result;
189    } else {
190      byte[] trunc = new byte[length];
191      System.arraycopy(result, 0, trunc, 0, length);
192      return trunc;
193    }
194  }
195
196  /**
197   * Determines whether the specified character sequence is a valid encoded string according to this
198   * encoding.
199   *
200   * @since 20.0
201   */
202  public abstract boolean canDecode(CharSequence chars);
203
204  /**
205   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
206   * inverse operation to {@link #encode(byte[])}.
207   *
208   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
209   *     encoding.
210   */
211  public final byte[] decode(CharSequence chars) {
212    try {
213      return decodeChecked(chars);
214    } catch (DecodingException badInput) {
215      throw new IllegalArgumentException(badInput);
216    }
217  }
218
219  /**
220   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
221   * inverse operation to {@link #encode(byte[])}.
222   *
223   * @throws DecodingException if the input is not a valid encoded string according to this
224   *     encoding.
225   */ final byte[] decodeChecked(CharSequence chars)
226      throws DecodingException {
227    chars = padding().trimTrailingFrom(chars);
228    byte[] tmp = new byte[maxDecodedSize(chars.length())];
229    int len = decodeTo(tmp, chars);
230    return extract(tmp, len);
231  }
232
233  /**
234   * Returns an {@code InputStream} that decodes base-encoded input from the specified
235   * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific
236   * errors.
237   */
238  @GwtIncompatible // Reader,InputStream
239  public abstract InputStream decodingStream(Reader reader);
240
241  /**
242   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
243   * {@code CharSource}.
244   */
245  @GwtIncompatible // ByteSource,CharSource
246  public final ByteSource decodingSource(final CharSource encodedSource) {
247    checkNotNull(encodedSource);
248    return new ByteSource() {
249      @Override
250      public InputStream openStream() throws IOException {
251        return decodingStream(encodedSource.openStream());
252      }
253    };
254  }
255
256  // Implementations for encoding/decoding
257
258  abstract int maxEncodedSize(int bytes);
259
260  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
261
262  abstract int maxDecodedSize(int chars);
263
264  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
265
266  abstract CharMatcher padding();
267
268  // Modified encoding generators
269
270  /**
271   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
272   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
273   * section 3.2</a>, Padding of Encoded Data.
274   */
275  public abstract BaseEncoding omitPadding();
276
277  /**
278   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
279   * for padding.
280   *
281   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
282   *     separator
283   */
284  public abstract BaseEncoding withPadChar(char padChar);
285
286  /**
287   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
288   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
289   * are skipped over in decoding.
290   *
291   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
292   *     string, or if {@code n <= 0}
293   * @throws UnsupportedOperationException if this encoding already uses a separator
294   */
295  public abstract BaseEncoding withSeparator(String separator, int n);
296
297  /**
298   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
299   * uppercase letters. Padding and separator characters remain in their original case.
300   *
301   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
302   *     lower-case characters
303   */
304  public abstract BaseEncoding upperCase();
305
306  /**
307   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
308   * lowercase letters. Padding and separator characters remain in their original case.
309   *
310   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
311   *     lower-case characters
312   */
313  public abstract BaseEncoding lowerCase();
314
315  private static final BaseEncoding BASE64 =
316      new Base64Encoding(
317          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
318
319  /**
320   * The "base64" base encoding specified by
321   * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64
322   * Encoding. (This is the same as the base 64 encoding from
323   * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
324   *
325   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
326   * omitted} or {@linkplain #withPadChar(char) replaced}.
327   *
328   * <p>No line feeds are added by default, as per
329   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
330   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
331   */
332  public static BaseEncoding base64() {
333    return BASE64;
334  }
335
336  private static final BaseEncoding BASE64_URL =
337      new Base64Encoding(
338          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
339
340  /**
341   * The "base64url" encoding specified by
342   * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
343   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
344   * is the same as the base 64 encoding with URL and filename safe alphabet from
345   * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
346   *
347   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
348   * omitted} or {@linkplain #withPadChar(char) replaced}.
349   *
350   * <p>No line feeds are added by default, as per
351   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
352   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
353   */
354  public static BaseEncoding base64Url() {
355    return BASE64_URL;
356  }
357
358  private static final BaseEncoding BASE32 =
359      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
360
361  /**
362   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
363   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from
364   * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
365   *
366   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
367   * omitted} or {@linkplain #withPadChar(char) replaced}.
368   *
369   * <p>No line feeds are added by default, as per
370   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
371   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
372   */
373  public static BaseEncoding base32() {
374    return BASE32;
375  }
376
377  private static final BaseEncoding BASE32_HEX =
378      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
379
380  /**
381   * The "base32hex" encoding specified by
382   * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
383   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
384   *
385   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
386   * omitted} or {@linkplain #withPadChar(char) replaced}.
387   *
388   * <p>No line feeds are added by default, as per
389   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
390   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
391   */
392  public static BaseEncoding base32Hex() {
393    return BASE32_HEX;
394  }
395
396  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
397
398  /**
399   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
400   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from
401   * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
402   * "hexadecimal" format.
403   *
404   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
405   * have no effect.
406   *
407   * <p>No line feeds are added by default, as per
408   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
409   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
410   */
411  public static BaseEncoding base16() {
412    return BASE16;
413  }
414
415  private static final class Alphabet extends CharMatcher {
416    private final String name;
417    // this is meant to be immutable -- don't modify it!
418    private final char[] chars;
419    final int mask;
420    final int bitsPerChar;
421    final int charsPerChunk;
422    final int bytesPerChunk;
423    private final byte[] decodabet;
424    private final boolean[] validPadding;
425
426    Alphabet(String name, char[] chars) {
427      this.name = checkNotNull(name);
428      this.chars = checkNotNull(chars);
429      try {
430        this.bitsPerChar = log2(chars.length, UNNECESSARY);
431      } catch (ArithmeticException e) {
432        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
433      }
434
435      /*
436       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
437       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
438       */
439      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
440      try {
441        this.charsPerChunk = 8 / gcd;
442        this.bytesPerChunk = bitsPerChar / gcd;
443      } catch (ArithmeticException e) {
444        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
445      }
446
447      this.mask = chars.length - 1;
448
449      byte[] decodabet = new byte[Ascii.MAX + 1];
450      Arrays.fill(decodabet, (byte) -1);
451      for (int i = 0; i < chars.length; i++) {
452        char c = chars[i];
453        checkArgument(CharMatcher.ascii().matches(c), "Non-ASCII character: %s", c);
454        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
455        decodabet[c] = (byte) i;
456      }
457      this.decodabet = decodabet;
458
459      boolean[] validPadding = new boolean[charsPerChunk];
460      for (int i = 0; i < bytesPerChunk; i++) {
461        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
462      }
463      this.validPadding = validPadding;
464    }
465
466    char encode(int bits) {
467      return chars[bits];
468    }
469
470    boolean isValidPaddingStartPosition(int index) {
471      return validPadding[index % charsPerChunk];
472    }
473
474    boolean canDecode(char ch) {
475      return ch <= Ascii.MAX && decodabet[ch] != -1;
476    }
477
478    int decode(char ch) throws DecodingException {
479      if (ch > Ascii.MAX || decodabet[ch] == -1) {
480        throw new DecodingException(
481            "Unrecognized character: "
482                + (CharMatcher.invisible().matches(ch) ? "0x" + Integer.toHexString(ch) : ch));
483      }
484      return decodabet[ch];
485    }
486
487    private boolean hasLowerCase() {
488      for (char c : chars) {
489        if (Ascii.isLowerCase(c)) {
490          return true;
491        }
492      }
493      return false;
494    }
495
496    private boolean hasUpperCase() {
497      for (char c : chars) {
498        if (Ascii.isUpperCase(c)) {
499          return true;
500        }
501      }
502      return false;
503    }
504
505    Alphabet upperCase() {
506      if (!hasLowerCase()) {
507        return this;
508      } else {
509        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
510        char[] upperCased = new char[chars.length];
511        for (int i = 0; i < chars.length; i++) {
512          upperCased[i] = Ascii.toUpperCase(chars[i]);
513        }
514        return new Alphabet(name + ".upperCase()", upperCased);
515      }
516    }
517
518    Alphabet lowerCase() {
519      if (!hasUpperCase()) {
520        return this;
521      } else {
522        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
523        char[] lowerCased = new char[chars.length];
524        for (int i = 0; i < chars.length; i++) {
525          lowerCased[i] = Ascii.toLowerCase(chars[i]);
526        }
527        return new Alphabet(name + ".lowerCase()", lowerCased);
528      }
529    }
530
531    @Override
532    public boolean matches(char c) {
533      return CharMatcher.ascii().matches(c) && decodabet[c] != -1;
534    }
535
536    @Override
537    public String toString() {
538      return name;
539    }
540
541    @Override
542    public boolean equals(@Nullable Object other) {
543      if (other instanceof Alphabet) {
544        Alphabet that = (Alphabet) other;
545        return Arrays.equals(this.chars, that.chars);
546      }
547      return false;
548    }
549
550    @Override
551    public int hashCode() {
552      return Arrays.hashCode(chars);
553    }
554  }
555
556  static class StandardBaseEncoding extends BaseEncoding {
557    // TODO(lowasser): provide a useful toString
558    final Alphabet alphabet;
559
560    @Nullable final Character paddingChar;
561
562    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
563      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
564    }
565
566    StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
567      this.alphabet = checkNotNull(alphabet);
568      checkArgument(
569          paddingChar == null || !alphabet.matches(paddingChar),
570          "Padding character %s was already in alphabet",
571          paddingChar);
572      this.paddingChar = paddingChar;
573    }
574
575    @Override
576    CharMatcher padding() {
577      return (paddingChar == null) ? CharMatcher.none() : CharMatcher.is(paddingChar.charValue());
578    }
579
580    @Override
581    int maxEncodedSize(int bytes) {
582      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
583    }
584
585    @GwtIncompatible // Writer,OutputStream
586    @Override
587    public OutputStream encodingStream(final Writer out) {
588      checkNotNull(out);
589      return new OutputStream() {
590        int bitBuffer = 0;
591        int bitBufferLength = 0;
592        int writtenChars = 0;
593
594        @Override
595        public void write(int b) throws IOException {
596          bitBuffer <<= 8;
597          bitBuffer |= b & 0xFF;
598          bitBufferLength += 8;
599          while (bitBufferLength >= alphabet.bitsPerChar) {
600            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
601            out.write(alphabet.encode(charIndex));
602            writtenChars++;
603            bitBufferLength -= alphabet.bitsPerChar;
604          }
605        }
606
607        @Override
608        public void flush() throws IOException {
609          out.flush();
610        }
611
612        @Override
613        public void close() throws IOException {
614          if (bitBufferLength > 0) {
615            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
616            out.write(alphabet.encode(charIndex));
617            writtenChars++;
618            if (paddingChar != null) {
619              while (writtenChars % alphabet.charsPerChunk != 0) {
620                out.write(paddingChar.charValue());
621                writtenChars++;
622              }
623            }
624          }
625          out.close();
626        }
627      };
628    }
629
630    @Override
631    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
632      checkNotNull(target);
633      checkPositionIndexes(off, off + len, bytes.length);
634      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
635        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
636      }
637    }
638
639    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
640      checkNotNull(target);
641      checkPositionIndexes(off, off + len, bytes.length);
642      checkArgument(len <= alphabet.bytesPerChunk);
643      long bitBuffer = 0;
644      for (int i = 0; i < len; ++i) {
645        bitBuffer |= bytes[off + i] & 0xFF;
646        bitBuffer <<= 8; // Add additional zero byte in the end.
647      }
648      // Position of first character is length of bitBuffer minus bitsPerChar.
649      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
650      int bitsProcessed = 0;
651      while (bitsProcessed < len * 8) {
652        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
653        target.append(alphabet.encode(charIndex));
654        bitsProcessed += alphabet.bitsPerChar;
655      }
656      if (paddingChar != null) {
657        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
658          target.append(paddingChar.charValue());
659          bitsProcessed += alphabet.bitsPerChar;
660        }
661      }
662    }
663
664    @Override
665    int maxDecodedSize(int chars) {
666      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
667    }
668
669    @Override
670    public boolean canDecode(CharSequence chars) {
671      chars = padding().trimTrailingFrom(chars);
672      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
673        return false;
674      }
675      for (int i = 0; i < chars.length(); i++) {
676        if (!alphabet.canDecode(chars.charAt(i))) {
677          return false;
678        }
679      }
680      return true;
681    }
682
683    @Override
684    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
685      checkNotNull(target);
686      chars = padding().trimTrailingFrom(chars);
687      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
688        throw new DecodingException("Invalid input length " + chars.length());
689      }
690      int bytesWritten = 0;
691      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
692        long chunk = 0;
693        int charsProcessed = 0;
694        for (int i = 0; i < alphabet.charsPerChunk; i++) {
695          chunk <<= alphabet.bitsPerChar;
696          if (charIdx + i < chars.length()) {
697            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
698          }
699        }
700        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
701        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
702          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
703        }
704      }
705      return bytesWritten;
706    }
707
708    @GwtIncompatible // Reader,InputStream
709    @Override
710    public InputStream decodingStream(final Reader reader) {
711      checkNotNull(reader);
712      return new InputStream() {
713        int bitBuffer = 0;
714        int bitBufferLength = 0;
715        int readChars = 0;
716        boolean hitPadding = false;
717        final CharMatcher paddingMatcher = padding();
718
719        @Override
720        public int read() throws IOException {
721          while (true) {
722            int readChar = reader.read();
723            if (readChar == -1) {
724              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
725                throw new DecodingException("Invalid input length " + readChars);
726              }
727              return -1;
728            }
729            readChars++;
730            char ch = (char) readChar;
731            if (paddingMatcher.matches(ch)) {
732              if (!hitPadding
733                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
734                throw new DecodingException("Padding cannot start at index " + readChars);
735              }
736              hitPadding = true;
737            } else if (hitPadding) {
738              throw new DecodingException(
739                  "Expected padding character but found '" + ch + "' at index " + readChars);
740            } else {
741              bitBuffer <<= alphabet.bitsPerChar;
742              bitBuffer |= alphabet.decode(ch);
743              bitBufferLength += alphabet.bitsPerChar;
744
745              if (bitBufferLength >= 8) {
746                bitBufferLength -= 8;
747                return (bitBuffer >> bitBufferLength) & 0xFF;
748              }
749            }
750          }
751        }
752
753        @Override
754        public void close() throws IOException {
755          reader.close();
756        }
757      };
758    }
759
760    @Override
761    public BaseEncoding omitPadding() {
762      return (paddingChar == null) ? this : newInstance(alphabet, null);
763    }
764
765    @Override
766    public BaseEncoding withPadChar(char padChar) {
767      if (8 % alphabet.bitsPerChar == 0
768          || (paddingChar != null && paddingChar.charValue() == padChar)) {
769        return this;
770      } else {
771        return newInstance(alphabet, padChar);
772      }
773    }
774
775    @Override
776    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
777      checkArgument(
778          padding().or(alphabet).matchesNoneOf(separator),
779          "Separator (%s) cannot contain alphabet or padding characters",
780          separator);
781      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
782    }
783
784    private transient BaseEncoding upperCase;
785    private transient BaseEncoding lowerCase;
786
787    @Override
788    public BaseEncoding upperCase() {
789      BaseEncoding result = upperCase;
790      if (result == null) {
791        Alphabet upper = alphabet.upperCase();
792        result = upperCase =
793            (upper == alphabet) ? this : newInstance(upper, paddingChar);
794      }
795      return result;
796    }
797
798    @Override
799    public BaseEncoding lowerCase() {
800      BaseEncoding result = lowerCase;
801      if (result == null) {
802        Alphabet lower = alphabet.lowerCase();
803        result = lowerCase =
804            (lower == alphabet) ? this : newInstance(lower, paddingChar);
805      }
806      return result;
807    }
808
809    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
810      return new StandardBaseEncoding(alphabet, paddingChar);
811    }
812
813    @Override
814    public String toString() {
815      StringBuilder builder = new StringBuilder("BaseEncoding.");
816      builder.append(alphabet.toString());
817      if (8 % alphabet.bitsPerChar != 0) {
818        if (paddingChar == null) {
819          builder.append(".omitPadding()");
820        } else {
821          builder.append(".withPadChar('").append(paddingChar).append("')");
822        }
823      }
824      return builder.toString();
825    }
826
827    @Override
828    public boolean equals(@Nullable Object other) {
829      if (other instanceof StandardBaseEncoding) {
830        StandardBaseEncoding that = (StandardBaseEncoding) other;
831        return this.alphabet.equals(that.alphabet)
832            && Objects.equal(this.paddingChar, that.paddingChar);
833      }
834      return false;
835    }
836
837    @Override
838    public int hashCode() {
839      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
840    }
841  }
842
843  static final class Base16Encoding extends StandardBaseEncoding {
844    final char[] encoding = new char[512];
845
846    Base16Encoding(String name, String alphabetChars) {
847      this(new Alphabet(name, alphabetChars.toCharArray()));
848    }
849
850    private Base16Encoding(Alphabet alphabet) {
851      super(alphabet, null);
852      checkArgument(alphabet.chars.length == 16);
853      for (int i = 0; i < 256; ++i) {
854        encoding[i] = alphabet.encode(i >>> 4);
855        encoding[i | 0x100] = alphabet.encode(i & 0xF);
856      }
857    }
858
859    @Override
860    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
861      checkNotNull(target);
862      checkPositionIndexes(off, off + len, bytes.length);
863      for (int i = 0; i < len; ++i) {
864        int b = bytes[off + i] & 0xFF;
865        target.append(encoding[b]);
866        target.append(encoding[b | 0x100]);
867      }
868    }
869
870    @Override
871    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
872      checkNotNull(target);
873      if (chars.length() % 2 == 1) {
874        throw new DecodingException("Invalid input length " + chars.length());
875      }
876      int bytesWritten = 0;
877      for (int i = 0; i < chars.length(); i += 2) {
878        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
879        target[bytesWritten++] = (byte) decoded;
880      }
881      return bytesWritten;
882    }
883
884    @Override
885    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
886      return new Base16Encoding(alphabet);
887    }
888  }
889
890  static final class Base64Encoding extends StandardBaseEncoding {
891    Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) {
892      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
893    }
894
895    private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) {
896      super(alphabet, paddingChar);
897      checkArgument(alphabet.chars.length == 64);
898    }
899
900    @Override
901    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
902      checkNotNull(target);
903      checkPositionIndexes(off, off + len, bytes.length);
904      int i = off;
905      for (int remaining = len; remaining >= 3; remaining -= 3) {
906        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
907        target.append(alphabet.encode(chunk >>> 18));
908        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
909        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
910        target.append(alphabet.encode(chunk & 0x3F));
911      }
912      if (i < off + len) {
913        encodeChunkTo(target, bytes, i, off + len - i);
914      }
915    }
916
917    @Override
918    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
919      checkNotNull(target);
920      chars = padding().trimTrailingFrom(chars);
921      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
922        throw new DecodingException("Invalid input length " + chars.length());
923      }
924      int bytesWritten = 0;
925      for (int i = 0; i < chars.length(); ) {
926        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
927        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
928        target[bytesWritten++] = (byte) (chunk >>> 16);
929        if (i < chars.length()) {
930          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
931          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
932          if (i < chars.length()) {
933            chunk |= alphabet.decode(chars.charAt(i++));
934            target[bytesWritten++] = (byte) (chunk & 0xFF);
935          }
936        }
937      }
938      return bytesWritten;
939    }
940
941    @Override
942    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
943      return new Base64Encoding(alphabet, paddingChar);
944    }
945  }
946
947  @GwtIncompatible // Reader
948  static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) {
949    checkNotNull(delegate);
950    checkNotNull(toIgnore);
951    return new Reader() {
952      @Override
953      public int read() throws IOException {
954        int readChar;
955        do {
956          readChar = delegate.read();
957        } while (readChar != -1 && toIgnore.matches((char) readChar));
958        return readChar;
959      }
960
961      @Override
962      public int read(char[] cbuf, int off, int len) throws IOException {
963        throw new UnsupportedOperationException();
964      }
965
966      @Override
967      public void close() throws IOException {
968        delegate.close();
969      }
970    };
971  }
972
973  static Appendable separatingAppendable(
974      final Appendable delegate, final String separator, final int afterEveryChars) {
975    checkNotNull(delegate);
976    checkNotNull(separator);
977    checkArgument(afterEveryChars > 0);
978    return new Appendable() {
979      int charsUntilSeparator = afterEveryChars;
980
981      @Override
982      public Appendable append(char c) throws IOException {
983        if (charsUntilSeparator == 0) {
984          delegate.append(separator);
985          charsUntilSeparator = afterEveryChars;
986        }
987        delegate.append(c);
988        charsUntilSeparator--;
989        return this;
990      }
991
992      @Override
993      public Appendable append(CharSequence chars, int off, int len) throws IOException {
994        throw new UnsupportedOperationException();
995      }
996
997      @Override
998      public Appendable append(CharSequence chars) throws IOException {
999        throw new UnsupportedOperationException();
1000      }
1001    };
1002  }
1003
1004  @GwtIncompatible // Writer
1005  static Writer separatingWriter(
1006      final Writer delegate, final String separator, final int afterEveryChars) {
1007    final Appendable seperatingAppendable =
1008        separatingAppendable(delegate, separator, afterEveryChars);
1009    return new Writer() {
1010      @Override
1011      public void write(int c) throws IOException {
1012        seperatingAppendable.append((char) c);
1013      }
1014
1015      @Override
1016      public void write(char[] chars, int off, int len) throws IOException {
1017        throw new UnsupportedOperationException();
1018      }
1019
1020      @Override
1021      public void flush() throws IOException {
1022        delegate.flush();
1023      }
1024
1025      @Override
1026      public void close() throws IOException {
1027        delegate.close();
1028      }
1029    };
1030  }
1031
1032  static final class SeparatedBaseEncoding extends BaseEncoding {
1033    private final BaseEncoding delegate;
1034    private final String separator;
1035    private final int afterEveryChars;
1036    private final CharMatcher separatorChars;
1037
1038    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1039      this.delegate = checkNotNull(delegate);
1040      this.separator = checkNotNull(separator);
1041      this.afterEveryChars = afterEveryChars;
1042      checkArgument(
1043          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1044      this.separatorChars = CharMatcher.anyOf(separator).precomputed();
1045    }
1046
1047    @Override
1048    CharMatcher padding() {
1049      return delegate.padding();
1050    }
1051
1052    @Override
1053    int maxEncodedSize(int bytes) {
1054      int unseparatedSize = delegate.maxEncodedSize(bytes);
1055      return unseparatedSize
1056          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1057    }
1058
1059    @GwtIncompatible // Writer,OutputStream
1060    @Override
1061    public OutputStream encodingStream(final Writer output) {
1062      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1063    }
1064
1065    @Override
1066    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1067      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1068    }
1069
1070    @Override
1071    int maxDecodedSize(int chars) {
1072      return delegate.maxDecodedSize(chars);
1073    }
1074
1075    @Override
1076    public boolean canDecode(CharSequence chars) {
1077      return delegate.canDecode(separatorChars.removeFrom(chars));
1078    }
1079
1080    @Override
1081    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1082      return delegate.decodeTo(target, separatorChars.removeFrom(chars));
1083    }
1084
1085    @GwtIncompatible // Reader,InputStream
1086    @Override
1087    public InputStream decodingStream(final Reader reader) {
1088      return delegate.decodingStream(ignoringReader(reader, separatorChars));
1089    }
1090
1091    @Override
1092    public BaseEncoding omitPadding() {
1093      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1094    }
1095
1096    @Override
1097    public BaseEncoding withPadChar(char padChar) {
1098      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1099    }
1100
1101    @Override
1102    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1103      throw new UnsupportedOperationException("Already have a separator");
1104    }
1105
1106    @Override
1107    public BaseEncoding upperCase() {
1108      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1109    }
1110
1111    @Override
1112    public BaseEncoding lowerCase() {
1113      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1114    }
1115
1116    @Override
1117    public String toString() {
1118      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1119    }
1120  }
1121}