001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.CharMatcher;
031import com.google.common.base.Objects;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.io.Writer;
037import java.util.Arrays;
038import javax.annotation.Nullable;
039
040/**
041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
042 * strings. This class includes several constants for encoding schemes specified by
043 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
044 *
045 * <pre>   {@code
046 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
047 *
048 * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
049 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
050 *
051 * <p>...returns the ASCII bytes of the string {@code "foo"}.
052 *
053 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
054 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
055 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
056 * behavior:
057 *
058 * <pre>   {@code
059 *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
060 *
061 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
062 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
063 *
064 * <pre>   {@code
065 *   // Do NOT do this
066 *   BaseEncoding hex = BaseEncoding.base16();
067 *   hex.lowerCase(); // does nothing!
068 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
069 *
070 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
071 * {@code x}, but the reverse does not necessarily hold.
072 *
073 * <table>
074 * <tr>
075 * <th>Encoding
076 * <th>Alphabet
077 * <th>{@code char:byte} ratio
078 * <th>Default padding
079 * <th>Comments
080 * <tr>
081 * <td>{@link #base16()}
082 * <td>0-9 A-F
083 * <td>2.00
084 * <td>N/A
085 * <td>Traditional hexadecimal. Defaults to upper case.
086 * <tr>
087 * <td>{@link #base32()}
088 * <td>A-Z 2-7
089 * <td>1.60
090 * <td>=
091 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
092 * <tr>
093 * <td>{@link #base32Hex()}
094 * <td>0-9 A-V
095 * <td>1.60
096 * <td>=
097 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
098 * <tr>
099 * <td>{@link #base64()}
100 * <td>A-Z a-z 0-9 + /
101 * <td>1.33
102 * <td>=
103 * <td>
104 * <tr>
105 * <td>{@link #base64Url()}
106 * <td>A-Z a-z 0-9 - _
107 * <td>1.33
108 * <td>=
109 * <td>Safe to use as filenames, or to pass in URLs without escaping
110 * </table>
111 *
112 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
113 *
114 * @author Louis Wasserman
115 * @since 14.0
116 */
117@GwtCompatible(emulated = true)
118public abstract class BaseEncoding {
119  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
120
121  BaseEncoding() {}
122
123  /**
124   * Exception indicating invalid base-encoded input encountered while decoding.
125   *
126   * @author Louis Wasserman
127   * @since 15.0
128   */
129  public static final class DecodingException extends IOException {
130    DecodingException(String message) {
131      super(message);
132    }
133
134    DecodingException(Throwable cause) {
135      super(cause);
136    }
137  }
138
139  /**
140   * Encodes the specified byte array, and returns the encoded {@code String}.
141   */
142  public String encode(byte[] bytes) {
143    return encode(bytes, 0, bytes.length);
144  }
145
146  /**
147   * Encodes the specified range of the specified byte array, and returns the encoded
148   * {@code String}.
149   */
150  public final String encode(byte[] bytes, int off, int len) {
151    checkPositionIndexes(off, off + len, bytes.length);
152    StringBuilder result = new StringBuilder(maxEncodedSize(len));
153    try {
154      encodeTo(result, bytes, off, len);
155    } catch (IOException impossible) {
156      throw new AssertionError(impossible);
157    }
158    return result.toString();
159  }
160
161  /**
162   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
163   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing
164   * {@code Writer}.
165   */
166  @GwtIncompatible // Writer,OutputStream
167  public abstract OutputStream encodingStream(Writer writer);
168
169  /**
170   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
171   */
172  @GwtIncompatible // ByteSink,CharSink
173  public final ByteSink encodingSink(final CharSink encodedSink) {
174    checkNotNull(encodedSink);
175    return new ByteSink() {
176      @Override
177      public OutputStream openStream() throws IOException {
178        return encodingStream(encodedSink.openStream());
179      }
180    };
181  }
182
183  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
184
185  private static byte[] extract(byte[] result, int length) {
186    if (length == result.length) {
187      return result;
188    } else {
189      byte[] trunc = new byte[length];
190      System.arraycopy(result, 0, trunc, 0, length);
191      return trunc;
192    }
193  }
194
195  /**
196   * Determines whether the specified character sequence is a valid encoded string according to this
197   * encoding.
198   */
199  public abstract boolean canDecode(CharSequence chars);
200
201  /**
202   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
203   * inverse operation to {@link #encode(byte[])}.
204   *
205   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
206   *     encoding.
207   */
208  public final byte[] decode(CharSequence chars) {
209    try {
210      return decodeChecked(chars);
211    } catch (DecodingException badInput) {
212      throw new IllegalArgumentException(badInput);
213    }
214  }
215
216  /**
217   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
218   * inverse operation to {@link #encode(byte[])}.
219   *
220   * @throws DecodingException if the input is not a valid encoded string according to this
221   *     encoding.
222   */ final byte[] decodeChecked(CharSequence chars)
223      throws DecodingException {
224    chars = padding().trimTrailingFrom(chars);
225    byte[] tmp = new byte[maxDecodedSize(chars.length())];
226    int len = decodeTo(tmp, chars);
227    return extract(tmp, len);
228  }
229
230  /**
231   * Returns an {@code InputStream} that decodes base-encoded input from the specified
232   * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific
233   * errors.
234   */
235  @GwtIncompatible // Reader,InputStream
236  public abstract InputStream decodingStream(Reader reader);
237
238  /**
239   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
240   * {@code CharSource}.
241   */
242  @GwtIncompatible // ByteSource,CharSource
243  public final ByteSource decodingSource(final CharSource encodedSource) {
244    checkNotNull(encodedSource);
245    return new ByteSource() {
246      @Override
247      public InputStream openStream() throws IOException {
248        return decodingStream(encodedSource.openStream());
249      }
250    };
251  }
252
253  // Implementations for encoding/decoding
254
255  abstract int maxEncodedSize(int bytes);
256
257  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
258
259  abstract int maxDecodedSize(int chars);
260
261  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
262
263  abstract CharMatcher padding();
264
265  // Modified encoding generators
266
267  /**
268   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
269   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
270   * section 3.2</a>, Padding of Encoded Data.
271   */
272  public abstract BaseEncoding omitPadding();
273
274  /**
275   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
276   * for padding.
277   *
278   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
279   *     separator
280   */
281  public abstract BaseEncoding withPadChar(char padChar);
282
283  /**
284   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
285   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
286   * are skipped over in decoding.
287   *
288   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
289   *     string, or if {@code n <= 0}
290   * @throws UnsupportedOperationException if this encoding already uses a separator
291   */
292  public abstract BaseEncoding withSeparator(String separator, int n);
293
294  /**
295   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
296   * uppercase letters. Padding and separator characters remain in their original case.
297   *
298   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
299   *     lower-case characters
300   */
301  public abstract BaseEncoding upperCase();
302
303  /**
304   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
305   * lowercase letters. Padding and separator characters remain in their original case.
306   *
307   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
308   *     lower-case characters
309   */
310  public abstract BaseEncoding lowerCase();
311
312  private static final BaseEncoding BASE64 =
313      new Base64Encoding(
314          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
315
316  /**
317   * The "base64" base encoding specified by
318   * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64
319   * Encoding. (This is the same as the base 64 encoding from
320   * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
321   *
322   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
323   * omitted} or {@linkplain #withPadChar(char) replaced}.
324   *
325   * <p>No line feeds are added by default, as per
326   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
327   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
328   */
329  public static BaseEncoding base64() {
330    return BASE64;
331  }
332
333  private static final BaseEncoding BASE64_URL =
334      new Base64Encoding(
335          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
336
337  /**
338   * The "base64url" encoding specified by
339   * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
340   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
341   * is the same as the base 64 encoding with URL and filename safe alphabet from
342   * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
343   *
344   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
345   * omitted} or {@linkplain #withPadChar(char) replaced}.
346   *
347   * <p>No line feeds are added by default, as per
348   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
349   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
350   */
351  public static BaseEncoding base64Url() {
352    return BASE64_URL;
353  }
354
355  private static final BaseEncoding BASE32 =
356      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
357
358  /**
359   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
360   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from
361   * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
362   *
363   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
364   * omitted} or {@linkplain #withPadChar(char) replaced}.
365   *
366   * <p>No line feeds are added by default, as per
367   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
368   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
369   */
370  public static BaseEncoding base32() {
371    return BASE32;
372  }
373
374  private static final BaseEncoding BASE32_HEX =
375      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
376
377  /**
378   * The "base32hex" encoding specified by
379   * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
380   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
381   *
382   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
383   * omitted} or {@linkplain #withPadChar(char) replaced}.
384   *
385   * <p>No line feeds are added by default, as per
386   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
387   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
388   */
389  public static BaseEncoding base32Hex() {
390    return BASE32_HEX;
391  }
392
393  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
394
395  /**
396   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
397   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from
398   * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
399   * "hexadecimal" format.
400   *
401   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
402   * have no effect.
403   *
404   * <p>No line feeds are added by default, as per
405   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
406   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
407   */
408  public static BaseEncoding base16() {
409    return BASE16;
410  }
411
412  private static final class Alphabet extends CharMatcher {
413    private final String name;
414    // this is meant to be immutable -- don't modify it!
415    private final char[] chars;
416    final int mask;
417    final int bitsPerChar;
418    final int charsPerChunk;
419    final int bytesPerChunk;
420    private final byte[] decodabet;
421    private final boolean[] validPadding;
422
423    Alphabet(String name, char[] chars) {
424      this.name = checkNotNull(name);
425      this.chars = checkNotNull(chars);
426      try {
427        this.bitsPerChar = log2(chars.length, UNNECESSARY);
428      } catch (ArithmeticException e) {
429        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
430      }
431
432      /*
433       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
434       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
435       */
436      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
437      try {
438        this.charsPerChunk = 8 / gcd;
439        this.bytesPerChunk = bitsPerChar / gcd;
440      } catch (ArithmeticException e) {
441        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
442      }
443
444      this.mask = chars.length - 1;
445
446      byte[] decodabet = new byte[Ascii.MAX + 1];
447      Arrays.fill(decodabet, (byte) -1);
448      for (int i = 0; i < chars.length; i++) {
449        char c = chars[i];
450        checkArgument(CharMatcher.ascii().matches(c), "Non-ASCII character: %s", c);
451        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
452        decodabet[c] = (byte) i;
453      }
454      this.decodabet = decodabet;
455
456      boolean[] validPadding = new boolean[charsPerChunk];
457      for (int i = 0; i < bytesPerChunk; i++) {
458        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
459      }
460      this.validPadding = validPadding;
461    }
462
463    char encode(int bits) {
464      return chars[bits];
465    }
466
467    boolean isValidPaddingStartPosition(int index) {
468      return validPadding[index % charsPerChunk];
469    }
470
471    boolean canDecode(char ch) {
472      return ch <= Ascii.MAX && decodabet[ch] != -1;
473    }
474
475    int decode(char ch) throws DecodingException {
476      if (ch > Ascii.MAX || decodabet[ch] == -1) {
477        throw new DecodingException(
478            "Unrecognized character: "
479                + (CharMatcher.invisible().matches(ch) ? "0x" + Integer.toHexString(ch) : ch));
480      }
481      return decodabet[ch];
482    }
483
484    private boolean hasLowerCase() {
485      for (char c : chars) {
486        if (Ascii.isLowerCase(c)) {
487          return true;
488        }
489      }
490      return false;
491    }
492
493    private boolean hasUpperCase() {
494      for (char c : chars) {
495        if (Ascii.isUpperCase(c)) {
496          return true;
497        }
498      }
499      return false;
500    }
501
502    Alphabet upperCase() {
503      if (!hasLowerCase()) {
504        return this;
505      } else {
506        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
507        char[] upperCased = new char[chars.length];
508        for (int i = 0; i < chars.length; i++) {
509          upperCased[i] = Ascii.toUpperCase(chars[i]);
510        }
511        return new Alphabet(name + ".upperCase()", upperCased);
512      }
513    }
514
515    Alphabet lowerCase() {
516      if (!hasUpperCase()) {
517        return this;
518      } else {
519        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
520        char[] lowerCased = new char[chars.length];
521        for (int i = 0; i < chars.length; i++) {
522          lowerCased[i] = Ascii.toLowerCase(chars[i]);
523        }
524        return new Alphabet(name + ".lowerCase()", lowerCased);
525      }
526    }
527
528    @Override
529    public boolean matches(char c) {
530      return CharMatcher.ascii().matches(c) && decodabet[c] != -1;
531    }
532
533    @Override
534    public String toString() {
535      return name;
536    }
537
538    @Override
539    public boolean equals(@Nullable Object other) {
540      if (other instanceof Alphabet) {
541        Alphabet that = (Alphabet) other;
542        return Arrays.equals(this.chars, that.chars);
543      }
544      return false;
545    }
546
547    @Override
548    public int hashCode() {
549      return Arrays.hashCode(chars);
550    }
551  }
552
553  static class StandardBaseEncoding extends BaseEncoding {
554    // TODO(lowasser): provide a useful toString
555    final Alphabet alphabet;
556
557    @Nullable final Character paddingChar;
558
559    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
560      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
561    }
562
563    StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
564      this.alphabet = checkNotNull(alphabet);
565      checkArgument(
566          paddingChar == null || !alphabet.matches(paddingChar),
567          "Padding character %s was already in alphabet",
568          paddingChar);
569      this.paddingChar = paddingChar;
570    }
571
572    @Override
573    CharMatcher padding() {
574      return (paddingChar == null) ? CharMatcher.none() : CharMatcher.is(paddingChar.charValue());
575    }
576
577    @Override
578    int maxEncodedSize(int bytes) {
579      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
580    }
581
582    @GwtIncompatible // Writer,OutputStream
583    @Override
584    public OutputStream encodingStream(final Writer out) {
585      checkNotNull(out);
586      return new OutputStream() {
587        int bitBuffer = 0;
588        int bitBufferLength = 0;
589        int writtenChars = 0;
590
591        @Override
592        public void write(int b) throws IOException {
593          bitBuffer <<= 8;
594          bitBuffer |= b & 0xFF;
595          bitBufferLength += 8;
596          while (bitBufferLength >= alphabet.bitsPerChar) {
597            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
598            out.write(alphabet.encode(charIndex));
599            writtenChars++;
600            bitBufferLength -= alphabet.bitsPerChar;
601          }
602        }
603
604        @Override
605        public void flush() throws IOException {
606          out.flush();
607        }
608
609        @Override
610        public void close() throws IOException {
611          if (bitBufferLength > 0) {
612            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
613            out.write(alphabet.encode(charIndex));
614            writtenChars++;
615            if (paddingChar != null) {
616              while (writtenChars % alphabet.charsPerChunk != 0) {
617                out.write(paddingChar.charValue());
618                writtenChars++;
619              }
620            }
621          }
622          out.close();
623        }
624      };
625    }
626
627    @Override
628    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
629      checkNotNull(target);
630      checkPositionIndexes(off, off + len, bytes.length);
631      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
632        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
633      }
634    }
635
636    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
637      checkNotNull(target);
638      checkPositionIndexes(off, off + len, bytes.length);
639      checkArgument(len <= alphabet.bytesPerChunk);
640      long bitBuffer = 0;
641      for (int i = 0; i < len; ++i) {
642        bitBuffer |= bytes[off + i] & 0xFF;
643        bitBuffer <<= 8; // Add additional zero byte in the end.
644      }
645      // Position of first character is length of bitBuffer minus bitsPerChar.
646      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
647      int bitsProcessed = 0;
648      while (bitsProcessed < len * 8) {
649        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
650        target.append(alphabet.encode(charIndex));
651        bitsProcessed += alphabet.bitsPerChar;
652      }
653      if (paddingChar != null) {
654        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
655          target.append(paddingChar.charValue());
656          bitsProcessed += alphabet.bitsPerChar;
657        }
658      }
659    }
660
661    @Override
662    int maxDecodedSize(int chars) {
663      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
664    }
665
666    @Override
667    public boolean canDecode(CharSequence chars) {
668      chars = padding().trimTrailingFrom(chars);
669      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
670        return false;
671      }
672      for (int i = 0; i < chars.length(); i++) {
673        if (!alphabet.canDecode(chars.charAt(i))) {
674          return false;
675        }
676      }
677      return true;
678    }
679
680    @Override
681    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
682      checkNotNull(target);
683      chars = padding().trimTrailingFrom(chars);
684      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
685        throw new DecodingException("Invalid input length " + chars.length());
686      }
687      int bytesWritten = 0;
688      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
689        long chunk = 0;
690        int charsProcessed = 0;
691        for (int i = 0; i < alphabet.charsPerChunk; i++) {
692          chunk <<= alphabet.bitsPerChar;
693          if (charIdx + i < chars.length()) {
694            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
695          }
696        }
697        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
698        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
699          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
700        }
701      }
702      return bytesWritten;
703    }
704
705    @GwtIncompatible // Reader,InputStream
706    @Override
707    public InputStream decodingStream(final Reader reader) {
708      checkNotNull(reader);
709      return new InputStream() {
710        int bitBuffer = 0;
711        int bitBufferLength = 0;
712        int readChars = 0;
713        boolean hitPadding = false;
714        final CharMatcher paddingMatcher = padding();
715
716        @Override
717        public int read() throws IOException {
718          while (true) {
719            int readChar = reader.read();
720            if (readChar == -1) {
721              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
722                throw new DecodingException("Invalid input length " + readChars);
723              }
724              return -1;
725            }
726            readChars++;
727            char ch = (char) readChar;
728            if (paddingMatcher.matches(ch)) {
729              if (!hitPadding
730                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
731                throw new DecodingException("Padding cannot start at index " + readChars);
732              }
733              hitPadding = true;
734            } else if (hitPadding) {
735              throw new DecodingException(
736                  "Expected padding character but found '" + ch + "' at index " + readChars);
737            } else {
738              bitBuffer <<= alphabet.bitsPerChar;
739              bitBuffer |= alphabet.decode(ch);
740              bitBufferLength += alphabet.bitsPerChar;
741
742              if (bitBufferLength >= 8) {
743                bitBufferLength -= 8;
744                return (bitBuffer >> bitBufferLength) & 0xFF;
745              }
746            }
747          }
748        }
749
750        @Override
751        public void close() throws IOException {
752          reader.close();
753        }
754      };
755    }
756
757    @Override
758    public BaseEncoding omitPadding() {
759      return (paddingChar == null) ? this : newInstance(alphabet, null);
760    }
761
762    @Override
763    public BaseEncoding withPadChar(char padChar) {
764      if (8 % alphabet.bitsPerChar == 0
765          || (paddingChar != null && paddingChar.charValue() == padChar)) {
766        return this;
767      } else {
768        return newInstance(alphabet, padChar);
769      }
770    }
771
772    @Override
773    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
774      checkArgument(
775          padding().or(alphabet).matchesNoneOf(separator),
776          "Separator (%s) cannot contain alphabet or padding characters",
777          separator);
778      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
779    }
780
781    private transient BaseEncoding upperCase;
782    private transient BaseEncoding lowerCase;
783
784    @Override
785    public BaseEncoding upperCase() {
786      BaseEncoding result = upperCase;
787      if (result == null) {
788        Alphabet upper = alphabet.upperCase();
789        result = upperCase =
790            (upper == alphabet) ? this : newInstance(upper, paddingChar);
791      }
792      return result;
793    }
794
795    @Override
796    public BaseEncoding lowerCase() {
797      BaseEncoding result = lowerCase;
798      if (result == null) {
799        Alphabet lower = alphabet.lowerCase();
800        result = lowerCase =
801            (lower == alphabet) ? this : newInstance(lower, paddingChar);
802      }
803      return result;
804    }
805
806    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
807      return new StandardBaseEncoding(alphabet, paddingChar);
808    }
809
810    @Override
811    public String toString() {
812      StringBuilder builder = new StringBuilder("BaseEncoding.");
813      builder.append(alphabet.toString());
814      if (8 % alphabet.bitsPerChar != 0) {
815        if (paddingChar == null) {
816          builder.append(".omitPadding()");
817        } else {
818          builder.append(".withPadChar('").append(paddingChar).append("')");
819        }
820      }
821      return builder.toString();
822    }
823
824    @Override
825    public boolean equals(@Nullable Object other) {
826      if (other instanceof StandardBaseEncoding) {
827        StandardBaseEncoding that = (StandardBaseEncoding) other;
828        return this.alphabet.equals(that.alphabet)
829            && Objects.equal(this.paddingChar, that.paddingChar);
830      }
831      return false;
832    }
833
834    @Override
835    public int hashCode() {
836      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
837    }
838  }
839
840  static final class Base16Encoding extends StandardBaseEncoding {
841    final char[] encoding = new char[512];
842
843    Base16Encoding(String name, String alphabetChars) {
844      this(new Alphabet(name, alphabetChars.toCharArray()));
845    }
846
847    private Base16Encoding(Alphabet alphabet) {
848      super(alphabet, null);
849      checkArgument(alphabet.chars.length == 16);
850      for (int i = 0; i < 256; ++i) {
851        encoding[i] = alphabet.encode(i >>> 4);
852        encoding[i | 0x100] = alphabet.encode(i & 0xF);
853      }
854    }
855
856    @Override
857    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
858      checkNotNull(target);
859      checkPositionIndexes(off, off + len, bytes.length);
860      for (int i = 0; i < len; ++i) {
861        int b = bytes[off + i] & 0xFF;
862        target.append(encoding[b]);
863        target.append(encoding[b | 0x100]);
864      }
865    }
866
867    @Override
868    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
869      checkNotNull(target);
870      if (chars.length() % 2 == 1) {
871        throw new DecodingException("Invalid input length " + chars.length());
872      }
873      int bytesWritten = 0;
874      for (int i = 0; i < chars.length(); i += 2) {
875        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
876        target[bytesWritten++] = (byte) decoded;
877      }
878      return bytesWritten;
879    }
880
881    @Override
882    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
883      return new Base16Encoding(alphabet);
884    }
885  }
886
887  static final class Base64Encoding extends StandardBaseEncoding {
888    Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) {
889      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
890    }
891
892    private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) {
893      super(alphabet, paddingChar);
894      checkArgument(alphabet.chars.length == 64);
895    }
896
897    @Override
898    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
899      checkNotNull(target);
900      checkPositionIndexes(off, off + len, bytes.length);
901      int i = off;
902      for (int remaining = len; remaining >= 3; remaining -= 3) {
903        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
904        target.append(alphabet.encode(chunk >>> 18));
905        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
906        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
907        target.append(alphabet.encode(chunk & 0x3F));
908      }
909      if (i < off + len) {
910        encodeChunkTo(target, bytes, i, off + len - i);
911      }
912    }
913
914    @Override
915    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
916      checkNotNull(target);
917      chars = padding().trimTrailingFrom(chars);
918      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
919        throw new DecodingException("Invalid input length " + chars.length());
920      }
921      int bytesWritten = 0;
922      for (int i = 0; i < chars.length(); ) {
923        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
924        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
925        target[bytesWritten++] = (byte) (chunk >>> 16);
926        if (i < chars.length()) {
927          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
928          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
929          if (i < chars.length()) {
930            chunk |= alphabet.decode(chars.charAt(i++));
931            target[bytesWritten++] = (byte) (chunk & 0xFF);
932          }
933        }
934      }
935      return bytesWritten;
936    }
937
938    @Override
939    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
940      return new Base64Encoding(alphabet, paddingChar);
941    }
942  }
943
944  @GwtIncompatible // Reader
945  static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) {
946    checkNotNull(delegate);
947    checkNotNull(toIgnore);
948    return new Reader() {
949      @Override
950      public int read() throws IOException {
951        int readChar;
952        do {
953          readChar = delegate.read();
954        } while (readChar != -1 && toIgnore.matches((char) readChar));
955        return readChar;
956      }
957
958      @Override
959      public int read(char[] cbuf, int off, int len) throws IOException {
960        throw new UnsupportedOperationException();
961      }
962
963      @Override
964      public void close() throws IOException {
965        delegate.close();
966      }
967    };
968  }
969
970  static Appendable separatingAppendable(
971      final Appendable delegate, final String separator, final int afterEveryChars) {
972    checkNotNull(delegate);
973    checkNotNull(separator);
974    checkArgument(afterEveryChars > 0);
975    return new Appendable() {
976      int charsUntilSeparator = afterEveryChars;
977
978      @Override
979      public Appendable append(char c) throws IOException {
980        if (charsUntilSeparator == 0) {
981          delegate.append(separator);
982          charsUntilSeparator = afterEveryChars;
983        }
984        delegate.append(c);
985        charsUntilSeparator--;
986        return this;
987      }
988
989      @Override
990      public Appendable append(CharSequence chars, int off, int len) throws IOException {
991        throw new UnsupportedOperationException();
992      }
993
994      @Override
995      public Appendable append(CharSequence chars) throws IOException {
996        throw new UnsupportedOperationException();
997      }
998    };
999  }
1000
1001  @GwtIncompatible // Writer
1002  static Writer separatingWriter(
1003      final Writer delegate, final String separator, final int afterEveryChars) {
1004    final Appendable seperatingAppendable =
1005        separatingAppendable(delegate, separator, afterEveryChars);
1006    return new Writer() {
1007      @Override
1008      public void write(int c) throws IOException {
1009        seperatingAppendable.append((char) c);
1010      }
1011
1012      @Override
1013      public void write(char[] chars, int off, int len) throws IOException {
1014        throw new UnsupportedOperationException();
1015      }
1016
1017      @Override
1018      public void flush() throws IOException {
1019        delegate.flush();
1020      }
1021
1022      @Override
1023      public void close() throws IOException {
1024        delegate.close();
1025      }
1026    };
1027  }
1028
1029  static final class SeparatedBaseEncoding extends BaseEncoding {
1030    private final BaseEncoding delegate;
1031    private final String separator;
1032    private final int afterEveryChars;
1033    private final CharMatcher separatorChars;
1034
1035    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1036      this.delegate = checkNotNull(delegate);
1037      this.separator = checkNotNull(separator);
1038      this.afterEveryChars = afterEveryChars;
1039      checkArgument(
1040          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1041      this.separatorChars = CharMatcher.anyOf(separator).precomputed();
1042    }
1043
1044    @Override
1045    CharMatcher padding() {
1046      return delegate.padding();
1047    }
1048
1049    @Override
1050    int maxEncodedSize(int bytes) {
1051      int unseparatedSize = delegate.maxEncodedSize(bytes);
1052      return unseparatedSize
1053          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1054    }
1055
1056    @GwtIncompatible // Writer,OutputStream
1057    @Override
1058    public OutputStream encodingStream(final Writer output) {
1059      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1060    }
1061
1062    @Override
1063    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1064      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1065    }
1066
1067    @Override
1068    int maxDecodedSize(int chars) {
1069      return delegate.maxDecodedSize(chars);
1070    }
1071
1072    @Override
1073    public boolean canDecode(CharSequence chars) {
1074      return delegate.canDecode(separatorChars.removeFrom(chars));
1075    }
1076
1077    @Override
1078    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1079      return delegate.decodeTo(target, separatorChars.removeFrom(chars));
1080    }
1081
1082    @GwtIncompatible // Reader,InputStream
1083    @Override
1084    public InputStream decodingStream(final Reader reader) {
1085      return delegate.decodingStream(ignoringReader(reader, separatorChars));
1086    }
1087
1088    @Override
1089    public BaseEncoding omitPadding() {
1090      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1091    }
1092
1093    @Override
1094    public BaseEncoding withPadChar(char padChar) {
1095      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1096    }
1097
1098    @Override
1099    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1100      throw new UnsupportedOperationException("Already have a separator");
1101    }
1102
1103    @Override
1104    public BaseEncoding upperCase() {
1105      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1106    }
1107
1108    @Override
1109    public BaseEncoding lowerCase() {
1110      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1111    }
1112
1113    @Override
1114    public String toString() {
1115      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1116    }
1117  }
1118}