Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.Objects;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.OutputStream;
034import java.io.Reader;
035import java.io.Writer;
036import java.util.Arrays;
037import javax.annotation.Nullable;
038
039/**
040 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
041 * strings. This class includes several constants for encoding schemes specified by
042 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
043 *
044 * <pre>   {@code
045 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
046 *
047 * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
048 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
049 *
050 * <p>...returns the ASCII bytes of the string {@code "foo"}.
051 *
052 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
053 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
054 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
055 * behavior:
056 *
057 * <pre>   {@code
058 *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
059 *
060 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
061 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
062 *
063 * <pre>   {@code
064 *   // Do NOT do this
065 *   BaseEncoding hex = BaseEncoding.base16();
066 *   hex.lowerCase(); // does nothing!
067 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
068 *
069 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
070 * {@code x}, but the reverse does not necessarily hold.
071 *
072 * <table>
073 * <caption>Encodings</caption>
074 * <tr>
075 * <th>Encoding
076 * <th>Alphabet
077 * <th>{@code char:byte} ratio
078 * <th>Default padding
079 * <th>Comments
080 * <tr>
081 * <td>{@link #base16()}
082 * <td>0-9 A-F
083 * <td>2.00
084 * <td>N/A
085 * <td>Traditional hexadecimal. Defaults to upper case.
086 * <tr>
087 * <td>{@link #base32()}
088 * <td>A-Z 2-7
089 * <td>1.60
090 * <td>=
091 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
092 * <tr>
093 * <td>{@link #base32Hex()}
094 * <td>0-9 A-V
095 * <td>1.60
096 * <td>=
097 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
098 * <tr>
099 * <td>{@link #base64()}
100 * <td>A-Z a-z 0-9 + /
101 * <td>1.33
102 * <td>=
103 * <td>
104 * <tr>
105 * <td>{@link #base64Url()}
106 * <td>A-Z a-z 0-9 - _
107 * <td>1.33
108 * <td>=
109 * <td>Safe to use as filenames, or to pass in URLs without escaping
110 * </table>
111 *
112 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
113 *
114 * @author Louis Wasserman
115 * @since 14.0
116 */
117@GwtCompatible(emulated = true)
118public abstract class BaseEncoding {
119  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
120
121  BaseEncoding() {}
122
123  /**
124   * Exception indicating invalid base-encoded input encountered while decoding.
125   *
126   * @author Louis Wasserman
127   * @since 15.0
128   */
129  public static final class DecodingException extends IOException {
130    DecodingException(String message) {
131      super(message);
132    }
133
134    DecodingException(Throwable cause) {
135      super(cause);
136    }
137  }
138
139  /**
140   * Encodes the specified byte array, and returns the encoded {@code String}.
141   */
142  public String encode(byte[] bytes) {
143    return encode(bytes, 0, bytes.length);
144  }
145
146  /**
147   * Encodes the specified range of the specified byte array, and returns the encoded
148   * {@code String}.
149   */
150  public final String encode(byte[] bytes, int off, int len) {
151    checkPositionIndexes(off, off + len, bytes.length);
152    StringBuilder result = new StringBuilder(maxEncodedSize(len));
153    try {
154      encodeTo(result, bytes, off, len);
155    } catch (IOException impossible) {
156      throw new AssertionError(impossible);
157    }
158    return result.toString();
159  }
160
161  /**
162   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
163   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing
164   * {@code Writer}.
165   */
166  @GwtIncompatible // Writer,OutputStream
167  public abstract OutputStream encodingStream(Writer writer);
168
169  /**
170   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
171   */
172  @GwtIncompatible // ByteSink,CharSink
173  public final ByteSink encodingSink(final CharSink encodedSink) {
174    checkNotNull(encodedSink);
175    return new ByteSink() {
176      @Override
177      public OutputStream openStream() throws IOException {
178        return encodingStream(encodedSink.openStream());
179      }
180    };
181  }
182
183  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
184
185  private static byte[] extract(byte[] result, int length) {
186    if (length == result.length) {
187      return result;
188    } else {
189      byte[] trunc = new byte[length];
190      System.arraycopy(result, 0, trunc, 0, length);
191      return trunc;
192    }
193  }
194
195  /**
196   * Determines whether the specified character sequence is a valid encoded string according to this
197   * encoding.
198   *
199   * @since 20.0
200   */
201  public abstract boolean canDecode(CharSequence chars);
202
203  /**
204   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
205   * inverse operation to {@link #encode(byte[])}.
206   *
207   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
208   *     encoding.
209   */
210  public final byte[] decode(CharSequence chars) {
211    try {
212      return decodeChecked(chars);
213    } catch (DecodingException badInput) {
214      throw new IllegalArgumentException(badInput);
215    }
216  }
217
218  /**
219   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
220   * inverse operation to {@link #encode(byte[])}.
221   *
222   * @throws DecodingException if the input is not a valid encoded string according to this
223   *     encoding.
224   */ final byte[] decodeChecked(CharSequence chars)
225      throws DecodingException {
226    chars = trimTrailingPadding(chars);
227    byte[] tmp = new byte[maxDecodedSize(chars.length())];
228    int len = decodeTo(tmp, chars);
229    return extract(tmp, len);
230  }
231
232  /**
233   * Returns an {@code InputStream} that decodes base-encoded input from the specified
234   * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific
235   * errors.
236   */
237  @GwtIncompatible // Reader,InputStream
238  public abstract InputStream decodingStream(Reader reader);
239
240  /**
241   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
242   * {@code CharSource}.
243   */
244  @GwtIncompatible // ByteSource,CharSource
245  public final ByteSource decodingSource(final CharSource encodedSource) {
246    checkNotNull(encodedSource);
247    return new ByteSource() {
248      @Override
249      public InputStream openStream() throws IOException {
250        return decodingStream(encodedSource.openStream());
251      }
252    };
253  }
254
255  // Implementations for encoding/decoding
256
257  abstract int maxEncodedSize(int bytes);
258
259  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
260
261  abstract int maxDecodedSize(int chars);
262
263  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
264
265  CharSequence trimTrailingPadding(CharSequence chars) {
266    return checkNotNull(chars);
267  }
268
269  // Modified encoding generators
270
271  /**
272   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
273   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
274   * section 3.2</a>, Padding of Encoded Data.
275   */
276  public abstract BaseEncoding omitPadding();
277
278  /**
279   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
280   * for padding.
281   *
282   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
283   *     separator
284   */
285  public abstract BaseEncoding withPadChar(char padChar);
286
287  /**
288   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
289   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
290   * are skipped over in decoding.
291   *
292   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
293   *     string, or if {@code n <= 0}
294   * @throws UnsupportedOperationException if this encoding already uses a separator
295   */
296  public abstract BaseEncoding withSeparator(String separator, int n);
297
298  /**
299   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
300   * uppercase letters. Padding and separator characters remain in their original case.
301   *
302   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
303   *     lower-case characters
304   */
305  public abstract BaseEncoding upperCase();
306
307  /**
308   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
309   * lowercase letters. Padding and separator characters remain in their original case.
310   *
311   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
312   *     lower-case characters
313   */
314  public abstract BaseEncoding lowerCase();
315
316  private static final BaseEncoding BASE64 =
317      new Base64Encoding(
318          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
319
320  /**
321   * The "base64" base encoding specified by
322   * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64
323   * Encoding. (This is the same as the base 64 encoding from
324   * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
325   *
326   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
327   * omitted} or {@linkplain #withPadChar(char) replaced}.
328   *
329   * <p>No line feeds are added by default, as per
330   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
331   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
332   */
333  public static BaseEncoding base64() {
334    return BASE64;
335  }
336
337  private static final BaseEncoding BASE64_URL =
338      new Base64Encoding(
339          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
340
341  /**
342   * The "base64url" encoding specified by
343   * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
344   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
345   * is the same as the base 64 encoding with URL and filename safe alphabet from
346   * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
347   *
348   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
349   * omitted} or {@linkplain #withPadChar(char) replaced}.
350   *
351   * <p>No line feeds are added by default, as per
352   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
353   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
354   */
355  public static BaseEncoding base64Url() {
356    return BASE64_URL;
357  }
358
359  private static final BaseEncoding BASE32 =
360      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
361
362  /**
363   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
364   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from
365   * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
366   *
367   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
368   * omitted} or {@linkplain #withPadChar(char) replaced}.
369   *
370   * <p>No line feeds are added by default, as per
371   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
372   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
373   */
374  public static BaseEncoding base32() {
375    return BASE32;
376  }
377
378  private static final BaseEncoding BASE32_HEX =
379      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
380
381  /**
382   * The "base32hex" encoding specified by
383   * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
384   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
385   *
386   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
387   * omitted} or {@linkplain #withPadChar(char) replaced}.
388   *
389   * <p>No line feeds are added by default, as per
390   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
391   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
392   */
393  public static BaseEncoding base32Hex() {
394    return BASE32_HEX;
395  }
396
397  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
398
399  /**
400   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
401   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from
402   * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
403   * "hexadecimal" format.
404   *
405   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
406   * have no effect.
407   *
408   * <p>No line feeds are added by default, as per
409   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
410   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
411   */
412  public static BaseEncoding base16() {
413    return BASE16;
414  }
415
416  private static final class Alphabet {
417    private final String name;
418    // this is meant to be immutable -- don't modify it!
419    private final char[] chars;
420    final int mask;
421    final int bitsPerChar;
422    final int charsPerChunk;
423    final int bytesPerChunk;
424    private final byte[] decodabet;
425    private final boolean[] validPadding;
426
427    Alphabet(String name, char[] chars) {
428      this.name = checkNotNull(name);
429      this.chars = checkNotNull(chars);
430      try {
431        this.bitsPerChar = log2(chars.length, UNNECESSARY);
432      } catch (ArithmeticException e) {
433        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
434      }
435
436      /*
437       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
438       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
439       */
440      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
441      try {
442        this.charsPerChunk = 8 / gcd;
443        this.bytesPerChunk = bitsPerChar / gcd;
444      } catch (ArithmeticException e) {
445        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
446      }
447
448      this.mask = chars.length - 1;
449
450      byte[] decodabet = new byte[Ascii.MAX + 1];
451      Arrays.fill(decodabet, (byte) -1);
452      for (int i = 0; i < chars.length; i++) {
453        char c = chars[i];
454        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
455        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
456        decodabet[c] = (byte) i;
457      }
458      this.decodabet = decodabet;
459
460      boolean[] validPadding = new boolean[charsPerChunk];
461      for (int i = 0; i < bytesPerChunk; i++) {
462        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
463      }
464      this.validPadding = validPadding;
465    }
466
467    char encode(int bits) {
468      return chars[bits];
469    }
470
471    boolean isValidPaddingStartPosition(int index) {
472      return validPadding[index % charsPerChunk];
473    }
474
475    boolean canDecode(char ch) {
476      return ch <= Ascii.MAX && decodabet[ch] != -1;
477    }
478
479    int decode(char ch) throws DecodingException {
480      if (ch > Ascii.MAX) {
481        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
482      }
483      int result = decodabet[ch];
484      if (result == -1) {
485        if (ch <= 0x20 || ch == Ascii.MAX) {
486          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
487        } else {
488          throw new DecodingException("Unrecognized character: " + ch);
489        }
490      }
491      return result;
492    }
493
494    private boolean hasLowerCase() {
495      for (char c : chars) {
496        if (Ascii.isLowerCase(c)) {
497          return true;
498        }
499      }
500      return false;
501    }
502
503    private boolean hasUpperCase() {
504      for (char c : chars) {
505        if (Ascii.isUpperCase(c)) {
506          return true;
507        }
508      }
509      return false;
510    }
511
512    Alphabet upperCase() {
513      if (!hasLowerCase()) {
514        return this;
515      } else {
516        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
517        char[] upperCased = new char[chars.length];
518        for (int i = 0; i < chars.length; i++) {
519          upperCased[i] = Ascii.toUpperCase(chars[i]);
520        }
521        return new Alphabet(name + ".upperCase()", upperCased);
522      }
523    }
524
525    Alphabet lowerCase() {
526      if (!hasUpperCase()) {
527        return this;
528      } else {
529        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
530        char[] lowerCased = new char[chars.length];
531        for (int i = 0; i < chars.length; i++) {
532          lowerCased[i] = Ascii.toLowerCase(chars[i]);
533        }
534        return new Alphabet(name + ".lowerCase()", lowerCased);
535      }
536    }
537
538    public boolean matches(char c) {
539      return c < decodabet.length && decodabet[c] != -1;
540    }
541
542    @Override
543    public String toString() {
544      return name;
545    }
546
547    @Override
548    public boolean equals(@Nullable Object other) {
549      if (other instanceof Alphabet) {
550        Alphabet that = (Alphabet) other;
551        return Arrays.equals(this.chars, that.chars);
552      }
553      return false;
554    }
555
556    @Override
557    public int hashCode() {
558      return Arrays.hashCode(chars);
559    }
560  }
561
562  static class StandardBaseEncoding extends BaseEncoding {
563    // TODO(lowasser): provide a useful toString
564    final Alphabet alphabet;
565
566    @Nullable final Character paddingChar;
567
568    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
569      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
570    }
571
572    StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
573      this.alphabet = checkNotNull(alphabet);
574      checkArgument(
575          paddingChar == null || !alphabet.matches(paddingChar),
576          "Padding character %s was already in alphabet",
577          paddingChar);
578      this.paddingChar = paddingChar;
579    }
580
581    @Override
582    int maxEncodedSize(int bytes) {
583      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
584    }
585
586    @GwtIncompatible // Writer,OutputStream
587    @Override
588    public OutputStream encodingStream(final Writer out) {
589      checkNotNull(out);
590      return new OutputStream() {
591        int bitBuffer = 0;
592        int bitBufferLength = 0;
593        int writtenChars = 0;
594
595        @Override
596        public void write(int b) throws IOException {
597          bitBuffer <<= 8;
598          bitBuffer |= b & 0xFF;
599          bitBufferLength += 8;
600          while (bitBufferLength >= alphabet.bitsPerChar) {
601            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
602            out.write(alphabet.encode(charIndex));
603            writtenChars++;
604            bitBufferLength -= alphabet.bitsPerChar;
605          }
606        }
607
608        @Override
609        public void flush() throws IOException {
610          out.flush();
611        }
612
613        @Override
614        public void close() throws IOException {
615          if (bitBufferLength > 0) {
616            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
617            out.write(alphabet.encode(charIndex));
618            writtenChars++;
619            if (paddingChar != null) {
620              while (writtenChars % alphabet.charsPerChunk != 0) {
621                out.write(paddingChar.charValue());
622                writtenChars++;
623              }
624            }
625          }
626          out.close();
627        }
628      };
629    }
630
631    @Override
632    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
633      checkNotNull(target);
634      checkPositionIndexes(off, off + len, bytes.length);
635      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
636        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
637      }
638    }
639
640    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
641      checkNotNull(target);
642      checkPositionIndexes(off, off + len, bytes.length);
643      checkArgument(len <= alphabet.bytesPerChunk);
644      long bitBuffer = 0;
645      for (int i = 0; i < len; ++i) {
646        bitBuffer |= bytes[off + i] & 0xFF;
647        bitBuffer <<= 8; // Add additional zero byte in the end.
648      }
649      // Position of first character is length of bitBuffer minus bitsPerChar.
650      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
651      int bitsProcessed = 0;
652      while (bitsProcessed < len * 8) {
653        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
654        target.append(alphabet.encode(charIndex));
655        bitsProcessed += alphabet.bitsPerChar;
656      }
657      if (paddingChar != null) {
658        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
659          target.append(paddingChar.charValue());
660          bitsProcessed += alphabet.bitsPerChar;
661        }
662      }
663    }
664
665    @Override
666    int maxDecodedSize(int chars) {
667      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
668    }
669
670    @Override
671    CharSequence trimTrailingPadding(CharSequence chars) {
672      checkNotNull(chars);
673      if (paddingChar == null) {
674        return chars;
675      }
676      char padChar = paddingChar.charValue();
677      int l;
678      for (l = chars.length() - 1; l >= 0; l--) {
679        if (chars.charAt(l) != padChar) {
680          break;
681        }
682      }
683      return chars.subSequence(0, l + 1);
684    }
685
686    @Override
687    public boolean canDecode(CharSequence chars) {
688      checkNotNull(chars);
689      chars = trimTrailingPadding(chars);
690      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
691        return false;
692      }
693      for (int i = 0; i < chars.length(); i++) {
694        if (!alphabet.canDecode(chars.charAt(i))) {
695          return false;
696        }
697      }
698      return true;
699    }
700
701    @Override
702    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
703      checkNotNull(target);
704      chars = trimTrailingPadding(chars);
705      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
706        throw new DecodingException("Invalid input length " + chars.length());
707      }
708      int bytesWritten = 0;
709      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
710        long chunk = 0;
711        int charsProcessed = 0;
712        for (int i = 0; i < alphabet.charsPerChunk; i++) {
713          chunk <<= alphabet.bitsPerChar;
714          if (charIdx + i < chars.length()) {
715            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
716          }
717        }
718        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
719        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
720          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
721        }
722      }
723      return bytesWritten;
724    }
725
726    @Override
727    @GwtIncompatible // Reader,InputStream
728    public InputStream decodingStream(final Reader reader) {
729      checkNotNull(reader);
730      return new InputStream() {
731        int bitBuffer = 0;
732        int bitBufferLength = 0;
733        int readChars = 0;
734        boolean hitPadding = false;
735
736        @Override
737        public int read() throws IOException {
738          while (true) {
739            int readChar = reader.read();
740            if (readChar == -1) {
741              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
742                throw new DecodingException("Invalid input length " + readChars);
743              }
744              return -1;
745            }
746            readChars++;
747            char ch = (char) readChar;
748            if (paddingChar != null && paddingChar.charValue() == ch) {
749              if (!hitPadding
750                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
751                throw new DecodingException("Padding cannot start at index " + readChars);
752              }
753              hitPadding = true;
754            } else if (hitPadding) {
755              throw new DecodingException(
756                  "Expected padding character but found '" + ch + "' at index " + readChars);
757            } else {
758              bitBuffer <<= alphabet.bitsPerChar;
759              bitBuffer |= alphabet.decode(ch);
760              bitBufferLength += alphabet.bitsPerChar;
761
762              if (bitBufferLength >= 8) {
763                bitBufferLength -= 8;
764                return (bitBuffer >> bitBufferLength) & 0xFF;
765              }
766            }
767          }
768        }
769
770        @Override
771        public void close() throws IOException {
772          reader.close();
773        }
774      };
775    }
776
777    @Override
778    public BaseEncoding omitPadding() {
779      return (paddingChar == null) ? this : newInstance(alphabet, null);
780    }
781
782    @Override
783    public BaseEncoding withPadChar(char padChar) {
784      if (8 % alphabet.bitsPerChar == 0
785          || (paddingChar != null && paddingChar.charValue() == padChar)) {
786        return this;
787      } else {
788        return newInstance(alphabet, padChar);
789      }
790    }
791
792    @Override
793    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
794      for (int i = 0; i < separator.length(); i++) {
795        checkArgument(
796            !alphabet.matches(separator.charAt(i)),
797            "Separator (%s) cannot contain alphabet characters",
798            separator);
799      }
800      if (paddingChar != null) {
801        checkArgument(
802            separator.indexOf(paddingChar.charValue()) < 0,
803            "Separator (%s) cannot contain padding character",
804            separator);
805      }
806      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
807    }
808
809    private transient BaseEncoding upperCase;
810    private transient BaseEncoding lowerCase;
811
812    @Override
813    public BaseEncoding upperCase() {
814      BaseEncoding result = upperCase;
815      if (result == null) {
816        Alphabet upper = alphabet.upperCase();
817        result = upperCase =
818            (upper == alphabet) ? this : newInstance(upper, paddingChar);
819      }
820      return result;
821    }
822
823    @Override
824    public BaseEncoding lowerCase() {
825      BaseEncoding result = lowerCase;
826      if (result == null) {
827        Alphabet lower = alphabet.lowerCase();
828        result = lowerCase =
829            (lower == alphabet) ? this : newInstance(lower, paddingChar);
830      }
831      return result;
832    }
833
834    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
835      return new StandardBaseEncoding(alphabet, paddingChar);
836    }
837
838    @Override
839    public String toString() {
840      StringBuilder builder = new StringBuilder("BaseEncoding.");
841      builder.append(alphabet.toString());
842      if (8 % alphabet.bitsPerChar != 0) {
843        if (paddingChar == null) {
844          builder.append(".omitPadding()");
845        } else {
846          builder.append(".withPadChar('").append(paddingChar).append("')");
847        }
848      }
849      return builder.toString();
850    }
851
852    @Override
853    public boolean equals(@Nullable Object other) {
854      if (other instanceof StandardBaseEncoding) {
855        StandardBaseEncoding that = (StandardBaseEncoding) other;
856        return this.alphabet.equals(that.alphabet)
857            && Objects.equal(this.paddingChar, that.paddingChar);
858      }
859      return false;
860    }
861
862    @Override
863    public int hashCode() {
864      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
865    }
866  }
867
868  static final class Base16Encoding extends StandardBaseEncoding {
869    final char[] encoding = new char[512];
870
871    Base16Encoding(String name, String alphabetChars) {
872      this(new Alphabet(name, alphabetChars.toCharArray()));
873    }
874
875    private Base16Encoding(Alphabet alphabet) {
876      super(alphabet, null);
877      checkArgument(alphabet.chars.length == 16);
878      for (int i = 0; i < 256; ++i) {
879        encoding[i] = alphabet.encode(i >>> 4);
880        encoding[i | 0x100] = alphabet.encode(i & 0xF);
881      }
882    }
883
884    @Override
885    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
886      checkNotNull(target);
887      checkPositionIndexes(off, off + len, bytes.length);
888      for (int i = 0; i < len; ++i) {
889        int b = bytes[off + i] & 0xFF;
890        target.append(encoding[b]);
891        target.append(encoding[b | 0x100]);
892      }
893    }
894
895    @Override
896    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
897      checkNotNull(target);
898      if (chars.length() % 2 == 1) {
899        throw new DecodingException("Invalid input length " + chars.length());
900      }
901      int bytesWritten = 0;
902      for (int i = 0; i < chars.length(); i += 2) {
903        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
904        target[bytesWritten++] = (byte) decoded;
905      }
906      return bytesWritten;
907    }
908
909    @Override
910    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
911      return new Base16Encoding(alphabet);
912    }
913  }
914
915  static final class Base64Encoding extends StandardBaseEncoding {
916    Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) {
917      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
918    }
919
920    private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) {
921      super(alphabet, paddingChar);
922      checkArgument(alphabet.chars.length == 64);
923    }
924
925    @Override
926    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
927      checkNotNull(target);
928      checkPositionIndexes(off, off + len, bytes.length);
929      int i = off;
930      for (int remaining = len; remaining >= 3; remaining -= 3) {
931        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
932        target.append(alphabet.encode(chunk >>> 18));
933        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
934        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
935        target.append(alphabet.encode(chunk & 0x3F));
936      }
937      if (i < off + len) {
938        encodeChunkTo(target, bytes, i, off + len - i);
939      }
940    }
941
942    @Override
943    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
944      checkNotNull(target);
945      chars = trimTrailingPadding(chars);
946      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
947        throw new DecodingException("Invalid input length " + chars.length());
948      }
949      int bytesWritten = 0;
950      for (int i = 0; i < chars.length(); ) {
951        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
952        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
953        target[bytesWritten++] = (byte) (chunk >>> 16);
954        if (i < chars.length()) {
955          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
956          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
957          if (i < chars.length()) {
958            chunk |= alphabet.decode(chars.charAt(i++));
959            target[bytesWritten++] = (byte) (chunk & 0xFF);
960          }
961        }
962      }
963      return bytesWritten;
964    }
965
966    @Override
967    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
968      return new Base64Encoding(alphabet, paddingChar);
969    }
970  }
971
972  @GwtIncompatible
973  static Reader ignoringReader(final Reader delegate, final String toIgnore) {
974    checkNotNull(delegate);
975    checkNotNull(toIgnore);
976    return new Reader() {
977      @Override
978      public int read() throws IOException {
979        int readChar;
980        do {
981          readChar = delegate.read();
982        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
983        return readChar;
984      }
985
986      @Override
987      public int read(char[] cbuf, int off, int len) throws IOException {
988        throw new UnsupportedOperationException();
989      }
990
991      @Override
992      public void close() throws IOException {
993        delegate.close();
994      }
995    };
996  }
997
998  static Appendable separatingAppendable(
999      final Appendable delegate, final String separator, final int afterEveryChars) {
1000    checkNotNull(delegate);
1001    checkNotNull(separator);
1002    checkArgument(afterEveryChars > 0);
1003    return new Appendable() {
1004      int charsUntilSeparator = afterEveryChars;
1005
1006      @Override
1007      public Appendable append(char c) throws IOException {
1008        if (charsUntilSeparator == 0) {
1009          delegate.append(separator);
1010          charsUntilSeparator = afterEveryChars;
1011        }
1012        delegate.append(c);
1013        charsUntilSeparator--;
1014        return this;
1015      }
1016
1017      @Override
1018      public Appendable append(CharSequence chars, int off, int len) throws IOException {
1019        throw new UnsupportedOperationException();
1020      }
1021
1022      @Override
1023      public Appendable append(CharSequence chars) throws IOException {
1024        throw new UnsupportedOperationException();
1025      }
1026    };
1027  }
1028
1029  @GwtIncompatible // Writer
1030  static Writer separatingWriter(
1031      final Writer delegate, final String separator, final int afterEveryChars) {
1032    final Appendable seperatingAppendable =
1033        separatingAppendable(delegate, separator, afterEveryChars);
1034    return new Writer() {
1035      @Override
1036      public void write(int c) throws IOException {
1037        seperatingAppendable.append((char) c);
1038      }
1039
1040      @Override
1041      public void write(char[] chars, int off, int len) throws IOException {
1042        throw new UnsupportedOperationException();
1043      }
1044
1045      @Override
1046      public void flush() throws IOException {
1047        delegate.flush();
1048      }
1049
1050      @Override
1051      public void close() throws IOException {
1052        delegate.close();
1053      }
1054    };
1055  }
1056
1057  static final class SeparatedBaseEncoding extends BaseEncoding {
1058    private final BaseEncoding delegate;
1059    private final String separator;
1060    private final int afterEveryChars;
1061
1062    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1063      this.delegate = checkNotNull(delegate);
1064      this.separator = checkNotNull(separator);
1065      this.afterEveryChars = afterEveryChars;
1066      checkArgument(
1067          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1068    }
1069
1070    @Override
1071    CharSequence trimTrailingPadding(CharSequence chars) {
1072      return delegate.trimTrailingPadding(chars);
1073    }
1074
1075    @Override
1076    int maxEncodedSize(int bytes) {
1077      int unseparatedSize = delegate.maxEncodedSize(bytes);
1078      return unseparatedSize
1079          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1080    }
1081
1082    @GwtIncompatible // Writer,OutputStream
1083    @Override
1084    public OutputStream encodingStream(final Writer output) {
1085      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1086    }
1087
1088    @Override
1089    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1090      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1091    }
1092
1093    @Override
1094    int maxDecodedSize(int chars) {
1095      return delegate.maxDecodedSize(chars);
1096    }
1097
1098    @Override
1099    public boolean canDecode(CharSequence chars) {
1100      StringBuilder builder = new StringBuilder();
1101      for (int i = 0; i < chars.length(); i++) {
1102        char c = chars.charAt(i);
1103        if (separator.indexOf(c) < 0) {
1104          builder.append(c);
1105        }
1106      }
1107      return delegate.canDecode(builder);
1108    }
1109
1110    @Override
1111    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1112      StringBuilder stripped = new StringBuilder(chars.length());
1113      for (int i = 0; i < chars.length(); i++) {
1114        char c = chars.charAt(i);
1115        if (separator.indexOf(c) < 0) {
1116          stripped.append(c);
1117        }
1118      }
1119      return delegate.decodeTo(target, stripped);
1120    }
1121
1122    @Override
1123    @GwtIncompatible // Reader,InputStream
1124    public InputStream decodingStream(final Reader reader) {
1125      return delegate.decodingStream(ignoringReader(reader, separator));
1126    }
1127
1128    @Override
1129    public BaseEncoding omitPadding() {
1130      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1131    }
1132
1133    @Override
1134    public BaseEncoding withPadChar(char padChar) {
1135      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1136    }
1137
1138    @Override
1139    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1140      throw new UnsupportedOperationException("Already have a separator");
1141    }
1142
1143    @Override
1144    public BaseEncoding upperCase() {
1145      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1146    }
1147
1148    @Override
1149    public BaseEncoding lowerCase() {
1150      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1151    }
1152
1153    @Override
1154    public String toString() {
1155      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1156    }
1157  }
1158}