Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.Beta;
028import com.google.common.annotations.GwtCompatible;
029import com.google.common.annotations.GwtIncompatible;
030import com.google.common.base.Ascii;
031import com.google.common.base.CharMatcher;
032
033import java.io.IOException;
034import java.io.InputStream;
035import java.io.OutputStream;
036import java.io.Reader;
037import java.io.Writer;
038import java.util.Arrays;
039
040import javax.annotation.CheckReturnValue;
041import javax.annotation.Nullable;
042
043/**
044 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
045 * strings. This class includes several constants for encoding schemes specified by <a
046 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
047 *
048 * <pre>   {@code
049 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
050 *
051 * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
052 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
053 *
054 * <p>...returns the ASCII bytes of the string {@code "foo"}.
055 *
056 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with
057 * RFC 4648.  Decoding rejects characters in the wrong case, though padding is optional.
058 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding
059 * with modified behavior:
060 *
061 * <pre>   {@code
062 *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
063 *
064 * <p>Warning: BaseEncoding instances are immutable.  Invoking a configuration method has no effect
065 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
066 *
067 * <pre>   {@code
068 *   // Do NOT do this
069 *   BaseEncoding hex = BaseEncoding.base16();
070 *   hex.lowerCase(); // does nothing!
071 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
072 *
073 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
074 * {@code x}, but the reverse does not necessarily hold.
075 *
076 * <p>
077 * <table>
078 * <tr>
079 * <th>Encoding
080 * <th>Alphabet
081 * <th>{@code char:byte} ratio
082 * <th>Default padding
083 * <th>Comments
084 * <tr>
085 * <td>{@link #base16()}
086 * <td>0-9 A-F
087 * <td>2.00
088 * <td>N/A
089 * <td>Traditional hexadecimal.  Defaults to upper case.
090 * <tr>
091 * <td>{@link #base32()}
092 * <td>A-Z 2-7
093 * <td>1.60
094 * <td>=
095 * <td>Human-readable; no possibility of mixing up 0/O or 1/I.  Defaults to upper case.
096 * <tr>
097 * <td>{@link #base32Hex()}
098 * <td>0-9 A-V
099 * <td>1.60
100 * <td>=
101 * <td>"Numerical" base 32; extended from the traditional hex alphabet.  Defaults to upper case.
102 * <tr>
103 * <td>{@link #base64()}
104 * <td>A-Z a-z 0-9 + /
105 * <td>1.33
106 * <td>=
107 * <td>
108 * <tr>
109 * <td>{@link #base64Url()}
110 * <td>A-Z a-z 0-9 - _
111 * <td>1.33
112 * <td>=
113 * <td>Safe to use as filenames, or to pass in URLs without escaping
114 * </table>
115 *
116 * <p>
117 * All instances of this class are immutable, so they may be stored safely as static constants.
118 *
119 * @author Louis Wasserman
120 * @since 14.0
121 */
122@Beta
123@GwtCompatible(emulated = true)
124public abstract class BaseEncoding {
125  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
126
127  BaseEncoding() {}
128
129  /**
130   * Exception indicating invalid base-encoded input encountered while decoding.
131   *
132   * @author Louis Wasserman
133   * @since 15.0
134   */
135  public static final class DecodingException extends IOException {
136    DecodingException(String message) {
137      super(message);
138    }
139
140    DecodingException(Throwable cause) {
141      super(cause);
142    }
143  }
144
145  /**
146   * Encodes the specified byte array, and returns the encoded {@code String}.
147   */
148  public String encode(byte[] bytes) {
149    return encode(bytes, 0, bytes.length);
150  }
151
152  /**
153   * Encodes the specified range of the specified byte array, and returns the encoded
154   * {@code String}.
155   */
156  public final String encode(byte[] bytes, int off, int len) {
157    checkPositionIndexes(off, off + len, bytes.length);
158    StringBuilder result = new StringBuilder(maxEncodedSize(len));
159    try {
160      encodeTo(result, bytes, off, len);
161    } catch (IOException impossible) {
162      throw new AssertionError(impossible);
163    }
164    return result.toString();
165  }
166
167  /**
168   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
169   * {@code Writer}.  When the returned {@code OutputStream} is closed, so is the backing
170   * {@code Writer}.
171   */
172  @GwtIncompatible("Writer,OutputStream")
173  public abstract OutputStream encodingStream(Writer writer);
174
175  /**
176   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
177   */
178  @GwtIncompatible("ByteSink,CharSink")
179  public final ByteSink encodingSink(final CharSink encodedSink) {
180    checkNotNull(encodedSink);
181    return new ByteSink() {
182      @Override
183      public OutputStream openStream() throws IOException {
184        return encodingStream(encodedSink.openStream());
185      }
186    };
187  }
188
189  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
190
191  private static byte[] extract(byte[] result, int length) {
192    if (length == result.length) {
193      return result;
194    } else {
195      byte[] trunc = new byte[length];
196      System.arraycopy(result, 0, trunc, 0, length);
197      return trunc;
198    }
199  }
200
201  /**
202   * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
203   * This is the inverse operation to {@link #encode(byte[])}.
204   *
205   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
206   *         encoding.
207   */
208  public final byte[] decode(CharSequence chars) {
209    try {
210      return decodeChecked(chars);
211    } catch (DecodingException badInput) {
212      throw new IllegalArgumentException(badInput);
213    }
214  }
215
216  /**
217   * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
218   * This is the inverse operation to {@link #encode(byte[])}.
219   *
220   * @throws DecodingException if the input is not a valid encoded string according to this
221   *         encoding.
222   */
223  final byte[] decodeChecked(CharSequence chars) throws DecodingException {
224    chars = padding().trimTrailingFrom(chars);
225    byte[] tmp = new byte[maxDecodedSize(chars.length())];
226    int len = decodeTo(tmp, chars);
227    return extract(tmp, len);
228  }
229
230  /**
231   * Returns an {@code InputStream} that decodes base-encoded input from the specified
232   * {@code Reader}.  The returned stream throws a {@link DecodingException} upon decoding-specific
233   * errors.
234   */
235  @GwtIncompatible("Reader,InputStream")
236  public abstract InputStream decodingStream(Reader reader);
237
238  /**
239   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
240   * {@code CharSource}.
241   */
242  @GwtIncompatible("ByteSource,CharSource")
243  public final ByteSource decodingSource(final CharSource encodedSource) {
244    checkNotNull(encodedSource);
245    return new ByteSource() {
246      @Override
247      public InputStream openStream() throws IOException {
248        return decodingStream(encodedSource.openStream());
249      }
250    };
251  }
252
253  // Implementations for encoding/decoding
254
255  abstract int maxEncodedSize(int bytes);
256
257  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
258
259  abstract int maxDecodedSize(int chars);
260
261  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
262
263  abstract CharMatcher padding();
264
265  // Modified encoding generators
266
267  /**
268   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
269   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
270   * section 3.2</a>, Padding of Encoded Data.
271   */
272  @CheckReturnValue
273  public abstract BaseEncoding omitPadding();
274
275  /**
276   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
277   * for padding.
278   *
279   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
280   *         separator
281   */
282  @CheckReturnValue
283  public abstract BaseEncoding withPadChar(char padChar);
284
285  /**
286   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
287   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
288   * are skipped over in decoding.
289   *
290   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
291   *         string, or if {@code n <= 0}
292   * @throws UnsupportedOperationException if this encoding already uses a separator
293   */
294  @CheckReturnValue
295  public abstract BaseEncoding withSeparator(String separator, int n);
296
297  /**
298   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
299   * uppercase letters. Padding and separator characters remain in their original case.
300   *
301   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
302   *         lower-case characters
303   */
304  @CheckReturnValue
305  public abstract BaseEncoding upperCase();
306
307  /**
308   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
309   * lowercase letters. Padding and separator characters remain in their original case.
310   *
311   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
312   *         lower-case characters
313   */
314  @CheckReturnValue
315  public abstract BaseEncoding lowerCase();
316
317  private static final BaseEncoding BASE64 = new Base64Encoding(
318      "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
319
320  /**
321   * The "base64" base encoding specified by <a
322   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
323   * (This is the same as the base 64 encoding from <a
324   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
325   *
326   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
327   * omitted} or {@linkplain #withPadChar(char) replaced}.
328   *
329   * <p>No line feeds are added by default, as per <a
330   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
331   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
332   */
333  public static BaseEncoding base64() {
334    return BASE64;
335  }
336
337  private static final BaseEncoding BASE64_URL = new Base64Encoding(
338      "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
339
340  /**
341   * The "base64url" encoding specified by <a
342   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
343   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64."
344   * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a
345   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
346   *
347   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
348   * omitted} or {@linkplain #withPadChar(char) replaced}.
349   *
350   * <p>No line feeds are added by default, as per <a
351   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
352   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
353   */
354  public static BaseEncoding base64Url() {
355    return BASE64_URL;
356  }
357
358  private static final BaseEncoding BASE32 =
359      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
360
361  /**
362   * The "base32" encoding specified by <a
363   * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding.
364   * (This is the same as the base 32 encoding from <a
365   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
366   *
367   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
368   * omitted} or {@linkplain #withPadChar(char) replaced}.
369   *
370   * <p>No line feeds are added by default, as per <a
371   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
372   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
373   */
374  public static BaseEncoding base32() {
375    return BASE32;
376  }
377
378  private static final BaseEncoding BASE32_HEX =
379      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
380
381  /**
382   * The "base32hex" encoding specified by <a
383   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
384   * with Extended Hex Alphabet.  There is no corresponding encoding in RFC 3548.
385   *
386   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
387   * omitted} or {@linkplain #withPadChar(char) replaced}.
388   *
389   * <p>No line feeds are added by default, as per <a
390   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
391   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
392   */
393  public static BaseEncoding base32Hex() {
394    return BASE32_HEX;
395  }
396
397  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
398
399  /**
400   * The "base16" encoding specified by <a
401   * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding.
402   * (This is the same as the base 16 encoding from <a
403   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
404   * "hexadecimal" format.
405   *
406   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and
407   * {@link #omitPadding()} have no effect.
408   *
409   * <p>No line feeds are added by default, as per <a
410   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
411   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
412   */
413  public static BaseEncoding base16() {
414    return BASE16;
415  }
416
417  private static final class Alphabet extends CharMatcher {
418    private final String name;
419    // this is meant to be immutable -- don't modify it!
420    private final char[] chars;
421    final int mask;
422    final int bitsPerChar;
423    final int charsPerChunk;
424    final int bytesPerChunk;
425    private final byte[] decodabet;
426    private final boolean[] validPadding;
427
428    Alphabet(String name, char[] chars) {
429      this.name = checkNotNull(name);
430      this.chars = checkNotNull(chars);
431      try {
432        this.bitsPerChar = log2(chars.length, UNNECESSARY);
433      } catch (ArithmeticException e) {
434        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
435      }
436
437      /*
438       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
439       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
440       */
441      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
442      this.charsPerChunk = 8 / gcd;
443      this.bytesPerChunk = bitsPerChar / gcd;
444
445      this.mask = chars.length - 1;
446
447      byte[] decodabet = new byte[Ascii.MAX + 1];
448      Arrays.fill(decodabet, (byte) -1);
449      for (int i = 0; i < chars.length; i++) {
450        char c = chars[i];
451        checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
452        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
453        decodabet[c] = (byte) i;
454      }
455      this.decodabet = decodabet;
456
457      boolean[] validPadding = new boolean[charsPerChunk];
458      for (int i = 0; i < bytesPerChunk; i++) {
459        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
460      }
461      this.validPadding = validPadding;
462    }
463
464    char encode(int bits) {
465      return chars[bits];
466    }
467
468    boolean isValidPaddingStartPosition(int index) {
469      return validPadding[index % charsPerChunk];
470    }
471
472    int decode(char ch) throws DecodingException {
473      if (ch > Ascii.MAX || decodabet[ch] == -1) {
474        throw new DecodingException("Unrecognized character: "
475            + (CharMatcher.INVISIBLE.matches(ch) ? "0x" + Integer.toHexString(ch) : ch));
476      }
477      return decodabet[ch];
478    }
479
480    private boolean hasLowerCase() {
481      for (char c : chars) {
482        if (Ascii.isLowerCase(c)) {
483          return true;
484        }
485      }
486      return false;
487    }
488
489    private boolean hasUpperCase() {
490      for (char c : chars) {
491        if (Ascii.isUpperCase(c)) {
492          return true;
493        }
494      }
495      return false;
496    }
497
498    Alphabet upperCase() {
499      if (!hasLowerCase()) {
500        return this;
501      } else {
502        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
503        char[] upperCased = new char[chars.length];
504        for (int i = 0; i < chars.length; i++) {
505          upperCased[i] = Ascii.toUpperCase(chars[i]);
506        }
507        return new Alphabet(name + ".upperCase()", upperCased);
508      }
509    }
510
511    Alphabet lowerCase() {
512      if (!hasUpperCase()) {
513        return this;
514      } else {
515        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
516        char[] lowerCased = new char[chars.length];
517        for (int i = 0; i < chars.length; i++) {
518          lowerCased[i] = Ascii.toLowerCase(chars[i]);
519        }
520        return new Alphabet(name + ".lowerCase()", lowerCased);
521      }
522    }
523
524    @Override
525    public boolean matches(char c) {
526      return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
527    }
528
529    @Override
530    public String toString() {
531      return name;
532    }
533  }
534
535  static class StandardBaseEncoding extends BaseEncoding {
536    // TODO(lowasser): provide a useful toString
537    final Alphabet alphabet;
538
539    @Nullable
540    final Character paddingChar;
541
542    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
543      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
544    }
545
546    StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
547      this.alphabet = checkNotNull(alphabet);
548      checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
549          "Padding character %s was already in alphabet", paddingChar);
550      this.paddingChar = paddingChar;
551    }
552
553    @Override
554    CharMatcher padding() {
555      return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
556    }
557
558    @Override
559    int maxEncodedSize(int bytes) {
560      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
561    }
562
563    @GwtIncompatible("Writer,OutputStream")
564    @Override
565    public OutputStream encodingStream(final Writer out) {
566      checkNotNull(out);
567      return new OutputStream() {
568        int bitBuffer = 0;
569        int bitBufferLength = 0;
570        int writtenChars = 0;
571
572        @Override
573        public void write(int b) throws IOException {
574          bitBuffer <<= 8;
575          bitBuffer |= b & 0xFF;
576          bitBufferLength += 8;
577          while (bitBufferLength >= alphabet.bitsPerChar) {
578            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
579                & alphabet.mask;
580            out.write(alphabet.encode(charIndex));
581            writtenChars++;
582            bitBufferLength -= alphabet.bitsPerChar;
583          }
584        }
585
586        @Override
587        public void flush() throws IOException {
588          out.flush();
589        }
590
591        @Override
592        public void close() throws IOException {
593          if (bitBufferLength > 0) {
594            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
595                & alphabet.mask;
596            out.write(alphabet.encode(charIndex));
597            writtenChars++;
598            if (paddingChar != null) {
599              while (writtenChars % alphabet.charsPerChunk != 0) {
600                out.write(paddingChar.charValue());
601                writtenChars++;
602              }
603            }
604          }
605          out.close();
606        }
607      };
608    }
609
610    @Override
611    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
612      checkNotNull(target);
613      checkPositionIndexes(off, off + len, bytes.length);
614      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
615        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
616      }
617    }
618
619    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len)
620        throws IOException {
621      checkNotNull(target);
622      checkPositionIndexes(off, off + len, bytes.length);
623      checkArgument(len <= alphabet.bytesPerChunk);
624      long bitBuffer = 0;
625      for (int i = 0; i < len; ++i) {
626        bitBuffer |= bytes[off + i] & 0xFF;
627        bitBuffer <<= 8; // Add additional zero byte in the end.
628      }
629      // Position of first character is length of bitBuffer minus bitsPerChar.
630      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
631      int bitsProcessed = 0;
632      while (bitsProcessed < len * 8) {
633        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
634        target.append(alphabet.encode(charIndex));
635        bitsProcessed += alphabet.bitsPerChar;
636      }
637      if (paddingChar != null) {
638        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
639          target.append(paddingChar.charValue());
640          bitsProcessed += alphabet.bitsPerChar;
641        }
642      }
643    }
644
645    @Override
646    int maxDecodedSize(int chars) {
647      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
648    }
649
650    @Override
651    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
652      checkNotNull(target);
653      chars = padding().trimTrailingFrom(chars);
654      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
655        throw new DecodingException("Invalid input length " + chars.length());
656      }
657      int bytesWritten = 0;
658      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
659        long chunk = 0;
660        int charsProcessed = 0;
661        for (int i = 0; i < alphabet.charsPerChunk; i++) {
662          chunk <<= alphabet.bitsPerChar;
663          if (charIdx + i < chars.length()) {
664            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
665          }
666        }
667        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
668        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
669          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
670        }
671      }
672      return bytesWritten;
673    }
674
675    @GwtIncompatible("Reader,InputStream")
676    @Override
677    public InputStream decodingStream(final Reader reader) {
678      checkNotNull(reader);
679      return new InputStream() {
680        int bitBuffer = 0;
681        int bitBufferLength = 0;
682        int readChars = 0;
683        boolean hitPadding = false;
684        final CharMatcher paddingMatcher = padding();
685
686        @Override
687        public int read() throws IOException {
688          while (true) {
689            int readChar = reader.read();
690            if (readChar == -1) {
691              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
692                throw new DecodingException("Invalid input length " + readChars);
693              }
694              return -1;
695            }
696            readChars++;
697            char ch = (char) readChar;
698            if (paddingMatcher.matches(ch)) {
699              if (!hitPadding
700                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
701                throw new DecodingException("Padding cannot start at index " + readChars);
702              }
703              hitPadding = true;
704            } else if (hitPadding) {
705              throw new DecodingException(
706                  "Expected padding character but found '" + ch + "' at index " + readChars);
707            } else {
708              bitBuffer <<= alphabet.bitsPerChar;
709              bitBuffer |= alphabet.decode(ch);
710              bitBufferLength += alphabet.bitsPerChar;
711
712              if (bitBufferLength >= 8) {
713                bitBufferLength -= 8;
714                return (bitBuffer >> bitBufferLength) & 0xFF;
715              }
716            }
717          }
718        }
719
720        @Override
721        public void close() throws IOException {
722          reader.close();
723        }
724      };
725    }
726
727    @Override
728    public BaseEncoding omitPadding() {
729      return (paddingChar == null) ? this : newInstance(alphabet, null);
730    }
731
732    @Override
733    public BaseEncoding withPadChar(char padChar) {
734      if (8 % alphabet.bitsPerChar == 0 ||
735          (paddingChar != null && paddingChar.charValue() == padChar)) {
736        return this;
737      } else {
738        return newInstance(alphabet, padChar);
739      }
740    }
741
742    @Override
743    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
744      checkArgument(padding().or(alphabet).matchesNoneOf(separator),
745          "Separator (%s) cannot contain alphabet or padding characters", separator);
746      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
747    }
748
749    private transient BaseEncoding upperCase;
750    private transient BaseEncoding lowerCase;
751
752    @Override
753    public BaseEncoding upperCase() {
754      BaseEncoding result = upperCase;
755      if (result == null) {
756        Alphabet upper = alphabet.upperCase();
757        result = upperCase =
758            (upper == alphabet) ? this : newInstance(upper, paddingChar);
759      }
760      return result;
761    }
762
763    @Override
764    public BaseEncoding lowerCase() {
765      BaseEncoding result = lowerCase;
766      if (result == null) {
767        Alphabet lower = alphabet.lowerCase();
768        result = lowerCase =
769            (lower == alphabet) ? this : newInstance(lower, paddingChar);
770      }
771      return result;
772    }
773
774    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
775      return new StandardBaseEncoding(alphabet, paddingChar);
776    }
777
778    @Override
779    public String toString() {
780      StringBuilder builder = new StringBuilder("BaseEncoding.");
781      builder.append(alphabet.toString());
782      if (8 % alphabet.bitsPerChar != 0) {
783        if (paddingChar == null) {
784          builder.append(".omitPadding()");
785        } else {
786          builder.append(".withPadChar(").append(paddingChar).append(')');
787        }
788      }
789      return builder.toString();
790    }
791  }
792
793  static final class Base16Encoding extends StandardBaseEncoding {
794    final char[] encoding = new char[512];
795
796    Base16Encoding(String name, String alphabetChars) {
797      this(new Alphabet(name, alphabetChars.toCharArray()));
798    }
799
800    private Base16Encoding(Alphabet alphabet) {
801      super(alphabet, null);
802      checkArgument(alphabet.chars.length == 16);
803      for (int i = 0; i < 256; ++i) {
804        encoding[i] = alphabet.encode(i >>> 4);
805        encoding[i | 0x100] = alphabet.encode(i & 0xF);
806      }
807    }
808
809    @Override
810    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
811      checkNotNull(target);
812      checkPositionIndexes(off, off + len, bytes.length);
813      for (int i = 0; i < len; ++i) {
814        int b = bytes[off + i] & 0xFF;
815        target.append(encoding[b]);
816        target.append(encoding[b | 0x100]);
817      }
818    }
819
820    @Override
821    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
822      checkNotNull(target);
823      if (chars.length() % 2 == 1) {
824        throw new DecodingException("Invalid input length " + chars.length());
825      }
826      int bytesWritten = 0;
827      for (int i = 0; i < chars.length(); i += 2) {
828        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
829        target[bytesWritten++] = (byte) decoded;
830      }
831      return bytesWritten;
832    }
833
834    @Override
835    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
836      return new Base16Encoding(alphabet);
837    }
838  }
839
840  static final class Base64Encoding extends StandardBaseEncoding {
841    Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) {
842      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
843    }
844
845    private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) {
846      super(alphabet, paddingChar);
847      checkArgument(alphabet.chars.length == 64);
848    }
849
850    @Override
851    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
852      checkNotNull(target);
853      checkPositionIndexes(off, off + len, bytes.length);
854      int i = off;
855      for (int remaining = len; remaining >= 3; remaining -= 3) {
856        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
857        target.append(alphabet.encode(chunk >>> 18));
858        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
859        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
860        target.append(alphabet.encode(chunk & 0x3F));
861      }
862      if (i < off + len) {
863        encodeChunkTo(target, bytes, i, off + len - i);
864      }
865    }
866
867    @Override
868    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
869      checkNotNull(target);
870      chars = padding().trimTrailingFrom(chars);
871      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
872        throw new DecodingException("Invalid input length " + chars.length());
873      }
874      int bytesWritten = 0;
875      for (int i = 0; i < chars.length();) {
876        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
877        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
878        target[bytesWritten++] = (byte) (chunk >>> 16);
879        if (i < chars.length()) {
880          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
881          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
882          if (i < chars.length()) {
883            chunk |= alphabet.decode(chars.charAt(i++));
884            target[bytesWritten++] = (byte) (chunk & 0xFF);
885          }
886        }
887      }
888      return bytesWritten;
889    }
890
891    @Override
892    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
893      return new Base64Encoding(alphabet, paddingChar);
894    }
895  }
896
897  @GwtIncompatible("Reader")
898  static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) {
899    checkNotNull(delegate);
900    checkNotNull(toIgnore);
901    return new Reader() {
902      @Override
903      public int read() throws IOException {
904        int readChar;
905        do {
906          readChar = delegate.read();
907        } while (readChar != -1 && toIgnore.matches((char) readChar));
908        return readChar;
909      }
910
911      @Override
912      public int read(char[] cbuf, int off, int len) throws IOException {
913        throw new UnsupportedOperationException();
914      }
915
916      @Override
917      public void close() throws IOException {
918        delegate.close();
919      }
920    };
921  }
922
923  static Appendable separatingAppendable(
924      final Appendable delegate, final String separator, final int afterEveryChars) {
925    checkNotNull(delegate);
926    checkNotNull(separator);
927    checkArgument(afterEveryChars > 0);
928    return new Appendable() {
929      int charsUntilSeparator = afterEveryChars;
930
931      @Override
932      public Appendable append(char c) throws IOException {
933        if (charsUntilSeparator == 0) {
934          delegate.append(separator);
935          charsUntilSeparator = afterEveryChars;
936        }
937        delegate.append(c);
938        charsUntilSeparator--;
939        return this;
940      }
941
942      @Override
943      public Appendable append(CharSequence chars, int off, int len) throws IOException {
944        throw new UnsupportedOperationException();
945      }
946
947      @Override
948      public Appendable append(CharSequence chars) throws IOException {
949        throw new UnsupportedOperationException();
950      }
951    };
952  }
953
954  @GwtIncompatible("Writer")
955  static Writer separatingWriter(
956      final Writer delegate, final String separator, final int afterEveryChars) {
957    final Appendable seperatingAppendable =
958        separatingAppendable(delegate, separator, afterEveryChars);
959    return new Writer() {
960      @Override
961      public void write(int c) throws IOException {
962        seperatingAppendable.append((char) c);
963      }
964
965      @Override
966      public void write(char[] chars, int off, int len) throws IOException {
967        throw new UnsupportedOperationException();
968      }
969
970      @Override
971      public void flush() throws IOException {
972        delegate.flush();
973      }
974
975      @Override
976      public void close() throws IOException {
977        delegate.close();
978      }
979    };
980  }
981
982  static final class SeparatedBaseEncoding extends BaseEncoding {
983    private final BaseEncoding delegate;
984    private final String separator;
985    private final int afterEveryChars;
986    private final CharMatcher separatorChars;
987
988    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
989      this.delegate = checkNotNull(delegate);
990      this.separator = checkNotNull(separator);
991      this.afterEveryChars = afterEveryChars;
992      checkArgument(
993          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
994      this.separatorChars = CharMatcher.anyOf(separator).precomputed();
995    }
996
997    @Override
998    CharMatcher padding() {
999      return delegate.padding();
1000    }
1001
1002    @Override
1003    int maxEncodedSize(int bytes) {
1004      int unseparatedSize = delegate.maxEncodedSize(bytes);
1005      return unseparatedSize + separator.length()
1006          * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1007    }
1008
1009    @GwtIncompatible("Writer,OutputStream")
1010    @Override
1011    public OutputStream encodingStream(final Writer output) {
1012      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1013    }
1014
1015    @Override
1016    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1017      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1018    }
1019
1020    @Override
1021    int maxDecodedSize(int chars) {
1022      return delegate.maxDecodedSize(chars);
1023    }
1024
1025    @Override
1026    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1027      return delegate.decodeTo(target, separatorChars.removeFrom(chars));
1028    }
1029
1030    @GwtIncompatible("Reader,InputStream")
1031    @Override
1032    public InputStream decodingStream(final Reader reader) {
1033      return delegate.decodingStream(ignoringReader(reader, separatorChars));
1034    }
1035
1036    @Override
1037    public BaseEncoding omitPadding() {
1038      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1039    }
1040
1041    @Override
1042    public BaseEncoding withPadChar(char padChar) {
1043      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1044    }
1045
1046    @Override
1047    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1048      throw new UnsupportedOperationException("Already have a separator");
1049    }
1050
1051    @Override
1052    public BaseEncoding upperCase() {
1053      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1054    }
1055
1056    @Override
1057    public BaseEncoding lowerCase() {
1058      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1059    }
1060
1061    @Override
1062    public String toString() {
1063      return delegate.toString() +
1064          ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1065    }
1066  }
1067}