Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.io.GwtWorkarounds.asCharInput;
022import static com.google.common.io.GwtWorkarounds.asCharOutput;
023import static com.google.common.io.GwtWorkarounds.asInputStream;
024import static com.google.common.io.GwtWorkarounds.asOutputStream;
025import static com.google.common.io.GwtWorkarounds.stringBuilderOutput;
026import static com.google.common.math.IntMath.divide;
027import static com.google.common.math.IntMath.log2;
028import static java.math.RoundingMode.CEILING;
029import static java.math.RoundingMode.FLOOR;
030import static java.math.RoundingMode.UNNECESSARY;
031
032import com.google.common.annotations.Beta;
033import com.google.common.annotations.GwtCompatible;
034import com.google.common.annotations.GwtIncompatible;
035import com.google.common.base.Ascii;
036import com.google.common.base.CharMatcher;
037import com.google.common.io.GwtWorkarounds.ByteInput;
038import com.google.common.io.GwtWorkarounds.ByteOutput;
039import com.google.common.io.GwtWorkarounds.CharInput;
040import com.google.common.io.GwtWorkarounds.CharOutput;
041
042import java.io.IOException;
043import java.io.InputStream;
044import java.io.OutputStream;
045import java.io.Reader;
046import java.io.Writer;
047import java.util.Arrays;
048
049import javax.annotation.CheckReturnValue;
050import javax.annotation.Nullable;
051
052/**
053 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
054 * strings. This class includes several constants for encoding schemes specified by <a
055 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
056 *
057 * <pre>   {@code
058 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
059 *
060 * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
061 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
062 *
063 * <p>...returns the ASCII bytes of the string {@code "foo"}.
064 *
065 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with
066 * RFC 4648.  Decoding rejects characters in the wrong case, though padding is optional.
067 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding
068 * with modified behavior:
069 *
070 * <pre>   {@code
071 *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
072 *
073 * <p>Warning: BaseEncoding instances are immutable.  Invoking a configuration method has no effect
074 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
075 *
076 * <pre>   {@code
077 *   // Do NOT do this
078 *   BaseEncoding hex = BaseEncoding.base16();
079 *   hex.lowerCase(); // does nothing!
080 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
081 *
082 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
083 * {@code x}, but the reverse does not necessarily hold.
084 *
085 * <p>
086 * <table>
087 * <tr>
088 * <th>Encoding
089 * <th>Alphabet
090 * <th>{@code char:byte} ratio
091 * <th>Default padding
092 * <th>Comments
093 * <tr>
094 * <td>{@link #base16()}
095 * <td>0-9 A-F
096 * <td>2.00
097 * <td>N/A
098 * <td>Traditional hexadecimal.  Defaults to upper case.
099 * <tr>
100 * <td>{@link #base32()}
101 * <td>A-Z 2-7
102 * <td>1.60
103 * <td>=
104 * <td>Human-readable; no possibility of mixing up 0/O or 1/I.  Defaults to upper case.
105 * <tr>
106 * <td>{@link #base32Hex()}
107 * <td>0-9 A-V
108 * <td>1.60
109 * <td>=
110 * <td>"Numerical" base 32; extended from the traditional hex alphabet.  Defaults to upper case.
111 * <tr>
112 * <td>{@link #base64()}
113 * <td>A-Z a-z 0-9 + /
114 * <td>1.33
115 * <td>=
116 * <td>
117 * <tr>
118 * <td>{@link #base64Url()}
119 * <td>A-Z a-z 0-9 - _
120 * <td>1.33
121 * <td>=
122 * <td>Safe to use as filenames, or to pass in URLs without escaping
123 * </table>
124 *
125 * <p>
126 * All instances of this class are immutable, so they may be stored safely as static constants.
127 *
128 * @author Louis Wasserman
129 * @since 14.0
130 */
131@Beta
132@GwtCompatible(emulated = true)
133public abstract class BaseEncoding {
134  // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int])
135
136  BaseEncoding() {}
137
138  /**
139   * Exception indicating invalid base-encoded input encountered while decoding.
140   *
141   * @author Louis Wasserman
142   * @since 15.0
143   */
144  public static final class DecodingException extends IOException {
145    DecodingException(String message) {
146      super(message);
147    }
148
149    DecodingException(Throwable cause) {
150      super(cause);
151    }
152  }
153
154  /**
155   * Encodes the specified byte array, and returns the encoded {@code String}.
156   */
157  public String encode(byte[] bytes) {
158    return encode(checkNotNull(bytes), 0, bytes.length);
159  }
160
161  /**
162   * Encodes the specified range of the specified byte array, and returns the encoded
163   * {@code String}.
164   */
165  public final String encode(byte[] bytes, int off, int len) {
166    checkNotNull(bytes);
167    checkPositionIndexes(off, off + len, bytes.length);
168    CharOutput result = stringBuilderOutput(maxEncodedSize(len));
169    ByteOutput byteOutput = encodingStream(result);
170    try {
171      for (int i = 0; i < len; i++) {
172        byteOutput.write(bytes[off + i]);
173      }
174      byteOutput.close();
175    } catch (IOException impossible) {
176      throw new AssertionError("impossible");
177    }
178    return result.toString();
179  }
180
181  /**
182   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
183   * {@code Writer}.  When the returned {@code OutputStream} is closed, so is the backing
184   * {@code Writer}.
185   */
186  @GwtIncompatible("Writer,OutputStream")
187  public final OutputStream encodingStream(Writer writer) {
188    return asOutputStream(encodingStream(asCharOutput(writer)));
189  }
190
191  /**
192   * Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding
193   * into writers from the specified {@code OutputSupplier}.
194   *
195   * @deprecated Use {@link #encodingSink(CharSink)} instead. This method is scheduled to be
196   *     removed in Guava 16.0.
197   */
198  @Deprecated
199  @GwtIncompatible("Writer,OutputStream")
200  public final OutputSupplier<OutputStream> encodingStream(
201      final OutputSupplier<? extends Writer> writerSupplier) {
202    checkNotNull(writerSupplier);
203    return new OutputSupplier<OutputStream>() {
204      @Override
205      public OutputStream getOutput() throws IOException {
206        return encodingStream(writerSupplier.getOutput());
207      }
208    };
209  }
210
211  /**
212   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
213   */
214  @GwtIncompatible("ByteSink,CharSink")
215  public final ByteSink encodingSink(final CharSink encodedSink) {
216    checkNotNull(encodedSink);
217    return new ByteSink() {
218      @Override
219      public OutputStream openStream() throws IOException {
220        return encodingStream(encodedSink.openStream());
221      }
222    };
223  }
224
225  // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher)
226
227  private static byte[] extract(byte[] result, int length) {
228    if (length == result.length) {
229      return result;
230    } else {
231      byte[] trunc = new byte[length];
232      System.arraycopy(result, 0, trunc, 0, length);
233      return trunc;
234    }
235  }
236
237  /**
238   * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
239   * This is the inverse operation to {@link #encode(byte[])}.
240   *
241   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
242   *         encoding.
243   */
244  public final byte[] decode(CharSequence chars) {
245    try {
246      return decodeChecked(chars);
247    } catch (DecodingException badInput) {
248      throw new IllegalArgumentException(badInput);
249    }
250  }
251
252  /**
253   * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
254   * This is the inverse operation to {@link #encode(byte[])}.
255   *
256   * @throws DecodingException if the input is not a valid encoded string according to this
257   *         encoding.
258   */
259  final byte[] decodeChecked(CharSequence chars) throws DecodingException {
260    chars = padding().trimTrailingFrom(chars);
261    ByteInput decodedInput = decodingStream(asCharInput(chars));
262    byte[] tmp = new byte[maxDecodedSize(chars.length())];
263    int index = 0;
264    try {
265      for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) {
266        tmp[index++] = (byte) i;
267      }
268    } catch (DecodingException badInput) {
269      throw badInput;
270    } catch (IOException impossible) {
271      throw new AssertionError(impossible);
272    }
273    return extract(tmp, index);
274  }
275
276  /**
277   * Returns an {@code InputStream} that decodes base-encoded input from the specified
278   * {@code Reader}.  The returned stream throws a {@link DecodingException} upon decoding-specific
279   * errors.
280   */
281  @GwtIncompatible("Reader,InputStream")
282  public final InputStream decodingStream(Reader reader) {
283    return asInputStream(decodingStream(asCharInput(reader)));
284  }
285
286  /**
287   * Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input
288   * from readers from the specified supplier.
289   *
290   * @deprecated Use {@link #decodingSource(CharSource)} instead. This method is scheduled to be
291   *     removed in Guava 16.0.
292   */
293  @Deprecated
294  @GwtIncompatible("Reader,InputStream")
295  public final InputSupplier<InputStream> decodingStream(
296      final InputSupplier<? extends Reader> readerSupplier) {
297    checkNotNull(readerSupplier);
298    return new InputSupplier<InputStream>() {
299      @Override
300      public InputStream getInput() throws IOException {
301        return decodingStream(readerSupplier.getInput());
302      }
303    };
304  }
305
306  /**
307   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
308   * {@code CharSource}.
309   */
310  @GwtIncompatible("ByteSource,CharSource")
311  public final ByteSource decodingSource(final CharSource encodedSource) {
312    checkNotNull(encodedSource);
313    return new ByteSource() {
314      @Override
315      public InputStream openStream() throws IOException {
316        return decodingStream(encodedSource.openStream());
317      }
318    };
319  }
320
321  // Implementations for encoding/decoding
322
323  abstract int maxEncodedSize(int bytes);
324
325  abstract ByteOutput encodingStream(CharOutput charOutput);
326
327  abstract int maxDecodedSize(int chars);
328
329  abstract ByteInput decodingStream(CharInput charInput);
330
331  abstract CharMatcher padding();
332
333  // Modified encoding generators
334
335  /**
336   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
337   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
338   * section 3.2</a>, Padding of Encoded Data.
339   */
340  @CheckReturnValue
341  public abstract BaseEncoding omitPadding();
342
343  /**
344   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
345   * for padding.
346   *
347   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
348   *         separator
349   */
350  @CheckReturnValue
351  public abstract BaseEncoding withPadChar(char padChar);
352
353  /**
354   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
355   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
356   * are skipped over in decoding.
357   *
358   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
359   *         string, or if {@code n <= 0}
360   * @throws UnsupportedOperationException if this encoding already uses a separator
361   */
362  @CheckReturnValue
363  public abstract BaseEncoding withSeparator(String separator, int n);
364
365  /**
366   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
367   * uppercase letters. Padding and separator characters remain in their original case.
368   *
369   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
370   *         lower-case characters
371   */
372  @CheckReturnValue
373  public abstract BaseEncoding upperCase();
374
375  /**
376   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
377   * lowercase letters. Padding and separator characters remain in their original case.
378   *
379   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
380   *         lower-case characters
381   */
382  @CheckReturnValue
383  public abstract BaseEncoding lowerCase();
384
385  private static final BaseEncoding BASE64 = new StandardBaseEncoding(
386      "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
387
388  /**
389   * The "base64" base encoding specified by <a
390   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
391   * (This is the same as the base 64 encoding from <a
392   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
393   *
394   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
395   * omitted} or {@linkplain #withPadChar(char) replaced}.
396   *
397   * <p>No line feeds are added by default, as per <a
398   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
399   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
400   */
401  public static BaseEncoding base64() {
402    return BASE64;
403  }
404
405  private static final BaseEncoding BASE64_URL = new StandardBaseEncoding(
406      "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
407
408  /**
409   * The "base64url" encoding specified by <a
410   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
411   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64."
412   * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a
413   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
414   *
415   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
416   * omitted} or {@linkplain #withPadChar(char) replaced}.
417   *
418   * <p>No line feeds are added by default, as per <a
419   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
420   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
421   */
422  public static BaseEncoding base64Url() {
423    return BASE64_URL;
424  }
425
426  private static final BaseEncoding BASE32 =
427      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
428
429  /**
430   * The "base32" encoding specified by <a
431   * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding.
432   * (This is the same as the base 32 encoding from <a
433   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
434   *
435   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
436   * omitted} or {@linkplain #withPadChar(char) replaced}.
437   *
438   * <p>No line feeds are added by default, as per <a
439   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
440   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
441   */
442  public static BaseEncoding base32() {
443    return BASE32;
444  }
445
446  private static final BaseEncoding BASE32_HEX =
447      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
448
449  /**
450   * The "base32hex" encoding specified by <a
451   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
452   * with Extended Hex Alphabet.  There is no corresponding encoding in RFC 3548.
453   *
454   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
455   * omitted} or {@linkplain #withPadChar(char) replaced}.
456   *
457   * <p>No line feeds are added by default, as per <a
458   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
459   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
460   */
461  public static BaseEncoding base32Hex() {
462    return BASE32_HEX;
463  }
464
465  private static final BaseEncoding BASE16 =
466      new StandardBaseEncoding("base16()", "0123456789ABCDEF", null);
467
468  /**
469   * The "base16" encoding specified by <a
470   * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding.
471   * (This is the same as the base 16 encoding from <a
472   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
473   * "hexadecimal" format.
474   *
475   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and
476   * {@link #omitPadding()} have no effect.
477   *
478   * <p>No line feeds are added by default, as per <a
479   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
480   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
481   */
482  public static BaseEncoding base16() {
483    return BASE16;
484  }
485
486  private static final class Alphabet extends CharMatcher {
487    private final String name;
488    // this is meant to be immutable -- don't modify it!
489    private final char[] chars;
490    final int mask;
491    final int bitsPerChar;
492    final int charsPerChunk;
493    final int bytesPerChunk;
494    private final byte[] decodabet;
495    private final boolean[] validPadding;
496
497    Alphabet(String name, char[] chars) {
498      this.name = checkNotNull(name);
499      this.chars = checkNotNull(chars);
500      try {
501        this.bitsPerChar = log2(chars.length, UNNECESSARY);
502      } catch (ArithmeticException e) {
503        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
504      }
505
506      /*
507       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
508       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
509       */
510      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
511      this.charsPerChunk = 8 / gcd;
512      this.bytesPerChunk = bitsPerChar / gcd;
513
514      this.mask = chars.length - 1;
515
516      byte[] decodabet = new byte[Ascii.MAX + 1];
517      Arrays.fill(decodabet, (byte) -1);
518      for (int i = 0; i < chars.length; i++) {
519        char c = chars[i];
520        checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
521        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
522        decodabet[c] = (byte) i;
523      }
524      this.decodabet = decodabet;
525
526      boolean[] validPadding = new boolean[charsPerChunk];
527      for (int i = 0; i < bytesPerChunk; i++) {
528        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
529      }
530      this.validPadding = validPadding;
531    }
532
533    char encode(int bits) {
534      return chars[bits];
535    }
536
537    boolean isValidPaddingStartPosition(int index) {
538      return validPadding[index % charsPerChunk];
539    }
540
541    int decode(char ch) throws IOException {
542      if (ch > Ascii.MAX || decodabet[ch] == -1) {
543        throw new DecodingException("Unrecognized character: " + ch);
544      }
545      return decodabet[ch];
546    }
547
548    private boolean hasLowerCase() {
549      for (char c : chars) {
550        if (Ascii.isLowerCase(c)) {
551          return true;
552        }
553      }
554      return false;
555    }
556
557    private boolean hasUpperCase() {
558      for (char c : chars) {
559        if (Ascii.isUpperCase(c)) {
560          return true;
561        }
562      }
563      return false;
564    }
565
566    Alphabet upperCase() {
567      if (!hasLowerCase()) {
568        return this;
569      } else {
570        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
571        char[] upperCased = new char[chars.length];
572        for (int i = 0; i < chars.length; i++) {
573          upperCased[i] = Ascii.toUpperCase(chars[i]);
574        }
575        return new Alphabet(name + ".upperCase()", upperCased);
576      }
577    }
578
579    Alphabet lowerCase() {
580      if (!hasUpperCase()) {
581        return this;
582      } else {
583        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
584        char[] lowerCased = new char[chars.length];
585        for (int i = 0; i < chars.length; i++) {
586          lowerCased[i] = Ascii.toLowerCase(chars[i]);
587        }
588        return new Alphabet(name + ".lowerCase()", lowerCased);
589      }
590    }
591
592    @Override
593    public boolean matches(char c) {
594      return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
595    }
596
597    @Override
598    public String toString() {
599      return name;
600    }
601  }
602
603  static final class StandardBaseEncoding extends BaseEncoding {
604    // TODO(user): provide a useful toString
605    private final Alphabet alphabet;
606
607    @Nullable
608    private final Character paddingChar;
609
610    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
611      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
612    }
613
614    StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
615      this.alphabet = checkNotNull(alphabet);
616      checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
617          "Padding character %s was already in alphabet", paddingChar);
618      this.paddingChar = paddingChar;
619    }
620
621    @Override
622    CharMatcher padding() {
623      return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
624    }
625
626    @Override
627    int maxEncodedSize(int bytes) {
628      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
629    }
630
631    @Override
632    ByteOutput encodingStream(final CharOutput out) {
633      checkNotNull(out);
634      return new ByteOutput() {
635        int bitBuffer = 0;
636        int bitBufferLength = 0;
637        int writtenChars = 0;
638
639        @Override
640        public void write(byte b) throws IOException {
641          bitBuffer <<= 8;
642          bitBuffer |= b & 0xFF;
643          bitBufferLength += 8;
644          while (bitBufferLength >= alphabet.bitsPerChar) {
645            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
646                & alphabet.mask;
647            out.write(alphabet.encode(charIndex));
648            writtenChars++;
649            bitBufferLength -= alphabet.bitsPerChar;
650          }
651        }
652
653        @Override
654        public void flush() throws IOException {
655          out.flush();
656        }
657
658        @Override
659        public void close() throws IOException {
660          if (bitBufferLength > 0) {
661            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
662                & alphabet.mask;
663            out.write(alphabet.encode(charIndex));
664            writtenChars++;
665            if (paddingChar != null) {
666              while (writtenChars % alphabet.charsPerChunk != 0) {
667                out.write(paddingChar.charValue());
668                writtenChars++;
669              }
670            }
671          }
672          out.close();
673        }
674      };
675    }
676
677    @Override
678    int maxDecodedSize(int chars) {
679      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
680    }
681
682    @Override
683    ByteInput decodingStream(final CharInput reader) {
684      checkNotNull(reader);
685      return new ByteInput() {
686        int bitBuffer = 0;
687        int bitBufferLength = 0;
688        int readChars = 0;
689        boolean hitPadding = false;
690        final CharMatcher paddingMatcher = padding();
691
692        @Override
693        public int read() throws IOException {
694          while (true) {
695            int readChar = reader.read();
696            if (readChar == -1) {
697              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
698                throw new DecodingException("Invalid input length " + readChars);
699              }
700              return -1;
701            }
702            readChars++;
703            char ch = (char) readChar;
704            if (paddingMatcher.matches(ch)) {
705              if (!hitPadding
706                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
707                throw new DecodingException("Padding cannot start at index " + readChars);
708              }
709              hitPadding = true;
710            } else if (hitPadding) {
711              throw new DecodingException(
712                  "Expected padding character but found '" + ch + "' at index " + readChars);
713            } else {
714              bitBuffer <<= alphabet.bitsPerChar;
715              bitBuffer |= alphabet.decode(ch);
716              bitBufferLength += alphabet.bitsPerChar;
717
718              if (bitBufferLength >= 8) {
719                bitBufferLength -= 8;
720                return (bitBuffer >> bitBufferLength) & 0xFF;
721              }
722            }
723          }
724        }
725
726        @Override
727        public void close() throws IOException {
728          reader.close();
729        }
730      };
731    }
732
733    @Override
734    public BaseEncoding omitPadding() {
735      return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null);
736    }
737
738    @Override
739    public BaseEncoding withPadChar(char padChar) {
740      if (8 % alphabet.bitsPerChar == 0 ||
741          (paddingChar != null && paddingChar.charValue() == padChar)) {
742        return this;
743      } else {
744        return new StandardBaseEncoding(alphabet, padChar);
745      }
746    }
747
748    @Override
749    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
750      checkNotNull(separator);
751      checkArgument(padding().or(alphabet).matchesNoneOf(separator),
752          "Separator cannot contain alphabet or padding characters");
753      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
754    }
755
756    private transient BaseEncoding upperCase;
757    private transient BaseEncoding lowerCase;
758
759    @Override
760    public BaseEncoding upperCase() {
761      BaseEncoding result = upperCase;
762      if (result == null) {
763        Alphabet upper = alphabet.upperCase();
764        result = upperCase =
765            (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar);
766      }
767      return result;
768    }
769
770    @Override
771    public BaseEncoding lowerCase() {
772      BaseEncoding result = lowerCase;
773      if (result == null) {
774        Alphabet lower = alphabet.lowerCase();
775        result = lowerCase =
776            (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar);
777      }
778      return result;
779    }
780
781    @Override
782    public String toString() {
783      StringBuilder builder = new StringBuilder("BaseEncoding.");
784      builder.append(alphabet.toString());
785      if (8 % alphabet.bitsPerChar != 0) {
786        if (paddingChar == null) {
787          builder.append(".omitPadding()");
788        } else {
789          builder.append(".withPadChar(").append(paddingChar).append(')');
790        }
791      }
792      return builder.toString();
793    }
794  }
795
796  static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) {
797    checkNotNull(delegate);
798    checkNotNull(toIgnore);
799    return new CharInput() {
800      @Override
801      public int read() throws IOException {
802        int readChar;
803        do {
804          readChar = delegate.read();
805        } while (readChar != -1 && toIgnore.matches((char) readChar));
806        return readChar;
807      }
808
809      @Override
810      public void close() throws IOException {
811        delegate.close();
812      }
813    };
814  }
815
816  static CharOutput separatingOutput(
817      final CharOutput delegate, final String separator, final int afterEveryChars) {
818    checkNotNull(delegate);
819    checkNotNull(separator);
820    checkArgument(afterEveryChars > 0);
821    return new CharOutput() {
822      int charsUntilSeparator = afterEveryChars;
823
824      @Override
825      public void write(char c) throws IOException {
826        if (charsUntilSeparator == 0) {
827          for (int i = 0; i < separator.length(); i++) {
828            delegate.write(separator.charAt(i));
829          }
830          charsUntilSeparator = afterEveryChars;
831        }
832        delegate.write(c);
833        charsUntilSeparator--;
834      }
835
836      @Override
837      public void flush() throws IOException {
838        delegate.flush();
839      }
840
841      @Override
842      public void close() throws IOException {
843        delegate.close();
844      }
845    };
846  }
847
848  static final class SeparatedBaseEncoding extends BaseEncoding {
849    private final BaseEncoding delegate;
850    private final String separator;
851    private final int afterEveryChars;
852    private final CharMatcher separatorChars;
853
854    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
855      this.delegate = checkNotNull(delegate);
856      this.separator = checkNotNull(separator);
857      this.afterEveryChars = afterEveryChars;
858      checkArgument(
859          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
860      this.separatorChars = CharMatcher.anyOf(separator).precomputed();
861    }
862
863    @Override
864    CharMatcher padding() {
865      return delegate.padding();
866    }
867
868    @Override
869    int maxEncodedSize(int bytes) {
870      int unseparatedSize = delegate.maxEncodedSize(bytes);
871      return unseparatedSize + separator.length()
872          * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
873    }
874
875    @Override
876    ByteOutput encodingStream(final CharOutput output) {
877      return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars));
878    }
879
880    @Override
881    int maxDecodedSize(int chars) {
882      return delegate.maxDecodedSize(chars);
883    }
884
885    @Override
886    ByteInput decodingStream(final CharInput input) {
887      return delegate.decodingStream(ignoringInput(input, separatorChars));
888    }
889
890    @Override
891    public BaseEncoding omitPadding() {
892      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
893    }
894
895    @Override
896    public BaseEncoding withPadChar(char padChar) {
897      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
898    }
899
900    @Override
901    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
902      throw new UnsupportedOperationException("Already have a separator");
903    }
904
905    @Override
906    public BaseEncoding upperCase() {
907      return delegate.upperCase().withSeparator(separator, afterEveryChars);
908    }
909
910    @Override
911    public BaseEncoding lowerCase() {
912      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
913    }
914
915    @Override
916    public String toString() {
917      return delegate.toString() +
918          ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
919    }
920  }
921}