001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.io.GwtWorkarounds.asCharInput;
022import static com.google.common.io.GwtWorkarounds.asCharOutput;
023import static com.google.common.io.GwtWorkarounds.asInputStream;
024import static com.google.common.io.GwtWorkarounds.asOutputStream;
025import static com.google.common.io.GwtWorkarounds.stringBuilderOutput;
026import static com.google.common.math.IntMath.divide;
027import static com.google.common.math.IntMath.log2;
028import static java.math.RoundingMode.CEILING;
029import static java.math.RoundingMode.FLOOR;
030import static java.math.RoundingMode.UNNECESSARY;
031
032import com.google.common.annotations.Beta;
033import com.google.common.annotations.GwtCompatible;
034import com.google.common.annotations.GwtIncompatible;
035import com.google.common.base.Ascii;
036import com.google.common.base.CharMatcher;
037import com.google.common.io.GwtWorkarounds.ByteInput;
038import com.google.common.io.GwtWorkarounds.ByteOutput;
039import com.google.common.io.GwtWorkarounds.CharInput;
040import com.google.common.io.GwtWorkarounds.CharOutput;
041
042import java.io.IOException;
043import java.io.InputStream;
044import java.io.OutputStream;
045import java.io.Reader;
046import java.io.Writer;
047import java.util.Arrays;
048
049import javax.annotation.CheckReturnValue;
050import javax.annotation.Nullable;
051
052/**
053 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
054 * strings. This class includes several constants for encoding schemes specified by <a
055 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
056 * <pre>   {@code
057 *
058 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
059 * }</pre>
060 * returns the string {@code "MZXW6==="}, and <pre>   {@code
061 *
062 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
063 * }</pre>
064 *
065 * ...returns the ASCII bytes of the string {@code "foo"}.
066 *
067 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with
068 * RFC 4648.  Decoding rejects characters in the wrong case, though padding is optional.
069 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding
070 * with modified behavior: <pre>   {@code
071 *
072 *  BaseEncoding.base16().lowerCase().decode("deadbeef");
073 * }</pre>
074 *
075 * <p>Warning: BaseEncoding instances are immutable.  Invoking a configuration method has no effect
076 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
077 * <pre>   {@code
078 *
079 *   // Do NOT do this
080 *   BaseEncoding hex = BaseEncoding.base16();
081 *   hex.lowerCase(); // does nothing!
082 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException
083 * }</pre>
084 *
085 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
086 * {@code x}, but the reverse does not necessarily hold.
087 *
088 * <p>
089 * <table>
090 * <tr>
091 * <th>Encoding
092 * <th>Alphabet
093 * <th>{@code char:byte} ratio
094 * <th>Default padding
095 * <th>Comments
096 * <tr>
097 * <td>{@link #base16()}
098 * <td>0-9 A-F
099 * <td>2.00
100 * <td>N/A
101 * <td>Traditional hexadecimal.  Defaults to upper case.
102 * <tr>
103 * <td>{@link #base32()}
104 * <td>A-Z 2-7
105 * <td>1.60
106 * <td>=
107 * <td>Human-readable; no possibility of mixing up 0/O or 1/I.  Defaults to upper case.
108 * <tr>
109 * <td>{@link #base32Hex()}
110 * <td>0-9 A-V
111 * <td>1.60
112 * <td>=
113 * <td>"Numerical" base 32; extended from the traditional hex alphabet.  Defaults to upper case.
114 * <tr>
115 * <td>{@link #base64()}
116 * <td>A-Z a-z 0-9 + /
117 * <td>1.33
118 * <td>=
119 * <td>
120 * <tr>
121 * <td>{@link #base64Url()}
122 * <td>A-Z a-z 0-9 - _
123 * <td>1.33
124 * <td>=
125 * <td>Safe to use as filenames, or to pass in URLs without escaping
126 * </table>
127 *
128 * <p>
129 * All instances of this class are immutable, so they may be stored safely as static constants.
130 *
131 * @author Louis Wasserman
132 * @since 14.0
133 */
134@Beta
135@GwtCompatible(emulated = true)
136public abstract class BaseEncoding {
137  // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int])
138
139  BaseEncoding() {}
140
141  /**
142   * Encodes the specified byte array, and returns the encoded {@code String}.
143   */
144  public String encode(byte[] bytes) {
145    return encode(checkNotNull(bytes), 0, bytes.length);
146  }
147
148  /**
149   * Encodes the specified range of the specified byte array, and returns the encoded
150   * {@code String}.
151   */
152  public final String encode(byte[] bytes, int off, int len) {
153    checkNotNull(bytes);
154    checkPositionIndexes(off, off + len, bytes.length);
155    CharOutput result = stringBuilderOutput(maxEncodedSize(len));
156    ByteOutput byteOutput = encodingStream(result);
157    try {
158      for (int i = 0; i < len; i++) {
159        byteOutput.write(bytes[off + i]);
160      }
161      byteOutput.close();
162    } catch (IOException impossible) {
163      throw new AssertionError("impossible");
164    }
165    return result.toString();
166  }
167
168  /**
169   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
170   * {@code Writer}.  When the returned {@code OutputStream} is closed, so is the backing
171   * {@code Writer}.
172   */
173  @GwtIncompatible("Writer,OutputStream")
174  public final OutputStream encodingStream(Writer writer) {
175    return asOutputStream(encodingStream(asCharOutput(writer)));
176  }
177
178  /**
179   * Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding
180   * into writers from the specified {@code OutputSupplier}.
181   */
182  @GwtIncompatible("Writer,OutputStream")
183  public final OutputSupplier<OutputStream> encodingStream(
184      final OutputSupplier<? extends Writer> writerSupplier) {
185    checkNotNull(writerSupplier);
186    return new OutputSupplier<OutputStream>() {
187      @Override
188      public OutputStream getOutput() throws IOException {
189        return encodingStream(writerSupplier.getOutput());
190      }
191    };
192  }
193
194  /**
195   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
196   */
197  @GwtIncompatible("ByteSink,CharSink")
198  public final ByteSink encodingSink(final CharSink encodedSink) {
199    checkNotNull(encodedSink);
200    return new ByteSink() {
201      @Override
202      public OutputStream openStream() throws IOException {
203        return encodingStream(encodedSink.openStream());
204      }
205    };
206  }
207
208  // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher)
209
210  private static byte[] extract(byte[] result, int length) {
211    if (length == result.length) {
212      return result;
213    } else {
214      byte[] trunc = new byte[length];
215      System.arraycopy(result, 0, trunc, 0, length);
216      return trunc;
217    }
218  }
219
220  /**
221   * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
222   * This is the inverse operation to {@link #encode(byte[])}.
223   *
224   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
225   *         encoding.
226   */
227  public final byte[] decode(CharSequence chars) {
228    chars = padding().trimTrailingFrom(chars);
229    ByteInput decodedInput = decodingStream(asCharInput(chars));
230    byte[] tmp = new byte[maxDecodedSize(chars.length())];
231    int index = 0;
232    try {
233      for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) {
234        tmp[index++] = (byte) i;
235      }
236    } catch (IOException badInput) {
237      throw new IllegalArgumentException(badInput);
238    }
239    return extract(tmp, index);
240  }
241
242  /**
243   * Returns an {@code InputStream} that decodes base-encoded input from the specified
244   * {@code Reader}.
245   */
246  @GwtIncompatible("Reader,InputStream")
247  public final InputStream decodingStream(Reader reader) {
248    return asInputStream(decodingStream(asCharInput(reader)));
249  }
250
251  /**
252   * Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input
253   * from readers from the specified supplier.
254   */
255  @GwtIncompatible("Reader,InputStream")
256  public final InputSupplier<InputStream> decodingStream(
257      final InputSupplier<? extends Reader> readerSupplier) {
258    checkNotNull(readerSupplier);
259    return new InputSupplier<InputStream>() {
260      @Override
261      public InputStream getInput() throws IOException {
262        return decodingStream(readerSupplier.getInput());
263      }
264    };
265  }
266
267  /**
268   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
269   * {@code CharSource}.
270   */
271  @GwtIncompatible("ByteSource,CharSource")
272  public final ByteSource decodingSource(final CharSource encodedSource) {
273    checkNotNull(encodedSource);
274    return new ByteSource() {
275      @Override
276      public InputStream openStream() throws IOException {
277        return decodingStream(encodedSource.openStream());
278      }
279    };
280  }
281
282  // Implementations for encoding/decoding
283
284  abstract int maxEncodedSize(int bytes);
285
286  abstract ByteOutput encodingStream(CharOutput charOutput);
287
288  abstract int maxDecodedSize(int chars);
289
290  abstract ByteInput decodingStream(CharInput charInput);
291
292  abstract CharMatcher padding();
293
294  // Modified encoding generators
295
296  /**
297   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
298   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
299   * section 3.2</a>, Padding of Encoded Data.
300   */
301  @CheckReturnValue
302  public abstract BaseEncoding omitPadding();
303
304  /**
305   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
306   * for padding.
307   *
308   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
309   *         separator
310   */
311  @CheckReturnValue
312  public abstract BaseEncoding withPadChar(char padChar);
313
314  /**
315   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
316   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
317   * are skipped over in decoding.
318   *
319   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
320   *         string, or if {@code n <= 0}
321   * @throws UnsupportedOperationException if this encoding already uses a separator
322   */
323  @CheckReturnValue
324  public abstract BaseEncoding withSeparator(String separator, int n);
325
326  /**
327   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
328   * uppercase letters. Padding and separator characters remain in their original case.
329   *
330   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
331   *         lower-case characters
332   */
333  @CheckReturnValue
334  public abstract BaseEncoding upperCase();
335
336  /**
337   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
338   * lowercase letters. Padding and separator characters remain in their original case.
339   *
340   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
341   *         lower-case characters
342   */
343  @CheckReturnValue
344  public abstract BaseEncoding lowerCase();
345
346  private static final BaseEncoding BASE64 = new StandardBaseEncoding(
347      "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
348
349  /**
350   * The "base64" base encoding specified by <a
351   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
352   * (This is the same as the base 64 encoding from <a
353   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
354   *
355   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
356   * omitted} or {@linkplain #withPadChar(char) replaced}.
357   *
358   * <p>No line feeds are added by default, as per <a
359   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
360   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
361   */
362  public static BaseEncoding base64() {
363    return BASE64;
364  }
365
366  private static final BaseEncoding BASE64_URL = new StandardBaseEncoding(
367      "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
368
369  /**
370   * The "base64url" encoding specified by <a
371   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
372   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64."
373   * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a
374   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
375   *
376   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
377   * omitted} or {@linkplain #withPadChar(char) replaced}.
378   *
379   * <p>No line feeds are added by default, as per <a
380   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
381   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
382   */
383  public static BaseEncoding base64Url() {
384    return BASE64_URL;
385  }
386
387  private static final BaseEncoding BASE32 =
388      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
389
390  /**
391   * The "base32" encoding specified by <a
392   * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding.
393   * (This is the same as the base 32 encoding from <a
394   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
395   *
396   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
397   * omitted} or {@linkplain #withPadChar(char) replaced}.
398   *
399   * <p>No line feeds are added by default, as per <a
400   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
401   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
402   */
403  public static BaseEncoding base32() {
404    return BASE32;
405  }
406
407  private static final BaseEncoding BASE32_HEX =
408      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
409
410  /**
411   * The "base32hex" encoding specified by <a
412   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
413   * with Extended Hex Alphabet.  There is no corresponding encoding in RFC 3548.
414   *
415   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
416   * omitted} or {@linkplain #withPadChar(char) replaced}.
417   *
418   * <p>No line feeds are added by default, as per <a
419   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
420   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
421   */
422  public static BaseEncoding base32Hex() {
423    return BASE32_HEX;
424  }
425
426  private static final BaseEncoding BASE16 =
427      new StandardBaseEncoding("base16()", "0123456789ABCDEF", null);
428
429  /**
430   * The "base16" encoding specified by <a
431   * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding.
432   * (This is the same as the base 16 encoding from <a
433   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
434   * "hexadecimal" format.
435   *
436   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and
437   * {@link #omitPadding()} have no effect.
438   *
439   * <p>No line feeds are added by default, as per <a
440   * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
441   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
442   */
443  public static BaseEncoding base16() {
444    return BASE16;
445  }
446
447  private static final class Alphabet extends CharMatcher {
448    private final String name;
449    // this is meant to be immutable -- don't modify it!
450    private final char[] chars;
451    final int mask;
452    final int bitsPerChar;
453    final int charsPerChunk;
454    final int bytesPerChunk;
455    private final byte[] decodabet;
456    private final boolean[] validPadding;
457
458    Alphabet(String name, char[] chars) {
459      this.name = checkNotNull(name);
460      this.chars = checkNotNull(chars);
461      try {
462        this.bitsPerChar = log2(chars.length, UNNECESSARY);
463      } catch (ArithmeticException e) {
464        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
465      }
466
467      /*
468       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
469       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
470       */
471      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
472      this.charsPerChunk = 8 / gcd;
473      this.bytesPerChunk = bitsPerChar / gcd;
474
475      this.mask = chars.length - 1;
476
477      byte[] decodabet = new byte[Ascii.MAX + 1];
478      Arrays.fill(decodabet, (byte) -1);
479      for (int i = 0; i < chars.length; i++) {
480        char c = chars[i];
481        checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
482        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
483        decodabet[c] = (byte) i;
484      }
485      this.decodabet = decodabet;
486
487      boolean[] validPadding = new boolean[charsPerChunk];
488      for (int i = 0; i < bytesPerChunk; i++) {
489        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
490      }
491      this.validPadding = validPadding;
492    }
493
494    char encode(int bits) {
495      return chars[bits];
496    }
497
498    boolean isValidPaddingStartPosition(int index) {
499      return validPadding[index % charsPerChunk];
500    }
501
502    int decode(char ch) throws IOException {
503      if (ch > Ascii.MAX || decodabet[ch] == -1) {
504        throw new IOException("Unrecognized character: " + ch);
505      }
506      return decodabet[ch];
507    }
508
509    private boolean hasLowerCase() {
510      for (char c : chars) {
511        if (Ascii.isLowerCase(c)) {
512          return true;
513        }
514      }
515      return false;
516    }
517
518    private boolean hasUpperCase() {
519      for (char c : chars) {
520        if (Ascii.isUpperCase(c)) {
521          return true;
522        }
523      }
524      return false;
525    }
526
527    Alphabet upperCase() {
528      if (!hasLowerCase()) {
529        return this;
530      } else {
531        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
532        char[] upperCased = new char[chars.length];
533        for (int i = 0; i < chars.length; i++) {
534          upperCased[i] = Ascii.toUpperCase(chars[i]);
535        }
536        return new Alphabet(name + ".upperCase()", upperCased);
537      }
538    }
539
540    Alphabet lowerCase() {
541      if (!hasUpperCase()) {
542        return this;
543      } else {
544        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
545        char[] lowerCased = new char[chars.length];
546        for (int i = 0; i < chars.length; i++) {
547          lowerCased[i] = Ascii.toLowerCase(chars[i]);
548        }
549        return new Alphabet(name + ".lowerCase()", lowerCased);
550      }
551    }
552
553    @Override
554    public boolean matches(char c) {
555      return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
556    }
557
558    @Override
559    public String toString() {
560      return name;
561    }
562  }
563
564  static final class StandardBaseEncoding extends BaseEncoding {
565    // TODO(user): provide a useful toString
566    private final Alphabet alphabet;
567
568    @Nullable
569    private final Character paddingChar;
570
571    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
572      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
573    }
574
575    StandardBaseEncoding(Alphabet alphabet, Character paddingChar) {
576      this.alphabet = checkNotNull(alphabet);
577      checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
578          "Padding character %s was already in alphabet", paddingChar);
579      this.paddingChar = paddingChar;
580    }
581
582    @Override
583    CharMatcher padding() {
584      return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
585    }
586
587    @Override
588    int maxEncodedSize(int bytes) {
589      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
590    }
591
592    @Override
593    ByteOutput encodingStream(final CharOutput out) {
594      checkNotNull(out);
595      return new ByteOutput() {
596        int bitBuffer = 0;
597        int bitBufferLength = 0;
598        int writtenChars = 0;
599
600        @Override
601        public void write(byte b) throws IOException {
602          bitBuffer <<= 8;
603          bitBuffer |= b & 0xFF;
604          bitBufferLength += 8;
605          while (bitBufferLength >= alphabet.bitsPerChar) {
606            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
607                & alphabet.mask;
608            out.write(alphabet.encode(charIndex));
609            writtenChars++;
610            bitBufferLength -= alphabet.bitsPerChar;
611          }
612        }
613
614        @Override
615        public void flush() throws IOException {
616          out.flush();
617        }
618
619        @Override
620        public void close() throws IOException {
621          if (bitBufferLength > 0) {
622            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
623                & alphabet.mask;
624            out.write(alphabet.encode(charIndex));
625            writtenChars++;
626            if (paddingChar != null) {
627              while (writtenChars % alphabet.charsPerChunk != 0) {
628                out.write(paddingChar.charValue());
629                writtenChars++;
630              }
631            }
632          }
633          out.close();
634        }
635      };
636    }
637
638    @Override
639    int maxDecodedSize(int chars) {
640      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
641    }
642
643    @Override
644    ByteInput decodingStream(final CharInput reader) {
645      checkNotNull(reader);
646      return new ByteInput() {
647        int bitBuffer = 0;
648        int bitBufferLength = 0;
649        int readChars = 0;
650        boolean hitPadding = false;
651        final CharMatcher paddingMatcher = padding();
652
653        @Override
654        public int read() throws IOException {
655          while (true) {
656            int readChar = reader.read();
657            if (readChar == -1) {
658              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
659                throw new IOException("Invalid input length " + readChars);
660              }
661              return -1;
662            }
663            readChars++;
664            char ch = (char) readChar;
665            if (paddingMatcher.matches(ch)) {
666              if (!hitPadding
667                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
668                throw new IOException("Padding cannot start at index " + readChars);
669              }
670              hitPadding = true;
671            } else if (hitPadding) {
672              throw new IOException(
673                  "Expected padding character but found '" + ch + "' at index " + readChars);
674            } else {
675              bitBuffer <<= alphabet.bitsPerChar;
676              bitBuffer |= alphabet.decode(ch);
677              bitBufferLength += alphabet.bitsPerChar;
678
679              if (bitBufferLength >= 8) {
680                bitBufferLength -= 8;
681                return (bitBuffer >> bitBufferLength) & 0xFF;
682              }
683            }
684          }
685        }
686
687        @Override
688        public void close() throws IOException {
689          reader.close();
690        }
691      };
692    }
693
694    @Override
695    public BaseEncoding omitPadding() {
696      return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null);
697    }
698
699    @Override
700    public BaseEncoding withPadChar(char padChar) {
701      if (8 % alphabet.bitsPerChar == 0 ||
702          (paddingChar != null && paddingChar.charValue() == padChar)) {
703        return this;
704      } else {
705        return new StandardBaseEncoding(alphabet, padChar);
706      }
707    }
708
709    @Override
710    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
711      checkNotNull(separator);
712      checkArgument(padding().or(alphabet).matchesNoneOf(separator),
713          "Separator cannot contain alphabet or padding characters");
714      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
715    }
716
717    private transient BaseEncoding upperCase;
718    private transient BaseEncoding lowerCase;
719
720    @Override
721    public BaseEncoding upperCase() {
722      BaseEncoding result = upperCase;
723      if (result == null) {
724        Alphabet upper = alphabet.upperCase();
725        result = upperCase =
726            (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar);
727      }
728      return result;
729    }
730
731    @Override
732    public BaseEncoding lowerCase() {
733      BaseEncoding result = lowerCase;
734      if (result == null) {
735        Alphabet lower = alphabet.lowerCase();
736        result = lowerCase =
737            (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar);
738      }
739      return result;
740    }
741
742    @Override
743    public String toString() {
744      StringBuilder builder = new StringBuilder("BaseEncoding.");
745      builder.append(alphabet.toString());
746      if (8 % alphabet.bitsPerChar != 0) {
747        if (paddingChar == null) {
748          builder.append(".omitPadding()");
749        } else {
750          builder.append(".withPadChar(").append(paddingChar).append(')');
751        }
752      }
753      return builder.toString();
754    }
755  }
756
757  static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) {
758    checkNotNull(delegate);
759    checkNotNull(toIgnore);
760    return new CharInput() {
761      @Override
762      public int read() throws IOException {
763        int readChar;
764        do {
765          readChar = delegate.read();
766        } while (readChar != -1 && toIgnore.matches((char) readChar));
767        return readChar;
768      }
769
770      @Override
771      public void close() throws IOException {
772        delegate.close();
773      }
774    };
775  }
776
777  static CharOutput separatingOutput(
778      final CharOutput delegate, final String separator, final int afterEveryChars) {
779    checkNotNull(delegate);
780    checkNotNull(separator);
781    checkArgument(afterEveryChars > 0);
782    return new CharOutput() {
783      int charsUntilSeparator = afterEveryChars;
784
785      @Override
786      public void write(char c) throws IOException {
787        if (charsUntilSeparator == 0) {
788          for (int i = 0; i < separator.length(); i++) {
789            delegate.write(separator.charAt(i));
790          }
791          charsUntilSeparator = afterEveryChars;
792        }
793        delegate.write(c);
794        charsUntilSeparator--;
795      }
796
797      @Override
798      public void flush() throws IOException {
799        delegate.flush();
800      }
801
802      @Override
803      public void close() throws IOException {
804        delegate.close();
805      }
806    };
807  }
808
809  static final class SeparatedBaseEncoding extends BaseEncoding {
810    private final BaseEncoding delegate;
811    private final String separator;
812    private final int afterEveryChars;
813    private final CharMatcher separatorChars;
814
815    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
816      this.delegate = checkNotNull(delegate);
817      this.separator = checkNotNull(separator);
818      this.afterEveryChars = afterEveryChars;
819      checkArgument(
820          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
821      this.separatorChars = CharMatcher.anyOf(separator).precomputed();
822    }
823
824    @Override
825    CharMatcher padding() {
826      return delegate.padding();
827    }
828
829    @Override
830    int maxEncodedSize(int bytes) {
831      int unseparatedSize = delegate.maxEncodedSize(bytes);
832      return unseparatedSize + separator.length()
833          * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
834    }
835
836    @Override
837    ByteOutput encodingStream(final CharOutput output) {
838      return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars));
839    }
840
841    @Override
842    int maxDecodedSize(int chars) {
843      return delegate.maxDecodedSize(chars);
844    }
845
846    @Override
847    ByteInput decodingStream(final CharInput input) {
848      return delegate.decodingStream(ignoringInput(input, separatorChars));
849    }
850
851    @Override
852    public BaseEncoding omitPadding() {
853      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
854    }
855
856    @Override
857    public BaseEncoding withPadChar(char padChar) {
858      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
859    }
860
861    @Override
862    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
863      throw new UnsupportedOperationException("Already have a separator");
864    }
865
866    @Override
867    public BaseEncoding upperCase() {
868      return delegate.upperCase().withSeparator(separator, afterEveryChars);
869    }
870
871    @Override
872    public BaseEncoding lowerCase() {
873      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
874    }
875
876    @Override
877    public String toString() {
878      return delegate.toString() +
879          ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
880    }
881  }
882}