Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.Objects;
031import com.google.errorprone.annotations.concurrent.LazyInit;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.io.Writer;
037import java.util.Arrays;
038import javax.annotation.CheckForNull;
039
040/**
041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
042 * strings. This class includes several constants for encoding schemes specified by <a
043 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
044 *
045 * <pre>{@code
046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))
047 * }</pre>
048 *
049 * <p>returns the string {@code "MZXW6==="}, and
050 *
051 * <pre>{@code
052 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
053 * }</pre>
054 *
055 * <p>...returns the ASCII bytes of the string {@code "foo"}.
056 *
057 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
058 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
059 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
060 * behavior:
061 *
062 * <pre>{@code
063 * BaseEncoding.base16().lowerCase().decode("deadbeef");
064 * }</pre>
065 *
066 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
067 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
068 *
069 * <pre>{@code
070 * // Do NOT do this
071 * BaseEncoding hex = BaseEncoding.base16();
072 * hex.lowerCase(); // does nothing!
073 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
074 * }</pre>
075 *
076 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
077 * x}, but the reverse does not necessarily hold.
078 *
079 * <table>
080 * <caption>Encodings</caption>
081 * <tr>
082 * <th>Encoding
083 * <th>Alphabet
084 * <th>{@code char:byte} ratio
085 * <th>Default padding
086 * <th>Comments
087 * <tr>
088 * <td>{@link #base16()}
089 * <td>0-9 A-F
090 * <td>2.00
091 * <td>N/A
092 * <td>Traditional hexadecimal. Defaults to upper case.
093 * <tr>
094 * <td>{@link #base32()}
095 * <td>A-Z 2-7
096 * <td>1.60
097 * <td>=
098 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
099 * <tr>
100 * <td>{@link #base32Hex()}
101 * <td>0-9 A-V
102 * <td>1.60
103 * <td>=
104 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
105 * <tr>
106 * <td>{@link #base64()}
107 * <td>A-Z a-z 0-9 + /
108 * <td>1.33
109 * <td>=
110 * <td>
111 * <tr>
112 * <td>{@link #base64Url()}
113 * <td>A-Z a-z 0-9 - _
114 * <td>1.33
115 * <td>=
116 * <td>Safe to use as filenames, or to pass in URLs without escaping
117 * </table>
118 *
119 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
120 *
121 * @author Louis Wasserman
122 * @since 14.0
123 */
124@GwtCompatible(emulated = true)
125@ElementTypesAreNonnullByDefault
126public abstract class BaseEncoding {
127  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
128
129  BaseEncoding() {}
130
131  /**
132   * Exception indicating invalid base-encoded input encountered while decoding.
133   *
134   * @author Louis Wasserman
135   * @since 15.0
136   */
137  public static final class DecodingException extends IOException {
138    DecodingException(String message) {
139      super(message);
140    }
141
142    DecodingException(Throwable cause) {
143      super(cause);
144    }
145  }
146
147  /** Encodes the specified byte array, and returns the encoded {@code String}. */
148  public String encode(byte[] bytes) {
149    return encode(bytes, 0, bytes.length);
150  }
151
152  /**
153   * Encodes the specified range of the specified byte array, and returns the encoded {@code
154   * String}.
155   */
156  public final String encode(byte[] bytes, int off, int len) {
157    checkPositionIndexes(off, off + len, bytes.length);
158    StringBuilder result = new StringBuilder(maxEncodedSize(len));
159    try {
160      encodeTo(result, bytes, off, len);
161    } catch (IOException impossible) {
162      throw new AssertionError(impossible);
163    }
164    return result.toString();
165  }
166
167  /**
168   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
169   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
170   * Writer}.
171   */
172  @GwtIncompatible // Writer,OutputStream
173  public abstract OutputStream encodingStream(Writer writer);
174
175  /**
176   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
177   */
178  @GwtIncompatible // ByteSink,CharSink
179  public final ByteSink encodingSink(final CharSink encodedSink) {
180    checkNotNull(encodedSink);
181    return new ByteSink() {
182      @Override
183      public OutputStream openStream() throws IOException {
184        return encodingStream(encodedSink.openStream());
185      }
186    };
187  }
188
189  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
190
191  private static byte[] extract(byte[] result, int length) {
192    if (length == result.length) {
193      return result;
194    }
195    byte[] trunc = new byte[length];
196    System.arraycopy(result, 0, trunc, 0, length);
197    return trunc;
198  }
199
200  /**
201   * Determines whether the specified character sequence is a valid encoded string according to this
202   * encoding.
203   *
204   * @since 20.0
205   */
206  public abstract boolean canDecode(CharSequence chars);
207
208  /**
209   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
210   * inverse operation to {@link #encode(byte[])}.
211   *
212   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
213   *     encoding.
214   */
215  public final byte[] decode(CharSequence chars) {
216    try {
217      return decodeChecked(chars);
218    } catch (DecodingException badInput) {
219      throw new IllegalArgumentException(badInput);
220    }
221  }
222
223  /**
224   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
225   * inverse operation to {@link #encode(byte[])}.
226   *
227   * @throws DecodingException if the input is not a valid encoded string according to this
228   *     encoding.
229   */
230  final byte[] decodeChecked(CharSequence chars)
231      throws DecodingException {
232    chars = trimTrailingPadding(chars);
233    byte[] tmp = new byte[maxDecodedSize(chars.length())];
234    int len = decodeTo(tmp, chars);
235    return extract(tmp, len);
236  }
237
238  /**
239   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
240   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
241   */
242  @GwtIncompatible // Reader,InputStream
243  public abstract InputStream decodingStream(Reader reader);
244
245  /**
246   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
247   * CharSource}.
248   */
249  @GwtIncompatible // ByteSource,CharSource
250  public final ByteSource decodingSource(final CharSource encodedSource) {
251    checkNotNull(encodedSource);
252    return new ByteSource() {
253      @Override
254      public InputStream openStream() throws IOException {
255        return decodingStream(encodedSource.openStream());
256      }
257    };
258  }
259
260  // Implementations for encoding/decoding
261
262  abstract int maxEncodedSize(int bytes);
263
264  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
265
266  abstract int maxDecodedSize(int chars);
267
268  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
269
270  CharSequence trimTrailingPadding(CharSequence chars) {
271    return checkNotNull(chars);
272  }
273
274  // Modified encoding generators
275
276  /**
277   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
278   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
279   * section 3.2</a>, Padding of Encoded Data.
280   */
281  public abstract BaseEncoding omitPadding();
282
283  /**
284   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
285   * for padding.
286   *
287   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
288   *     separator
289   */
290  public abstract BaseEncoding withPadChar(char padChar);
291
292  /**
293   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
294   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
295   * are skipped over in decoding.
296   *
297   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
298   *     string, or if {@code n <= 0}
299   * @throws UnsupportedOperationException if this encoding already uses a separator
300   */
301  public abstract BaseEncoding withSeparator(String separator, int n);
302
303  /**
304   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
305   * uppercase letters. Padding and separator characters remain in their original case.
306   *
307   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
308   *     lower-case characters
309   */
310  public abstract BaseEncoding upperCase();
311
312  /**
313   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
314   * lowercase letters. Padding and separator characters remain in their original case.
315   *
316   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
317   *     lower-case characters
318   */
319  public abstract BaseEncoding lowerCase();
320
321  private static final BaseEncoding BASE64 =
322      new Base64Encoding(
323          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
324
325  /**
326   * The "base64" base encoding specified by <a
327   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
328   * (This is the same as the base 64 encoding from <a
329   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
330   *
331   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
332   * omitted} or {@linkplain #withPadChar(char) replaced}.
333   *
334   * <p>No line feeds are added by default, as per <a
335   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
336   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
337   */
338  public static BaseEncoding base64() {
339    return BASE64;
340  }
341
342  private static final BaseEncoding BASE64_URL =
343      new Base64Encoding(
344          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
345
346  /**
347   * The "base64url" encoding specified by <a
348   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
349   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
350   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
351   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
352   *
353   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
354   * omitted} or {@linkplain #withPadChar(char) replaced}.
355   *
356   * <p>No line feeds are added by default, as per <a
357   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
358   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
359   */
360  public static BaseEncoding base64Url() {
361    return BASE64_URL;
362  }
363
364  private static final BaseEncoding BASE32 =
365      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
366
367  /**
368   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
369   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
370   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
371   *
372   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
373   * omitted} or {@linkplain #withPadChar(char) replaced}.
374   *
375   * <p>No line feeds are added by default, as per <a
376   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
377   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
378   */
379  public static BaseEncoding base32() {
380    return BASE32;
381  }
382
383  private static final BaseEncoding BASE32_HEX =
384      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
385
386  /**
387   * The "base32hex" encoding specified by <a
388   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
389   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
390   *
391   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
392   * omitted} or {@linkplain #withPadChar(char) replaced}.
393   *
394   * <p>No line feeds are added by default, as per <a
395   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
396   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
397   */
398  public static BaseEncoding base32Hex() {
399    return BASE32_HEX;
400  }
401
402  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
403
404  /**
405   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
406   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
407   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
408   * "hexadecimal" format.
409   *
410   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
411   * have no effect.
412   *
413   * <p>No line feeds are added by default, as per <a
414   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
415   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
416   */
417  public static BaseEncoding base16() {
418    return BASE16;
419  }
420
421  private static final class Alphabet {
422    private final String name;
423    // this is meant to be immutable -- don't modify it!
424    private final char[] chars;
425    final int mask;
426    final int bitsPerChar;
427    final int charsPerChunk;
428    final int bytesPerChunk;
429    private final byte[] decodabet;
430    private final boolean[] validPadding;
431
432    Alphabet(String name, char[] chars) {
433      this.name = checkNotNull(name);
434      this.chars = checkNotNull(chars);
435      try {
436        this.bitsPerChar = log2(chars.length, UNNECESSARY);
437      } catch (ArithmeticException e) {
438        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
439      }
440
441      /*
442       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
443       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
444       */
445      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
446      try {
447        this.charsPerChunk = 8 / gcd;
448        this.bytesPerChunk = bitsPerChar / gcd;
449      } catch (ArithmeticException e) {
450        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
451      }
452
453      this.mask = chars.length - 1;
454
455      byte[] decodabet = new byte[Ascii.MAX + 1];
456      Arrays.fill(decodabet, (byte) -1);
457      for (int i = 0; i < chars.length; i++) {
458        char c = chars[i];
459        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
460        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
461        decodabet[c] = (byte) i;
462      }
463      this.decodabet = decodabet;
464
465      boolean[] validPadding = new boolean[charsPerChunk];
466      for (int i = 0; i < bytesPerChunk; i++) {
467        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
468      }
469      this.validPadding = validPadding;
470    }
471
472    char encode(int bits) {
473      return chars[bits];
474    }
475
476    boolean isValidPaddingStartPosition(int index) {
477      return validPadding[index % charsPerChunk];
478    }
479
480    boolean canDecode(char ch) {
481      return ch <= Ascii.MAX && decodabet[ch] != -1;
482    }
483
484    int decode(char ch) throws DecodingException {
485      if (ch > Ascii.MAX) {
486        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
487      }
488      int result = decodabet[ch];
489      if (result == -1) {
490        if (ch <= 0x20 || ch == Ascii.MAX) {
491          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
492        } else {
493          throw new DecodingException("Unrecognized character: " + ch);
494        }
495      }
496      return result;
497    }
498
499    private boolean hasLowerCase() {
500      for (char c : chars) {
501        if (Ascii.isLowerCase(c)) {
502          return true;
503        }
504      }
505      return false;
506    }
507
508    private boolean hasUpperCase() {
509      for (char c : chars) {
510        if (Ascii.isUpperCase(c)) {
511          return true;
512        }
513      }
514      return false;
515    }
516
517    Alphabet upperCase() {
518      if (!hasLowerCase()) {
519        return this;
520      }
521      checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
522      char[] upperCased = new char[chars.length];
523      for (int i = 0; i < chars.length; i++) {
524        upperCased[i] = Ascii.toUpperCase(chars[i]);
525      }
526      return new Alphabet(name + ".upperCase()", upperCased);
527    }
528
529    Alphabet lowerCase() {
530      if (!hasUpperCase()) {
531        return this;
532      }
533      checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
534      char[] lowerCased = new char[chars.length];
535      for (int i = 0; i < chars.length; i++) {
536        lowerCased[i] = Ascii.toLowerCase(chars[i]);
537      }
538      return new Alphabet(name + ".lowerCase()", lowerCased);
539    }
540
541    public boolean matches(char c) {
542      return c < decodabet.length && decodabet[c] != -1;
543    }
544
545    @Override
546    public String toString() {
547      return name;
548    }
549
550    @Override
551    public boolean equals(@CheckForNull Object other) {
552      if (other instanceof Alphabet) {
553        Alphabet that = (Alphabet) other;
554        return Arrays.equals(this.chars, that.chars);
555      }
556      return false;
557    }
558
559    @Override
560    public int hashCode() {
561      return Arrays.hashCode(chars);
562    }
563  }
564
565  static class StandardBaseEncoding extends BaseEncoding {
566    // TODO(lowasser): provide a useful toString
567    final Alphabet alphabet;
568
569    @CheckForNull final Character paddingChar;
570
571    StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
572      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
573    }
574
575    StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
576      this.alphabet = checkNotNull(alphabet);
577      checkArgument(
578          paddingChar == null || !alphabet.matches(paddingChar),
579          "Padding character %s was already in alphabet",
580          paddingChar);
581      this.paddingChar = paddingChar;
582    }
583
584    @Override
585    int maxEncodedSize(int bytes) {
586      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
587    }
588
589    @GwtIncompatible // Writer,OutputStream
590    @Override
591    public OutputStream encodingStream(final Writer out) {
592      checkNotNull(out);
593      return new OutputStream() {
594        int bitBuffer = 0;
595        int bitBufferLength = 0;
596        int writtenChars = 0;
597
598        @Override
599        public void write(int b) throws IOException {
600          bitBuffer <<= 8;
601          bitBuffer |= b & 0xFF;
602          bitBufferLength += 8;
603          while (bitBufferLength >= alphabet.bitsPerChar) {
604            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
605            out.write(alphabet.encode(charIndex));
606            writtenChars++;
607            bitBufferLength -= alphabet.bitsPerChar;
608          }
609        }
610
611        @Override
612        public void flush() throws IOException {
613          out.flush();
614        }
615
616        @Override
617        public void close() throws IOException {
618          if (bitBufferLength > 0) {
619            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
620            out.write(alphabet.encode(charIndex));
621            writtenChars++;
622            if (paddingChar != null) {
623              while (writtenChars % alphabet.charsPerChunk != 0) {
624                out.write(paddingChar.charValue());
625                writtenChars++;
626              }
627            }
628          }
629          out.close();
630        }
631      };
632    }
633
634    @Override
635    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
636      checkNotNull(target);
637      checkPositionIndexes(off, off + len, bytes.length);
638      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
639        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
640      }
641    }
642
643    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
644      checkNotNull(target);
645      checkPositionIndexes(off, off + len, bytes.length);
646      checkArgument(len <= alphabet.bytesPerChunk);
647      long bitBuffer = 0;
648      for (int i = 0; i < len; ++i) {
649        bitBuffer |= bytes[off + i] & 0xFF;
650        bitBuffer <<= 8; // Add additional zero byte in the end.
651      }
652      // Position of first character is length of bitBuffer minus bitsPerChar.
653      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
654      int bitsProcessed = 0;
655      while (bitsProcessed < len * 8) {
656        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
657        target.append(alphabet.encode(charIndex));
658        bitsProcessed += alphabet.bitsPerChar;
659      }
660      if (paddingChar != null) {
661        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
662          target.append(paddingChar.charValue());
663          bitsProcessed += alphabet.bitsPerChar;
664        }
665      }
666    }
667
668    @Override
669    int maxDecodedSize(int chars) {
670      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
671    }
672
673    @Override
674    CharSequence trimTrailingPadding(CharSequence chars) {
675      checkNotNull(chars);
676      if (paddingChar == null) {
677        return chars;
678      }
679      char padChar = paddingChar.charValue();
680      int l;
681      for (l = chars.length() - 1; l >= 0; l--) {
682        if (chars.charAt(l) != padChar) {
683          break;
684        }
685      }
686      return chars.subSequence(0, l + 1);
687    }
688
689    @Override
690    public boolean canDecode(CharSequence chars) {
691      checkNotNull(chars);
692      chars = trimTrailingPadding(chars);
693      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
694        return false;
695      }
696      for (int i = 0; i < chars.length(); i++) {
697        if (!alphabet.canDecode(chars.charAt(i))) {
698          return false;
699        }
700      }
701      return true;
702    }
703
704    @Override
705    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
706      checkNotNull(target);
707      chars = trimTrailingPadding(chars);
708      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
709        throw new DecodingException("Invalid input length " + chars.length());
710      }
711      int bytesWritten = 0;
712      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
713        long chunk = 0;
714        int charsProcessed = 0;
715        for (int i = 0; i < alphabet.charsPerChunk; i++) {
716          chunk <<= alphabet.bitsPerChar;
717          if (charIdx + i < chars.length()) {
718            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
719          }
720        }
721        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
722        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
723          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
724        }
725      }
726      return bytesWritten;
727    }
728
729    @Override
730    @GwtIncompatible // Reader,InputStream
731    public InputStream decodingStream(final Reader reader) {
732      checkNotNull(reader);
733      return new InputStream() {
734        int bitBuffer = 0;
735        int bitBufferLength = 0;
736        int readChars = 0;
737        boolean hitPadding = false;
738
739        @Override
740        public int read() throws IOException {
741          while (true) {
742            int readChar = reader.read();
743            if (readChar == -1) {
744              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
745                throw new DecodingException("Invalid input length " + readChars);
746              }
747              return -1;
748            }
749            readChars++;
750            char ch = (char) readChar;
751            if (paddingChar != null && paddingChar.charValue() == ch) {
752              if (!hitPadding
753                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
754                throw new DecodingException("Padding cannot start at index " + readChars);
755              }
756              hitPadding = true;
757            } else if (hitPadding) {
758              throw new DecodingException(
759                  "Expected padding character but found '" + ch + "' at index " + readChars);
760            } else {
761              bitBuffer <<= alphabet.bitsPerChar;
762              bitBuffer |= alphabet.decode(ch);
763              bitBufferLength += alphabet.bitsPerChar;
764
765              if (bitBufferLength >= 8) {
766                bitBufferLength -= 8;
767                return (bitBuffer >> bitBufferLength) & 0xFF;
768              }
769            }
770          }
771        }
772
773        @Override
774        public int read(byte[] buf, int off, int len) throws IOException {
775          // Overriding this to work around the fact that InputStream's default implementation of
776          // this method will silently swallow exceptions thrown by the single-byte read() method
777          // (other than on the first call to it), which in this case can cause invalid encoded
778          // strings to not throw an exception.
779          // See https://github.com/google/guava/issues/3542
780          checkPositionIndexes(off, off + len, buf.length);
781
782          int i = off;
783          for (; i < off + len; i++) {
784            int b = read();
785            if (b == -1) {
786              int read = i - off;
787              return read == 0 ? -1 : read;
788            }
789            buf[i] = (byte) b;
790          }
791          return i - off;
792        }
793
794        @Override
795        public void close() throws IOException {
796          reader.close();
797        }
798      };
799    }
800
801    @Override
802    public BaseEncoding omitPadding() {
803      return (paddingChar == null) ? this : newInstance(alphabet, null);
804    }
805
806    @Override
807    public BaseEncoding withPadChar(char padChar) {
808      if (8 % alphabet.bitsPerChar == 0
809          || (paddingChar != null && paddingChar.charValue() == padChar)) {
810        return this;
811      } else {
812        return newInstance(alphabet, padChar);
813      }
814    }
815
816    @Override
817    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
818      for (int i = 0; i < separator.length(); i++) {
819        checkArgument(
820            !alphabet.matches(separator.charAt(i)),
821            "Separator (%s) cannot contain alphabet characters",
822            separator);
823      }
824      if (paddingChar != null) {
825        checkArgument(
826            separator.indexOf(paddingChar.charValue()) < 0,
827            "Separator (%s) cannot contain padding character",
828            separator);
829      }
830      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
831    }
832
833    @LazyInit @CheckForNull private transient BaseEncoding upperCase;
834    @LazyInit @CheckForNull private transient BaseEncoding lowerCase;
835
836    @Override
837    public BaseEncoding upperCase() {
838      BaseEncoding result = upperCase;
839      if (result == null) {
840        Alphabet upper = alphabet.upperCase();
841        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
842      }
843      return result;
844    }
845
846    @Override
847    public BaseEncoding lowerCase() {
848      BaseEncoding result = lowerCase;
849      if (result == null) {
850        Alphabet lower = alphabet.lowerCase();
851        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
852      }
853      return result;
854    }
855
856    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
857      return new StandardBaseEncoding(alphabet, paddingChar);
858    }
859
860    @Override
861    public String toString() {
862      StringBuilder builder = new StringBuilder("BaseEncoding.");
863      builder.append(alphabet.toString());
864      if (8 % alphabet.bitsPerChar != 0) {
865        if (paddingChar == null) {
866          builder.append(".omitPadding()");
867        } else {
868          builder.append(".withPadChar('").append(paddingChar).append("')");
869        }
870      }
871      return builder.toString();
872    }
873
874    @Override
875    public boolean equals(@CheckForNull Object other) {
876      if (other instanceof StandardBaseEncoding) {
877        StandardBaseEncoding that = (StandardBaseEncoding) other;
878        return this.alphabet.equals(that.alphabet)
879            && Objects.equal(this.paddingChar, that.paddingChar);
880      }
881      return false;
882    }
883
884    @Override
885    public int hashCode() {
886      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
887    }
888  }
889
890  static final class Base16Encoding extends StandardBaseEncoding {
891    final char[] encoding = new char[512];
892
893    Base16Encoding(String name, String alphabetChars) {
894      this(new Alphabet(name, alphabetChars.toCharArray()));
895    }
896
897    private Base16Encoding(Alphabet alphabet) {
898      super(alphabet, null);
899      checkArgument(alphabet.chars.length == 16);
900      for (int i = 0; i < 256; ++i) {
901        encoding[i] = alphabet.encode(i >>> 4);
902        encoding[i | 0x100] = alphabet.encode(i & 0xF);
903      }
904    }
905
906    @Override
907    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
908      checkNotNull(target);
909      checkPositionIndexes(off, off + len, bytes.length);
910      for (int i = 0; i < len; ++i) {
911        int b = bytes[off + i] & 0xFF;
912        target.append(encoding[b]);
913        target.append(encoding[b | 0x100]);
914      }
915    }
916
917    @Override
918    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
919      checkNotNull(target);
920      if (chars.length() % 2 == 1) {
921        throw new DecodingException("Invalid input length " + chars.length());
922      }
923      int bytesWritten = 0;
924      for (int i = 0; i < chars.length(); i += 2) {
925        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
926        target[bytesWritten++] = (byte) decoded;
927      }
928      return bytesWritten;
929    }
930
931    @Override
932    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
933      return new Base16Encoding(alphabet);
934    }
935  }
936
937  static final class Base64Encoding extends StandardBaseEncoding {
938    Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
939      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
940    }
941
942    private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
943      super(alphabet, paddingChar);
944      checkArgument(alphabet.chars.length == 64);
945    }
946
947    @Override
948    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
949      checkNotNull(target);
950      checkPositionIndexes(off, off + len, bytes.length);
951      int i = off;
952      for (int remaining = len; remaining >= 3; remaining -= 3) {
953        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
954        target.append(alphabet.encode(chunk >>> 18));
955        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
956        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
957        target.append(alphabet.encode(chunk & 0x3F));
958      }
959      if (i < off + len) {
960        encodeChunkTo(target, bytes, i, off + len - i);
961      }
962    }
963
964    @Override
965    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
966      checkNotNull(target);
967      chars = trimTrailingPadding(chars);
968      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
969        throw new DecodingException("Invalid input length " + chars.length());
970      }
971      int bytesWritten = 0;
972      for (int i = 0; i < chars.length(); ) {
973        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
974        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
975        target[bytesWritten++] = (byte) (chunk >>> 16);
976        if (i < chars.length()) {
977          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
978          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
979          if (i < chars.length()) {
980            chunk |= alphabet.decode(chars.charAt(i++));
981            target[bytesWritten++] = (byte) (chunk & 0xFF);
982          }
983        }
984      }
985      return bytesWritten;
986    }
987
988    @Override
989    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
990      return new Base64Encoding(alphabet, paddingChar);
991    }
992  }
993
994  @GwtIncompatible
995  static Reader ignoringReader(final Reader delegate, final String toIgnore) {
996    checkNotNull(delegate);
997    checkNotNull(toIgnore);
998    return new Reader() {
999      @Override
1000      public int read() throws IOException {
1001        int readChar;
1002        do {
1003          readChar = delegate.read();
1004        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1005        return readChar;
1006      }
1007
1008      @Override
1009      public int read(char[] cbuf, int off, int len) throws IOException {
1010        throw new UnsupportedOperationException();
1011      }
1012
1013      @Override
1014      public void close() throws IOException {
1015        delegate.close();
1016      }
1017    };
1018  }
1019
1020  static Appendable separatingAppendable(
1021      final Appendable delegate, final String separator, final int afterEveryChars) {
1022    checkNotNull(delegate);
1023    checkNotNull(separator);
1024    checkArgument(afterEveryChars > 0);
1025    return new Appendable() {
1026      int charsUntilSeparator = afterEveryChars;
1027
1028      @Override
1029      public Appendable append(char c) throws IOException {
1030        if (charsUntilSeparator == 0) {
1031          delegate.append(separator);
1032          charsUntilSeparator = afterEveryChars;
1033        }
1034        delegate.append(c);
1035        charsUntilSeparator--;
1036        return this;
1037      }
1038
1039      @Override
1040      public Appendable append(@CheckForNull CharSequence chars, int off, int len) {
1041        throw new UnsupportedOperationException();
1042      }
1043
1044      @Override
1045      public Appendable append(@CheckForNull CharSequence chars) {
1046        throw new UnsupportedOperationException();
1047      }
1048    };
1049  }
1050
1051  @GwtIncompatible // Writer
1052  static Writer separatingWriter(
1053      final Writer delegate, final String separator, final int afterEveryChars) {
1054    final Appendable separatingAppendable =
1055        separatingAppendable(delegate, separator, afterEveryChars);
1056    return new Writer() {
1057      @Override
1058      public void write(int c) throws IOException {
1059        separatingAppendable.append((char) c);
1060      }
1061
1062      @Override
1063      public void write(char[] chars, int off, int len) throws IOException {
1064        throw new UnsupportedOperationException();
1065      }
1066
1067      @Override
1068      public void flush() throws IOException {
1069        delegate.flush();
1070      }
1071
1072      @Override
1073      public void close() throws IOException {
1074        delegate.close();
1075      }
1076    };
1077  }
1078
1079  static final class SeparatedBaseEncoding extends BaseEncoding {
1080    private final BaseEncoding delegate;
1081    private final String separator;
1082    private final int afterEveryChars;
1083
1084    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1085      this.delegate = checkNotNull(delegate);
1086      this.separator = checkNotNull(separator);
1087      this.afterEveryChars = afterEveryChars;
1088      checkArgument(
1089          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1090    }
1091
1092    @Override
1093    CharSequence trimTrailingPadding(CharSequence chars) {
1094      return delegate.trimTrailingPadding(chars);
1095    }
1096
1097    @Override
1098    int maxEncodedSize(int bytes) {
1099      int unseparatedSize = delegate.maxEncodedSize(bytes);
1100      return unseparatedSize
1101          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1102    }
1103
1104    @GwtIncompatible // Writer,OutputStream
1105    @Override
1106    public OutputStream encodingStream(final Writer output) {
1107      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1108    }
1109
1110    @Override
1111    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1112      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1113    }
1114
1115    @Override
1116    int maxDecodedSize(int chars) {
1117      return delegate.maxDecodedSize(chars);
1118    }
1119
1120    @Override
1121    public boolean canDecode(CharSequence chars) {
1122      StringBuilder builder = new StringBuilder();
1123      for (int i = 0; i < chars.length(); i++) {
1124        char c = chars.charAt(i);
1125        if (separator.indexOf(c) < 0) {
1126          builder.append(c);
1127        }
1128      }
1129      return delegate.canDecode(builder);
1130    }
1131
1132    @Override
1133    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1134      StringBuilder stripped = new StringBuilder(chars.length());
1135      for (int i = 0; i < chars.length(); i++) {
1136        char c = chars.charAt(i);
1137        if (separator.indexOf(c) < 0) {
1138          stripped.append(c);
1139        }
1140      }
1141      return delegate.decodeTo(target, stripped);
1142    }
1143
1144    @Override
1145    @GwtIncompatible // Reader,InputStream
1146    public InputStream decodingStream(final Reader reader) {
1147      return delegate.decodingStream(ignoringReader(reader, separator));
1148    }
1149
1150    @Override
1151    public BaseEncoding omitPadding() {
1152      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1153    }
1154
1155    @Override
1156    public BaseEncoding withPadChar(char padChar) {
1157      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1158    }
1159
1160    @Override
1161    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1162      throw new UnsupportedOperationException("Already have a separator");
1163    }
1164
1165    @Override
1166    public BaseEncoding upperCase() {
1167      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1168    }
1169
1170    @Override
1171    public BaseEncoding lowerCase() {
1172      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1173    }
1174
1175    @Override
1176    public String toString() {
1177      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1178    }
1179  }
1180}