Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.Ascii;
030import com.google.common.base.CharMatcher;
031import com.google.common.base.Objects;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.io.Writer;
037import java.util.Arrays;
038import javax.annotation.Nullable;
039
040/**
041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
042 * strings. This class includes several constants for encoding schemes specified by
043 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
044 *
045 * <pre>   {@code
046 *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
047 *
048 * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
049 *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
050 *
051 * <p>...returns the ASCII bytes of the string {@code "foo"}.
052 *
053 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
054 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
055 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
056 * behavior:
057 *
058 * <pre>   {@code
059 *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
060 *
061 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
062 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
063 *
064 * <pre>   {@code
065 *   // Do NOT do this
066 *   BaseEncoding hex = BaseEncoding.base16();
067 *   hex.lowerCase(); // does nothing!
068 *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
069 *
070 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
071 * {@code x}, but the reverse does not necessarily hold.
072 *
073 * <table>
074 * <tr>
075 * <th>Encoding
076 * <th>Alphabet
077 * <th>{@code char:byte} ratio
078 * <th>Default padding
079 * <th>Comments
080 * <tr>
081 * <td>{@link #base16()}
082 * <td>0-9 A-F
083 * <td>2.00
084 * <td>N/A
085 * <td>Traditional hexadecimal. Defaults to upper case.
086 * <tr>
087 * <td>{@link #base32()}
088 * <td>A-Z 2-7
089 * <td>1.60
090 * <td>=
091 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
092 * <tr>
093 * <td>{@link #base32Hex()}
094 * <td>0-9 A-V
095 * <td>1.60
096 * <td>=
097 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
098 * <tr>
099 * <td>{@link #base64()}
100 * <td>A-Z a-z 0-9 + /
101 * <td>1.33
102 * <td>=
103 * <td>
104 * <tr>
105 * <td>{@link #base64Url()}
106 * <td>A-Z a-z 0-9 - _
107 * <td>1.33
108 * <td>=
109 * <td>Safe to use as filenames, or to pass in URLs without escaping
110 * </table>
111 *
112 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
113 *
114 * @author Louis Wasserman
115 * @since 14.0
116 */
117@GwtCompatible(emulated = true)
118public abstract class BaseEncoding {
119  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
120
121  BaseEncoding() {}
122
123  /**
124   * Exception indicating invalid base-encoded input encountered while decoding.
125   *
126   * @author Louis Wasserman
127   * @since 15.0
128   */
129  public static final class DecodingException extends IOException {
130    DecodingException(String message) {
131      super(message);
132    }
133
134    DecodingException(Throwable cause) {
135      super(cause);
136    }
137  }
138
139  /**
140   * Encodes the specified byte array, and returns the encoded {@code String}.
141   */
142  public String encode(byte[] bytes) {
143    return encode(bytes, 0, bytes.length);
144  }
145
146  /**
147   * Encodes the specified range of the specified byte array, and returns the encoded
148   * {@code String}.
149   */
150  public final String encode(byte[] bytes, int off, int len) {
151    checkPositionIndexes(off, off + len, bytes.length);
152    StringBuilder result = new StringBuilder(maxEncodedSize(len));
153    try {
154      encodeTo(result, bytes, off, len);
155    } catch (IOException impossible) {
156      throw new AssertionError(impossible);
157    }
158    return result.toString();
159  }
160
161  /**
162   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
163   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing
164   * {@code Writer}.
165   */
166  @GwtIncompatible // Writer,OutputStream
167  public abstract OutputStream encodingStream(Writer writer);
168
169  /**
170   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
171   */
172  @GwtIncompatible // ByteSink,CharSink
173  public final ByteSink encodingSink(final CharSink encodedSink) {
174    checkNotNull(encodedSink);
175    return new ByteSink() {
176      @Override
177      public OutputStream openStream() throws IOException {
178        return encodingStream(encodedSink.openStream());
179      }
180    };
181  }
182
183  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
184
185  private static byte[] extract(byte[] result, int length) {
186    if (length == result.length) {
187      return result;
188    } else {
189      byte[] trunc = new byte[length];
190      System.arraycopy(result, 0, trunc, 0, length);
191      return trunc;
192    }
193  }
194
195  /**
196   * Determines whether the specified character sequence is a valid encoded string according to this
197   * encoding.
198   *
199   * @since 20.0
200   */
201  public abstract boolean canDecode(CharSequence chars);
202
203  /**
204   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
205   * inverse operation to {@link #encode(byte[])}.
206   *
207   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
208   *     encoding.
209   */
210  public final byte[] decode(CharSequence chars) {
211    try {
212      return decodeChecked(chars);
213    } catch (DecodingException badInput) {
214      throw new IllegalArgumentException(badInput);
215    }
216  }
217
218  /**
219   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
220   * inverse operation to {@link #encode(byte[])}.
221   *
222   * @throws DecodingException if the input is not a valid encoded string according to this
223   *     encoding.
224   */ final byte[] decodeChecked(CharSequence chars)
225      throws DecodingException {
226    chars = padding().trimTrailingFrom(chars);
227    byte[] tmp = new byte[maxDecodedSize(chars.length())];
228    int len = decodeTo(tmp, chars);
229    return extract(tmp, len);
230  }
231
232  /**
233   * Returns an {@code InputStream} that decodes base-encoded input from the specified
234   * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific
235   * errors.
236   */
237  @GwtIncompatible // Reader,InputStream
238  public abstract InputStream decodingStream(Reader reader);
239
240  /**
241   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
242   * {@code CharSource}.
243   */
244  @GwtIncompatible // ByteSource,CharSource
245  public final ByteSource decodingSource(final CharSource encodedSource) {
246    checkNotNull(encodedSource);
247    return new ByteSource() {
248      @Override
249      public InputStream openStream() throws IOException {
250        return decodingStream(encodedSource.openStream());
251      }
252    };
253  }
254
255  // Implementations for encoding/decoding
256
257  abstract int maxEncodedSize(int bytes);
258
259  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
260
261  abstract int maxDecodedSize(int chars);
262
263  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
264
265  abstract CharMatcher padding();
266
267  // Modified encoding generators
268
269  /**
270   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
271   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
272   * section 3.2</a>, Padding of Encoded Data.
273   */
274  public abstract BaseEncoding omitPadding();
275
276  /**
277   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
278   * for padding.
279   *
280   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
281   *     separator
282   */
283  public abstract BaseEncoding withPadChar(char padChar);
284
285  /**
286   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
287   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
288   * are skipped over in decoding.
289   *
290   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
291   *     string, or if {@code n <= 0}
292   * @throws UnsupportedOperationException if this encoding already uses a separator
293   */
294  public abstract BaseEncoding withSeparator(String separator, int n);
295
296  /**
297   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
298   * uppercase letters. Padding and separator characters remain in their original case.
299   *
300   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
301   *     lower-case characters
302   */
303  public abstract BaseEncoding upperCase();
304
305  /**
306   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
307   * lowercase letters. Padding and separator characters remain in their original case.
308   *
309   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
310   *     lower-case characters
311   */
312  public abstract BaseEncoding lowerCase();
313
314  private static final BaseEncoding BASE64 =
315      new Base64Encoding(
316          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
317
318  /**
319   * The "base64" base encoding specified by
320   * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64
321   * Encoding. (This is the same as the base 64 encoding from
322   * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
323   *
324   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
325   * omitted} or {@linkplain #withPadChar(char) replaced}.
326   *
327   * <p>No line feeds are added by default, as per
328   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
329   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
330   */
331  public static BaseEncoding base64() {
332    return BASE64;
333  }
334
335  private static final BaseEncoding BASE64_URL =
336      new Base64Encoding(
337          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
338
339  /**
340   * The "base64url" encoding specified by
341   * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
342   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
343   * is the same as the base 64 encoding with URL and filename safe alphabet from
344   * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
345   *
346   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
347   * omitted} or {@linkplain #withPadChar(char) replaced}.
348   *
349   * <p>No line feeds are added by default, as per
350   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
351   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
352   */
353  public static BaseEncoding base64Url() {
354    return BASE64_URL;
355  }
356
357  private static final BaseEncoding BASE32 =
358      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
359
360  /**
361   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
362   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from
363   * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
364   *
365   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
366   * omitted} or {@linkplain #withPadChar(char) replaced}.
367   *
368   * <p>No line feeds are added by default, as per
369   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
370   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
371   */
372  public static BaseEncoding base32() {
373    return BASE32;
374  }
375
376  private static final BaseEncoding BASE32_HEX =
377      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
378
379  /**
380   * The "base32hex" encoding specified by
381   * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
382   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
383   *
384   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
385   * omitted} or {@linkplain #withPadChar(char) replaced}.
386   *
387   * <p>No line feeds are added by default, as per
388   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
389   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
390   */
391  public static BaseEncoding base32Hex() {
392    return BASE32_HEX;
393  }
394
395  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
396
397  /**
398   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
399   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from
400   * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
401   * "hexadecimal" format.
402   *
403   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
404   * have no effect.
405   *
406   * <p>No line feeds are added by default, as per
407   * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds
408   * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
409   */
410  public static BaseEncoding base16() {
411    return BASE16;
412  }
413
414  private static final class Alphabet extends CharMatcher {
415    private final String name;
416    // this is meant to be immutable -- don't modify it!
417    private final char[] chars;
418    final int mask;
419    final int bitsPerChar;
420    final int charsPerChunk;
421    final int bytesPerChunk;
422    private final byte[] decodabet;
423    private final boolean[] validPadding;
424
425    Alphabet(String name, char[] chars) {
426      this.name = checkNotNull(name);
427      this.chars = checkNotNull(chars);
428      try {
429        this.bitsPerChar = log2(chars.length, UNNECESSARY);
430      } catch (ArithmeticException e) {
431        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
432      }
433
434      /*
435       * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
436       * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
437       */
438      int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
439      try {
440        this.charsPerChunk = 8 / gcd;
441        this.bytesPerChunk = bitsPerChar / gcd;
442      } catch (ArithmeticException e) {
443        throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e);
444      }
445
446      this.mask = chars.length - 1;
447
448      byte[] decodabet = new byte[Ascii.MAX + 1];
449      Arrays.fill(decodabet, (byte) -1);
450      for (int i = 0; i < chars.length; i++) {
451        char c = chars[i];
452        checkArgument(CharMatcher.ascii().matches(c), "Non-ASCII character: %s", c);
453        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
454        decodabet[c] = (byte) i;
455      }
456      this.decodabet = decodabet;
457
458      boolean[] validPadding = new boolean[charsPerChunk];
459      for (int i = 0; i < bytesPerChunk; i++) {
460        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
461      }
462      this.validPadding = validPadding;
463    }
464
465    char encode(int bits) {
466      return chars[bits];
467    }
468
469    boolean isValidPaddingStartPosition(int index) {
470      return validPadding[index % charsPerChunk];
471    }
472
473    boolean canDecode(char ch) {
474      return ch <= Ascii.MAX && decodabet[ch] != -1;
475    }
476
477    int decode(char ch) throws DecodingException {
478      if (ch > Ascii.MAX || decodabet[ch] == -1) {
479        throw new DecodingException(
480            "Unrecognized character: "
481                + (CharMatcher.invisible().matches(ch) ? "0x" + Integer.toHexString(ch) : ch));
482      }
483      return decodabet[ch];
484    }
485
486    private boolean hasLowerCase() {
487      for (char c : chars) {
488        if (Ascii.isLowerCase(c)) {
489          return true;
490        }
491      }
492      return false;
493    }
494
495    private boolean hasUpperCase() {
496      for (char c : chars) {
497        if (Ascii.isUpperCase(c)) {
498          return true;
499        }
500      }
501      return false;
502    }
503
504    Alphabet upperCase() {
505      if (!hasLowerCase()) {
506        return this;
507      } else {
508        checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
509        char[] upperCased = new char[chars.length];
510        for (int i = 0; i < chars.length; i++) {
511          upperCased[i] = Ascii.toUpperCase(chars[i]);
512        }
513        return new Alphabet(name + ".upperCase()", upperCased);
514      }
515    }
516
517    Alphabet lowerCase() {
518      if (!hasUpperCase()) {
519        return this;
520      } else {
521        checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
522        char[] lowerCased = new char[chars.length];
523        for (int i = 0; i < chars.length; i++) {
524          lowerCased[i] = Ascii.toLowerCase(chars[i]);
525        }
526        return new Alphabet(name + ".lowerCase()", lowerCased);
527      }
528    }
529
530    @Override
531    public boolean matches(char c) {
532      return CharMatcher.ascii().matches(c) && decodabet[c] != -1;
533    }
534
535    @Override
536    public String toString() {
537      return name;
538    }
539
540    @Override
541    public boolean equals(@Nullable Object other) {
542      if (other instanceof Alphabet) {
543        Alphabet that = (Alphabet) other;
544        return Arrays.equals(this.chars, that.chars);
545      }
546      return false;
547    }
548
549    @Override
550    public int hashCode() {
551      return Arrays.hashCode(chars);
552    }
553  }
554
555  static class StandardBaseEncoding extends BaseEncoding {
556    // TODO(lowasser): provide a useful toString
557    final Alphabet alphabet;
558
559    @Nullable final Character paddingChar;
560
561    StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
562      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
563    }
564
565    StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
566      this.alphabet = checkNotNull(alphabet);
567      checkArgument(
568          paddingChar == null || !alphabet.matches(paddingChar),
569          "Padding character %s was already in alphabet",
570          paddingChar);
571      this.paddingChar = paddingChar;
572    }
573
574    @Override
575    CharMatcher padding() {
576      return (paddingChar == null) ? CharMatcher.none() : CharMatcher.is(paddingChar.charValue());
577    }
578
579    @Override
580    int maxEncodedSize(int bytes) {
581      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
582    }
583
584    @GwtIncompatible // Writer,OutputStream
585    @Override
586    public OutputStream encodingStream(final Writer out) {
587      checkNotNull(out);
588      return new OutputStream() {
589        int bitBuffer = 0;
590        int bitBufferLength = 0;
591        int writtenChars = 0;
592
593        @Override
594        public void write(int b) throws IOException {
595          bitBuffer <<= 8;
596          bitBuffer |= b & 0xFF;
597          bitBufferLength += 8;
598          while (bitBufferLength >= alphabet.bitsPerChar) {
599            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
600            out.write(alphabet.encode(charIndex));
601            writtenChars++;
602            bitBufferLength -= alphabet.bitsPerChar;
603          }
604        }
605
606        @Override
607        public void flush() throws IOException {
608          out.flush();
609        }
610
611        @Override
612        public void close() throws IOException {
613          if (bitBufferLength > 0) {
614            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
615            out.write(alphabet.encode(charIndex));
616            writtenChars++;
617            if (paddingChar != null) {
618              while (writtenChars % alphabet.charsPerChunk != 0) {
619                out.write(paddingChar.charValue());
620                writtenChars++;
621              }
622            }
623          }
624          out.close();
625        }
626      };
627    }
628
629    @Override
630    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
631      checkNotNull(target);
632      checkPositionIndexes(off, off + len, bytes.length);
633      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
634        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
635      }
636    }
637
638    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
639      checkNotNull(target);
640      checkPositionIndexes(off, off + len, bytes.length);
641      checkArgument(len <= alphabet.bytesPerChunk);
642      long bitBuffer = 0;
643      for (int i = 0; i < len; ++i) {
644        bitBuffer |= bytes[off + i] & 0xFF;
645        bitBuffer <<= 8; // Add additional zero byte in the end.
646      }
647      // Position of first character is length of bitBuffer minus bitsPerChar.
648      final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
649      int bitsProcessed = 0;
650      while (bitsProcessed < len * 8) {
651        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
652        target.append(alphabet.encode(charIndex));
653        bitsProcessed += alphabet.bitsPerChar;
654      }
655      if (paddingChar != null) {
656        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
657          target.append(paddingChar.charValue());
658          bitsProcessed += alphabet.bitsPerChar;
659        }
660      }
661    }
662
663    @Override
664    int maxDecodedSize(int chars) {
665      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
666    }
667
668    @Override
669    public boolean canDecode(CharSequence chars) {
670      chars = padding().trimTrailingFrom(chars);
671      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
672        return false;
673      }
674      for (int i = 0; i < chars.length(); i++) {
675        if (!alphabet.canDecode(chars.charAt(i))) {
676          return false;
677        }
678      }
679      return true;
680    }
681
682    @Override
683    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
684      checkNotNull(target);
685      chars = padding().trimTrailingFrom(chars);
686      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
687        throw new DecodingException("Invalid input length " + chars.length());
688      }
689      int bytesWritten = 0;
690      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
691        long chunk = 0;
692        int charsProcessed = 0;
693        for (int i = 0; i < alphabet.charsPerChunk; i++) {
694          chunk <<= alphabet.bitsPerChar;
695          if (charIdx + i < chars.length()) {
696            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
697          }
698        }
699        final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
700        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
701          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
702        }
703      }
704      return bytesWritten;
705    }
706
707    @GwtIncompatible // Reader,InputStream
708    @Override
709    public InputStream decodingStream(final Reader reader) {
710      checkNotNull(reader);
711      return new InputStream() {
712        int bitBuffer = 0;
713        int bitBufferLength = 0;
714        int readChars = 0;
715        boolean hitPadding = false;
716        final CharMatcher paddingMatcher = padding();
717
718        @Override
719        public int read() throws IOException {
720          while (true) {
721            int readChar = reader.read();
722            if (readChar == -1) {
723              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
724                throw new DecodingException("Invalid input length " + readChars);
725              }
726              return -1;
727            }
728            readChars++;
729            char ch = (char) readChar;
730            if (paddingMatcher.matches(ch)) {
731              if (!hitPadding
732                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
733                throw new DecodingException("Padding cannot start at index " + readChars);
734              }
735              hitPadding = true;
736            } else if (hitPadding) {
737              throw new DecodingException(
738                  "Expected padding character but found '" + ch + "' at index " + readChars);
739            } else {
740              bitBuffer <<= alphabet.bitsPerChar;
741              bitBuffer |= alphabet.decode(ch);
742              bitBufferLength += alphabet.bitsPerChar;
743
744              if (bitBufferLength >= 8) {
745                bitBufferLength -= 8;
746                return (bitBuffer >> bitBufferLength) & 0xFF;
747              }
748            }
749          }
750        }
751
752        @Override
753        public void close() throws IOException {
754          reader.close();
755        }
756      };
757    }
758
759    @Override
760    public BaseEncoding omitPadding() {
761      return (paddingChar == null) ? this : newInstance(alphabet, null);
762    }
763
764    @Override
765    public BaseEncoding withPadChar(char padChar) {
766      if (8 % alphabet.bitsPerChar == 0
767          || (paddingChar != null && paddingChar.charValue() == padChar)) {
768        return this;
769      } else {
770        return newInstance(alphabet, padChar);
771      }
772    }
773
774    @Override
775    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
776      checkArgument(
777          padding().or(alphabet).matchesNoneOf(separator),
778          "Separator (%s) cannot contain alphabet or padding characters",
779          separator);
780      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
781    }
782
783    private transient BaseEncoding upperCase;
784    private transient BaseEncoding lowerCase;
785
786    @Override
787    public BaseEncoding upperCase() {
788      BaseEncoding result = upperCase;
789      if (result == null) {
790        Alphabet upper = alphabet.upperCase();
791        result = upperCase =
792            (upper == alphabet) ? this : newInstance(upper, paddingChar);
793      }
794      return result;
795    }
796
797    @Override
798    public BaseEncoding lowerCase() {
799      BaseEncoding result = lowerCase;
800      if (result == null) {
801        Alphabet lower = alphabet.lowerCase();
802        result = lowerCase =
803            (lower == alphabet) ? this : newInstance(lower, paddingChar);
804      }
805      return result;
806    }
807
808    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
809      return new StandardBaseEncoding(alphabet, paddingChar);
810    }
811
812    @Override
813    public String toString() {
814      StringBuilder builder = new StringBuilder("BaseEncoding.");
815      builder.append(alphabet.toString());
816      if (8 % alphabet.bitsPerChar != 0) {
817        if (paddingChar == null) {
818          builder.append(".omitPadding()");
819        } else {
820          builder.append(".withPadChar('").append(paddingChar).append("')");
821        }
822      }
823      return builder.toString();
824    }
825
826    @Override
827    public boolean equals(@Nullable Object other) {
828      if (other instanceof StandardBaseEncoding) {
829        StandardBaseEncoding that = (StandardBaseEncoding) other;
830        return this.alphabet.equals(that.alphabet)
831            && Objects.equal(this.paddingChar, that.paddingChar);
832      }
833      return false;
834    }
835
836    @Override
837    public int hashCode() {
838      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
839    }
840  }
841
842  static final class Base16Encoding extends StandardBaseEncoding {
843    final char[] encoding = new char[512];
844
845    Base16Encoding(String name, String alphabetChars) {
846      this(new Alphabet(name, alphabetChars.toCharArray()));
847    }
848
849    private Base16Encoding(Alphabet alphabet) {
850      super(alphabet, null);
851      checkArgument(alphabet.chars.length == 16);
852      for (int i = 0; i < 256; ++i) {
853        encoding[i] = alphabet.encode(i >>> 4);
854        encoding[i | 0x100] = alphabet.encode(i & 0xF);
855      }
856    }
857
858    @Override
859    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
860      checkNotNull(target);
861      checkPositionIndexes(off, off + len, bytes.length);
862      for (int i = 0; i < len; ++i) {
863        int b = bytes[off + i] & 0xFF;
864        target.append(encoding[b]);
865        target.append(encoding[b | 0x100]);
866      }
867    }
868
869    @Override
870    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
871      checkNotNull(target);
872      if (chars.length() % 2 == 1) {
873        throw new DecodingException("Invalid input length " + chars.length());
874      }
875      int bytesWritten = 0;
876      for (int i = 0; i < chars.length(); i += 2) {
877        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
878        target[bytesWritten++] = (byte) decoded;
879      }
880      return bytesWritten;
881    }
882
883    @Override
884    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
885      return new Base16Encoding(alphabet);
886    }
887  }
888
889  static final class Base64Encoding extends StandardBaseEncoding {
890    Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) {
891      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
892    }
893
894    private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) {
895      super(alphabet, paddingChar);
896      checkArgument(alphabet.chars.length == 64);
897    }
898
899    @Override
900    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
901      checkNotNull(target);
902      checkPositionIndexes(off, off + len, bytes.length);
903      int i = off;
904      for (int remaining = len; remaining >= 3; remaining -= 3) {
905        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
906        target.append(alphabet.encode(chunk >>> 18));
907        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
908        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
909        target.append(alphabet.encode(chunk & 0x3F));
910      }
911      if (i < off + len) {
912        encodeChunkTo(target, bytes, i, off + len - i);
913      }
914    }
915
916    @Override
917    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
918      checkNotNull(target);
919      chars = padding().trimTrailingFrom(chars);
920      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
921        throw new DecodingException("Invalid input length " + chars.length());
922      }
923      int bytesWritten = 0;
924      for (int i = 0; i < chars.length(); ) {
925        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
926        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
927        target[bytesWritten++] = (byte) (chunk >>> 16);
928        if (i < chars.length()) {
929          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
930          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
931          if (i < chars.length()) {
932            chunk |= alphabet.decode(chars.charAt(i++));
933            target[bytesWritten++] = (byte) (chunk & 0xFF);
934          }
935        }
936      }
937      return bytesWritten;
938    }
939
940    @Override
941    BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) {
942      return new Base64Encoding(alphabet, paddingChar);
943    }
944  }
945
946  @GwtIncompatible // Reader
947  static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) {
948    checkNotNull(delegate);
949    checkNotNull(toIgnore);
950    return new Reader() {
951      @Override
952      public int read() throws IOException {
953        int readChar;
954        do {
955          readChar = delegate.read();
956        } while (readChar != -1 && toIgnore.matches((char) readChar));
957        return readChar;
958      }
959
960      @Override
961      public int read(char[] cbuf, int off, int len) throws IOException {
962        throw new UnsupportedOperationException();
963      }
964
965      @Override
966      public void close() throws IOException {
967        delegate.close();
968      }
969    };
970  }
971
972  static Appendable separatingAppendable(
973      final Appendable delegate, final String separator, final int afterEveryChars) {
974    checkNotNull(delegate);
975    checkNotNull(separator);
976    checkArgument(afterEveryChars > 0);
977    return new Appendable() {
978      int charsUntilSeparator = afterEveryChars;
979
980      @Override
981      public Appendable append(char c) throws IOException {
982        if (charsUntilSeparator == 0) {
983          delegate.append(separator);
984          charsUntilSeparator = afterEveryChars;
985        }
986        delegate.append(c);
987        charsUntilSeparator--;
988        return this;
989      }
990
991      @Override
992      public Appendable append(CharSequence chars, int off, int len) throws IOException {
993        throw new UnsupportedOperationException();
994      }
995
996      @Override
997      public Appendable append(CharSequence chars) throws IOException {
998        throw new UnsupportedOperationException();
999      }
1000    };
1001  }
1002
1003  @GwtIncompatible // Writer
1004  static Writer separatingWriter(
1005      final Writer delegate, final String separator, final int afterEveryChars) {
1006    final Appendable seperatingAppendable =
1007        separatingAppendable(delegate, separator, afterEveryChars);
1008    return new Writer() {
1009      @Override
1010      public void write(int c) throws IOException {
1011        seperatingAppendable.append((char) c);
1012      }
1013
1014      @Override
1015      public void write(char[] chars, int off, int len) throws IOException {
1016        throw new UnsupportedOperationException();
1017      }
1018
1019      @Override
1020      public void flush() throws IOException {
1021        delegate.flush();
1022      }
1023
1024      @Override
1025      public void close() throws IOException {
1026        delegate.close();
1027      }
1028    };
1029  }
1030
1031  static final class SeparatedBaseEncoding extends BaseEncoding {
1032    private final BaseEncoding delegate;
1033    private final String separator;
1034    private final int afterEveryChars;
1035    private final CharMatcher separatorChars;
1036
1037    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1038      this.delegate = checkNotNull(delegate);
1039      this.separator = checkNotNull(separator);
1040      this.afterEveryChars = afterEveryChars;
1041      checkArgument(
1042          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1043      this.separatorChars = CharMatcher.anyOf(separator).precomputed();
1044    }
1045
1046    @Override
1047    CharMatcher padding() {
1048      return delegate.padding();
1049    }
1050
1051    @Override
1052    int maxEncodedSize(int bytes) {
1053      int unseparatedSize = delegate.maxEncodedSize(bytes);
1054      return unseparatedSize
1055          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1056    }
1057
1058    @GwtIncompatible // Writer,OutputStream
1059    @Override
1060    public OutputStream encodingStream(final Writer output) {
1061      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1062    }
1063
1064    @Override
1065    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1066      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1067    }
1068
1069    @Override
1070    int maxDecodedSize(int chars) {
1071      return delegate.maxDecodedSize(chars);
1072    }
1073
1074    @Override
1075    public boolean canDecode(CharSequence chars) {
1076      return delegate.canDecode(separatorChars.removeFrom(chars));
1077    }
1078
1079    @Override
1080    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1081      return delegate.decodeTo(target, separatorChars.removeFrom(chars));
1082    }
1083
1084    @GwtIncompatible // Reader,InputStream
1085    @Override
1086    public InputStream decodingStream(final Reader reader) {
1087      return delegate.decodingStream(ignoringReader(reader, separatorChars));
1088    }
1089
1090    @Override
1091    public BaseEncoding omitPadding() {
1092      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1093    }
1094
1095    @Override
1096    public BaseEncoding withPadChar(char padChar) {
1097      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1098    }
1099
1100    @Override
1101    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1102      throw new UnsupportedOperationException("Already have a separator");
1103    }
1104
1105    @Override
1106    public BaseEncoding upperCase() {
1107      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1108    }
1109
1110    @Override
1111    public BaseEncoding lowerCase() {
1112      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1113    }
1114
1115    @Override
1116    public String toString() {
1117      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1118    }
1119  }
1120}