001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.lang.Math.max;
024import static java.lang.Math.min;
025import static java.math.RoundingMode.CEILING;
026import static java.math.RoundingMode.FLOOR;
027import static java.math.RoundingMode.UNNECESSARY;
028
029import com.google.common.annotations.GwtCompatible;
030import com.google.common.annotations.GwtIncompatible;
031import com.google.common.annotations.J2ktIncompatible;
032import com.google.common.base.Ascii;
033import com.google.errorprone.annotations.concurrent.LazyInit;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.io.Reader;
038import java.io.Writer;
039import java.util.Arrays;
040import java.util.Objects;
041import javax.annotation.CheckForNull;
042import org.checkerframework.checker.nullness.qual.Nullable;
043
044/**
045 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
046 * strings. This class includes several constants for encoding schemes specified by <a
047 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
048 *
049 * <pre>{@code
050 * BaseEncoding.base32().encode("foo".getBytes(US_ASCII))
051 * }</pre>
052 *
053 * <p>returns the string {@code "MZXW6==="}, and
054 *
055 * <pre>{@code
056 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
057 * }</pre>
058 *
059 * <p>...returns the ASCII bytes of the string {@code "foo"}.
060 *
061 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
062 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
063 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
064 * behavior:
065 *
066 * <pre>{@code
067 * BaseEncoding.base16().lowerCase().decode("deadbeef");
068 * }</pre>
069 *
070 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
071 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
072 *
073 * <pre>{@code
074 * // Do NOT do this
075 * BaseEncoding hex = BaseEncoding.base16();
076 * hex.lowerCase(); // does nothing!
077 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
078 * }</pre>
079 *
080 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
081 * x}, but the reverse does not necessarily hold.
082 *
083 * <table>
084 * <caption>Encodings</caption>
085 * <tr>
086 * <th>Encoding
087 * <th>Alphabet
088 * <th>{@code char:byte} ratio
089 * <th>Default padding
090 * <th>Comments
091 * <tr>
092 * <td>{@link #base16()}
093 * <td>0-9 A-F
094 * <td>2.00
095 * <td>N/A
096 * <td>Traditional hexadecimal. Defaults to upper case.
097 * <tr>
098 * <td>{@link #base32()}
099 * <td>A-Z 2-7
100 * <td>1.60
101 * <td>=
102 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
103 * <tr>
104 * <td>{@link #base32Hex()}
105 * <td>0-9 A-V
106 * <td>1.60
107 * <td>=
108 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
109 * <tr>
110 * <td>{@link #base64()}
111 * <td>A-Z a-z 0-9 + /
112 * <td>1.33
113 * <td>=
114 * <td>
115 * <tr>
116 * <td>{@link #base64Url()}
117 * <td>A-Z a-z 0-9 - _
118 * <td>1.33
119 * <td>=
120 * <td>Safe to use as filenames, or to pass in URLs without escaping
121 * </table>
122 *
123 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
124 *
125 * @author Louis Wasserman
126 * @since 14.0
127 */
128@GwtCompatible(emulated = true)
129@ElementTypesAreNonnullByDefault
130public abstract class BaseEncoding {
131  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
132
133  BaseEncoding() {}
134
135  /**
136   * Exception indicating invalid base-encoded input encountered while decoding.
137   *
138   * @author Louis Wasserman
139   * @since 15.0
140   */
141  public static final class DecodingException extends IOException {
142    DecodingException(@Nullable String message) {
143      super(message);
144    }
145  }
146
147  /** Encodes the specified byte array, and returns the encoded {@code String}. */
148  public String encode(byte[] bytes) {
149    return encode(bytes, 0, bytes.length);
150  }
151
152  /**
153   * Encodes the specified range of the specified byte array, and returns the encoded {@code
154   * String}.
155   */
156  public final String encode(byte[] bytes, int off, int len) {
157    checkPositionIndexes(off, off + len, bytes.length);
158    StringBuilder result = new StringBuilder(maxEncodedSize(len));
159    try {
160      encodeTo(result, bytes, off, len);
161    } catch (IOException impossible) {
162      throw new AssertionError(impossible);
163    }
164    return result.toString();
165  }
166
167  /**
168   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
169   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
170   * Writer}.
171   */
172  @J2ktIncompatible
173  @GwtIncompatible // Writer,OutputStream
174  public abstract OutputStream encodingStream(Writer writer);
175
176  /**
177   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
178   */
179  @J2ktIncompatible
180  @GwtIncompatible // ByteSink,CharSink
181  public final ByteSink encodingSink(CharSink encodedSink) {
182    checkNotNull(encodedSink);
183    return new ByteSink() {
184      @Override
185      public OutputStream openStream() throws IOException {
186        return encodingStream(encodedSink.openStream());
187      }
188    };
189  }
190
191  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
192
193  private static byte[] extract(byte[] result, int length) {
194    if (length == result.length) {
195      return result;
196    }
197    byte[] trunc = new byte[length];
198    System.arraycopy(result, 0, trunc, 0, length);
199    return trunc;
200  }
201
202  /**
203   * Determines whether the specified character sequence is a valid encoded string according to this
204   * encoding.
205   *
206   * @since 20.0
207   */
208  public abstract boolean canDecode(CharSequence chars);
209
210  /**
211   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
212   * inverse operation to {@link #encode(byte[])}.
213   *
214   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
215   *     encoding.
216   */
217  public final byte[] decode(CharSequence chars) {
218    try {
219      return decodeChecked(chars);
220    } catch (DecodingException badInput) {
221      throw new IllegalArgumentException(badInput);
222    }
223  }
224
225  /**
226   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
227   * inverse operation to {@link #encode(byte[])}.
228   *
229   * @throws DecodingException if the input is not a valid encoded string according to this
230   *     encoding.
231   */
232  final byte[] decodeChecked(CharSequence chars)
233      throws DecodingException {
234    chars = trimTrailingPadding(chars);
235    byte[] tmp = new byte[maxDecodedSize(chars.length())];
236    int len = decodeTo(tmp, chars);
237    return extract(tmp, len);
238  }
239
240  /**
241   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
242   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
243   */
244  @J2ktIncompatible
245  @GwtIncompatible // Reader,InputStream
246  public abstract InputStream decodingStream(Reader reader);
247
248  /**
249   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
250   * CharSource}.
251   */
252  @J2ktIncompatible
253  @GwtIncompatible // ByteSource,CharSource
254  public final ByteSource decodingSource(CharSource encodedSource) {
255    checkNotNull(encodedSource);
256    return new ByteSource() {
257      @Override
258      public InputStream openStream() throws IOException {
259        return decodingStream(encodedSource.openStream());
260      }
261    };
262  }
263
264  // Implementations for encoding/decoding
265
266  abstract int maxEncodedSize(int bytes);
267
268  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
269
270  abstract int maxDecodedSize(int chars);
271
272  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
273
274  CharSequence trimTrailingPadding(CharSequence chars) {
275    return checkNotNull(chars);
276  }
277
278  // Modified encoding generators
279
280  /**
281   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
282   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
283   * section 3.2</a>, Padding of Encoded Data.
284   */
285  public abstract BaseEncoding omitPadding();
286
287  /**
288   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
289   * for padding.
290   *
291   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
292   *     separator
293   */
294  public abstract BaseEncoding withPadChar(char padChar);
295
296  /**
297   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
298   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
299   * are skipped over in decoding.
300   *
301   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
302   *     string, or if {@code n <= 0}
303   * @throws UnsupportedOperationException if this encoding already uses a separator
304   */
305  public abstract BaseEncoding withSeparator(String separator, int n);
306
307  /**
308   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
309   * uppercase letters. Padding and separator characters remain in their original case.
310   *
311   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
312   *     lower-case characters
313   */
314  public abstract BaseEncoding upperCase();
315
316  /**
317   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
318   * lowercase letters. Padding and separator characters remain in their original case.
319   *
320   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
321   *     lower-case characters
322   */
323  public abstract BaseEncoding lowerCase();
324
325  /**
326   * Returns an encoding that behaves equivalently to this encoding, but decodes letters without
327   * regard to case.
328   *
329   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
330   *     lower-case characters
331   * @since 32.0.0
332   */
333  public abstract BaseEncoding ignoreCase();
334
335  private static final BaseEncoding BASE64 =
336      new Base64Encoding(
337          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
338
339  /**
340   * The "base64" base encoding specified by <a
341   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
342   * (This is the same as the base 64 encoding from <a
343   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
344   *
345   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
346   * omitted} or {@linkplain #withPadChar(char) replaced}.
347   *
348   * <p>No line feeds are added by default, as per <a
349   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
350   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
351   */
352  public static BaseEncoding base64() {
353    return BASE64;
354  }
355
356  private static final BaseEncoding BASE64_URL =
357      new Base64Encoding(
358          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
359
360  /**
361   * The "base64url" encoding specified by <a
362   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
363   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
364   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
365   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
366   *
367   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
368   * omitted} or {@linkplain #withPadChar(char) replaced}.
369   *
370   * <p>No line feeds are added by default, as per <a
371   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
372   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
373   */
374  public static BaseEncoding base64Url() {
375    return BASE64_URL;
376  }
377
378  private static final BaseEncoding BASE32 =
379      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
380
381  /**
382   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
383   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
384   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
385   *
386   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
387   * omitted} or {@linkplain #withPadChar(char) replaced}.
388   *
389   * <p>No line feeds are added by default, as per <a
390   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
391   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
392   */
393  public static BaseEncoding base32() {
394    return BASE32;
395  }
396
397  private static final BaseEncoding BASE32_HEX =
398      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
399
400  /**
401   * The "base32hex" encoding specified by <a
402   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
403   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
404   *
405   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
406   * omitted} or {@linkplain #withPadChar(char) replaced}.
407   *
408   * <p>No line feeds are added by default, as per <a
409   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
410   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
411   */
412  public static BaseEncoding base32Hex() {
413    return BASE32_HEX;
414  }
415
416  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
417
418  /**
419   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
420   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
421   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
422   * "hexadecimal" format.
423   *
424   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
425   * have no effect.
426   *
427   * <p>No line feeds are added by default, as per <a
428   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
429   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
430   */
431  public static BaseEncoding base16() {
432    return BASE16;
433  }
434
435  static final class Alphabet {
436    private final String name;
437    // this is meant to be immutable -- don't modify it!
438    private final char[] chars;
439    final int mask;
440    final int bitsPerChar;
441    final int charsPerChunk;
442    final int bytesPerChunk;
443    private final byte[] decodabet;
444    private final boolean[] validPadding;
445    private final boolean ignoreCase;
446
447    Alphabet(String name, char[] chars) {
448      this(name, chars, decodabetFor(chars), /* ignoreCase= */ false);
449    }
450
451    private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) {
452      this.name = checkNotNull(name);
453      this.chars = checkNotNull(chars);
454      try {
455        this.bitsPerChar = log2(chars.length, UNNECESSARY);
456      } catch (ArithmeticException e) {
457        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
458      }
459
460      // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into
461      // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3.
462      // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a
463      // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many
464      // extra zero bits we need to add to the end of bitsPerChar to get 3 in total.
465      // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII
466      // characters that can't happen.
467      int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar);
468      this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar);
469      this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar;
470
471      this.mask = chars.length - 1;
472
473      this.decodabet = decodabet;
474
475      boolean[] validPadding = new boolean[charsPerChunk];
476      for (int i = 0; i < bytesPerChunk; i++) {
477        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
478      }
479      this.validPadding = validPadding;
480      this.ignoreCase = ignoreCase;
481    }
482
483    private static byte[] decodabetFor(char[] chars) {
484      byte[] decodabet = new byte[Ascii.MAX + 1];
485      Arrays.fill(decodabet, (byte) -1);
486      for (int i = 0; i < chars.length; i++) {
487        char c = chars[i];
488        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
489        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
490        decodabet[c] = (byte) i;
491      }
492      return decodabet;
493    }
494
495    /** Returns an equivalent {@code Alphabet} except it ignores case. */
496    Alphabet ignoreCase() {
497      if (ignoreCase) {
498        return this;
499      }
500
501      // We can't use .clone() because of GWT.
502      byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length);
503      for (int upper = 'A'; upper <= 'Z'; upper++) {
504        int lower = upper | 0x20;
505        byte decodeUpper = decodabet[upper];
506        byte decodeLower = decodabet[lower];
507        if (decodeUpper == -1) {
508          newDecodabet[upper] = decodeLower;
509        } else {
510          checkState(
511              decodeLower == -1,
512              "Can't ignoreCase() since '%s' and '%s' encode different values",
513              (char) upper,
514              (char) lower);
515          newDecodabet[lower] = decodeUpper;
516        }
517      }
518      return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true);
519    }
520
521    char encode(int bits) {
522      return chars[bits];
523    }
524
525    boolean isValidPaddingStartPosition(int index) {
526      return validPadding[index % charsPerChunk];
527    }
528
529    boolean canDecode(char ch) {
530      return ch <= Ascii.MAX && decodabet[ch] != -1;
531    }
532
533    int decode(char ch) throws DecodingException {
534      if (ch > Ascii.MAX) {
535        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
536      }
537      int result = decodabet[ch];
538      if (result == -1) {
539        if (ch <= 0x20 || ch == Ascii.MAX) {
540          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
541        } else {
542          throw new DecodingException("Unrecognized character: " + ch);
543        }
544      }
545      return result;
546    }
547
548    private boolean hasLowerCase() {
549      for (char c : chars) {
550        if (Ascii.isLowerCase(c)) {
551          return true;
552        }
553      }
554      return false;
555    }
556
557    private boolean hasUpperCase() {
558      for (char c : chars) {
559        if (Ascii.isUpperCase(c)) {
560          return true;
561        }
562      }
563      return false;
564    }
565
566    Alphabet upperCase() {
567      if (!hasLowerCase()) {
568        return this;
569      }
570      checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
571      char[] upperCased = new char[chars.length];
572      for (int i = 0; i < chars.length; i++) {
573        upperCased[i] = Ascii.toUpperCase(chars[i]);
574      }
575      Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased);
576      return ignoreCase ? upperCase.ignoreCase() : upperCase;
577    }
578
579    Alphabet lowerCase() {
580      if (!hasUpperCase()) {
581        return this;
582      }
583      checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
584      char[] lowerCased = new char[chars.length];
585      for (int i = 0; i < chars.length; i++) {
586        lowerCased[i] = Ascii.toLowerCase(chars[i]);
587      }
588      Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased);
589      return ignoreCase ? lowerCase.ignoreCase() : lowerCase;
590    }
591
592    public boolean matches(char c) {
593      return c < decodabet.length && decodabet[c] != -1;
594    }
595
596    @Override
597    public String toString() {
598      return name;
599    }
600
601    @Override
602    public boolean equals(@CheckForNull Object other) {
603      if (other instanceof Alphabet) {
604        Alphabet that = (Alphabet) other;
605        return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars);
606      }
607      return false;
608    }
609
610    @Override
611    public int hashCode() {
612      return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237);
613    }
614  }
615
616  private static class StandardBaseEncoding extends BaseEncoding {
617    final Alphabet alphabet;
618
619    @CheckForNull final Character paddingChar;
620
621    StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
622      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
623    }
624
625    StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
626      this.alphabet = checkNotNull(alphabet);
627      checkArgument(
628          paddingChar == null || !alphabet.matches(paddingChar),
629          "Padding character %s was already in alphabet",
630          paddingChar);
631      this.paddingChar = paddingChar;
632    }
633
634    @Override
635    int maxEncodedSize(int bytes) {
636      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
637    }
638
639    @J2ktIncompatible
640    @GwtIncompatible // Writer,OutputStream
641    @Override
642    public OutputStream encodingStream(Writer out) {
643      checkNotNull(out);
644      return new OutputStream() {
645        int bitBuffer = 0;
646        int bitBufferLength = 0;
647        int writtenChars = 0;
648
649        @Override
650        public void write(int b) throws IOException {
651          bitBuffer <<= 8;
652          bitBuffer |= b & 0xFF;
653          bitBufferLength += 8;
654          while (bitBufferLength >= alphabet.bitsPerChar) {
655            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
656            out.write(alphabet.encode(charIndex));
657            writtenChars++;
658            bitBufferLength -= alphabet.bitsPerChar;
659          }
660        }
661
662        @Override
663        public void flush() throws IOException {
664          out.flush();
665        }
666
667        @Override
668        public void close() throws IOException {
669          if (bitBufferLength > 0) {
670            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
671            out.write(alphabet.encode(charIndex));
672            writtenChars++;
673            if (paddingChar != null) {
674              while (writtenChars % alphabet.charsPerChunk != 0) {
675                out.write(paddingChar.charValue());
676                writtenChars++;
677              }
678            }
679          }
680          out.close();
681        }
682      };
683    }
684
685    @Override
686    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
687      checkNotNull(target);
688      checkPositionIndexes(off, off + len, bytes.length);
689      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
690        encodeChunkTo(target, bytes, off + i, min(alphabet.bytesPerChunk, len - i));
691      }
692    }
693
694    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
695      checkNotNull(target);
696      checkPositionIndexes(off, off + len, bytes.length);
697      checkArgument(len <= alphabet.bytesPerChunk);
698      long bitBuffer = 0;
699      for (int i = 0; i < len; ++i) {
700        bitBuffer |= bytes[off + i] & 0xFF;
701        bitBuffer <<= 8; // Add additional zero byte in the end.
702      }
703      // Position of first character is length of bitBuffer minus bitsPerChar.
704      int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
705      int bitsProcessed = 0;
706      while (bitsProcessed < len * 8) {
707        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
708        target.append(alphabet.encode(charIndex));
709        bitsProcessed += alphabet.bitsPerChar;
710      }
711      if (paddingChar != null) {
712        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
713          target.append(paddingChar.charValue());
714          bitsProcessed += alphabet.bitsPerChar;
715        }
716      }
717    }
718
719    @Override
720    int maxDecodedSize(int chars) {
721      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
722    }
723
724    @Override
725    CharSequence trimTrailingPadding(CharSequence chars) {
726      checkNotNull(chars);
727      if (paddingChar == null) {
728        return chars;
729      }
730      char padChar = paddingChar.charValue();
731      int l;
732      for (l = chars.length() - 1; l >= 0; l--) {
733        if (chars.charAt(l) != padChar) {
734          break;
735        }
736      }
737      return chars.subSequence(0, l + 1);
738    }
739
740    @Override
741    public boolean canDecode(CharSequence chars) {
742      checkNotNull(chars);
743      chars = trimTrailingPadding(chars);
744      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
745        return false;
746      }
747      for (int i = 0; i < chars.length(); i++) {
748        if (!alphabet.canDecode(chars.charAt(i))) {
749          return false;
750        }
751      }
752      return true;
753    }
754
755    @Override
756    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
757      checkNotNull(target);
758      chars = trimTrailingPadding(chars);
759      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
760        throw new DecodingException("Invalid input length " + chars.length());
761      }
762      int bytesWritten = 0;
763      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
764        long chunk = 0;
765        int charsProcessed = 0;
766        for (int i = 0; i < alphabet.charsPerChunk; i++) {
767          chunk <<= alphabet.bitsPerChar;
768          if (charIdx + i < chars.length()) {
769            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
770          }
771        }
772        int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
773        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
774          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
775        }
776      }
777      return bytesWritten;
778    }
779
780    @Override
781    @J2ktIncompatible
782    @GwtIncompatible // Reader,InputStream
783    public InputStream decodingStream(Reader reader) {
784      checkNotNull(reader);
785      return new InputStream() {
786        int bitBuffer = 0;
787        int bitBufferLength = 0;
788        int readChars = 0;
789        boolean hitPadding = false;
790
791        @Override
792        public int read() throws IOException {
793          while (true) {
794            int readChar = reader.read();
795            if (readChar == -1) {
796              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
797                throw new DecodingException("Invalid input length " + readChars);
798              }
799              return -1;
800            }
801            readChars++;
802            char ch = (char) readChar;
803            if (paddingChar != null && paddingChar.charValue() == ch) {
804              if (!hitPadding
805                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
806                throw new DecodingException("Padding cannot start at index " + readChars);
807              }
808              hitPadding = true;
809            } else if (hitPadding) {
810              throw new DecodingException(
811                  "Expected padding character but found '" + ch + "' at index " + readChars);
812            } else {
813              bitBuffer <<= alphabet.bitsPerChar;
814              bitBuffer |= alphabet.decode(ch);
815              bitBufferLength += alphabet.bitsPerChar;
816
817              if (bitBufferLength >= 8) {
818                bitBufferLength -= 8;
819                return (bitBuffer >> bitBufferLength) & 0xFF;
820              }
821            }
822          }
823        }
824
825        @Override
826        public int read(byte[] buf, int off, int len) throws IOException {
827          // Overriding this to work around the fact that InputStream's default implementation of
828          // this method will silently swallow exceptions thrown by the single-byte read() method
829          // (other than on the first call to it), which in this case can cause invalid encoded
830          // strings to not throw an exception.
831          // See https://github.com/google/guava/issues/3542
832          checkPositionIndexes(off, off + len, buf.length);
833
834          int i = off;
835          for (; i < off + len; i++) {
836            int b = read();
837            if (b == -1) {
838              int read = i - off;
839              return read == 0 ? -1 : read;
840            }
841            buf[i] = (byte) b;
842          }
843          return i - off;
844        }
845
846        @Override
847        public void close() throws IOException {
848          reader.close();
849        }
850      };
851    }
852
853    @Override
854    public BaseEncoding omitPadding() {
855      return (paddingChar == null) ? this : newInstance(alphabet, null);
856    }
857
858    @Override
859    public BaseEncoding withPadChar(char padChar) {
860      if (8 % alphabet.bitsPerChar == 0
861          || (paddingChar != null && paddingChar.charValue() == padChar)) {
862        return this;
863      } else {
864        return newInstance(alphabet, padChar);
865      }
866    }
867
868    @Override
869    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
870      for (int i = 0; i < separator.length(); i++) {
871        checkArgument(
872            !alphabet.matches(separator.charAt(i)),
873            "Separator (%s) cannot contain alphabet characters",
874            separator);
875      }
876      if (paddingChar != null) {
877        checkArgument(
878            separator.indexOf(paddingChar.charValue()) < 0,
879            "Separator (%s) cannot contain padding character",
880            separator);
881      }
882      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
883    }
884
885    @LazyInit @CheckForNull private volatile BaseEncoding upperCase;
886    @LazyInit @CheckForNull private volatile BaseEncoding lowerCase;
887    @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase;
888
889    @Override
890    public BaseEncoding upperCase() {
891      BaseEncoding result = upperCase;
892      if (result == null) {
893        Alphabet upper = alphabet.upperCase();
894        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
895      }
896      return result;
897    }
898
899    @Override
900    public BaseEncoding lowerCase() {
901      BaseEncoding result = lowerCase;
902      if (result == null) {
903        Alphabet lower = alphabet.lowerCase();
904        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
905      }
906      return result;
907    }
908
909    @Override
910    public BaseEncoding ignoreCase() {
911      BaseEncoding result = ignoreCase;
912      if (result == null) {
913        Alphabet ignore = alphabet.ignoreCase();
914        result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar);
915      }
916      return result;
917    }
918
919    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
920      return new StandardBaseEncoding(alphabet, paddingChar);
921    }
922
923    @Override
924    public String toString() {
925      StringBuilder builder = new StringBuilder("BaseEncoding.");
926      builder.append(alphabet);
927      if (8 % alphabet.bitsPerChar != 0) {
928        if (paddingChar == null) {
929          builder.append(".omitPadding()");
930        } else {
931          builder.append(".withPadChar('").append(paddingChar).append("')");
932        }
933      }
934      return builder.toString();
935    }
936
937    @Override
938    public boolean equals(@CheckForNull Object other) {
939      if (other instanceof StandardBaseEncoding) {
940        StandardBaseEncoding that = (StandardBaseEncoding) other;
941        return this.alphabet.equals(that.alphabet)
942            && Objects.equals(this.paddingChar, that.paddingChar);
943      }
944      return false;
945    }
946
947    @Override
948    public int hashCode() {
949      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
950    }
951  }
952
953  private static final class Base16Encoding extends StandardBaseEncoding {
954    final char[] encoding = new char[512];
955
956    Base16Encoding(String name, String alphabetChars) {
957      this(new Alphabet(name, alphabetChars.toCharArray()));
958    }
959
960    private Base16Encoding(Alphabet alphabet) {
961      super(alphabet, null);
962      checkArgument(alphabet.chars.length == 16);
963      for (int i = 0; i < 256; ++i) {
964        encoding[i] = alphabet.encode(i >>> 4);
965        encoding[i | 0x100] = alphabet.encode(i & 0xF);
966      }
967    }
968
969    @Override
970    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
971      checkNotNull(target);
972      checkPositionIndexes(off, off + len, bytes.length);
973      for (int i = 0; i < len; ++i) {
974        int b = bytes[off + i] & 0xFF;
975        target.append(encoding[b]);
976        target.append(encoding[b | 0x100]);
977      }
978    }
979
980    @Override
981    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
982      checkNotNull(target);
983      if (chars.length() % 2 == 1) {
984        throw new DecodingException("Invalid input length " + chars.length());
985      }
986      int bytesWritten = 0;
987      for (int i = 0; i < chars.length(); i += 2) {
988        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
989        target[bytesWritten++] = (byte) decoded;
990      }
991      return bytesWritten;
992    }
993
994    @Override
995    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
996      return new Base16Encoding(alphabet);
997    }
998  }
999
1000  private static final class Base64Encoding extends StandardBaseEncoding {
1001    Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
1002      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
1003    }
1004
1005    private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
1006      super(alphabet, paddingChar);
1007      checkArgument(alphabet.chars.length == 64);
1008    }
1009
1010    @Override
1011    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1012      checkNotNull(target);
1013      checkPositionIndexes(off, off + len, bytes.length);
1014      int i = off;
1015      for (int remaining = len; remaining >= 3; remaining -= 3) {
1016        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
1017        target.append(alphabet.encode(chunk >>> 18));
1018        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
1019        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
1020        target.append(alphabet.encode(chunk & 0x3F));
1021      }
1022      if (i < off + len) {
1023        encodeChunkTo(target, bytes, i, off + len - i);
1024      }
1025    }
1026
1027    @Override
1028    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1029      checkNotNull(target);
1030      chars = trimTrailingPadding(chars);
1031      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
1032        throw new DecodingException("Invalid input length " + chars.length());
1033      }
1034      int bytesWritten = 0;
1035      for (int i = 0; i < chars.length(); ) {
1036        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
1037        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
1038        target[bytesWritten++] = (byte) (chunk >>> 16);
1039        if (i < chars.length()) {
1040          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
1041          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
1042          if (i < chars.length()) {
1043            chunk |= alphabet.decode(chars.charAt(i++));
1044            target[bytesWritten++] = (byte) (chunk & 0xFF);
1045          }
1046        }
1047      }
1048      return bytesWritten;
1049    }
1050
1051    @Override
1052    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
1053      return new Base64Encoding(alphabet, paddingChar);
1054    }
1055  }
1056
1057  @J2ktIncompatible
1058  @GwtIncompatible
1059  static Reader ignoringReader(Reader delegate, String toIgnore) {
1060    checkNotNull(delegate);
1061    checkNotNull(toIgnore);
1062    return new Reader() {
1063      @Override
1064      public int read() throws IOException {
1065        int readChar;
1066        do {
1067          readChar = delegate.read();
1068        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1069        return readChar;
1070      }
1071
1072      @Override
1073      public int read(char[] cbuf, int off, int len) throws IOException {
1074        throw new UnsupportedOperationException();
1075      }
1076
1077      @Override
1078      public void close() throws IOException {
1079        delegate.close();
1080      }
1081    };
1082  }
1083
1084  static Appendable separatingAppendable(
1085      Appendable delegate, String separator, int afterEveryChars) {
1086    checkNotNull(delegate);
1087    checkNotNull(separator);
1088    checkArgument(afterEveryChars > 0);
1089    return new Appendable() {
1090      int charsUntilSeparator = afterEveryChars;
1091
1092      @Override
1093      public Appendable append(char c) throws IOException {
1094        if (charsUntilSeparator == 0) {
1095          delegate.append(separator);
1096          charsUntilSeparator = afterEveryChars;
1097        }
1098        delegate.append(c);
1099        charsUntilSeparator--;
1100        return this;
1101      }
1102
1103      @Override
1104      public Appendable append(@CheckForNull CharSequence chars, int off, int len) {
1105        throw new UnsupportedOperationException();
1106      }
1107
1108      @Override
1109      public Appendable append(@CheckForNull CharSequence chars) {
1110        throw new UnsupportedOperationException();
1111      }
1112    };
1113  }
1114
1115  @J2ktIncompatible
1116  @GwtIncompatible // Writer
1117  static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) {
1118    Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars);
1119    return new Writer() {
1120      @Override
1121      public void write(int c) throws IOException {
1122        separatingAppendable.append((char) c);
1123      }
1124
1125      @Override
1126      public void write(char[] chars, int off, int len) throws IOException {
1127        throw new UnsupportedOperationException();
1128      }
1129
1130      @Override
1131      public void flush() throws IOException {
1132        delegate.flush();
1133      }
1134
1135      @Override
1136      public void close() throws IOException {
1137        delegate.close();
1138      }
1139    };
1140  }
1141
1142  static final class SeparatedBaseEncoding extends BaseEncoding {
1143    private final BaseEncoding delegate;
1144    private final String separator;
1145    private final int afterEveryChars;
1146
1147    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1148      this.delegate = checkNotNull(delegate);
1149      this.separator = checkNotNull(separator);
1150      this.afterEveryChars = afterEveryChars;
1151      checkArgument(
1152          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1153    }
1154
1155    @Override
1156    CharSequence trimTrailingPadding(CharSequence chars) {
1157      return delegate.trimTrailingPadding(chars);
1158    }
1159
1160    @Override
1161    int maxEncodedSize(int bytes) {
1162      int unseparatedSize = delegate.maxEncodedSize(bytes);
1163      return unseparatedSize
1164          + separator.length() * divide(max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1165    }
1166
1167    @J2ktIncompatible
1168    @GwtIncompatible // Writer,OutputStream
1169    @Override
1170    public OutputStream encodingStream(Writer output) {
1171      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1172    }
1173
1174    @Override
1175    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1176      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1177    }
1178
1179    @Override
1180    int maxDecodedSize(int chars) {
1181      return delegate.maxDecodedSize(chars);
1182    }
1183
1184    @Override
1185    public boolean canDecode(CharSequence chars) {
1186      StringBuilder builder = new StringBuilder();
1187      for (int i = 0; i < chars.length(); i++) {
1188        char c = chars.charAt(i);
1189        if (separator.indexOf(c) < 0) {
1190          builder.append(c);
1191        }
1192      }
1193      return delegate.canDecode(builder);
1194    }
1195
1196    @Override
1197    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1198      StringBuilder stripped = new StringBuilder(chars.length());
1199      for (int i = 0; i < chars.length(); i++) {
1200        char c = chars.charAt(i);
1201        if (separator.indexOf(c) < 0) {
1202          stripped.append(c);
1203        }
1204      }
1205      return delegate.decodeTo(target, stripped);
1206    }
1207
1208    @Override
1209    @J2ktIncompatible
1210    @GwtIncompatible // Reader,InputStream
1211    public InputStream decodingStream(Reader reader) {
1212      return delegate.decodingStream(ignoringReader(reader, separator));
1213    }
1214
1215    @Override
1216    public BaseEncoding omitPadding() {
1217      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1218    }
1219
1220    @Override
1221    public BaseEncoding withPadChar(char padChar) {
1222      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1223    }
1224
1225    @Override
1226    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1227      throw new UnsupportedOperationException("Already have a separator");
1228    }
1229
1230    @Override
1231    public BaseEncoding upperCase() {
1232      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1233    }
1234
1235    @Override
1236    public BaseEncoding lowerCase() {
1237      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1238    }
1239
1240    @Override
1241    public BaseEncoding ignoreCase() {
1242      return delegate.ignoreCase().withSeparator(separator, afterEveryChars);
1243    }
1244
1245    @Override
1246    public String toString() {
1247      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1248    }
1249  }
1250}