Source code

001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkPositionIndexes;
020import static com.google.common.base.Preconditions.checkState;
021import static com.google.common.math.IntMath.divide;
022import static com.google.common.math.IntMath.log2;
023import static java.math.RoundingMode.CEILING;
024import static java.math.RoundingMode.FLOOR;
025import static java.math.RoundingMode.UNNECESSARY;
026
027import com.google.common.annotations.GwtCompatible;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.annotations.J2ktIncompatible;
030import com.google.common.base.Ascii;
031import com.google.errorprone.annotations.concurrent.LazyInit;
032import java.io.IOException;
033import java.io.InputStream;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.io.Writer;
037import java.util.Arrays;
038import java.util.Objects;
039import javax.annotation.CheckForNull;
040import org.checkerframework.checker.nullness.qual.Nullable;
041
042/**
043 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
044 * strings. This class includes several constants for encoding schemes specified by <a
045 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
046 *
047 * <pre>{@code
048 * BaseEncoding.base32().encode("foo".getBytes(US_ASCII))
049 * }</pre>
050 *
051 * <p>returns the string {@code "MZXW6==="}, and
052 *
053 * <pre>{@code
054 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");
055 * }</pre>
056 *
057 * <p>...returns the ASCII bytes of the string {@code "foo"}.
058 *
059 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC
060 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify
061 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified
062 * behavior:
063 *
064 * <pre>{@code
065 * BaseEncoding.base16().lowerCase().decode("deadbeef");
066 * }</pre>
067 *
068 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect
069 * on the receiving instance; you must store and use the new encoding instance it returns, instead.
070 *
071 * <pre>{@code
072 * // Do NOT do this
073 * BaseEncoding hex = BaseEncoding.base16();
074 * hex.lowerCase(); // does nothing!
075 * return hex.decode("deadbeef"); // throws an IllegalArgumentException
076 * }</pre>
077 *
078 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code
079 * x}, but the reverse does not necessarily hold.
080 *
081 * <table>
082 * <caption>Encodings</caption>
083 * <tr>
084 * <th>Encoding
085 * <th>Alphabet
086 * <th>{@code char:byte} ratio
087 * <th>Default padding
088 * <th>Comments
089 * <tr>
090 * <td>{@link #base16()}
091 * <td>0-9 A-F
092 * <td>2.00
093 * <td>N/A
094 * <td>Traditional hexadecimal. Defaults to upper case.
095 * <tr>
096 * <td>{@link #base32()}
097 * <td>A-Z 2-7
098 * <td>1.60
099 * <td>=
100 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case.
101 * <tr>
102 * <td>{@link #base32Hex()}
103 * <td>0-9 A-V
104 * <td>1.60
105 * <td>=
106 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case.
107 * <tr>
108 * <td>{@link #base64()}
109 * <td>A-Z a-z 0-9 + /
110 * <td>1.33
111 * <td>=
112 * <td>
113 * <tr>
114 * <td>{@link #base64Url()}
115 * <td>A-Z a-z 0-9 - _
116 * <td>1.33
117 * <td>=
118 * <td>Safe to use as filenames, or to pass in URLs without escaping
119 * </table>
120 *
121 * <p>All instances of this class are immutable, so they may be stored safely as static constants.
122 *
123 * @author Louis Wasserman
124 * @since 14.0
125 */
126@GwtCompatible(emulated = true)
127@ElementTypesAreNonnullByDefault
128public abstract class BaseEncoding {
129  // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public.
130
131  BaseEncoding() {}
132
133  /**
134   * Exception indicating invalid base-encoded input encountered while decoding.
135   *
136   * @author Louis Wasserman
137   * @since 15.0
138   */
139  public static final class DecodingException extends IOException {
140    DecodingException(@Nullable String message) {
141      super(message);
142    }
143  }
144
145  /** Encodes the specified byte array, and returns the encoded {@code String}. */
146  public String encode(byte[] bytes) {
147    return encode(bytes, 0, bytes.length);
148  }
149
150  /**
151   * Encodes the specified range of the specified byte array, and returns the encoded {@code
152   * String}.
153   */
154  public final String encode(byte[] bytes, int off, int len) {
155    checkPositionIndexes(off, off + len, bytes.length);
156    StringBuilder result = new StringBuilder(maxEncodedSize(len));
157    try {
158      encodeTo(result, bytes, off, len);
159    } catch (IOException impossible) {
160      throw new AssertionError(impossible);
161    }
162    return result.toString();
163  }
164
165  /**
166   * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
167   * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code
168   * Writer}.
169   */
170  @J2ktIncompatible
171  @GwtIncompatible // Writer,OutputStream
172  public abstract OutputStream encodingStream(Writer writer);
173
174  /**
175   * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
176   */
177  @J2ktIncompatible
178  @GwtIncompatible // ByteSink,CharSink
179  public final ByteSink encodingSink(CharSink encodedSink) {
180    checkNotNull(encodedSink);
181    return new ByteSink() {
182      @Override
183      public OutputStream openStream() throws IOException {
184        return encodingStream(encodedSink.openStream());
185      }
186    };
187  }
188
189  // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher)
190
191  private static byte[] extract(byte[] result, int length) {
192    if (length == result.length) {
193      return result;
194    }
195    byte[] trunc = new byte[length];
196    System.arraycopy(result, 0, trunc, 0, length);
197    return trunc;
198  }
199
200  /**
201   * Determines whether the specified character sequence is a valid encoded string according to this
202   * encoding.
203   *
204   * @since 20.0
205   */
206  public abstract boolean canDecode(CharSequence chars);
207
208  /**
209   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
210   * inverse operation to {@link #encode(byte[])}.
211   *
212   * @throws IllegalArgumentException if the input is not a valid encoded string according to this
213   *     encoding.
214   */
215  public final byte[] decode(CharSequence chars) {
216    try {
217      return decodeChecked(chars);
218    } catch (DecodingException badInput) {
219      throw new IllegalArgumentException(badInput);
220    }
221  }
222
223  /**
224   * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the
225   * inverse operation to {@link #encode(byte[])}.
226   *
227   * @throws DecodingException if the input is not a valid encoded string according to this
228   *     encoding.
229   */
230  final byte[] decodeChecked(CharSequence chars)
231      throws DecodingException {
232    chars = trimTrailingPadding(chars);
233    byte[] tmp = new byte[maxDecodedSize(chars.length())];
234    int len = decodeTo(tmp, chars);
235    return extract(tmp, len);
236  }
237
238  /**
239   * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code
240   * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors.
241   */
242  @J2ktIncompatible
243  @GwtIncompatible // Reader,InputStream
244  public abstract InputStream decodingStream(Reader reader);
245
246  /**
247   * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code
248   * CharSource}.
249   */
250  @J2ktIncompatible
251  @GwtIncompatible // ByteSource,CharSource
252  public final ByteSource decodingSource(CharSource encodedSource) {
253    checkNotNull(encodedSource);
254    return new ByteSource() {
255      @Override
256      public InputStream openStream() throws IOException {
257        return decodingStream(encodedSource.openStream());
258      }
259    };
260  }
261
262  // Implementations for encoding/decoding
263
264  abstract int maxEncodedSize(int bytes);
265
266  abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException;
267
268  abstract int maxDecodedSize(int chars);
269
270  abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException;
271
272  CharSequence trimTrailingPadding(CharSequence chars) {
273    return checkNotNull(chars);
274  }
275
276  // Modified encoding generators
277
278  /**
279   * Returns an encoding that behaves equivalently to this encoding, but omits any padding
280   * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
281   * section 3.2</a>, Padding of Encoded Data.
282   */
283  public abstract BaseEncoding omitPadding();
284
285  /**
286   * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
287   * for padding.
288   *
289   * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
290   *     separator
291   */
292  public abstract BaseEncoding withPadChar(char padChar);
293
294  /**
295   * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
296   * after every {@code n} characters. Any occurrences of any characters that occur in the separator
297   * are skipped over in decoding.
298   *
299   * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
300   *     string, or if {@code n <= 0}
301   * @throws UnsupportedOperationException if this encoding already uses a separator
302   */
303  public abstract BaseEncoding withSeparator(String separator, int n);
304
305  /**
306   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
307   * uppercase letters. Padding and separator characters remain in their original case.
308   *
309   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
310   *     lower-case characters
311   */
312  public abstract BaseEncoding upperCase();
313
314  /**
315   * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
316   * lowercase letters. Padding and separator characters remain in their original case.
317   *
318   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
319   *     lower-case characters
320   */
321  public abstract BaseEncoding lowerCase();
322
323  /**
324   * Returns an encoding that behaves equivalently to this encoding, but decodes letters without
325   * regard to case.
326   *
327   * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
328   *     lower-case characters
329   * @since 32.0.0
330   */
331  public abstract BaseEncoding ignoreCase();
332
333  private static final BaseEncoding BASE64 =
334      new Base64Encoding(
335          "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
336
337  /**
338   * The "base64" base encoding specified by <a
339   * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
340   * (This is the same as the base 64 encoding from <a
341   * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
342   *
343   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
344   * omitted} or {@linkplain #withPadChar(char) replaced}.
345   *
346   * <p>No line feeds are added by default, as per <a
347   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
348   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
349   */
350  public static BaseEncoding base64() {
351    return BASE64;
352  }
353
354  private static final BaseEncoding BASE64_URL =
355      new Base64Encoding(
356          "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
357
358  /**
359   * The "base64url" encoding specified by <a
360   * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
361   * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This
362   * is the same as the base 64 encoding with URL and filename safe alphabet from <a
363   * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
364   *
365   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
366   * omitted} or {@linkplain #withPadChar(char) replaced}.
367   *
368   * <p>No line feeds are added by default, as per <a
369   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
370   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
371   */
372  public static BaseEncoding base64Url() {
373    return BASE64_URL;
374  }
375
376  private static final BaseEncoding BASE32 =
377      new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
378
379  /**
380   * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC
381   * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a
382   * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
383   *
384   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
385   * omitted} or {@linkplain #withPadChar(char) replaced}.
386   *
387   * <p>No line feeds are added by default, as per <a
388   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
389   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
390   */
391  public static BaseEncoding base32() {
392    return BASE32;
393  }
394
395  private static final BaseEncoding BASE32_HEX =
396      new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
397
398  /**
399   * The "base32hex" encoding specified by <a
400   * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
401   * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548.
402   *
403   * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
404   * omitted} or {@linkplain #withPadChar(char) replaced}.
405   *
406   * <p>No line feeds are added by default, as per <a
407   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
408   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
409   */
410  public static BaseEncoding base32Hex() {
411    return BASE32_HEX;
412  }
413
414  private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF");
415
416  /**
417   * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC
418   * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a
419   * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
420   * "hexadecimal" format.
421   *
422   * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()}
423   * have no effect.
424   *
425   * <p>No line feeds are added by default, as per <a
426   * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in
427   * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
428   */
429  public static BaseEncoding base16() {
430    return BASE16;
431  }
432
433  static final class Alphabet {
434    private final String name;
435    // this is meant to be immutable -- don't modify it!
436    private final char[] chars;
437    final int mask;
438    final int bitsPerChar;
439    final int charsPerChunk;
440    final int bytesPerChunk;
441    private final byte[] decodabet;
442    private final boolean[] validPadding;
443    private final boolean ignoreCase;
444
445    Alphabet(String name, char[] chars) {
446      this(name, chars, decodabetFor(chars), /* ignoreCase= */ false);
447    }
448
449    private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) {
450      this.name = checkNotNull(name);
451      this.chars = checkNotNull(chars);
452      try {
453        this.bitsPerChar = log2(chars.length, UNNECESSARY);
454      } catch (ArithmeticException e) {
455        throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
456      }
457
458      // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into
459      // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3.
460      // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a
461      // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many
462      // extra zero bits we need to add to the end of bitsPerChar to get 3 in total.
463      // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII
464      // characters that can't happen.
465      int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar);
466      this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar);
467      this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar;
468
469      this.mask = chars.length - 1;
470
471      this.decodabet = decodabet;
472
473      boolean[] validPadding = new boolean[charsPerChunk];
474      for (int i = 0; i < bytesPerChunk; i++) {
475        validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
476      }
477      this.validPadding = validPadding;
478      this.ignoreCase = ignoreCase;
479    }
480
481    private static byte[] decodabetFor(char[] chars) {
482      byte[] decodabet = new byte[Ascii.MAX + 1];
483      Arrays.fill(decodabet, (byte) -1);
484      for (int i = 0; i < chars.length; i++) {
485        char c = chars[i];
486        checkArgument(c < decodabet.length, "Non-ASCII character: %s", c);
487        checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
488        decodabet[c] = (byte) i;
489      }
490      return decodabet;
491    }
492
493    /** Returns an equivalent {@code Alphabet} except it ignores case. */
494    Alphabet ignoreCase() {
495      if (ignoreCase) {
496        return this;
497      }
498
499      // We can't use .clone() because of GWT.
500      byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length);
501      for (int upper = 'A'; upper <= 'Z'; upper++) {
502        int lower = upper | 0x20;
503        byte decodeUpper = decodabet[upper];
504        byte decodeLower = decodabet[lower];
505        if (decodeUpper == -1) {
506          newDecodabet[upper] = decodeLower;
507        } else {
508          checkState(
509              decodeLower == -1,
510              "Can't ignoreCase() since '%s' and '%s' encode different values",
511              (char) upper,
512              (char) lower);
513          newDecodabet[lower] = decodeUpper;
514        }
515      }
516      return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true);
517    }
518
519    char encode(int bits) {
520      return chars[bits];
521    }
522
523    boolean isValidPaddingStartPosition(int index) {
524      return validPadding[index % charsPerChunk];
525    }
526
527    boolean canDecode(char ch) {
528      return ch <= Ascii.MAX && decodabet[ch] != -1;
529    }
530
531    int decode(char ch) throws DecodingException {
532      if (ch > Ascii.MAX) {
533        throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
534      }
535      int result = decodabet[ch];
536      if (result == -1) {
537        if (ch <= 0x20 || ch == Ascii.MAX) {
538          throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch));
539        } else {
540          throw new DecodingException("Unrecognized character: " + ch);
541        }
542      }
543      return result;
544    }
545
546    private boolean hasLowerCase() {
547      for (char c : chars) {
548        if (Ascii.isLowerCase(c)) {
549          return true;
550        }
551      }
552      return false;
553    }
554
555    private boolean hasUpperCase() {
556      for (char c : chars) {
557        if (Ascii.isUpperCase(c)) {
558          return true;
559        }
560      }
561      return false;
562    }
563
564    Alphabet upperCase() {
565      if (!hasLowerCase()) {
566        return this;
567      }
568      checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
569      char[] upperCased = new char[chars.length];
570      for (int i = 0; i < chars.length; i++) {
571        upperCased[i] = Ascii.toUpperCase(chars[i]);
572      }
573      Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased);
574      return ignoreCase ? upperCase.ignoreCase() : upperCase;
575    }
576
577    Alphabet lowerCase() {
578      if (!hasUpperCase()) {
579        return this;
580      }
581      checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
582      char[] lowerCased = new char[chars.length];
583      for (int i = 0; i < chars.length; i++) {
584        lowerCased[i] = Ascii.toLowerCase(chars[i]);
585      }
586      Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased);
587      return ignoreCase ? lowerCase.ignoreCase() : lowerCase;
588    }
589
590    public boolean matches(char c) {
591      return c < decodabet.length && decodabet[c] != -1;
592    }
593
594    @Override
595    public String toString() {
596      return name;
597    }
598
599    @Override
600    public boolean equals(@CheckForNull Object other) {
601      if (other instanceof Alphabet) {
602        Alphabet that = (Alphabet) other;
603        return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars);
604      }
605      return false;
606    }
607
608    @Override
609    public int hashCode() {
610      return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237);
611    }
612  }
613
614  private static class StandardBaseEncoding extends BaseEncoding {
615    final Alphabet alphabet;
616
617    @CheckForNull final Character paddingChar;
618
619    StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
620      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
621    }
622
623    StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
624      this.alphabet = checkNotNull(alphabet);
625      checkArgument(
626          paddingChar == null || !alphabet.matches(paddingChar),
627          "Padding character %s was already in alphabet",
628          paddingChar);
629      this.paddingChar = paddingChar;
630    }
631
632    @Override
633    int maxEncodedSize(int bytes) {
634      return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
635    }
636
637    @J2ktIncompatible
638    @GwtIncompatible // Writer,OutputStream
639    @Override
640    public OutputStream encodingStream(Writer out) {
641      checkNotNull(out);
642      return new OutputStream() {
643        int bitBuffer = 0;
644        int bitBufferLength = 0;
645        int writtenChars = 0;
646
647        @Override
648        public void write(int b) throws IOException {
649          bitBuffer <<= 8;
650          bitBuffer |= b & 0xFF;
651          bitBufferLength += 8;
652          while (bitBufferLength >= alphabet.bitsPerChar) {
653            int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask;
654            out.write(alphabet.encode(charIndex));
655            writtenChars++;
656            bitBufferLength -= alphabet.bitsPerChar;
657          }
658        }
659
660        @Override
661        public void flush() throws IOException {
662          out.flush();
663        }
664
665        @Override
666        public void close() throws IOException {
667          if (bitBufferLength > 0) {
668            int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask;
669            out.write(alphabet.encode(charIndex));
670            writtenChars++;
671            if (paddingChar != null) {
672              while (writtenChars % alphabet.charsPerChunk != 0) {
673                out.write(paddingChar.charValue());
674                writtenChars++;
675              }
676            }
677          }
678          out.close();
679        }
680      };
681    }
682
683    @Override
684    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
685      checkNotNull(target);
686      checkPositionIndexes(off, off + len, bytes.length);
687      for (int i = 0; i < len; i += alphabet.bytesPerChunk) {
688        encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i));
689      }
690    }
691
692    void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
693      checkNotNull(target);
694      checkPositionIndexes(off, off + len, bytes.length);
695      checkArgument(len <= alphabet.bytesPerChunk);
696      long bitBuffer = 0;
697      for (int i = 0; i < len; ++i) {
698        bitBuffer |= bytes[off + i] & 0xFF;
699        bitBuffer <<= 8; // Add additional zero byte in the end.
700      }
701      // Position of first character is length of bitBuffer minus bitsPerChar.
702      int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar;
703      int bitsProcessed = 0;
704      while (bitsProcessed < len * 8) {
705        int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask;
706        target.append(alphabet.encode(charIndex));
707        bitsProcessed += alphabet.bitsPerChar;
708      }
709      if (paddingChar != null) {
710        while (bitsProcessed < alphabet.bytesPerChunk * 8) {
711          target.append(paddingChar.charValue());
712          bitsProcessed += alphabet.bitsPerChar;
713        }
714      }
715    }
716
717    @Override
718    int maxDecodedSize(int chars) {
719      return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
720    }
721
722    @Override
723    CharSequence trimTrailingPadding(CharSequence chars) {
724      checkNotNull(chars);
725      if (paddingChar == null) {
726        return chars;
727      }
728      char padChar = paddingChar.charValue();
729      int l;
730      for (l = chars.length() - 1; l >= 0; l--) {
731        if (chars.charAt(l) != padChar) {
732          break;
733        }
734      }
735      return chars.subSequence(0, l + 1);
736    }
737
738    @Override
739    public boolean canDecode(CharSequence chars) {
740      checkNotNull(chars);
741      chars = trimTrailingPadding(chars);
742      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
743        return false;
744      }
745      for (int i = 0; i < chars.length(); i++) {
746        if (!alphabet.canDecode(chars.charAt(i))) {
747          return false;
748        }
749      }
750      return true;
751    }
752
753    @Override
754    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
755      checkNotNull(target);
756      chars = trimTrailingPadding(chars);
757      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
758        throw new DecodingException("Invalid input length " + chars.length());
759      }
760      int bytesWritten = 0;
761      for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) {
762        long chunk = 0;
763        int charsProcessed = 0;
764        for (int i = 0; i < alphabet.charsPerChunk; i++) {
765          chunk <<= alphabet.bitsPerChar;
766          if (charIdx + i < chars.length()) {
767            chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++));
768          }
769        }
770        int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar;
771        for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) {
772          target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF);
773        }
774      }
775      return bytesWritten;
776    }
777
778    @Override
779    @J2ktIncompatible
780    @GwtIncompatible // Reader,InputStream
781    public InputStream decodingStream(Reader reader) {
782      checkNotNull(reader);
783      return new InputStream() {
784        int bitBuffer = 0;
785        int bitBufferLength = 0;
786        int readChars = 0;
787        boolean hitPadding = false;
788
789        @Override
790        public int read() throws IOException {
791          while (true) {
792            int readChar = reader.read();
793            if (readChar == -1) {
794              if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
795                throw new DecodingException("Invalid input length " + readChars);
796              }
797              return -1;
798            }
799            readChars++;
800            char ch = (char) readChar;
801            if (paddingChar != null && paddingChar.charValue() == ch) {
802              if (!hitPadding
803                  && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
804                throw new DecodingException("Padding cannot start at index " + readChars);
805              }
806              hitPadding = true;
807            } else if (hitPadding) {
808              throw new DecodingException(
809                  "Expected padding character but found '" + ch + "' at index " + readChars);
810            } else {
811              bitBuffer <<= alphabet.bitsPerChar;
812              bitBuffer |= alphabet.decode(ch);
813              bitBufferLength += alphabet.bitsPerChar;
814
815              if (bitBufferLength >= 8) {
816                bitBufferLength -= 8;
817                return (bitBuffer >> bitBufferLength) & 0xFF;
818              }
819            }
820          }
821        }
822
823        @Override
824        public int read(byte[] buf, int off, int len) throws IOException {
825          // Overriding this to work around the fact that InputStream's default implementation of
826          // this method will silently swallow exceptions thrown by the single-byte read() method
827          // (other than on the first call to it), which in this case can cause invalid encoded
828          // strings to not throw an exception.
829          // See https://github.com/google/guava/issues/3542
830          checkPositionIndexes(off, off + len, buf.length);
831
832          int i = off;
833          for (; i < off + len; i++) {
834            int b = read();
835            if (b == -1) {
836              int read = i - off;
837              return read == 0 ? -1 : read;
838            }
839            buf[i] = (byte) b;
840          }
841          return i - off;
842        }
843
844        @Override
845        public void close() throws IOException {
846          reader.close();
847        }
848      };
849    }
850
851    @Override
852    public BaseEncoding omitPadding() {
853      return (paddingChar == null) ? this : newInstance(alphabet, null);
854    }
855
856    @Override
857    public BaseEncoding withPadChar(char padChar) {
858      if (8 % alphabet.bitsPerChar == 0
859          || (paddingChar != null && paddingChar.charValue() == padChar)) {
860        return this;
861      } else {
862        return newInstance(alphabet, padChar);
863      }
864    }
865
866    @Override
867    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
868      for (int i = 0; i < separator.length(); i++) {
869        checkArgument(
870            !alphabet.matches(separator.charAt(i)),
871            "Separator (%s) cannot contain alphabet characters",
872            separator);
873      }
874      if (paddingChar != null) {
875        checkArgument(
876            separator.indexOf(paddingChar.charValue()) < 0,
877            "Separator (%s) cannot contain padding character",
878            separator);
879      }
880      return new SeparatedBaseEncoding(this, separator, afterEveryChars);
881    }
882
883    @LazyInit @CheckForNull private volatile BaseEncoding upperCase;
884    @LazyInit @CheckForNull private volatile BaseEncoding lowerCase;
885    @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase;
886
887    @Override
888    public BaseEncoding upperCase() {
889      BaseEncoding result = upperCase;
890      if (result == null) {
891        Alphabet upper = alphabet.upperCase();
892        result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar);
893      }
894      return result;
895    }
896
897    @Override
898    public BaseEncoding lowerCase() {
899      BaseEncoding result = lowerCase;
900      if (result == null) {
901        Alphabet lower = alphabet.lowerCase();
902        result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar);
903      }
904      return result;
905    }
906
907    @Override
908    public BaseEncoding ignoreCase() {
909      BaseEncoding result = ignoreCase;
910      if (result == null) {
911        Alphabet ignore = alphabet.ignoreCase();
912        result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar);
913      }
914      return result;
915    }
916
917    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
918      return new StandardBaseEncoding(alphabet, paddingChar);
919    }
920
921    @Override
922    public String toString() {
923      StringBuilder builder = new StringBuilder("BaseEncoding.");
924      builder.append(alphabet);
925      if (8 % alphabet.bitsPerChar != 0) {
926        if (paddingChar == null) {
927          builder.append(".omitPadding()");
928        } else {
929          builder.append(".withPadChar('").append(paddingChar).append("')");
930        }
931      }
932      return builder.toString();
933    }
934
935    @Override
936    public boolean equals(@CheckForNull Object other) {
937      if (other instanceof StandardBaseEncoding) {
938        StandardBaseEncoding that = (StandardBaseEncoding) other;
939        return this.alphabet.equals(that.alphabet)
940            && Objects.equals(this.paddingChar, that.paddingChar);
941      }
942      return false;
943    }
944
945    @Override
946    public int hashCode() {
947      return alphabet.hashCode() ^ Objects.hashCode(paddingChar);
948    }
949  }
950
951  private static final class Base16Encoding extends StandardBaseEncoding {
952    final char[] encoding = new char[512];
953
954    Base16Encoding(String name, String alphabetChars) {
955      this(new Alphabet(name, alphabetChars.toCharArray()));
956    }
957
958    private Base16Encoding(Alphabet alphabet) {
959      super(alphabet, null);
960      checkArgument(alphabet.chars.length == 16);
961      for (int i = 0; i < 256; ++i) {
962        encoding[i] = alphabet.encode(i >>> 4);
963        encoding[i | 0x100] = alphabet.encode(i & 0xF);
964      }
965    }
966
967    @Override
968    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
969      checkNotNull(target);
970      checkPositionIndexes(off, off + len, bytes.length);
971      for (int i = 0; i < len; ++i) {
972        int b = bytes[off + i] & 0xFF;
973        target.append(encoding[b]);
974        target.append(encoding[b | 0x100]);
975      }
976    }
977
978    @Override
979    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
980      checkNotNull(target);
981      if (chars.length() % 2 == 1) {
982        throw new DecodingException("Invalid input length " + chars.length());
983      }
984      int bytesWritten = 0;
985      for (int i = 0; i < chars.length(); i += 2) {
986        int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1));
987        target[bytesWritten++] = (byte) decoded;
988      }
989      return bytesWritten;
990    }
991
992    @Override
993    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
994      return new Base16Encoding(alphabet);
995    }
996  }
997
998  private static final class Base64Encoding extends StandardBaseEncoding {
999    Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) {
1000      this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
1001    }
1002
1003    private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) {
1004      super(alphabet, paddingChar);
1005      checkArgument(alphabet.chars.length == 64);
1006    }
1007
1008    @Override
1009    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1010      checkNotNull(target);
1011      checkPositionIndexes(off, off + len, bytes.length);
1012      int i = off;
1013      for (int remaining = len; remaining >= 3; remaining -= 3) {
1014        int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF;
1015        target.append(alphabet.encode(chunk >>> 18));
1016        target.append(alphabet.encode((chunk >>> 12) & 0x3F));
1017        target.append(alphabet.encode((chunk >>> 6) & 0x3F));
1018        target.append(alphabet.encode(chunk & 0x3F));
1019      }
1020      if (i < off + len) {
1021        encodeChunkTo(target, bytes, i, off + len - i);
1022      }
1023    }
1024
1025    @Override
1026    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1027      checkNotNull(target);
1028      chars = trimTrailingPadding(chars);
1029      if (!alphabet.isValidPaddingStartPosition(chars.length())) {
1030        throw new DecodingException("Invalid input length " + chars.length());
1031      }
1032      int bytesWritten = 0;
1033      for (int i = 0; i < chars.length(); ) {
1034        int chunk = alphabet.decode(chars.charAt(i++)) << 18;
1035        chunk |= alphabet.decode(chars.charAt(i++)) << 12;
1036        target[bytesWritten++] = (byte) (chunk >>> 16);
1037        if (i < chars.length()) {
1038          chunk |= alphabet.decode(chars.charAt(i++)) << 6;
1039          target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF);
1040          if (i < chars.length()) {
1041            chunk |= alphabet.decode(chars.charAt(i++));
1042            target[bytesWritten++] = (byte) (chunk & 0xFF);
1043          }
1044        }
1045      }
1046      return bytesWritten;
1047    }
1048
1049    @Override
1050    BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) {
1051      return new Base64Encoding(alphabet, paddingChar);
1052    }
1053  }
1054
1055  @J2ktIncompatible
1056  @GwtIncompatible
1057  static Reader ignoringReader(Reader delegate, String toIgnore) {
1058    checkNotNull(delegate);
1059    checkNotNull(toIgnore);
1060    return new Reader() {
1061      @Override
1062      public int read() throws IOException {
1063        int readChar;
1064        do {
1065          readChar = delegate.read();
1066        } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0);
1067        return readChar;
1068      }
1069
1070      @Override
1071      public int read(char[] cbuf, int off, int len) throws IOException {
1072        throw new UnsupportedOperationException();
1073      }
1074
1075      @Override
1076      public void close() throws IOException {
1077        delegate.close();
1078      }
1079    };
1080  }
1081
1082  static Appendable separatingAppendable(
1083      Appendable delegate, String separator, int afterEveryChars) {
1084    checkNotNull(delegate);
1085    checkNotNull(separator);
1086    checkArgument(afterEveryChars > 0);
1087    return new Appendable() {
1088      int charsUntilSeparator = afterEveryChars;
1089
1090      @Override
1091      public Appendable append(char c) throws IOException {
1092        if (charsUntilSeparator == 0) {
1093          delegate.append(separator);
1094          charsUntilSeparator = afterEveryChars;
1095        }
1096        delegate.append(c);
1097        charsUntilSeparator--;
1098        return this;
1099      }
1100
1101      @Override
1102      public Appendable append(@CheckForNull CharSequence chars, int off, int len) {
1103        throw new UnsupportedOperationException();
1104      }
1105
1106      @Override
1107      public Appendable append(@CheckForNull CharSequence chars) {
1108        throw new UnsupportedOperationException();
1109      }
1110    };
1111  }
1112
1113  @J2ktIncompatible
1114  @GwtIncompatible // Writer
1115  static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) {
1116    Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars);
1117    return new Writer() {
1118      @Override
1119      public void write(int c) throws IOException {
1120        separatingAppendable.append((char) c);
1121      }
1122
1123      @Override
1124      public void write(char[] chars, int off, int len) throws IOException {
1125        throw new UnsupportedOperationException();
1126      }
1127
1128      @Override
1129      public void flush() throws IOException {
1130        delegate.flush();
1131      }
1132
1133      @Override
1134      public void close() throws IOException {
1135        delegate.close();
1136      }
1137    };
1138  }
1139
1140  static final class SeparatedBaseEncoding extends BaseEncoding {
1141    private final BaseEncoding delegate;
1142    private final String separator;
1143    private final int afterEveryChars;
1144
1145    SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
1146      this.delegate = checkNotNull(delegate);
1147      this.separator = checkNotNull(separator);
1148      this.afterEveryChars = afterEveryChars;
1149      checkArgument(
1150          afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
1151    }
1152
1153    @Override
1154    CharSequence trimTrailingPadding(CharSequence chars) {
1155      return delegate.trimTrailingPadding(chars);
1156    }
1157
1158    @Override
1159    int maxEncodedSize(int bytes) {
1160      int unseparatedSize = delegate.maxEncodedSize(bytes);
1161      return unseparatedSize
1162          + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
1163    }
1164
1165    @J2ktIncompatible
1166    @GwtIncompatible // Writer,OutputStream
1167    @Override
1168    public OutputStream encodingStream(Writer output) {
1169      return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars));
1170    }
1171
1172    @Override
1173    void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException {
1174      delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len);
1175    }
1176
1177    @Override
1178    int maxDecodedSize(int chars) {
1179      return delegate.maxDecodedSize(chars);
1180    }
1181
1182    @Override
1183    public boolean canDecode(CharSequence chars) {
1184      StringBuilder builder = new StringBuilder();
1185      for (int i = 0; i < chars.length(); i++) {
1186        char c = chars.charAt(i);
1187        if (separator.indexOf(c) < 0) {
1188          builder.append(c);
1189        }
1190      }
1191      return delegate.canDecode(builder);
1192    }
1193
1194    @Override
1195    int decodeTo(byte[] target, CharSequence chars) throws DecodingException {
1196      StringBuilder stripped = new StringBuilder(chars.length());
1197      for (int i = 0; i < chars.length(); i++) {
1198        char c = chars.charAt(i);
1199        if (separator.indexOf(c) < 0) {
1200          stripped.append(c);
1201        }
1202      }
1203      return delegate.decodeTo(target, stripped);
1204    }
1205
1206    @Override
1207    @J2ktIncompatible
1208    @GwtIncompatible // Reader,InputStream
1209    public InputStream decodingStream(Reader reader) {
1210      return delegate.decodingStream(ignoringReader(reader, separator));
1211    }
1212
1213    @Override
1214    public BaseEncoding omitPadding() {
1215      return delegate.omitPadding().withSeparator(separator, afterEveryChars);
1216    }
1217
1218    @Override
1219    public BaseEncoding withPadChar(char padChar) {
1220      return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
1221    }
1222
1223    @Override
1224    public BaseEncoding withSeparator(String separator, int afterEveryChars) {
1225      throw new UnsupportedOperationException("Already have a separator");
1226    }
1227
1228    @Override
1229    public BaseEncoding upperCase() {
1230      return delegate.upperCase().withSeparator(separator, afterEveryChars);
1231    }
1232
1233    @Override
1234    public BaseEncoding lowerCase() {
1235      return delegate.lowerCase().withSeparator(separator, afterEveryChars);
1236    }
1237
1238    @Override
1239    public BaseEncoding ignoreCase() {
1240      return delegate.ignoreCase().withSeparator(separator, afterEveryChars);
1241    }
1242
1243    @Override
1244    public String toString() {
1245      return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
1246    }
1247  }
1248}