001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.lang.Math.max; 024import static java.lang.Math.min; 025import static java.math.RoundingMode.CEILING; 026import static java.math.RoundingMode.FLOOR; 027import static java.math.RoundingMode.UNNECESSARY; 028 029import com.google.common.annotations.GwtCompatible; 030import com.google.common.annotations.GwtIncompatible; 031import com.google.common.annotations.J2ktIncompatible; 032import com.google.common.base.Ascii; 033import com.google.errorprone.annotations.concurrent.LazyInit; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.io.Reader; 038import java.io.Writer; 039import java.util.Arrays; 040import java.util.Objects; 041import javax.annotation.CheckForNull; 042import org.checkerframework.checker.nullness.qual.Nullable; 043 044/** 045 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 046 * strings. This class includes several constants for encoding schemes specified by <a 047 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 048 * 049 * <pre>{@code 050 * BaseEncoding.base32().encode("foo".getBytes(US_ASCII)) 051 * }</pre> 052 * 053 * <p>returns the string {@code "MZXW6==="}, and 054 * 055 * <pre>{@code 056 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 057 * }</pre> 058 * 059 * <p>...returns the ASCII bytes of the string {@code "foo"}. 060 * 061 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 062 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 063 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 064 * behavior: 065 * 066 * <pre>{@code 067 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 068 * }</pre> 069 * 070 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 071 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 072 * 073 * <pre>{@code 074 * // Do NOT do this 075 * BaseEncoding hex = BaseEncoding.base16(); 076 * hex.lowerCase(); // does nothing! 077 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 078 * }</pre> 079 * 080 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 081 * x}, but the reverse does not necessarily hold. 082 * 083 * <table> 084 * <caption>Encodings</caption> 085 * <tr> 086 * <th>Encoding 087 * <th>Alphabet 088 * <th>{@code char:byte} ratio 089 * <th>Default padding 090 * <th>Comments 091 * <tr> 092 * <td>{@link #base16()} 093 * <td>0-9 A-F 094 * <td>2.00 095 * <td>N/A 096 * <td>Traditional hexadecimal. Defaults to upper case. 097 * <tr> 098 * <td>{@link #base32()} 099 * <td>A-Z 2-7 100 * <td>1.60 101 * <td>= 102 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 103 * <tr> 104 * <td>{@link #base32Hex()} 105 * <td>0-9 A-V 106 * <td>1.60 107 * <td>= 108 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 109 * <tr> 110 * <td>{@link #base64()} 111 * <td>A-Z a-z 0-9 + / 112 * <td>1.33 113 * <td>= 114 * <td> 115 * <tr> 116 * <td>{@link #base64Url()} 117 * <td>A-Z a-z 0-9 - _ 118 * <td>1.33 119 * <td>= 120 * <td>Safe to use as filenames, or to pass in URLs without escaping 121 * </table> 122 * 123 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 124 * 125 * @author Louis Wasserman 126 * @since 14.0 127 */ 128@GwtCompatible(emulated = true) 129@ElementTypesAreNonnullByDefault 130public abstract class BaseEncoding { 131 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 132 133 BaseEncoding() {} 134 135 /** 136 * Exception indicating invalid base-encoded input encountered while decoding. 137 * 138 * @author Louis Wasserman 139 * @since 15.0 140 */ 141 public static final class DecodingException extends IOException { 142 DecodingException(@Nullable String message) { 143 super(message); 144 } 145 } 146 147 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 148 public String encode(byte[] bytes) { 149 return encode(bytes, 0, bytes.length); 150 } 151 152 /** 153 * Encodes the specified range of the specified byte array, and returns the encoded {@code 154 * String}. 155 */ 156 public final String encode(byte[] bytes, int off, int len) { 157 checkPositionIndexes(off, off + len, bytes.length); 158 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 159 try { 160 encodeTo(result, bytes, off, len); 161 } catch (IOException impossible) { 162 throw new AssertionError(impossible); 163 } 164 return result.toString(); 165 } 166 167 /** 168 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 169 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 170 * Writer}. 171 */ 172 @J2ktIncompatible 173 @GwtIncompatible // Writer,OutputStream 174 public abstract OutputStream encodingStream(Writer writer); 175 176 /** 177 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 178 */ 179 @J2ktIncompatible 180 @GwtIncompatible // ByteSink,CharSink 181 public final ByteSink encodingSink(CharSink encodedSink) { 182 checkNotNull(encodedSink); 183 return new ByteSink() { 184 @Override 185 public OutputStream openStream() throws IOException { 186 return encodingStream(encodedSink.openStream()); 187 } 188 }; 189 } 190 191 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 192 193 private static byte[] extract(byte[] result, int length) { 194 if (length == result.length) { 195 return result; 196 } 197 byte[] trunc = new byte[length]; 198 System.arraycopy(result, 0, trunc, 0, length); 199 return trunc; 200 } 201 202 /** 203 * Determines whether the specified character sequence is a valid encoded string according to this 204 * encoding. 205 * 206 * @since 20.0 207 */ 208 public abstract boolean canDecode(CharSequence chars); 209 210 /** 211 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 212 * inverse operation to {@link #encode(byte[])}. 213 * 214 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 215 * encoding. 216 */ 217 public final byte[] decode(CharSequence chars) { 218 try { 219 return decodeChecked(chars); 220 } catch (DecodingException badInput) { 221 throw new IllegalArgumentException(badInput); 222 } 223 } 224 225 /** 226 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 227 * inverse operation to {@link #encode(byte[])}. 228 * 229 * @throws DecodingException if the input is not a valid encoded string according to this 230 * encoding. 231 */ 232 final byte[] decodeChecked(CharSequence chars) 233 throws DecodingException { 234 chars = trimTrailingPadding(chars); 235 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 236 int len = decodeTo(tmp, chars); 237 return extract(tmp, len); 238 } 239 240 /** 241 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 242 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 243 */ 244 @J2ktIncompatible 245 @GwtIncompatible // Reader,InputStream 246 public abstract InputStream decodingStream(Reader reader); 247 248 /** 249 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 250 * CharSource}. 251 */ 252 @J2ktIncompatible 253 @GwtIncompatible // ByteSource,CharSource 254 public final ByteSource decodingSource(CharSource encodedSource) { 255 checkNotNull(encodedSource); 256 return new ByteSource() { 257 @Override 258 public InputStream openStream() throws IOException { 259 return decodingStream(encodedSource.openStream()); 260 } 261 }; 262 } 263 264 // Implementations for encoding/decoding 265 266 abstract int maxEncodedSize(int bytes); 267 268 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 269 270 abstract int maxDecodedSize(int chars); 271 272 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 273 274 CharSequence trimTrailingPadding(CharSequence chars) { 275 return checkNotNull(chars); 276 } 277 278 // Modified encoding generators 279 280 /** 281 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 282 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 283 * section 3.2</a>, Padding of Encoded Data. 284 */ 285 public abstract BaseEncoding omitPadding(); 286 287 /** 288 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 289 * for padding. 290 * 291 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 292 * separator 293 */ 294 public abstract BaseEncoding withPadChar(char padChar); 295 296 /** 297 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 298 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 299 * are skipped over in decoding. 300 * 301 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 302 * string, or if {@code n <= 0} 303 * @throws UnsupportedOperationException if this encoding already uses a separator 304 */ 305 public abstract BaseEncoding withSeparator(String separator, int n); 306 307 /** 308 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 309 * uppercase letters. Padding and separator characters remain in their original case. 310 * 311 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 312 * lower-case characters 313 */ 314 public abstract BaseEncoding upperCase(); 315 316 /** 317 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 318 * lowercase letters. Padding and separator characters remain in their original case. 319 * 320 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 321 * lower-case characters 322 */ 323 public abstract BaseEncoding lowerCase(); 324 325 /** 326 * Returns an encoding that behaves equivalently to this encoding, but decodes letters without 327 * regard to case. 328 * 329 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 330 * lower-case characters 331 * @since 32.0.0 332 */ 333 public abstract BaseEncoding ignoreCase(); 334 335 private static final BaseEncoding BASE64 = 336 new Base64Encoding( 337 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 338 339 /** 340 * The "base64" base encoding specified by <a 341 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 342 * (This is the same as the base 64 encoding from <a 343 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 344 * 345 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 346 * omitted} or {@linkplain #withPadChar(char) replaced}. 347 * 348 * <p>No line feeds are added by default, as per <a 349 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 350 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 351 */ 352 public static BaseEncoding base64() { 353 return BASE64; 354 } 355 356 private static final BaseEncoding BASE64_URL = 357 new Base64Encoding( 358 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 359 360 /** 361 * The "base64url" encoding specified by <a 362 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 363 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 364 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 365 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 366 * 367 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 368 * omitted} or {@linkplain #withPadChar(char) replaced}. 369 * 370 * <p>No line feeds are added by default, as per <a 371 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 372 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 373 */ 374 public static BaseEncoding base64Url() { 375 return BASE64_URL; 376 } 377 378 private static final BaseEncoding BASE32 = 379 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 380 381 /** 382 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 383 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 384 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 385 * 386 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 387 * omitted} or {@linkplain #withPadChar(char) replaced}. 388 * 389 * <p>No line feeds are added by default, as per <a 390 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 391 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 392 */ 393 public static BaseEncoding base32() { 394 return BASE32; 395 } 396 397 private static final BaseEncoding BASE32_HEX = 398 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 399 400 /** 401 * The "base32hex" encoding specified by <a 402 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 403 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 404 * 405 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 406 * omitted} or {@linkplain #withPadChar(char) replaced}. 407 * 408 * <p>No line feeds are added by default, as per <a 409 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 410 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 411 */ 412 public static BaseEncoding base32Hex() { 413 return BASE32_HEX; 414 } 415 416 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 417 418 /** 419 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 420 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 421 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 422 * "hexadecimal" format. 423 * 424 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 425 * have no effect. 426 * 427 * <p>No line feeds are added by default, as per <a 428 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 429 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 430 */ 431 public static BaseEncoding base16() { 432 return BASE16; 433 } 434 435 static final class Alphabet { 436 private final String name; 437 // this is meant to be immutable -- don't modify it! 438 private final char[] chars; 439 final int mask; 440 final int bitsPerChar; 441 final int charsPerChunk; 442 final int bytesPerChunk; 443 private final byte[] decodabet; 444 private final boolean[] validPadding; 445 private final boolean ignoreCase; 446 447 Alphabet(String name, char[] chars) { 448 this(name, chars, decodabetFor(chars), /* ignoreCase= */ false); 449 } 450 451 private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) { 452 this.name = checkNotNull(name); 453 this.chars = checkNotNull(chars); 454 try { 455 this.bitsPerChar = log2(chars.length, UNNECESSARY); 456 } catch (ArithmeticException e) { 457 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 458 } 459 460 // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into 461 // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. 462 // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a 463 // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many 464 // extra zero bits we need to add to the end of bitsPerChar to get 3 in total. 465 // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII 466 // characters that can't happen. 467 int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar); 468 this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar); 469 this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar; 470 471 this.mask = chars.length - 1; 472 473 this.decodabet = decodabet; 474 475 boolean[] validPadding = new boolean[charsPerChunk]; 476 for (int i = 0; i < bytesPerChunk; i++) { 477 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 478 } 479 this.validPadding = validPadding; 480 this.ignoreCase = ignoreCase; 481 } 482 483 private static byte[] decodabetFor(char[] chars) { 484 byte[] decodabet = new byte[Ascii.MAX + 1]; 485 Arrays.fill(decodabet, (byte) -1); 486 for (int i = 0; i < chars.length; i++) { 487 char c = chars[i]; 488 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 489 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 490 decodabet[c] = (byte) i; 491 } 492 return decodabet; 493 } 494 495 /** Returns an equivalent {@code Alphabet} except it ignores case. */ 496 Alphabet ignoreCase() { 497 if (ignoreCase) { 498 return this; 499 } 500 501 // We can't use .clone() because of GWT. 502 byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length); 503 for (int upper = 'A'; upper <= 'Z'; upper++) { 504 int lower = upper | 0x20; 505 byte decodeUpper = decodabet[upper]; 506 byte decodeLower = decodabet[lower]; 507 if (decodeUpper == -1) { 508 newDecodabet[upper] = decodeLower; 509 } else { 510 checkState( 511 decodeLower == -1, 512 "Can't ignoreCase() since '%s' and '%s' encode different values", 513 (char) upper, 514 (char) lower); 515 newDecodabet[lower] = decodeUpper; 516 } 517 } 518 return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true); 519 } 520 521 char encode(int bits) { 522 return chars[bits]; 523 } 524 525 boolean isValidPaddingStartPosition(int index) { 526 return validPadding[index % charsPerChunk]; 527 } 528 529 boolean canDecode(char ch) { 530 return ch <= Ascii.MAX && decodabet[ch] != -1; 531 } 532 533 int decode(char ch) throws DecodingException { 534 if (ch > Ascii.MAX) { 535 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 536 } 537 int result = decodabet[ch]; 538 if (result == -1) { 539 if (ch <= 0x20 || ch == Ascii.MAX) { 540 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 541 } else { 542 throw new DecodingException("Unrecognized character: " + ch); 543 } 544 } 545 return result; 546 } 547 548 private boolean hasLowerCase() { 549 for (char c : chars) { 550 if (Ascii.isLowerCase(c)) { 551 return true; 552 } 553 } 554 return false; 555 } 556 557 private boolean hasUpperCase() { 558 for (char c : chars) { 559 if (Ascii.isUpperCase(c)) { 560 return true; 561 } 562 } 563 return false; 564 } 565 566 Alphabet upperCase() { 567 if (!hasLowerCase()) { 568 return this; 569 } 570 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 571 char[] upperCased = new char[chars.length]; 572 for (int i = 0; i < chars.length; i++) { 573 upperCased[i] = Ascii.toUpperCase(chars[i]); 574 } 575 Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased); 576 return ignoreCase ? upperCase.ignoreCase() : upperCase; 577 } 578 579 Alphabet lowerCase() { 580 if (!hasUpperCase()) { 581 return this; 582 } 583 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 584 char[] lowerCased = new char[chars.length]; 585 for (int i = 0; i < chars.length; i++) { 586 lowerCased[i] = Ascii.toLowerCase(chars[i]); 587 } 588 Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased); 589 return ignoreCase ? lowerCase.ignoreCase() : lowerCase; 590 } 591 592 public boolean matches(char c) { 593 return c < decodabet.length && decodabet[c] != -1; 594 } 595 596 @Override 597 public String toString() { 598 return name; 599 } 600 601 @Override 602 public boolean equals(@CheckForNull Object other) { 603 if (other instanceof Alphabet) { 604 Alphabet that = (Alphabet) other; 605 return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars); 606 } 607 return false; 608 } 609 610 @Override 611 public int hashCode() { 612 return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237); 613 } 614 } 615 616 private static class StandardBaseEncoding extends BaseEncoding { 617 final Alphabet alphabet; 618 619 @CheckForNull final Character paddingChar; 620 621 StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 622 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 623 } 624 625 StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 626 this.alphabet = checkNotNull(alphabet); 627 checkArgument( 628 paddingChar == null || !alphabet.matches(paddingChar), 629 "Padding character %s was already in alphabet", 630 paddingChar); 631 this.paddingChar = paddingChar; 632 } 633 634 @Override 635 int maxEncodedSize(int bytes) { 636 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 637 } 638 639 @J2ktIncompatible 640 @GwtIncompatible // Writer,OutputStream 641 @Override 642 public OutputStream encodingStream(Writer out) { 643 checkNotNull(out); 644 return new OutputStream() { 645 int bitBuffer = 0; 646 int bitBufferLength = 0; 647 int writtenChars = 0; 648 649 @Override 650 public void write(int b) throws IOException { 651 bitBuffer <<= 8; 652 bitBuffer |= b & 0xFF; 653 bitBufferLength += 8; 654 while (bitBufferLength >= alphabet.bitsPerChar) { 655 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 656 out.write(alphabet.encode(charIndex)); 657 writtenChars++; 658 bitBufferLength -= alphabet.bitsPerChar; 659 } 660 } 661 662 @Override 663 public void flush() throws IOException { 664 out.flush(); 665 } 666 667 @Override 668 public void close() throws IOException { 669 if (bitBufferLength > 0) { 670 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 671 out.write(alphabet.encode(charIndex)); 672 writtenChars++; 673 if (paddingChar != null) { 674 while (writtenChars % alphabet.charsPerChunk != 0) { 675 out.write(paddingChar.charValue()); 676 writtenChars++; 677 } 678 } 679 } 680 out.close(); 681 } 682 }; 683 } 684 685 @Override 686 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 687 checkNotNull(target); 688 checkPositionIndexes(off, off + len, bytes.length); 689 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 690 encodeChunkTo(target, bytes, off + i, min(alphabet.bytesPerChunk, len - i)); 691 } 692 } 693 694 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 695 checkNotNull(target); 696 checkPositionIndexes(off, off + len, bytes.length); 697 checkArgument(len <= alphabet.bytesPerChunk); 698 long bitBuffer = 0; 699 for (int i = 0; i < len; ++i) { 700 bitBuffer |= bytes[off + i] & 0xFF; 701 bitBuffer <<= 8; // Add additional zero byte in the end. 702 } 703 // Position of first character is length of bitBuffer minus bitsPerChar. 704 int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 705 int bitsProcessed = 0; 706 while (bitsProcessed < len * 8) { 707 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 708 target.append(alphabet.encode(charIndex)); 709 bitsProcessed += alphabet.bitsPerChar; 710 } 711 if (paddingChar != null) { 712 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 713 target.append(paddingChar.charValue()); 714 bitsProcessed += alphabet.bitsPerChar; 715 } 716 } 717 } 718 719 @Override 720 int maxDecodedSize(int chars) { 721 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 722 } 723 724 @Override 725 CharSequence trimTrailingPadding(CharSequence chars) { 726 checkNotNull(chars); 727 if (paddingChar == null) { 728 return chars; 729 } 730 char padChar = paddingChar.charValue(); 731 int l; 732 for (l = chars.length() - 1; l >= 0; l--) { 733 if (chars.charAt(l) != padChar) { 734 break; 735 } 736 } 737 return chars.subSequence(0, l + 1); 738 } 739 740 @Override 741 public boolean canDecode(CharSequence chars) { 742 checkNotNull(chars); 743 chars = trimTrailingPadding(chars); 744 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 745 return false; 746 } 747 for (int i = 0; i < chars.length(); i++) { 748 if (!alphabet.canDecode(chars.charAt(i))) { 749 return false; 750 } 751 } 752 return true; 753 } 754 755 @Override 756 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 757 checkNotNull(target); 758 chars = trimTrailingPadding(chars); 759 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 760 throw new DecodingException("Invalid input length " + chars.length()); 761 } 762 int bytesWritten = 0; 763 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 764 long chunk = 0; 765 int charsProcessed = 0; 766 for (int i = 0; i < alphabet.charsPerChunk; i++) { 767 chunk <<= alphabet.bitsPerChar; 768 if (charIdx + i < chars.length()) { 769 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 770 } 771 } 772 int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 773 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 774 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 775 } 776 } 777 return bytesWritten; 778 } 779 780 @Override 781 @J2ktIncompatible 782 @GwtIncompatible // Reader,InputStream 783 public InputStream decodingStream(Reader reader) { 784 checkNotNull(reader); 785 return new InputStream() { 786 int bitBuffer = 0; 787 int bitBufferLength = 0; 788 int readChars = 0; 789 boolean hitPadding = false; 790 791 @Override 792 public int read() throws IOException { 793 while (true) { 794 int readChar = reader.read(); 795 if (readChar == -1) { 796 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 797 throw new DecodingException("Invalid input length " + readChars); 798 } 799 return -1; 800 } 801 readChars++; 802 char ch = (char) readChar; 803 if (paddingChar != null && paddingChar.charValue() == ch) { 804 if (!hitPadding 805 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 806 throw new DecodingException("Padding cannot start at index " + readChars); 807 } 808 hitPadding = true; 809 } else if (hitPadding) { 810 throw new DecodingException( 811 "Expected padding character but found '" + ch + "' at index " + readChars); 812 } else { 813 bitBuffer <<= alphabet.bitsPerChar; 814 bitBuffer |= alphabet.decode(ch); 815 bitBufferLength += alphabet.bitsPerChar; 816 817 if (bitBufferLength >= 8) { 818 bitBufferLength -= 8; 819 return (bitBuffer >> bitBufferLength) & 0xFF; 820 } 821 } 822 } 823 } 824 825 @Override 826 public int read(byte[] buf, int off, int len) throws IOException { 827 // Overriding this to work around the fact that InputStream's default implementation of 828 // this method will silently swallow exceptions thrown by the single-byte read() method 829 // (other than on the first call to it), which in this case can cause invalid encoded 830 // strings to not throw an exception. 831 // See https://github.com/google/guava/issues/3542 832 checkPositionIndexes(off, off + len, buf.length); 833 834 int i = off; 835 for (; i < off + len; i++) { 836 int b = read(); 837 if (b == -1) { 838 int read = i - off; 839 return read == 0 ? -1 : read; 840 } 841 buf[i] = (byte) b; 842 } 843 return i - off; 844 } 845 846 @Override 847 public void close() throws IOException { 848 reader.close(); 849 } 850 }; 851 } 852 853 @Override 854 public BaseEncoding omitPadding() { 855 return (paddingChar == null) ? this : newInstance(alphabet, null); 856 } 857 858 @Override 859 public BaseEncoding withPadChar(char padChar) { 860 if (8 % alphabet.bitsPerChar == 0 861 || (paddingChar != null && paddingChar.charValue() == padChar)) { 862 return this; 863 } else { 864 return newInstance(alphabet, padChar); 865 } 866 } 867 868 @Override 869 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 870 for (int i = 0; i < separator.length(); i++) { 871 checkArgument( 872 !alphabet.matches(separator.charAt(i)), 873 "Separator (%s) cannot contain alphabet characters", 874 separator); 875 } 876 if (paddingChar != null) { 877 checkArgument( 878 separator.indexOf(paddingChar.charValue()) < 0, 879 "Separator (%s) cannot contain padding character", 880 separator); 881 } 882 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 883 } 884 885 @LazyInit @CheckForNull private volatile BaseEncoding upperCase; 886 @LazyInit @CheckForNull private volatile BaseEncoding lowerCase; 887 @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase; 888 889 @Override 890 public BaseEncoding upperCase() { 891 BaseEncoding result = upperCase; 892 if (result == null) { 893 Alphabet upper = alphabet.upperCase(); 894 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 895 } 896 return result; 897 } 898 899 @Override 900 public BaseEncoding lowerCase() { 901 BaseEncoding result = lowerCase; 902 if (result == null) { 903 Alphabet lower = alphabet.lowerCase(); 904 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 905 } 906 return result; 907 } 908 909 @Override 910 public BaseEncoding ignoreCase() { 911 BaseEncoding result = ignoreCase; 912 if (result == null) { 913 Alphabet ignore = alphabet.ignoreCase(); 914 result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar); 915 } 916 return result; 917 } 918 919 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 920 return new StandardBaseEncoding(alphabet, paddingChar); 921 } 922 923 @Override 924 public String toString() { 925 StringBuilder builder = new StringBuilder("BaseEncoding."); 926 builder.append(alphabet); 927 if (8 % alphabet.bitsPerChar != 0) { 928 if (paddingChar == null) { 929 builder.append(".omitPadding()"); 930 } else { 931 builder.append(".withPadChar('").append(paddingChar).append("')"); 932 } 933 } 934 return builder.toString(); 935 } 936 937 @Override 938 public boolean equals(@CheckForNull Object other) { 939 if (other instanceof StandardBaseEncoding) { 940 StandardBaseEncoding that = (StandardBaseEncoding) other; 941 return this.alphabet.equals(that.alphabet) 942 && Objects.equals(this.paddingChar, that.paddingChar); 943 } 944 return false; 945 } 946 947 @Override 948 public int hashCode() { 949 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 950 } 951 } 952 953 private static final class Base16Encoding extends StandardBaseEncoding { 954 final char[] encoding = new char[512]; 955 956 Base16Encoding(String name, String alphabetChars) { 957 this(new Alphabet(name, alphabetChars.toCharArray())); 958 } 959 960 private Base16Encoding(Alphabet alphabet) { 961 super(alphabet, null); 962 checkArgument(alphabet.chars.length == 16); 963 for (int i = 0; i < 256; ++i) { 964 encoding[i] = alphabet.encode(i >>> 4); 965 encoding[i | 0x100] = alphabet.encode(i & 0xF); 966 } 967 } 968 969 @Override 970 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 971 checkNotNull(target); 972 checkPositionIndexes(off, off + len, bytes.length); 973 for (int i = 0; i < len; ++i) { 974 int b = bytes[off + i] & 0xFF; 975 target.append(encoding[b]); 976 target.append(encoding[b | 0x100]); 977 } 978 } 979 980 @Override 981 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 982 checkNotNull(target); 983 if (chars.length() % 2 == 1) { 984 throw new DecodingException("Invalid input length " + chars.length()); 985 } 986 int bytesWritten = 0; 987 for (int i = 0; i < chars.length(); i += 2) { 988 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 989 target[bytesWritten++] = (byte) decoded; 990 } 991 return bytesWritten; 992 } 993 994 @Override 995 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 996 return new Base16Encoding(alphabet); 997 } 998 } 999 1000 private static final class Base64Encoding extends StandardBaseEncoding { 1001 Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 1002 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 1003 } 1004 1005 private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 1006 super(alphabet, paddingChar); 1007 checkArgument(alphabet.chars.length == 64); 1008 } 1009 1010 @Override 1011 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1012 checkNotNull(target); 1013 checkPositionIndexes(off, off + len, bytes.length); 1014 int i = off; 1015 for (int remaining = len; remaining >= 3; remaining -= 3) { 1016 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 1017 target.append(alphabet.encode(chunk >>> 18)); 1018 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 1019 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 1020 target.append(alphabet.encode(chunk & 0x3F)); 1021 } 1022 if (i < off + len) { 1023 encodeChunkTo(target, bytes, i, off + len - i); 1024 } 1025 } 1026 1027 @Override 1028 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1029 checkNotNull(target); 1030 chars = trimTrailingPadding(chars); 1031 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 1032 throw new DecodingException("Invalid input length " + chars.length()); 1033 } 1034 int bytesWritten = 0; 1035 for (int i = 0; i < chars.length(); ) { 1036 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 1037 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 1038 target[bytesWritten++] = (byte) (chunk >>> 16); 1039 if (i < chars.length()) { 1040 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 1041 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 1042 if (i < chars.length()) { 1043 chunk |= alphabet.decode(chars.charAt(i++)); 1044 target[bytesWritten++] = (byte) (chunk & 0xFF); 1045 } 1046 } 1047 } 1048 return bytesWritten; 1049 } 1050 1051 @Override 1052 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 1053 return new Base64Encoding(alphabet, paddingChar); 1054 } 1055 } 1056 1057 @J2ktIncompatible 1058 @GwtIncompatible 1059 static Reader ignoringReader(Reader delegate, String toIgnore) { 1060 checkNotNull(delegate); 1061 checkNotNull(toIgnore); 1062 return new Reader() { 1063 @Override 1064 public int read() throws IOException { 1065 int readChar; 1066 do { 1067 readChar = delegate.read(); 1068 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 1069 return readChar; 1070 } 1071 1072 @Override 1073 public int read(char[] cbuf, int off, int len) throws IOException { 1074 throw new UnsupportedOperationException(); 1075 } 1076 1077 @Override 1078 public void close() throws IOException { 1079 delegate.close(); 1080 } 1081 }; 1082 } 1083 1084 static Appendable separatingAppendable( 1085 Appendable delegate, String separator, int afterEveryChars) { 1086 checkNotNull(delegate); 1087 checkNotNull(separator); 1088 checkArgument(afterEveryChars > 0); 1089 return new Appendable() { 1090 int charsUntilSeparator = afterEveryChars; 1091 1092 @Override 1093 public Appendable append(char c) throws IOException { 1094 if (charsUntilSeparator == 0) { 1095 delegate.append(separator); 1096 charsUntilSeparator = afterEveryChars; 1097 } 1098 delegate.append(c); 1099 charsUntilSeparator--; 1100 return this; 1101 } 1102 1103 @Override 1104 public Appendable append(@CheckForNull CharSequence chars, int off, int len) { 1105 throw new UnsupportedOperationException(); 1106 } 1107 1108 @Override 1109 public Appendable append(@CheckForNull CharSequence chars) { 1110 throw new UnsupportedOperationException(); 1111 } 1112 }; 1113 } 1114 1115 @J2ktIncompatible 1116 @GwtIncompatible // Writer 1117 static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) { 1118 Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars); 1119 return new Writer() { 1120 @Override 1121 public void write(int c) throws IOException { 1122 separatingAppendable.append((char) c); 1123 } 1124 1125 @Override 1126 public void write(char[] chars, int off, int len) throws IOException { 1127 throw new UnsupportedOperationException(); 1128 } 1129 1130 @Override 1131 public void flush() throws IOException { 1132 delegate.flush(); 1133 } 1134 1135 @Override 1136 public void close() throws IOException { 1137 delegate.close(); 1138 } 1139 }; 1140 } 1141 1142 static final class SeparatedBaseEncoding extends BaseEncoding { 1143 private final BaseEncoding delegate; 1144 private final String separator; 1145 private final int afterEveryChars; 1146 1147 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1148 this.delegate = checkNotNull(delegate); 1149 this.separator = checkNotNull(separator); 1150 this.afterEveryChars = afterEveryChars; 1151 checkArgument( 1152 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1153 } 1154 1155 @Override 1156 CharSequence trimTrailingPadding(CharSequence chars) { 1157 return delegate.trimTrailingPadding(chars); 1158 } 1159 1160 @Override 1161 int maxEncodedSize(int bytes) { 1162 int unseparatedSize = delegate.maxEncodedSize(bytes); 1163 return unseparatedSize 1164 + separator.length() * divide(max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1165 } 1166 1167 @J2ktIncompatible 1168 @GwtIncompatible // Writer,OutputStream 1169 @Override 1170 public OutputStream encodingStream(Writer output) { 1171 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1172 } 1173 1174 @Override 1175 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1176 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1177 } 1178 1179 @Override 1180 int maxDecodedSize(int chars) { 1181 return delegate.maxDecodedSize(chars); 1182 } 1183 1184 @Override 1185 public boolean canDecode(CharSequence chars) { 1186 StringBuilder builder = new StringBuilder(); 1187 for (int i = 0; i < chars.length(); i++) { 1188 char c = chars.charAt(i); 1189 if (separator.indexOf(c) < 0) { 1190 builder.append(c); 1191 } 1192 } 1193 return delegate.canDecode(builder); 1194 } 1195 1196 @Override 1197 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1198 StringBuilder stripped = new StringBuilder(chars.length()); 1199 for (int i = 0; i < chars.length(); i++) { 1200 char c = chars.charAt(i); 1201 if (separator.indexOf(c) < 0) { 1202 stripped.append(c); 1203 } 1204 } 1205 return delegate.decodeTo(target, stripped); 1206 } 1207 1208 @Override 1209 @J2ktIncompatible 1210 @GwtIncompatible // Reader,InputStream 1211 public InputStream decodingStream(Reader reader) { 1212 return delegate.decodingStream(ignoringReader(reader, separator)); 1213 } 1214 1215 @Override 1216 public BaseEncoding omitPadding() { 1217 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1218 } 1219 1220 @Override 1221 public BaseEncoding withPadChar(char padChar) { 1222 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1223 } 1224 1225 @Override 1226 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1227 throw new UnsupportedOperationException("Already have a separator"); 1228 } 1229 1230 @Override 1231 public BaseEncoding upperCase() { 1232 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1233 } 1234 1235 @Override 1236 public BaseEncoding lowerCase() { 1237 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1238 } 1239 1240 @Override 1241 public BaseEncoding ignoreCase() { 1242 return delegate.ignoreCase().withSeparator(separator, afterEveryChars); 1243 } 1244 1245 @Override 1246 public String toString() { 1247 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1248 } 1249 } 1250}