001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.lang.Math.max; 024import static java.lang.Math.min; 025import static java.math.RoundingMode.CEILING; 026import static java.math.RoundingMode.FLOOR; 027import static java.math.RoundingMode.UNNECESSARY; 028 029import com.google.common.annotations.GwtCompatible; 030import com.google.common.annotations.GwtIncompatible; 031import com.google.common.annotations.J2ktIncompatible; 032import com.google.common.base.Ascii; 033import com.google.errorprone.annotations.concurrent.LazyInit; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.io.Reader; 038import java.io.Writer; 039import java.util.Arrays; 040import java.util.Objects; 041import javax.annotation.CheckForNull; 042import org.checkerframework.checker.nullness.qual.Nullable; 043 044/** 045 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 046 * strings. This class includes several constants for encoding schemes specified by <a 047 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 048 * 049 * <pre>{@code 050 * BaseEncoding.base32().encode("foo".getBytes(US_ASCII)) 051 * }</pre> 052 * 053 * <p>returns the string {@code "MZXW6==="}, and 054 * 055 * <pre>{@code 056 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 057 * }</pre> 058 * 059 * <p>...returns the ASCII bytes of the string {@code "foo"}. 060 * 061 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 062 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 063 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 064 * behavior: 065 * 066 * <pre>{@code 067 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 068 * }</pre> 069 * 070 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 071 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 072 * 073 * <pre>{@code 074 * // Do NOT do this 075 * BaseEncoding hex = BaseEncoding.base16(); 076 * hex.lowerCase(); // does nothing! 077 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 078 * }</pre> 079 * 080 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 081 * x}, but the reverse does not necessarily hold. 082 * 083 * <table> 084 * <caption>Encodings</caption> 085 * <tr> 086 * <th>Encoding 087 * <th>Alphabet 088 * <th>{@code char:byte} ratio 089 * <th>Default padding 090 * <th>Comments 091 * <tr> 092 * <td>{@link #base16()} 093 * <td>0-9 A-F 094 * <td>2.00 095 * <td>N/A 096 * <td>Traditional hexadecimal. Defaults to upper case. 097 * <tr> 098 * <td>{@link #base32()} 099 * <td>A-Z 2-7 100 * <td>1.60 101 * <td>= 102 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 103 * <tr> 104 * <td>{@link #base32Hex()} 105 * <td>0-9 A-V 106 * <td>1.60 107 * <td>= 108 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 109 * <tr> 110 * <td>{@link #base64()} 111 * <td>A-Z a-z 0-9 + / 112 * <td>1.33 113 * <td>= 114 * <td> 115 * <tr> 116 * <td>{@link #base64Url()} 117 * <td>A-Z a-z 0-9 - _ 118 * <td>1.33 119 * <td>= 120 * <td>Safe to use as filenames, or to pass in URLs without escaping 121 * </table> 122 * 123 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 124 * 125 * @author Louis Wasserman 126 * @since 14.0 127 */ 128@GwtCompatible(emulated = true) 129public abstract class BaseEncoding { 130 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 131 132 BaseEncoding() {} 133 134 /** 135 * Exception indicating invalid base-encoded input encountered while decoding. 136 * 137 * @author Louis Wasserman 138 * @since 15.0 139 */ 140 public static final class DecodingException extends IOException { 141 DecodingException(@Nullable String message) { 142 super(message); 143 } 144 } 145 146 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 147 public String encode(byte[] bytes) { 148 return encode(bytes, 0, bytes.length); 149 } 150 151 /** 152 * Encodes the specified range of the specified byte array, and returns the encoded {@code 153 * String}. 154 */ 155 public final String encode(byte[] bytes, int off, int len) { 156 checkPositionIndexes(off, off + len, bytes.length); 157 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 158 try { 159 encodeTo(result, bytes, off, len); 160 } catch (IOException impossible) { 161 throw new AssertionError(impossible); 162 } 163 return result.toString(); 164 } 165 166 /** 167 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 168 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 169 * Writer}. 170 */ 171 @J2ktIncompatible 172 @GwtIncompatible // Writer,OutputStream 173 public abstract OutputStream encodingStream(Writer writer); 174 175 /** 176 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 177 */ 178 @J2ktIncompatible 179 @GwtIncompatible // ByteSink,CharSink 180 public final ByteSink encodingSink(CharSink encodedSink) { 181 checkNotNull(encodedSink); 182 return new ByteSink() { 183 @Override 184 public OutputStream openStream() throws IOException { 185 return encodingStream(encodedSink.openStream()); 186 } 187 }; 188 } 189 190 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 191 192 private static byte[] extract(byte[] result, int length) { 193 if (length == result.length) { 194 return result; 195 } 196 byte[] trunc = new byte[length]; 197 System.arraycopy(result, 0, trunc, 0, length); 198 return trunc; 199 } 200 201 /** 202 * Determines whether the specified character sequence is a valid encoded string according to this 203 * encoding. 204 * 205 * @since 20.0 206 */ 207 public abstract boolean canDecode(CharSequence chars); 208 209 /** 210 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 211 * inverse operation to {@link #encode(byte[])}. 212 * 213 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 214 * encoding. 215 */ 216 public final byte[] decode(CharSequence chars) { 217 try { 218 return decodeChecked(chars); 219 } catch (DecodingException badInput) { 220 throw new IllegalArgumentException(badInput); 221 } 222 } 223 224 /** 225 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 226 * inverse operation to {@link #encode(byte[])}. 227 * 228 * @throws DecodingException if the input is not a valid encoded string according to this 229 * encoding. 230 */ 231 final byte[] decodeChecked(CharSequence chars) 232 throws DecodingException { 233 chars = trimTrailingPadding(chars); 234 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 235 int len = decodeTo(tmp, chars); 236 return extract(tmp, len); 237 } 238 239 /** 240 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 241 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 242 */ 243 @J2ktIncompatible 244 @GwtIncompatible // Reader,InputStream 245 public abstract InputStream decodingStream(Reader reader); 246 247 /** 248 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 249 * CharSource}. 250 */ 251 @J2ktIncompatible 252 @GwtIncompatible // ByteSource,CharSource 253 public final ByteSource decodingSource(CharSource encodedSource) { 254 checkNotNull(encodedSource); 255 return new ByteSource() { 256 @Override 257 public InputStream openStream() throws IOException { 258 return decodingStream(encodedSource.openStream()); 259 } 260 }; 261 } 262 263 // Implementations for encoding/decoding 264 265 abstract int maxEncodedSize(int bytes); 266 267 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 268 269 abstract int maxDecodedSize(int chars); 270 271 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 272 273 CharSequence trimTrailingPadding(CharSequence chars) { 274 return checkNotNull(chars); 275 } 276 277 // Modified encoding generators 278 279 /** 280 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 281 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 282 * section 3.2</a>, Padding of Encoded Data. 283 */ 284 public abstract BaseEncoding omitPadding(); 285 286 /** 287 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 288 * for padding. 289 * 290 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 291 * separator 292 */ 293 public abstract BaseEncoding withPadChar(char padChar); 294 295 /** 296 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 297 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 298 * are skipped over in decoding. 299 * 300 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 301 * string, or if {@code n <= 0} 302 * @throws UnsupportedOperationException if this encoding already uses a separator 303 */ 304 public abstract BaseEncoding withSeparator(String separator, int n); 305 306 /** 307 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 308 * uppercase letters. Padding and separator characters remain in their original case. 309 * 310 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 311 * lower-case characters 312 */ 313 public abstract BaseEncoding upperCase(); 314 315 /** 316 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 317 * lowercase letters. Padding and separator characters remain in their original case. 318 * 319 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 320 * lower-case characters 321 */ 322 public abstract BaseEncoding lowerCase(); 323 324 /** 325 * Returns an encoding that behaves equivalently to this encoding, but decodes letters without 326 * regard to case. 327 * 328 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 329 * lower-case characters 330 * @since 32.0.0 331 */ 332 public abstract BaseEncoding ignoreCase(); 333 334 private static final BaseEncoding BASE64 = 335 new Base64Encoding( 336 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 337 338 /** 339 * The "base64" base encoding specified by <a 340 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 341 * (This is the same as the base 64 encoding from <a 342 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 343 * 344 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 345 * omitted} or {@linkplain #withPadChar(char) replaced}. 346 * 347 * <p>No line feeds are added by default, as per <a 348 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 349 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 350 */ 351 public static BaseEncoding base64() { 352 return BASE64; 353 } 354 355 private static final BaseEncoding BASE64_URL = 356 new Base64Encoding( 357 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 358 359 /** 360 * The "base64url" encoding specified by <a 361 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 362 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 363 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 364 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 365 * 366 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 367 * omitted} or {@linkplain #withPadChar(char) replaced}. 368 * 369 * <p>No line feeds are added by default, as per <a 370 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 371 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 372 */ 373 public static BaseEncoding base64Url() { 374 return BASE64_URL; 375 } 376 377 private static final BaseEncoding BASE32 = 378 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 379 380 /** 381 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 382 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 383 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 384 * 385 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 386 * omitted} or {@linkplain #withPadChar(char) replaced}. 387 * 388 * <p>No line feeds are added by default, as per <a 389 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 390 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 391 */ 392 public static BaseEncoding base32() { 393 return BASE32; 394 } 395 396 private static final BaseEncoding BASE32_HEX = 397 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 398 399 /** 400 * The "base32hex" encoding specified by <a 401 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 402 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 403 * 404 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 405 * omitted} or {@linkplain #withPadChar(char) replaced}. 406 * 407 * <p>No line feeds are added by default, as per <a 408 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 409 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 410 */ 411 public static BaseEncoding base32Hex() { 412 return BASE32_HEX; 413 } 414 415 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 416 417 /** 418 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 419 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 420 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 421 * "hexadecimal" format. 422 * 423 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 424 * have no effect. 425 * 426 * <p>No line feeds are added by default, as per <a 427 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 428 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 429 */ 430 public static BaseEncoding base16() { 431 return BASE16; 432 } 433 434 static final class Alphabet { 435 private final String name; 436 // this is meant to be immutable -- don't modify it! 437 private final char[] chars; 438 final int mask; 439 final int bitsPerChar; 440 final int charsPerChunk; 441 final int bytesPerChunk; 442 private final byte[] decodabet; 443 private final boolean[] validPadding; 444 private final boolean ignoreCase; 445 446 Alphabet(String name, char[] chars) { 447 this(name, chars, decodabetFor(chars), /* ignoreCase= */ false); 448 } 449 450 private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) { 451 this.name = checkNotNull(name); 452 this.chars = checkNotNull(chars); 453 try { 454 this.bitsPerChar = log2(chars.length, UNNECESSARY); 455 } catch (ArithmeticException e) { 456 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 457 } 458 459 // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into 460 // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. 461 // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a 462 // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many 463 // extra zero bits we need to add to the end of bitsPerChar to get 3 in total. 464 // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII 465 // characters that can't happen. 466 int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar); 467 this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar); 468 this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar; 469 470 this.mask = chars.length - 1; 471 472 this.decodabet = decodabet; 473 474 boolean[] validPadding = new boolean[charsPerChunk]; 475 for (int i = 0; i < bytesPerChunk; i++) { 476 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 477 } 478 this.validPadding = validPadding; 479 this.ignoreCase = ignoreCase; 480 } 481 482 private static byte[] decodabetFor(char[] chars) { 483 byte[] decodabet = new byte[Ascii.MAX + 1]; 484 Arrays.fill(decodabet, (byte) -1); 485 for (int i = 0; i < chars.length; i++) { 486 char c = chars[i]; 487 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 488 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 489 decodabet[c] = (byte) i; 490 } 491 return decodabet; 492 } 493 494 /** Returns an equivalent {@code Alphabet} except it ignores case. */ 495 Alphabet ignoreCase() { 496 if (ignoreCase) { 497 return this; 498 } 499 500 // We can't use .clone() because of GWT. 501 byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length); 502 for (int upper = 'A'; upper <= 'Z'; upper++) { 503 int lower = upper | 0x20; 504 byte decodeUpper = decodabet[upper]; 505 byte decodeLower = decodabet[lower]; 506 if (decodeUpper == -1) { 507 newDecodabet[upper] = decodeLower; 508 } else { 509 checkState( 510 decodeLower == -1, 511 "Can't ignoreCase() since '%s' and '%s' encode different values", 512 (char) upper, 513 (char) lower); 514 newDecodabet[lower] = decodeUpper; 515 } 516 } 517 return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true); 518 } 519 520 char encode(int bits) { 521 return chars[bits]; 522 } 523 524 boolean isValidPaddingStartPosition(int index) { 525 return validPadding[index % charsPerChunk]; 526 } 527 528 boolean canDecode(char ch) { 529 return ch <= Ascii.MAX && decodabet[ch] != -1; 530 } 531 532 int decode(char ch) throws DecodingException { 533 if (ch > Ascii.MAX) { 534 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 535 } 536 int result = decodabet[ch]; 537 if (result == -1) { 538 if (ch <= 0x20 || ch == Ascii.MAX) { 539 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 540 } else { 541 throw new DecodingException("Unrecognized character: " + ch); 542 } 543 } 544 return result; 545 } 546 547 private boolean hasLowerCase() { 548 for (char c : chars) { 549 if (Ascii.isLowerCase(c)) { 550 return true; 551 } 552 } 553 return false; 554 } 555 556 private boolean hasUpperCase() { 557 for (char c : chars) { 558 if (Ascii.isUpperCase(c)) { 559 return true; 560 } 561 } 562 return false; 563 } 564 565 Alphabet upperCase() { 566 if (!hasLowerCase()) { 567 return this; 568 } 569 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 570 char[] upperCased = new char[chars.length]; 571 for (int i = 0; i < chars.length; i++) { 572 upperCased[i] = Ascii.toUpperCase(chars[i]); 573 } 574 Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased); 575 return ignoreCase ? upperCase.ignoreCase() : upperCase; 576 } 577 578 Alphabet lowerCase() { 579 if (!hasUpperCase()) { 580 return this; 581 } 582 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 583 char[] lowerCased = new char[chars.length]; 584 for (int i = 0; i < chars.length; i++) { 585 lowerCased[i] = Ascii.toLowerCase(chars[i]); 586 } 587 Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased); 588 return ignoreCase ? lowerCase.ignoreCase() : lowerCase; 589 } 590 591 public boolean matches(char c) { 592 return c < decodabet.length && decodabet[c] != -1; 593 } 594 595 @Override 596 public String toString() { 597 return name; 598 } 599 600 @Override 601 public boolean equals(@CheckForNull Object other) { 602 if (other instanceof Alphabet) { 603 Alphabet that = (Alphabet) other; 604 return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars); 605 } 606 return false; 607 } 608 609 @Override 610 public int hashCode() { 611 return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237); 612 } 613 } 614 615 private static class StandardBaseEncoding extends BaseEncoding { 616 final Alphabet alphabet; 617 618 @CheckForNull final Character paddingChar; 619 620 StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 621 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 622 } 623 624 StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 625 this.alphabet = checkNotNull(alphabet); 626 checkArgument( 627 paddingChar == null || !alphabet.matches(paddingChar), 628 "Padding character %s was already in alphabet", 629 paddingChar); 630 this.paddingChar = paddingChar; 631 } 632 633 @Override 634 int maxEncodedSize(int bytes) { 635 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 636 } 637 638 @J2ktIncompatible 639 @GwtIncompatible // Writer,OutputStream 640 @Override 641 public OutputStream encodingStream(Writer out) { 642 checkNotNull(out); 643 return new OutputStream() { 644 int bitBuffer = 0; 645 int bitBufferLength = 0; 646 int writtenChars = 0; 647 648 @Override 649 public void write(int b) throws IOException { 650 bitBuffer <<= 8; 651 bitBuffer |= b & 0xFF; 652 bitBufferLength += 8; 653 while (bitBufferLength >= alphabet.bitsPerChar) { 654 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 655 out.write(alphabet.encode(charIndex)); 656 writtenChars++; 657 bitBufferLength -= alphabet.bitsPerChar; 658 } 659 } 660 661 @Override 662 public void flush() throws IOException { 663 out.flush(); 664 } 665 666 @Override 667 public void close() throws IOException { 668 if (bitBufferLength > 0) { 669 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 670 out.write(alphabet.encode(charIndex)); 671 writtenChars++; 672 if (paddingChar != null) { 673 while (writtenChars % alphabet.charsPerChunk != 0) { 674 out.write(paddingChar.charValue()); 675 writtenChars++; 676 } 677 } 678 } 679 out.close(); 680 } 681 }; 682 } 683 684 @Override 685 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 686 checkNotNull(target); 687 checkPositionIndexes(off, off + len, bytes.length); 688 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 689 encodeChunkTo(target, bytes, off + i, min(alphabet.bytesPerChunk, len - i)); 690 } 691 } 692 693 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 694 checkNotNull(target); 695 checkPositionIndexes(off, off + len, bytes.length); 696 checkArgument(len <= alphabet.bytesPerChunk); 697 long bitBuffer = 0; 698 for (int i = 0; i < len; ++i) { 699 bitBuffer |= bytes[off + i] & 0xFF; 700 bitBuffer <<= 8; // Add additional zero byte in the end. 701 } 702 // Position of first character is length of bitBuffer minus bitsPerChar. 703 int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 704 int bitsProcessed = 0; 705 while (bitsProcessed < len * 8) { 706 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 707 target.append(alphabet.encode(charIndex)); 708 bitsProcessed += alphabet.bitsPerChar; 709 } 710 if (paddingChar != null) { 711 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 712 target.append(paddingChar.charValue()); 713 bitsProcessed += alphabet.bitsPerChar; 714 } 715 } 716 } 717 718 @Override 719 int maxDecodedSize(int chars) { 720 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 721 } 722 723 @Override 724 CharSequence trimTrailingPadding(CharSequence chars) { 725 checkNotNull(chars); 726 if (paddingChar == null) { 727 return chars; 728 } 729 char padChar = paddingChar.charValue(); 730 int l; 731 for (l = chars.length() - 1; l >= 0; l--) { 732 if (chars.charAt(l) != padChar) { 733 break; 734 } 735 } 736 return chars.subSequence(0, l + 1); 737 } 738 739 @Override 740 public boolean canDecode(CharSequence chars) { 741 checkNotNull(chars); 742 chars = trimTrailingPadding(chars); 743 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 744 return false; 745 } 746 for (int i = 0; i < chars.length(); i++) { 747 if (!alphabet.canDecode(chars.charAt(i))) { 748 return false; 749 } 750 } 751 return true; 752 } 753 754 @Override 755 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 756 checkNotNull(target); 757 chars = trimTrailingPadding(chars); 758 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 759 throw new DecodingException("Invalid input length " + chars.length()); 760 } 761 int bytesWritten = 0; 762 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 763 long chunk = 0; 764 int charsProcessed = 0; 765 for (int i = 0; i < alphabet.charsPerChunk; i++) { 766 chunk <<= alphabet.bitsPerChar; 767 if (charIdx + i < chars.length()) { 768 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 769 } 770 } 771 int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 772 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 773 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 774 } 775 } 776 return bytesWritten; 777 } 778 779 @Override 780 @J2ktIncompatible 781 @GwtIncompatible // Reader,InputStream 782 public InputStream decodingStream(Reader reader) { 783 checkNotNull(reader); 784 return new InputStream() { 785 int bitBuffer = 0; 786 int bitBufferLength = 0; 787 int readChars = 0; 788 boolean hitPadding = false; 789 790 @Override 791 public int read() throws IOException { 792 while (true) { 793 int readChar = reader.read(); 794 if (readChar == -1) { 795 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 796 throw new DecodingException("Invalid input length " + readChars); 797 } 798 return -1; 799 } 800 readChars++; 801 char ch = (char) readChar; 802 if (paddingChar != null && paddingChar.charValue() == ch) { 803 if (!hitPadding 804 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 805 throw new DecodingException("Padding cannot start at index " + readChars); 806 } 807 hitPadding = true; 808 } else if (hitPadding) { 809 throw new DecodingException( 810 "Expected padding character but found '" + ch + "' at index " + readChars); 811 } else { 812 bitBuffer <<= alphabet.bitsPerChar; 813 bitBuffer |= alphabet.decode(ch); 814 bitBufferLength += alphabet.bitsPerChar; 815 816 if (bitBufferLength >= 8) { 817 bitBufferLength -= 8; 818 return (bitBuffer >> bitBufferLength) & 0xFF; 819 } 820 } 821 } 822 } 823 824 @Override 825 public int read(byte[] buf, int off, int len) throws IOException { 826 // Overriding this to work around the fact that InputStream's default implementation of 827 // this method will silently swallow exceptions thrown by the single-byte read() method 828 // (other than on the first call to it), which in this case can cause invalid encoded 829 // strings to not throw an exception. 830 // See https://github.com/google/guava/issues/3542 831 checkPositionIndexes(off, off + len, buf.length); 832 833 int i = off; 834 for (; i < off + len; i++) { 835 int b = read(); 836 if (b == -1) { 837 int read = i - off; 838 return read == 0 ? -1 : read; 839 } 840 buf[i] = (byte) b; 841 } 842 return i - off; 843 } 844 845 @Override 846 public void close() throws IOException { 847 reader.close(); 848 } 849 }; 850 } 851 852 @Override 853 public BaseEncoding omitPadding() { 854 return (paddingChar == null) ? this : newInstance(alphabet, null); 855 } 856 857 @Override 858 public BaseEncoding withPadChar(char padChar) { 859 if (8 % alphabet.bitsPerChar == 0 860 || (paddingChar != null && paddingChar.charValue() == padChar)) { 861 return this; 862 } else { 863 return newInstance(alphabet, padChar); 864 } 865 } 866 867 @Override 868 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 869 for (int i = 0; i < separator.length(); i++) { 870 checkArgument( 871 !alphabet.matches(separator.charAt(i)), 872 "Separator (%s) cannot contain alphabet characters", 873 separator); 874 } 875 if (paddingChar != null) { 876 checkArgument( 877 separator.indexOf(paddingChar.charValue()) < 0, 878 "Separator (%s) cannot contain padding character", 879 separator); 880 } 881 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 882 } 883 884 @LazyInit @CheckForNull private volatile BaseEncoding upperCase; 885 @LazyInit @CheckForNull private volatile BaseEncoding lowerCase; 886 @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase; 887 888 @Override 889 public BaseEncoding upperCase() { 890 BaseEncoding result = upperCase; 891 if (result == null) { 892 Alphabet upper = alphabet.upperCase(); 893 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 894 } 895 return result; 896 } 897 898 @Override 899 public BaseEncoding lowerCase() { 900 BaseEncoding result = lowerCase; 901 if (result == null) { 902 Alphabet lower = alphabet.lowerCase(); 903 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 904 } 905 return result; 906 } 907 908 @Override 909 public BaseEncoding ignoreCase() { 910 BaseEncoding result = ignoreCase; 911 if (result == null) { 912 Alphabet ignore = alphabet.ignoreCase(); 913 result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar); 914 } 915 return result; 916 } 917 918 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 919 return new StandardBaseEncoding(alphabet, paddingChar); 920 } 921 922 @Override 923 public String toString() { 924 StringBuilder builder = new StringBuilder("BaseEncoding."); 925 builder.append(alphabet); 926 if (8 % alphabet.bitsPerChar != 0) { 927 if (paddingChar == null) { 928 builder.append(".omitPadding()"); 929 } else { 930 builder.append(".withPadChar('").append(paddingChar).append("')"); 931 } 932 } 933 return builder.toString(); 934 } 935 936 @Override 937 public boolean equals(@CheckForNull Object other) { 938 if (other instanceof StandardBaseEncoding) { 939 StandardBaseEncoding that = (StandardBaseEncoding) other; 940 return this.alphabet.equals(that.alphabet) 941 && Objects.equals(this.paddingChar, that.paddingChar); 942 } 943 return false; 944 } 945 946 @Override 947 public int hashCode() { 948 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 949 } 950 } 951 952 private static final class Base16Encoding extends StandardBaseEncoding { 953 final char[] encoding = new char[512]; 954 955 Base16Encoding(String name, String alphabetChars) { 956 this(new Alphabet(name, alphabetChars.toCharArray())); 957 } 958 959 private Base16Encoding(Alphabet alphabet) { 960 super(alphabet, null); 961 checkArgument(alphabet.chars.length == 16); 962 for (int i = 0; i < 256; ++i) { 963 encoding[i] = alphabet.encode(i >>> 4); 964 encoding[i | 0x100] = alphabet.encode(i & 0xF); 965 } 966 } 967 968 @Override 969 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 970 checkNotNull(target); 971 checkPositionIndexes(off, off + len, bytes.length); 972 for (int i = 0; i < len; ++i) { 973 int b = bytes[off + i] & 0xFF; 974 target.append(encoding[b]); 975 target.append(encoding[b | 0x100]); 976 } 977 } 978 979 @Override 980 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 981 checkNotNull(target); 982 if (chars.length() % 2 == 1) { 983 throw new DecodingException("Invalid input length " + chars.length()); 984 } 985 int bytesWritten = 0; 986 for (int i = 0; i < chars.length(); i += 2) { 987 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 988 target[bytesWritten++] = (byte) decoded; 989 } 990 return bytesWritten; 991 } 992 993 @Override 994 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 995 return new Base16Encoding(alphabet); 996 } 997 } 998 999 private static final class Base64Encoding extends StandardBaseEncoding { 1000 Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 1001 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 1002 } 1003 1004 private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 1005 super(alphabet, paddingChar); 1006 checkArgument(alphabet.chars.length == 64); 1007 } 1008 1009 @Override 1010 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1011 checkNotNull(target); 1012 checkPositionIndexes(off, off + len, bytes.length); 1013 int i = off; 1014 for (int remaining = len; remaining >= 3; remaining -= 3) { 1015 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 1016 target.append(alphabet.encode(chunk >>> 18)); 1017 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 1018 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 1019 target.append(alphabet.encode(chunk & 0x3F)); 1020 } 1021 if (i < off + len) { 1022 encodeChunkTo(target, bytes, i, off + len - i); 1023 } 1024 } 1025 1026 @Override 1027 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1028 checkNotNull(target); 1029 chars = trimTrailingPadding(chars); 1030 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 1031 throw new DecodingException("Invalid input length " + chars.length()); 1032 } 1033 int bytesWritten = 0; 1034 for (int i = 0; i < chars.length(); ) { 1035 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 1036 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 1037 target[bytesWritten++] = (byte) (chunk >>> 16); 1038 if (i < chars.length()) { 1039 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 1040 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 1041 if (i < chars.length()) { 1042 chunk |= alphabet.decode(chars.charAt(i++)); 1043 target[bytesWritten++] = (byte) (chunk & 0xFF); 1044 } 1045 } 1046 } 1047 return bytesWritten; 1048 } 1049 1050 @Override 1051 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 1052 return new Base64Encoding(alphabet, paddingChar); 1053 } 1054 } 1055 1056 @J2ktIncompatible 1057 @GwtIncompatible 1058 static Reader ignoringReader(Reader delegate, String toIgnore) { 1059 checkNotNull(delegate); 1060 checkNotNull(toIgnore); 1061 return new Reader() { 1062 @Override 1063 public int read() throws IOException { 1064 int readChar; 1065 do { 1066 readChar = delegate.read(); 1067 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 1068 return readChar; 1069 } 1070 1071 @Override 1072 public int read(char[] cbuf, int off, int len) throws IOException { 1073 throw new UnsupportedOperationException(); 1074 } 1075 1076 @Override 1077 public void close() throws IOException { 1078 delegate.close(); 1079 } 1080 }; 1081 } 1082 1083 static Appendable separatingAppendable( 1084 Appendable delegate, String separator, int afterEveryChars) { 1085 checkNotNull(delegate); 1086 checkNotNull(separator); 1087 checkArgument(afterEveryChars > 0); 1088 return new Appendable() { 1089 int charsUntilSeparator = afterEveryChars; 1090 1091 @Override 1092 public Appendable append(char c) throws IOException { 1093 if (charsUntilSeparator == 0) { 1094 delegate.append(separator); 1095 charsUntilSeparator = afterEveryChars; 1096 } 1097 delegate.append(c); 1098 charsUntilSeparator--; 1099 return this; 1100 } 1101 1102 @Override 1103 public Appendable append(@CheckForNull CharSequence chars, int off, int len) { 1104 throw new UnsupportedOperationException(); 1105 } 1106 1107 @Override 1108 public Appendable append(@CheckForNull CharSequence chars) { 1109 throw new UnsupportedOperationException(); 1110 } 1111 }; 1112 } 1113 1114 @J2ktIncompatible 1115 @GwtIncompatible // Writer 1116 static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) { 1117 Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars); 1118 return new Writer() { 1119 @Override 1120 public void write(int c) throws IOException { 1121 separatingAppendable.append((char) c); 1122 } 1123 1124 @Override 1125 public void write(char[] chars, int off, int len) throws IOException { 1126 throw new UnsupportedOperationException(); 1127 } 1128 1129 @Override 1130 public void flush() throws IOException { 1131 delegate.flush(); 1132 } 1133 1134 @Override 1135 public void close() throws IOException { 1136 delegate.close(); 1137 } 1138 }; 1139 } 1140 1141 static final class SeparatedBaseEncoding extends BaseEncoding { 1142 private final BaseEncoding delegate; 1143 private final String separator; 1144 private final int afterEveryChars; 1145 1146 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1147 this.delegate = checkNotNull(delegate); 1148 this.separator = checkNotNull(separator); 1149 this.afterEveryChars = afterEveryChars; 1150 checkArgument( 1151 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1152 } 1153 1154 @Override 1155 CharSequence trimTrailingPadding(CharSequence chars) { 1156 return delegate.trimTrailingPadding(chars); 1157 } 1158 1159 @Override 1160 int maxEncodedSize(int bytes) { 1161 int unseparatedSize = delegate.maxEncodedSize(bytes); 1162 return unseparatedSize 1163 + separator.length() * divide(max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1164 } 1165 1166 @J2ktIncompatible 1167 @GwtIncompatible // Writer,OutputStream 1168 @Override 1169 public OutputStream encodingStream(Writer output) { 1170 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1171 } 1172 1173 @Override 1174 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1175 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1176 } 1177 1178 @Override 1179 int maxDecodedSize(int chars) { 1180 return delegate.maxDecodedSize(chars); 1181 } 1182 1183 @Override 1184 public boolean canDecode(CharSequence chars) { 1185 StringBuilder builder = new StringBuilder(); 1186 for (int i = 0; i < chars.length(); i++) { 1187 char c = chars.charAt(i); 1188 if (separator.indexOf(c) < 0) { 1189 builder.append(c); 1190 } 1191 } 1192 return delegate.canDecode(builder); 1193 } 1194 1195 @Override 1196 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1197 StringBuilder stripped = new StringBuilder(chars.length()); 1198 for (int i = 0; i < chars.length(); i++) { 1199 char c = chars.charAt(i); 1200 if (separator.indexOf(c) < 0) { 1201 stripped.append(c); 1202 } 1203 } 1204 return delegate.decodeTo(target, stripped); 1205 } 1206 1207 @Override 1208 @J2ktIncompatible 1209 @GwtIncompatible // Reader,InputStream 1210 public InputStream decodingStream(Reader reader) { 1211 return delegate.decodingStream(ignoringReader(reader, separator)); 1212 } 1213 1214 @Override 1215 public BaseEncoding omitPadding() { 1216 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1217 } 1218 1219 @Override 1220 public BaseEncoding withPadChar(char padChar) { 1221 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1222 } 1223 1224 @Override 1225 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1226 throw new UnsupportedOperationException("Already have a separator"); 1227 } 1228 1229 @Override 1230 public BaseEncoding upperCase() { 1231 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1232 } 1233 1234 @Override 1235 public BaseEncoding lowerCase() { 1236 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1237 } 1238 1239 @Override 1240 public BaseEncoding ignoreCase() { 1241 return delegate.ignoreCase().withSeparator(separator, afterEveryChars); 1242 } 1243 1244 @Override 1245 public String toString() { 1246 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1247 } 1248 } 1249}