001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.annotations.J2ktIncompatible; 030import com.google.common.base.Ascii; 031import com.google.errorprone.annotations.concurrent.LazyInit; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.OutputStream; 035import java.io.Reader; 036import java.io.Writer; 037import java.util.Arrays; 038import java.util.Objects; 039import javax.annotation.CheckForNull; 040 041/** 042 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 043 * strings. This class includes several constants for encoding schemes specified by <a 044 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 045 * 046 * <pre>{@code 047 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII)) 048 * }</pre> 049 * 050 * <p>returns the string {@code "MZXW6==="}, and 051 * 052 * <pre>{@code 053 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 054 * }</pre> 055 * 056 * <p>...returns the ASCII bytes of the string {@code "foo"}. 057 * 058 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 059 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 060 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 061 * behavior: 062 * 063 * <pre>{@code 064 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 065 * }</pre> 066 * 067 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 068 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 069 * 070 * <pre>{@code 071 * // Do NOT do this 072 * BaseEncoding hex = BaseEncoding.base16(); 073 * hex.lowerCase(); // does nothing! 074 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 075 * }</pre> 076 * 077 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 078 * x}, but the reverse does not necessarily hold. 079 * 080 * <table> 081 * <caption>Encodings</caption> 082 * <tr> 083 * <th>Encoding 084 * <th>Alphabet 085 * <th>{@code char:byte} ratio 086 * <th>Default padding 087 * <th>Comments 088 * <tr> 089 * <td>{@link #base16()} 090 * <td>0-9 A-F 091 * <td>2.00 092 * <td>N/A 093 * <td>Traditional hexadecimal. Defaults to upper case. 094 * <tr> 095 * <td>{@link #base32()} 096 * <td>A-Z 2-7 097 * <td>1.60 098 * <td>= 099 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 100 * <tr> 101 * <td>{@link #base32Hex()} 102 * <td>0-9 A-V 103 * <td>1.60 104 * <td>= 105 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 106 * <tr> 107 * <td>{@link #base64()} 108 * <td>A-Z a-z 0-9 + / 109 * <td>1.33 110 * <td>= 111 * <td> 112 * <tr> 113 * <td>{@link #base64Url()} 114 * <td>A-Z a-z 0-9 - _ 115 * <td>1.33 116 * <td>= 117 * <td>Safe to use as filenames, or to pass in URLs without escaping 118 * </table> 119 * 120 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 121 * 122 * @author Louis Wasserman 123 * @since 14.0 124 */ 125@GwtCompatible(emulated = true) 126@ElementTypesAreNonnullByDefault 127public abstract class BaseEncoding { 128 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 129 130 BaseEncoding() {} 131 132 /** 133 * Exception indicating invalid base-encoded input encountered while decoding. 134 * 135 * @author Louis Wasserman 136 * @since 15.0 137 */ 138 public static final class DecodingException extends IOException { 139 DecodingException(String message) { 140 super(message); 141 } 142 143 DecodingException(Throwable cause) { 144 super(cause); 145 } 146 } 147 148 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 149 public String encode(byte[] bytes) { 150 return encode(bytes, 0, bytes.length); 151 } 152 153 /** 154 * Encodes the specified range of the specified byte array, and returns the encoded {@code 155 * String}. 156 */ 157 public final String encode(byte[] bytes, int off, int len) { 158 checkPositionIndexes(off, off + len, bytes.length); 159 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 160 try { 161 encodeTo(result, bytes, off, len); 162 } catch (IOException impossible) { 163 throw new AssertionError(impossible); 164 } 165 return result.toString(); 166 } 167 168 /** 169 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 170 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 171 * Writer}. 172 */ 173 @J2ktIncompatible 174 @GwtIncompatible // Writer,OutputStream 175 public abstract OutputStream encodingStream(Writer writer); 176 177 /** 178 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 179 */ 180 @J2ktIncompatible 181 @GwtIncompatible // ByteSink,CharSink 182 public final ByteSink encodingSink(CharSink encodedSink) { 183 checkNotNull(encodedSink); 184 return new ByteSink() { 185 @Override 186 public OutputStream openStream() throws IOException { 187 return encodingStream(encodedSink.openStream()); 188 } 189 }; 190 } 191 192 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 193 194 private static byte[] extract(byte[] result, int length) { 195 if (length == result.length) { 196 return result; 197 } 198 byte[] trunc = new byte[length]; 199 System.arraycopy(result, 0, trunc, 0, length); 200 return trunc; 201 } 202 203 /** 204 * Determines whether the specified character sequence is a valid encoded string according to this 205 * encoding. 206 * 207 * @since 20.0 208 */ 209 public abstract boolean canDecode(CharSequence chars); 210 211 /** 212 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 213 * inverse operation to {@link #encode(byte[])}. 214 * 215 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 216 * encoding. 217 */ 218 public final byte[] decode(CharSequence chars) { 219 try { 220 return decodeChecked(chars); 221 } catch (DecodingException badInput) { 222 throw new IllegalArgumentException(badInput); 223 } 224 } 225 226 /** 227 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 228 * inverse operation to {@link #encode(byte[])}. 229 * 230 * @throws DecodingException if the input is not a valid encoded string according to this 231 * encoding. 232 */ 233 final byte[] decodeChecked(CharSequence chars) 234 throws DecodingException { 235 chars = trimTrailingPadding(chars); 236 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 237 int len = decodeTo(tmp, chars); 238 return extract(tmp, len); 239 } 240 241 /** 242 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 243 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 244 */ 245 @J2ktIncompatible 246 @GwtIncompatible // Reader,InputStream 247 public abstract InputStream decodingStream(Reader reader); 248 249 /** 250 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 251 * CharSource}. 252 */ 253 @J2ktIncompatible 254 @GwtIncompatible // ByteSource,CharSource 255 public final ByteSource decodingSource(CharSource encodedSource) { 256 checkNotNull(encodedSource); 257 return new ByteSource() { 258 @Override 259 public InputStream openStream() throws IOException { 260 return decodingStream(encodedSource.openStream()); 261 } 262 }; 263 } 264 265 // Implementations for encoding/decoding 266 267 abstract int maxEncodedSize(int bytes); 268 269 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 270 271 abstract int maxDecodedSize(int chars); 272 273 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 274 275 CharSequence trimTrailingPadding(CharSequence chars) { 276 return checkNotNull(chars); 277 } 278 279 // Modified encoding generators 280 281 /** 282 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 283 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 284 * section 3.2</a>, Padding of Encoded Data. 285 */ 286 public abstract BaseEncoding omitPadding(); 287 288 /** 289 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 290 * for padding. 291 * 292 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 293 * separator 294 */ 295 public abstract BaseEncoding withPadChar(char padChar); 296 297 /** 298 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 299 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 300 * are skipped over in decoding. 301 * 302 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 303 * string, or if {@code n <= 0} 304 * @throws UnsupportedOperationException if this encoding already uses a separator 305 */ 306 public abstract BaseEncoding withSeparator(String separator, int n); 307 308 /** 309 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 310 * uppercase letters. Padding and separator characters remain in their original case. 311 * 312 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 313 * lower-case characters 314 */ 315 public abstract BaseEncoding upperCase(); 316 317 /** 318 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 319 * lowercase letters. Padding and separator characters remain in their original case. 320 * 321 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 322 * lower-case characters 323 */ 324 public abstract BaseEncoding lowerCase(); 325 326 /** 327 * Returns an encoding that behaves equivalently to this encoding, but decodes letters without 328 * regard to case. 329 * 330 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 331 * lower-case characters 332 * @since 32.0.0 333 */ 334 public abstract BaseEncoding ignoreCase(); 335 336 private static final BaseEncoding BASE64 = 337 new Base64Encoding( 338 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 339 340 /** 341 * The "base64" base encoding specified by <a 342 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 343 * (This is the same as the base 64 encoding from <a 344 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 345 * 346 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 347 * omitted} or {@linkplain #withPadChar(char) replaced}. 348 * 349 * <p>No line feeds are added by default, as per <a 350 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 351 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 352 */ 353 public static BaseEncoding base64() { 354 return BASE64; 355 } 356 357 private static final BaseEncoding BASE64_URL = 358 new Base64Encoding( 359 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 360 361 /** 362 * The "base64url" encoding specified by <a 363 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 364 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 365 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 366 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 367 * 368 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 369 * omitted} or {@linkplain #withPadChar(char) replaced}. 370 * 371 * <p>No line feeds are added by default, as per <a 372 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 373 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 374 */ 375 public static BaseEncoding base64Url() { 376 return BASE64_URL; 377 } 378 379 private static final BaseEncoding BASE32 = 380 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 381 382 /** 383 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 384 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 385 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 386 * 387 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 388 * omitted} or {@linkplain #withPadChar(char) replaced}. 389 * 390 * <p>No line feeds are added by default, as per <a 391 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 392 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 393 */ 394 public static BaseEncoding base32() { 395 return BASE32; 396 } 397 398 private static final BaseEncoding BASE32_HEX = 399 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 400 401 /** 402 * The "base32hex" encoding specified by <a 403 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 404 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 405 * 406 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 407 * omitted} or {@linkplain #withPadChar(char) replaced}. 408 * 409 * <p>No line feeds are added by default, as per <a 410 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 411 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 412 */ 413 public static BaseEncoding base32Hex() { 414 return BASE32_HEX; 415 } 416 417 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 418 419 /** 420 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 421 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 422 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 423 * "hexadecimal" format. 424 * 425 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 426 * have no effect. 427 * 428 * <p>No line feeds are added by default, as per <a 429 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 430 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 431 */ 432 public static BaseEncoding base16() { 433 return BASE16; 434 } 435 436 private static final class Alphabet { 437 private final String name; 438 // this is meant to be immutable -- don't modify it! 439 private final char[] chars; 440 final int mask; 441 final int bitsPerChar; 442 final int charsPerChunk; 443 final int bytesPerChunk; 444 private final byte[] decodabet; 445 private final boolean[] validPadding; 446 private final boolean ignoreCase; 447 448 Alphabet(String name, char[] chars) { 449 this(name, chars, decodabetFor(chars), /* ignoreCase= */ false); 450 } 451 452 private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) { 453 this.name = checkNotNull(name); 454 this.chars = checkNotNull(chars); 455 try { 456 this.bitsPerChar = log2(chars.length, UNNECESSARY); 457 } catch (ArithmeticException e) { 458 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 459 } 460 461 // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into 462 // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. 463 // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a 464 // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many 465 // extra zero bits we need to add to the end of bitsPerChar to get 3 in total. 466 // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII 467 // characters that can't happen. 468 int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar); 469 this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar); 470 this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar; 471 472 this.mask = chars.length - 1; 473 474 this.decodabet = decodabet; 475 476 boolean[] validPadding = new boolean[charsPerChunk]; 477 for (int i = 0; i < bytesPerChunk; i++) { 478 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 479 } 480 this.validPadding = validPadding; 481 this.ignoreCase = ignoreCase; 482 } 483 484 private static byte[] decodabetFor(char[] chars) { 485 byte[] decodabet = new byte[Ascii.MAX + 1]; 486 Arrays.fill(decodabet, (byte) -1); 487 for (int i = 0; i < chars.length; i++) { 488 char c = chars[i]; 489 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 490 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 491 decodabet[c] = (byte) i; 492 } 493 return decodabet; 494 } 495 496 /** Returns an equivalent {@code Alphabet} except it ignores case. */ 497 Alphabet ignoreCase() { 498 if (ignoreCase) { 499 return this; 500 } 501 502 // We can't use .clone() because of GWT. 503 byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length); 504 for (int upper = 'A'; upper <= 'Z'; upper++) { 505 int lower = upper | 0x20; 506 byte decodeUpper = decodabet[upper]; 507 byte decodeLower = decodabet[lower]; 508 if (decodeUpper == -1) { 509 newDecodabet[upper] = decodeLower; 510 } else { 511 checkState( 512 decodeLower == -1, 513 "Can't ignoreCase() since '%s' and '%s' encode different values", 514 (char) upper, 515 (char) lower); 516 newDecodabet[lower] = decodeUpper; 517 } 518 } 519 return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true); 520 } 521 522 char encode(int bits) { 523 return chars[bits]; 524 } 525 526 boolean isValidPaddingStartPosition(int index) { 527 return validPadding[index % charsPerChunk]; 528 } 529 530 boolean canDecode(char ch) { 531 return ch <= Ascii.MAX && decodabet[ch] != -1; 532 } 533 534 int decode(char ch) throws DecodingException { 535 if (ch > Ascii.MAX) { 536 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 537 } 538 int result = decodabet[ch]; 539 if (result == -1) { 540 if (ch <= 0x20 || ch == Ascii.MAX) { 541 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 542 } else { 543 throw new DecodingException("Unrecognized character: " + ch); 544 } 545 } 546 return result; 547 } 548 549 private boolean hasLowerCase() { 550 for (char c : chars) { 551 if (Ascii.isLowerCase(c)) { 552 return true; 553 } 554 } 555 return false; 556 } 557 558 private boolean hasUpperCase() { 559 for (char c : chars) { 560 if (Ascii.isUpperCase(c)) { 561 return true; 562 } 563 } 564 return false; 565 } 566 567 Alphabet upperCase() { 568 if (!hasLowerCase()) { 569 return this; 570 } 571 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 572 char[] upperCased = new char[chars.length]; 573 for (int i = 0; i < chars.length; i++) { 574 upperCased[i] = Ascii.toUpperCase(chars[i]); 575 } 576 Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased); 577 return ignoreCase ? upperCase.ignoreCase() : upperCase; 578 } 579 580 Alphabet lowerCase() { 581 if (!hasUpperCase()) { 582 return this; 583 } 584 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 585 char[] lowerCased = new char[chars.length]; 586 for (int i = 0; i < chars.length; i++) { 587 lowerCased[i] = Ascii.toLowerCase(chars[i]); 588 } 589 Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased); 590 return ignoreCase ? lowerCase.ignoreCase() : lowerCase; 591 } 592 593 public boolean matches(char c) { 594 return c < decodabet.length && decodabet[c] != -1; 595 } 596 597 @Override 598 public String toString() { 599 return name; 600 } 601 602 @Override 603 public boolean equals(@CheckForNull Object other) { 604 if (other instanceof Alphabet) { 605 Alphabet that = (Alphabet) other; 606 return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars); 607 } 608 return false; 609 } 610 611 @Override 612 public int hashCode() { 613 return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237); 614 } 615 } 616 617 static class StandardBaseEncoding extends BaseEncoding { 618 final Alphabet alphabet; 619 620 @CheckForNull final Character paddingChar; 621 622 StandardBaseEncoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 623 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 624 } 625 626 StandardBaseEncoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 627 this.alphabet = checkNotNull(alphabet); 628 checkArgument( 629 paddingChar == null || !alphabet.matches(paddingChar), 630 "Padding character %s was already in alphabet", 631 paddingChar); 632 this.paddingChar = paddingChar; 633 } 634 635 @Override 636 int maxEncodedSize(int bytes) { 637 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 638 } 639 640 @J2ktIncompatible 641 @GwtIncompatible // Writer,OutputStream 642 @Override 643 public OutputStream encodingStream(Writer out) { 644 checkNotNull(out); 645 return new OutputStream() { 646 int bitBuffer = 0; 647 int bitBufferLength = 0; 648 int writtenChars = 0; 649 650 @Override 651 public void write(int b) throws IOException { 652 bitBuffer <<= 8; 653 bitBuffer |= b & 0xFF; 654 bitBufferLength += 8; 655 while (bitBufferLength >= alphabet.bitsPerChar) { 656 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 657 out.write(alphabet.encode(charIndex)); 658 writtenChars++; 659 bitBufferLength -= alphabet.bitsPerChar; 660 } 661 } 662 663 @Override 664 public void flush() throws IOException { 665 out.flush(); 666 } 667 668 @Override 669 public void close() throws IOException { 670 if (bitBufferLength > 0) { 671 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 672 out.write(alphabet.encode(charIndex)); 673 writtenChars++; 674 if (paddingChar != null) { 675 while (writtenChars % alphabet.charsPerChunk != 0) { 676 out.write(paddingChar.charValue()); 677 writtenChars++; 678 } 679 } 680 } 681 out.close(); 682 } 683 }; 684 } 685 686 @Override 687 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 688 checkNotNull(target); 689 checkPositionIndexes(off, off + len, bytes.length); 690 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 691 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 692 } 693 } 694 695 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 696 checkNotNull(target); 697 checkPositionIndexes(off, off + len, bytes.length); 698 checkArgument(len <= alphabet.bytesPerChunk); 699 long bitBuffer = 0; 700 for (int i = 0; i < len; ++i) { 701 bitBuffer |= bytes[off + i] & 0xFF; 702 bitBuffer <<= 8; // Add additional zero byte in the end. 703 } 704 // Position of first character is length of bitBuffer minus bitsPerChar. 705 int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 706 int bitsProcessed = 0; 707 while (bitsProcessed < len * 8) { 708 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 709 target.append(alphabet.encode(charIndex)); 710 bitsProcessed += alphabet.bitsPerChar; 711 } 712 if (paddingChar != null) { 713 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 714 target.append(paddingChar.charValue()); 715 bitsProcessed += alphabet.bitsPerChar; 716 } 717 } 718 } 719 720 @Override 721 int maxDecodedSize(int chars) { 722 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 723 } 724 725 @Override 726 CharSequence trimTrailingPadding(CharSequence chars) { 727 checkNotNull(chars); 728 if (paddingChar == null) { 729 return chars; 730 } 731 char padChar = paddingChar.charValue(); 732 int l; 733 for (l = chars.length() - 1; l >= 0; l--) { 734 if (chars.charAt(l) != padChar) { 735 break; 736 } 737 } 738 return chars.subSequence(0, l + 1); 739 } 740 741 @Override 742 public boolean canDecode(CharSequence chars) { 743 checkNotNull(chars); 744 chars = trimTrailingPadding(chars); 745 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 746 return false; 747 } 748 for (int i = 0; i < chars.length(); i++) { 749 if (!alphabet.canDecode(chars.charAt(i))) { 750 return false; 751 } 752 } 753 return true; 754 } 755 756 @Override 757 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 758 checkNotNull(target); 759 chars = trimTrailingPadding(chars); 760 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 761 throw new DecodingException("Invalid input length " + chars.length()); 762 } 763 int bytesWritten = 0; 764 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 765 long chunk = 0; 766 int charsProcessed = 0; 767 for (int i = 0; i < alphabet.charsPerChunk; i++) { 768 chunk <<= alphabet.bitsPerChar; 769 if (charIdx + i < chars.length()) { 770 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 771 } 772 } 773 int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 774 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 775 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 776 } 777 } 778 return bytesWritten; 779 } 780 781 @Override 782 @J2ktIncompatible 783 @GwtIncompatible // Reader,InputStream 784 public InputStream decodingStream(Reader reader) { 785 checkNotNull(reader); 786 return new InputStream() { 787 int bitBuffer = 0; 788 int bitBufferLength = 0; 789 int readChars = 0; 790 boolean hitPadding = false; 791 792 @Override 793 public int read() throws IOException { 794 while (true) { 795 int readChar = reader.read(); 796 if (readChar == -1) { 797 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 798 throw new DecodingException("Invalid input length " + readChars); 799 } 800 return -1; 801 } 802 readChars++; 803 char ch = (char) readChar; 804 if (paddingChar != null && paddingChar.charValue() == ch) { 805 if (!hitPadding 806 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 807 throw new DecodingException("Padding cannot start at index " + readChars); 808 } 809 hitPadding = true; 810 } else if (hitPadding) { 811 throw new DecodingException( 812 "Expected padding character but found '" + ch + "' at index " + readChars); 813 } else { 814 bitBuffer <<= alphabet.bitsPerChar; 815 bitBuffer |= alphabet.decode(ch); 816 bitBufferLength += alphabet.bitsPerChar; 817 818 if (bitBufferLength >= 8) { 819 bitBufferLength -= 8; 820 return (bitBuffer >> bitBufferLength) & 0xFF; 821 } 822 } 823 } 824 } 825 826 @Override 827 public int read(byte[] buf, int off, int len) throws IOException { 828 // Overriding this to work around the fact that InputStream's default implementation of 829 // this method will silently swallow exceptions thrown by the single-byte read() method 830 // (other than on the first call to it), which in this case can cause invalid encoded 831 // strings to not throw an exception. 832 // See https://github.com/google/guava/issues/3542 833 checkPositionIndexes(off, off + len, buf.length); 834 835 int i = off; 836 for (; i < off + len; i++) { 837 int b = read(); 838 if (b == -1) { 839 int read = i - off; 840 return read == 0 ? -1 : read; 841 } 842 buf[i] = (byte) b; 843 } 844 return i - off; 845 } 846 847 @Override 848 public void close() throws IOException { 849 reader.close(); 850 } 851 }; 852 } 853 854 @Override 855 public BaseEncoding omitPadding() { 856 return (paddingChar == null) ? this : newInstance(alphabet, null); 857 } 858 859 @Override 860 public BaseEncoding withPadChar(char padChar) { 861 if (8 % alphabet.bitsPerChar == 0 862 || (paddingChar != null && paddingChar.charValue() == padChar)) { 863 return this; 864 } else { 865 return newInstance(alphabet, padChar); 866 } 867 } 868 869 @Override 870 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 871 for (int i = 0; i < separator.length(); i++) { 872 checkArgument( 873 !alphabet.matches(separator.charAt(i)), 874 "Separator (%s) cannot contain alphabet characters", 875 separator); 876 } 877 if (paddingChar != null) { 878 checkArgument( 879 separator.indexOf(paddingChar.charValue()) < 0, 880 "Separator (%s) cannot contain padding character", 881 separator); 882 } 883 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 884 } 885 886 @LazyInit @CheckForNull private volatile BaseEncoding upperCase; 887 @LazyInit @CheckForNull private volatile BaseEncoding lowerCase; 888 @LazyInit @CheckForNull private volatile BaseEncoding ignoreCase; 889 890 @Override 891 public BaseEncoding upperCase() { 892 BaseEncoding result = upperCase; 893 if (result == null) { 894 Alphabet upper = alphabet.upperCase(); 895 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 896 } 897 return result; 898 } 899 900 @Override 901 public BaseEncoding lowerCase() { 902 BaseEncoding result = lowerCase; 903 if (result == null) { 904 Alphabet lower = alphabet.lowerCase(); 905 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 906 } 907 return result; 908 } 909 910 @Override 911 public BaseEncoding ignoreCase() { 912 BaseEncoding result = ignoreCase; 913 if (result == null) { 914 Alphabet ignore = alphabet.ignoreCase(); 915 result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar); 916 } 917 return result; 918 } 919 920 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 921 return new StandardBaseEncoding(alphabet, paddingChar); 922 } 923 924 @Override 925 public String toString() { 926 StringBuilder builder = new StringBuilder("BaseEncoding."); 927 builder.append(alphabet); 928 if (8 % alphabet.bitsPerChar != 0) { 929 if (paddingChar == null) { 930 builder.append(".omitPadding()"); 931 } else { 932 builder.append(".withPadChar('").append(paddingChar).append("')"); 933 } 934 } 935 return builder.toString(); 936 } 937 938 @Override 939 public boolean equals(@CheckForNull Object other) { 940 if (other instanceof StandardBaseEncoding) { 941 StandardBaseEncoding that = (StandardBaseEncoding) other; 942 return this.alphabet.equals(that.alphabet) 943 && Objects.equals(this.paddingChar, that.paddingChar); 944 } 945 return false; 946 } 947 948 @Override 949 public int hashCode() { 950 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 951 } 952 } 953 954 static final class Base16Encoding extends StandardBaseEncoding { 955 final char[] encoding = new char[512]; 956 957 Base16Encoding(String name, String alphabetChars) { 958 this(new Alphabet(name, alphabetChars.toCharArray())); 959 } 960 961 private Base16Encoding(Alphabet alphabet) { 962 super(alphabet, null); 963 checkArgument(alphabet.chars.length == 16); 964 for (int i = 0; i < 256; ++i) { 965 encoding[i] = alphabet.encode(i >>> 4); 966 encoding[i | 0x100] = alphabet.encode(i & 0xF); 967 } 968 } 969 970 @Override 971 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 972 checkNotNull(target); 973 checkPositionIndexes(off, off + len, bytes.length); 974 for (int i = 0; i < len; ++i) { 975 int b = bytes[off + i] & 0xFF; 976 target.append(encoding[b]); 977 target.append(encoding[b | 0x100]); 978 } 979 } 980 981 @Override 982 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 983 checkNotNull(target); 984 if (chars.length() % 2 == 1) { 985 throw new DecodingException("Invalid input length " + chars.length()); 986 } 987 int bytesWritten = 0; 988 for (int i = 0; i < chars.length(); i += 2) { 989 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 990 target[bytesWritten++] = (byte) decoded; 991 } 992 return bytesWritten; 993 } 994 995 @Override 996 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 997 return new Base16Encoding(alphabet); 998 } 999 } 1000 1001 static final class Base64Encoding extends StandardBaseEncoding { 1002 Base64Encoding(String name, String alphabetChars, @CheckForNull Character paddingChar) { 1003 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 1004 } 1005 1006 private Base64Encoding(Alphabet alphabet, @CheckForNull Character paddingChar) { 1007 super(alphabet, paddingChar); 1008 checkArgument(alphabet.chars.length == 64); 1009 } 1010 1011 @Override 1012 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1013 checkNotNull(target); 1014 checkPositionIndexes(off, off + len, bytes.length); 1015 int i = off; 1016 for (int remaining = len; remaining >= 3; remaining -= 3) { 1017 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 1018 target.append(alphabet.encode(chunk >>> 18)); 1019 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 1020 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 1021 target.append(alphabet.encode(chunk & 0x3F)); 1022 } 1023 if (i < off + len) { 1024 encodeChunkTo(target, bytes, i, off + len - i); 1025 } 1026 } 1027 1028 @Override 1029 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1030 checkNotNull(target); 1031 chars = trimTrailingPadding(chars); 1032 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 1033 throw new DecodingException("Invalid input length " + chars.length()); 1034 } 1035 int bytesWritten = 0; 1036 for (int i = 0; i < chars.length(); ) { 1037 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 1038 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 1039 target[bytesWritten++] = (byte) (chunk >>> 16); 1040 if (i < chars.length()) { 1041 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 1042 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 1043 if (i < chars.length()) { 1044 chunk |= alphabet.decode(chars.charAt(i++)); 1045 target[bytesWritten++] = (byte) (chunk & 0xFF); 1046 } 1047 } 1048 } 1049 return bytesWritten; 1050 } 1051 1052 @Override 1053 BaseEncoding newInstance(Alphabet alphabet, @CheckForNull Character paddingChar) { 1054 return new Base64Encoding(alphabet, paddingChar); 1055 } 1056 } 1057 1058 @J2ktIncompatible 1059 @GwtIncompatible 1060 static Reader ignoringReader(Reader delegate, String toIgnore) { 1061 checkNotNull(delegate); 1062 checkNotNull(toIgnore); 1063 return new Reader() { 1064 @Override 1065 public int read() throws IOException { 1066 int readChar; 1067 do { 1068 readChar = delegate.read(); 1069 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 1070 return readChar; 1071 } 1072 1073 @Override 1074 public int read(char[] cbuf, int off, int len) throws IOException { 1075 throw new UnsupportedOperationException(); 1076 } 1077 1078 @Override 1079 public void close() throws IOException { 1080 delegate.close(); 1081 } 1082 }; 1083 } 1084 1085 static Appendable separatingAppendable( 1086 Appendable delegate, String separator, int afterEveryChars) { 1087 checkNotNull(delegate); 1088 checkNotNull(separator); 1089 checkArgument(afterEveryChars > 0); 1090 return new Appendable() { 1091 int charsUntilSeparator = afterEveryChars; 1092 1093 @Override 1094 public Appendable append(char c) throws IOException { 1095 if (charsUntilSeparator == 0) { 1096 delegate.append(separator); 1097 charsUntilSeparator = afterEveryChars; 1098 } 1099 delegate.append(c); 1100 charsUntilSeparator--; 1101 return this; 1102 } 1103 1104 @Override 1105 public Appendable append(@CheckForNull CharSequence chars, int off, int len) { 1106 throw new UnsupportedOperationException(); 1107 } 1108 1109 @Override 1110 public Appendable append(@CheckForNull CharSequence chars) { 1111 throw new UnsupportedOperationException(); 1112 } 1113 }; 1114 } 1115 1116 @J2ktIncompatible 1117 @GwtIncompatible // Writer 1118 static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) { 1119 Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars); 1120 return new Writer() { 1121 @Override 1122 public void write(int c) throws IOException { 1123 separatingAppendable.append((char) c); 1124 } 1125 1126 @Override 1127 public void write(char[] chars, int off, int len) throws IOException { 1128 throw new UnsupportedOperationException(); 1129 } 1130 1131 @Override 1132 public void flush() throws IOException { 1133 delegate.flush(); 1134 } 1135 1136 @Override 1137 public void close() throws IOException { 1138 delegate.close(); 1139 } 1140 }; 1141 } 1142 1143 static final class SeparatedBaseEncoding extends BaseEncoding { 1144 private final BaseEncoding delegate; 1145 private final String separator; 1146 private final int afterEveryChars; 1147 1148 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1149 this.delegate = checkNotNull(delegate); 1150 this.separator = checkNotNull(separator); 1151 this.afterEveryChars = afterEveryChars; 1152 checkArgument( 1153 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1154 } 1155 1156 @Override 1157 CharSequence trimTrailingPadding(CharSequence chars) { 1158 return delegate.trimTrailingPadding(chars); 1159 } 1160 1161 @Override 1162 int maxEncodedSize(int bytes) { 1163 int unseparatedSize = delegate.maxEncodedSize(bytes); 1164 return unseparatedSize 1165 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1166 } 1167 1168 @J2ktIncompatible 1169 @GwtIncompatible // Writer,OutputStream 1170 @Override 1171 public OutputStream encodingStream(Writer output) { 1172 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1173 } 1174 1175 @Override 1176 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1177 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1178 } 1179 1180 @Override 1181 int maxDecodedSize(int chars) { 1182 return delegate.maxDecodedSize(chars); 1183 } 1184 1185 @Override 1186 public boolean canDecode(CharSequence chars) { 1187 StringBuilder builder = new StringBuilder(); 1188 for (int i = 0; i < chars.length(); i++) { 1189 char c = chars.charAt(i); 1190 if (separator.indexOf(c) < 0) { 1191 builder.append(c); 1192 } 1193 } 1194 return delegate.canDecode(builder); 1195 } 1196 1197 @Override 1198 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1199 StringBuilder stripped = new StringBuilder(chars.length()); 1200 for (int i = 0; i < chars.length(); i++) { 1201 char c = chars.charAt(i); 1202 if (separator.indexOf(c) < 0) { 1203 stripped.append(c); 1204 } 1205 } 1206 return delegate.decodeTo(target, stripped); 1207 } 1208 1209 @Override 1210 @J2ktIncompatible 1211 @GwtIncompatible // Reader,InputStream 1212 public InputStream decodingStream(Reader reader) { 1213 return delegate.decodingStream(ignoringReader(reader, separator)); 1214 } 1215 1216 @Override 1217 public BaseEncoding omitPadding() { 1218 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1219 } 1220 1221 @Override 1222 public BaseEncoding withPadChar(char padChar) { 1223 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1224 } 1225 1226 @Override 1227 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1228 throw new UnsupportedOperationException("Already have a separator"); 1229 } 1230 1231 @Override 1232 public BaseEncoding upperCase() { 1233 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1234 } 1235 1236 @Override 1237 public BaseEncoding lowerCase() { 1238 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1239 } 1240 1241 @Override 1242 public BaseEncoding ignoreCase() { 1243 return delegate.ignoreCase().withSeparator(separator, afterEveryChars); 1244 } 1245 1246 @Override 1247 public String toString() { 1248 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1249 } 1250 } 1251}