001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.lang.Math.max; 024import static java.lang.Math.min; 025import static java.math.RoundingMode.CEILING; 026import static java.math.RoundingMode.FLOOR; 027import static java.math.RoundingMode.UNNECESSARY; 028 029import com.google.common.annotations.GwtCompatible; 030import com.google.common.annotations.GwtIncompatible; 031import com.google.common.annotations.J2ktIncompatible; 032import com.google.common.base.Ascii; 033import com.google.errorprone.annotations.concurrent.LazyInit; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.io.Reader; 038import java.io.Writer; 039import java.util.Arrays; 040import java.util.Objects; 041import org.jspecify.annotations.Nullable; 042 043/** 044 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 045 * strings. This class includes several constants for encoding schemes specified by <a 046 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 047 * 048 * <pre>{@code 049 * BaseEncoding.base32().encode("foo".getBytes(US_ASCII)) 050 * }</pre> 051 * 052 * <p>returns the string {@code "MZXW6==="}, and 053 * 054 * <pre>{@code 055 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 056 * }</pre> 057 * 058 * <p>...returns the ASCII bytes of the string {@code "foo"}. 059 * 060 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 061 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 062 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 063 * behavior: 064 * 065 * <pre>{@code 066 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 067 * }</pre> 068 * 069 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 070 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 071 * 072 * <pre>{@code 073 * // Do NOT do this 074 * BaseEncoding hex = BaseEncoding.base16(); 075 * hex.lowerCase(); // does nothing! 076 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 077 * }</pre> 078 * 079 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 080 * x}, but the reverse does not necessarily hold. 081 * 082 * <table> 083 * <caption>Encodings</caption> 084 * <tr> 085 * <th>Encoding 086 * <th>Alphabet 087 * <th>{@code char:byte} ratio 088 * <th>Default padding 089 * <th>Comments 090 * <tr> 091 * <td>{@link #base16()} 092 * <td>0-9 A-F 093 * <td>2.00 094 * <td>N/A 095 * <td>Traditional hexadecimal. Defaults to upper case. 096 * <tr> 097 * <td>{@link #base32()} 098 * <td>A-Z 2-7 099 * <td>1.60 100 * <td>= 101 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 102 * <tr> 103 * <td>{@link #base32Hex()} 104 * <td>0-9 A-V 105 * <td>1.60 106 * <td>= 107 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 108 * <tr> 109 * <td>{@link #base64()} 110 * <td>A-Z a-z 0-9 + / 111 * <td>1.33 112 * <td>= 113 * <td> 114 * <tr> 115 * <td>{@link #base64Url()} 116 * <td>A-Z a-z 0-9 - _ 117 * <td>1.33 118 * <td>= 119 * <td>Safe to use as filenames, or to pass in URLs without escaping 120 * </table> 121 * 122 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 123 * 124 * @author Louis Wasserman 125 * @since 14.0 126 */ 127@GwtCompatible(emulated = true) 128public abstract class BaseEncoding { 129 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 130 131 BaseEncoding() {} 132 133 /** 134 * Exception indicating invalid base-encoded input encountered while decoding. 135 * 136 * @author Louis Wasserman 137 * @since 15.0 138 */ 139 public static final class DecodingException extends IOException { 140 DecodingException(@Nullable String message) { 141 super(message); 142 } 143 } 144 145 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 146 public String encode(byte[] bytes) { 147 return encode(bytes, 0, bytes.length); 148 } 149 150 /** 151 * Encodes the specified range of the specified byte array, and returns the encoded {@code 152 * String}. 153 */ 154 public final String encode(byte[] bytes, int off, int len) { 155 checkPositionIndexes(off, off + len, bytes.length); 156 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 157 try { 158 encodeTo(result, bytes, off, len); 159 } catch (IOException impossible) { 160 throw new AssertionError(impossible); 161 } 162 return result.toString(); 163 } 164 165 /** 166 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 167 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 168 * Writer}. 169 */ 170 @J2ktIncompatible 171 @GwtIncompatible // Writer,OutputStream 172 public abstract OutputStream encodingStream(Writer writer); 173 174 /** 175 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 176 */ 177 @J2ktIncompatible 178 @GwtIncompatible // ByteSink,CharSink 179 public final ByteSink encodingSink(CharSink encodedSink) { 180 checkNotNull(encodedSink); 181 return new ByteSink() { 182 @Override 183 public OutputStream openStream() throws IOException { 184 return encodingStream(encodedSink.openStream()); 185 } 186 }; 187 } 188 189 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 190 191 private static byte[] extract(byte[] result, int length) { 192 if (length == result.length) { 193 return result; 194 } 195 byte[] trunc = new byte[length]; 196 System.arraycopy(result, 0, trunc, 0, length); 197 return trunc; 198 } 199 200 /** 201 * Determines whether the specified character sequence is a valid encoded string according to this 202 * encoding. 203 * 204 * @since 20.0 205 */ 206 public abstract boolean canDecode(CharSequence chars); 207 208 /** 209 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 210 * inverse operation to {@link #encode(byte[])}. 211 * 212 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 213 * encoding. 214 */ 215 public final byte[] decode(CharSequence chars) { 216 try { 217 return decodeChecked(chars); 218 } catch (DecodingException badInput) { 219 throw new IllegalArgumentException(badInput); 220 } 221 } 222 223 /** 224 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 225 * inverse operation to {@link #encode(byte[])}. 226 * 227 * @throws DecodingException if the input is not a valid encoded string according to this 228 * encoding. 229 */ 230 final byte[] decodeChecked(CharSequence chars) 231 throws DecodingException { 232 chars = trimTrailingPadding(chars); 233 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 234 int len = decodeTo(tmp, chars); 235 return extract(tmp, len); 236 } 237 238 /** 239 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 240 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 241 */ 242 @J2ktIncompatible 243 @GwtIncompatible // Reader,InputStream 244 public abstract InputStream decodingStream(Reader reader); 245 246 /** 247 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 248 * CharSource}. 249 */ 250 @J2ktIncompatible 251 @GwtIncompatible // ByteSource,CharSource 252 public final ByteSource decodingSource(CharSource encodedSource) { 253 checkNotNull(encodedSource); 254 return new ByteSource() { 255 @Override 256 public InputStream openStream() throws IOException { 257 return decodingStream(encodedSource.openStream()); 258 } 259 }; 260 } 261 262 // Implementations for encoding/decoding 263 264 abstract int maxEncodedSize(int bytes); 265 266 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 267 268 abstract int maxDecodedSize(int chars); 269 270 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 271 272 CharSequence trimTrailingPadding(CharSequence chars) { 273 return checkNotNull(chars); 274 } 275 276 // Modified encoding generators 277 278 /** 279 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 280 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 281 * section 3.2</a>, Padding of Encoded Data. 282 */ 283 public abstract BaseEncoding omitPadding(); 284 285 /** 286 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 287 * for padding. 288 * 289 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 290 * separator 291 */ 292 public abstract BaseEncoding withPadChar(char padChar); 293 294 /** 295 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 296 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 297 * are skipped over in decoding. 298 * 299 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 300 * string, or if {@code n <= 0} 301 * @throws UnsupportedOperationException if this encoding already uses a separator 302 */ 303 public abstract BaseEncoding withSeparator(String separator, int n); 304 305 /** 306 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 307 * uppercase letters. Padding and separator characters remain in their original case. 308 * 309 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 310 * lower-case characters 311 */ 312 public abstract BaseEncoding upperCase(); 313 314 /** 315 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 316 * lowercase letters. Padding and separator characters remain in their original case. 317 * 318 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 319 * lower-case characters 320 */ 321 public abstract BaseEncoding lowerCase(); 322 323 /** 324 * Returns an encoding that behaves equivalently to this encoding, but decodes letters without 325 * regard to case. 326 * 327 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 328 * lower-case characters 329 * @since 32.0.0 330 */ 331 public abstract BaseEncoding ignoreCase(); 332 333 private static final BaseEncoding BASE64 = 334 new Base64Encoding( 335 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 336 337 /** 338 * The "base64" base encoding specified by <a 339 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 340 * (This is the same as the base 64 encoding from <a 341 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 342 * 343 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 344 * omitted} or {@linkplain #withPadChar(char) replaced}. 345 * 346 * <p>No line feeds are added by default, as per <a 347 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 348 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 349 */ 350 public static BaseEncoding base64() { 351 return BASE64; 352 } 353 354 private static final BaseEncoding BASE64_URL = 355 new Base64Encoding( 356 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 357 358 /** 359 * The "base64url" encoding specified by <a 360 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 361 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 362 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 363 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 364 * 365 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 366 * omitted} or {@linkplain #withPadChar(char) replaced}. 367 * 368 * <p>No line feeds are added by default, as per <a 369 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 370 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 371 */ 372 public static BaseEncoding base64Url() { 373 return BASE64_URL; 374 } 375 376 private static final BaseEncoding BASE32 = 377 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 378 379 /** 380 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 381 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 382 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 383 * 384 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 385 * omitted} or {@linkplain #withPadChar(char) replaced}. 386 * 387 * <p>No line feeds are added by default, as per <a 388 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 389 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 390 */ 391 public static BaseEncoding base32() { 392 return BASE32; 393 } 394 395 private static final BaseEncoding BASE32_HEX = 396 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 397 398 /** 399 * The "base32hex" encoding specified by <a 400 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 401 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 402 * 403 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 404 * omitted} or {@linkplain #withPadChar(char) replaced}. 405 * 406 * <p>No line feeds are added by default, as per <a 407 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 408 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 409 */ 410 public static BaseEncoding base32Hex() { 411 return BASE32_HEX; 412 } 413 414 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 415 416 /** 417 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 418 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 419 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 420 * "hexadecimal" format. 421 * 422 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 423 * have no effect. 424 * 425 * <p>No line feeds are added by default, as per <a 426 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 427 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 428 */ 429 public static BaseEncoding base16() { 430 return BASE16; 431 } 432 433 static final class Alphabet { 434 private final String name; 435 // this is meant to be immutable -- don't modify it! 436 private final char[] chars; 437 final int mask; 438 final int bitsPerChar; 439 final int charsPerChunk; 440 final int bytesPerChunk; 441 private final byte[] decodabet; 442 private final boolean[] validPadding; 443 private final boolean ignoreCase; 444 445 Alphabet(String name, char[] chars) { 446 this(name, chars, decodabetFor(chars), /* ignoreCase= */ false); 447 } 448 449 private Alphabet(String name, char[] chars, byte[] decodabet, boolean ignoreCase) { 450 this.name = checkNotNull(name); 451 this.chars = checkNotNull(chars); 452 try { 453 this.bitsPerChar = log2(chars.length, UNNECESSARY); 454 } catch (ArithmeticException e) { 455 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 456 } 457 458 // Compute how input bytes are chunked. For example, with base64 we chunk every 3 bytes into 459 // 4 characters. We have bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. 460 // We're looking for the smallest charsPerChunk such that bitsPerChar * charsPerChunk is a 461 // multiple of 8. A multiple of 8 has 3 low zero bits, so we just need to figure out how many 462 // extra zero bits we need to add to the end of bitsPerChar to get 3 in total. 463 // The logic here would be wrong for bitsPerChar > 8, but since we require distinct ASCII 464 // characters that can't happen. 465 int zeroesInBitsPerChar = Integer.numberOfTrailingZeros(bitsPerChar); 466 this.charsPerChunk = 1 << (3 - zeroesInBitsPerChar); 467 this.bytesPerChunk = bitsPerChar >> zeroesInBitsPerChar; 468 469 this.mask = chars.length - 1; 470 471 this.decodabet = decodabet; 472 473 boolean[] validPadding = new boolean[charsPerChunk]; 474 for (int i = 0; i < bytesPerChunk; i++) { 475 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 476 } 477 this.validPadding = validPadding; 478 this.ignoreCase = ignoreCase; 479 } 480 481 private static byte[] decodabetFor(char[] chars) { 482 byte[] decodabet = new byte[Ascii.MAX + 1]; 483 Arrays.fill(decodabet, (byte) -1); 484 for (int i = 0; i < chars.length; i++) { 485 char c = chars[i]; 486 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 487 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 488 decodabet[c] = (byte) i; 489 } 490 return decodabet; 491 } 492 493 /** Returns an equivalent {@code Alphabet} except it ignores case. */ 494 Alphabet ignoreCase() { 495 if (ignoreCase) { 496 return this; 497 } 498 499 // We can't use .clone() because of GWT. 500 byte[] newDecodabet = Arrays.copyOf(decodabet, decodabet.length); 501 for (int upper = 'A'; upper <= 'Z'; upper++) { 502 int lower = upper | 0x20; 503 byte decodeUpper = decodabet[upper]; 504 byte decodeLower = decodabet[lower]; 505 if (decodeUpper == -1) { 506 newDecodabet[upper] = decodeLower; 507 } else { 508 checkState( 509 decodeLower == -1, 510 "Can't ignoreCase() since '%s' and '%s' encode different values", 511 (char) upper, 512 (char) lower); 513 newDecodabet[lower] = decodeUpper; 514 } 515 } 516 return new Alphabet(name + ".ignoreCase()", chars, newDecodabet, /* ignoreCase= */ true); 517 } 518 519 char encode(int bits) { 520 return chars[bits]; 521 } 522 523 boolean isValidPaddingStartPosition(int index) { 524 return validPadding[index % charsPerChunk]; 525 } 526 527 boolean canDecode(char ch) { 528 return ch <= Ascii.MAX && decodabet[ch] != -1; 529 } 530 531 int decode(char ch) throws DecodingException { 532 if (ch > Ascii.MAX) { 533 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 534 } 535 int result = decodabet[ch]; 536 if (result == -1) { 537 if (ch <= 0x20 || ch == Ascii.MAX) { 538 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 539 } else { 540 throw new DecodingException("Unrecognized character: " + ch); 541 } 542 } 543 return result; 544 } 545 546 private boolean hasLowerCase() { 547 for (char c : chars) { 548 if (Ascii.isLowerCase(c)) { 549 return true; 550 } 551 } 552 return false; 553 } 554 555 private boolean hasUpperCase() { 556 for (char c : chars) { 557 if (Ascii.isUpperCase(c)) { 558 return true; 559 } 560 } 561 return false; 562 } 563 564 Alphabet upperCase() { 565 if (!hasLowerCase()) { 566 return this; 567 } 568 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 569 char[] upperCased = new char[chars.length]; 570 for (int i = 0; i < chars.length; i++) { 571 upperCased[i] = Ascii.toUpperCase(chars[i]); 572 } 573 Alphabet upperCase = new Alphabet(name + ".upperCase()", upperCased); 574 return ignoreCase ? upperCase.ignoreCase() : upperCase; 575 } 576 577 Alphabet lowerCase() { 578 if (!hasUpperCase()) { 579 return this; 580 } 581 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 582 char[] lowerCased = new char[chars.length]; 583 for (int i = 0; i < chars.length; i++) { 584 lowerCased[i] = Ascii.toLowerCase(chars[i]); 585 } 586 Alphabet lowerCase = new Alphabet(name + ".lowerCase()", lowerCased); 587 return ignoreCase ? lowerCase.ignoreCase() : lowerCase; 588 } 589 590 public boolean matches(char c) { 591 return c < decodabet.length && decodabet[c] != -1; 592 } 593 594 @Override 595 public String toString() { 596 return name; 597 } 598 599 @Override 600 public boolean equals(@Nullable Object other) { 601 if (other instanceof Alphabet) { 602 Alphabet that = (Alphabet) other; 603 return this.ignoreCase == that.ignoreCase && Arrays.equals(this.chars, that.chars); 604 } 605 return false; 606 } 607 608 @Override 609 public int hashCode() { 610 return Arrays.hashCode(chars) + (ignoreCase ? 1231 : 1237); 611 } 612 } 613 614 private static class StandardBaseEncoding extends BaseEncoding { 615 final Alphabet alphabet; 616 617 final @Nullable Character paddingChar; 618 619 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 620 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 621 } 622 623 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 624 this.alphabet = checkNotNull(alphabet); 625 checkArgument( 626 paddingChar == null || !alphabet.matches(paddingChar), 627 "Padding character %s was already in alphabet", 628 paddingChar); 629 this.paddingChar = paddingChar; 630 } 631 632 @Override 633 int maxEncodedSize(int bytes) { 634 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 635 } 636 637 @J2ktIncompatible 638 @GwtIncompatible // Writer,OutputStream 639 @Override 640 public OutputStream encodingStream(Writer out) { 641 checkNotNull(out); 642 return new OutputStream() { 643 int bitBuffer = 0; 644 int bitBufferLength = 0; 645 int writtenChars = 0; 646 647 @Override 648 public void write(int b) throws IOException { 649 bitBuffer <<= 8; 650 bitBuffer |= b & 0xFF; 651 bitBufferLength += 8; 652 while (bitBufferLength >= alphabet.bitsPerChar) { 653 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 654 out.write(alphabet.encode(charIndex)); 655 writtenChars++; 656 bitBufferLength -= alphabet.bitsPerChar; 657 } 658 } 659 660 @Override 661 public void flush() throws IOException { 662 out.flush(); 663 } 664 665 @Override 666 public void close() throws IOException { 667 if (bitBufferLength > 0) { 668 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 669 out.write(alphabet.encode(charIndex)); 670 writtenChars++; 671 if (paddingChar != null) { 672 while (writtenChars % alphabet.charsPerChunk != 0) { 673 out.write(paddingChar.charValue()); 674 writtenChars++; 675 } 676 } 677 } 678 out.close(); 679 } 680 }; 681 } 682 683 @Override 684 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 685 checkNotNull(target); 686 checkPositionIndexes(off, off + len, bytes.length); 687 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 688 encodeChunkTo(target, bytes, off + i, min(alphabet.bytesPerChunk, len - i)); 689 } 690 } 691 692 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 693 checkNotNull(target); 694 checkPositionIndexes(off, off + len, bytes.length); 695 checkArgument(len <= alphabet.bytesPerChunk); 696 long bitBuffer = 0; 697 for (int i = 0; i < len; ++i) { 698 bitBuffer |= bytes[off + i] & 0xFF; 699 bitBuffer <<= 8; // Add additional zero byte in the end. 700 } 701 // Position of first character is length of bitBuffer minus bitsPerChar. 702 int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 703 int bitsProcessed = 0; 704 while (bitsProcessed < len * 8) { 705 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 706 target.append(alphabet.encode(charIndex)); 707 bitsProcessed += alphabet.bitsPerChar; 708 } 709 if (paddingChar != null) { 710 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 711 target.append(paddingChar.charValue()); 712 bitsProcessed += alphabet.bitsPerChar; 713 } 714 } 715 } 716 717 @Override 718 int maxDecodedSize(int chars) { 719 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 720 } 721 722 @Override 723 CharSequence trimTrailingPadding(CharSequence chars) { 724 checkNotNull(chars); 725 if (paddingChar == null) { 726 return chars; 727 } 728 char padChar = paddingChar.charValue(); 729 int l; 730 for (l = chars.length() - 1; l >= 0; l--) { 731 if (chars.charAt(l) != padChar) { 732 break; 733 } 734 } 735 return chars.subSequence(0, l + 1); 736 } 737 738 @Override 739 public boolean canDecode(CharSequence chars) { 740 checkNotNull(chars); 741 chars = trimTrailingPadding(chars); 742 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 743 return false; 744 } 745 for (int i = 0; i < chars.length(); i++) { 746 if (!alphabet.canDecode(chars.charAt(i))) { 747 return false; 748 } 749 } 750 return true; 751 } 752 753 @Override 754 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 755 checkNotNull(target); 756 chars = trimTrailingPadding(chars); 757 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 758 throw new DecodingException("Invalid input length " + chars.length()); 759 } 760 int bytesWritten = 0; 761 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 762 long chunk = 0; 763 int charsProcessed = 0; 764 for (int i = 0; i < alphabet.charsPerChunk; i++) { 765 chunk <<= alphabet.bitsPerChar; 766 if (charIdx + i < chars.length()) { 767 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 768 } 769 } 770 int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 771 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 772 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 773 } 774 } 775 return bytesWritten; 776 } 777 778 @Override 779 @J2ktIncompatible 780 @GwtIncompatible // Reader,InputStream 781 public InputStream decodingStream(Reader reader) { 782 checkNotNull(reader); 783 return new InputStream() { 784 int bitBuffer = 0; 785 int bitBufferLength = 0; 786 int readChars = 0; 787 boolean hitPadding = false; 788 789 @Override 790 public int read() throws IOException { 791 while (true) { 792 int readChar = reader.read(); 793 if (readChar == -1) { 794 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 795 throw new DecodingException("Invalid input length " + readChars); 796 } 797 return -1; 798 } 799 readChars++; 800 char ch = (char) readChar; 801 if (paddingChar != null && paddingChar.charValue() == ch) { 802 if (!hitPadding 803 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 804 throw new DecodingException("Padding cannot start at index " + readChars); 805 } 806 hitPadding = true; 807 } else if (hitPadding) { 808 throw new DecodingException( 809 "Expected padding character but found '" + ch + "' at index " + readChars); 810 } else { 811 bitBuffer <<= alphabet.bitsPerChar; 812 bitBuffer |= alphabet.decode(ch); 813 bitBufferLength += alphabet.bitsPerChar; 814 815 if (bitBufferLength >= 8) { 816 bitBufferLength -= 8; 817 return (bitBuffer >> bitBufferLength) & 0xFF; 818 } 819 } 820 } 821 } 822 823 @Override 824 public int read(byte[] buf, int off, int len) throws IOException { 825 // Overriding this to work around the fact that InputStream's default implementation of 826 // this method will silently swallow exceptions thrown by the single-byte read() method 827 // (other than on the first call to it), which in this case can cause invalid encoded 828 // strings to not throw an exception. 829 // See https://github.com/google/guava/issues/3542 830 checkPositionIndexes(off, off + len, buf.length); 831 832 int i = off; 833 for (; i < off + len; i++) { 834 int b = read(); 835 if (b == -1) { 836 int read = i - off; 837 return read == 0 ? -1 : read; 838 } 839 buf[i] = (byte) b; 840 } 841 return i - off; 842 } 843 844 @Override 845 public void close() throws IOException { 846 reader.close(); 847 } 848 }; 849 } 850 851 @Override 852 public BaseEncoding omitPadding() { 853 return (paddingChar == null) ? this : newInstance(alphabet, null); 854 } 855 856 @Override 857 public BaseEncoding withPadChar(char padChar) { 858 if (8 % alphabet.bitsPerChar == 0 859 || (paddingChar != null && paddingChar.charValue() == padChar)) { 860 return this; 861 } else { 862 return newInstance(alphabet, padChar); 863 } 864 } 865 866 @Override 867 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 868 for (int i = 0; i < separator.length(); i++) { 869 checkArgument( 870 !alphabet.matches(separator.charAt(i)), 871 "Separator (%s) cannot contain alphabet characters", 872 separator); 873 } 874 if (paddingChar != null) { 875 checkArgument( 876 separator.indexOf(paddingChar.charValue()) < 0, 877 "Separator (%s) cannot contain padding character", 878 separator); 879 } 880 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 881 } 882 883 @LazyInit private volatile @Nullable BaseEncoding upperCase; 884 @LazyInit private volatile @Nullable BaseEncoding lowerCase; 885 @LazyInit private volatile @Nullable BaseEncoding ignoreCase; 886 887 @Override 888 public BaseEncoding upperCase() { 889 BaseEncoding result = upperCase; 890 if (result == null) { 891 Alphabet upper = alphabet.upperCase(); 892 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 893 } 894 return result; 895 } 896 897 @Override 898 public BaseEncoding lowerCase() { 899 BaseEncoding result = lowerCase; 900 if (result == null) { 901 Alphabet lower = alphabet.lowerCase(); 902 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 903 } 904 return result; 905 } 906 907 @Override 908 public BaseEncoding ignoreCase() { 909 BaseEncoding result = ignoreCase; 910 if (result == null) { 911 Alphabet ignore = alphabet.ignoreCase(); 912 result = ignoreCase = (ignore == alphabet) ? this : newInstance(ignore, paddingChar); 913 } 914 return result; 915 } 916 917 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 918 return new StandardBaseEncoding(alphabet, paddingChar); 919 } 920 921 @Override 922 public String toString() { 923 StringBuilder builder = new StringBuilder("BaseEncoding."); 924 builder.append(alphabet); 925 if (8 % alphabet.bitsPerChar != 0) { 926 if (paddingChar == null) { 927 builder.append(".omitPadding()"); 928 } else { 929 builder.append(".withPadChar('").append(paddingChar).append("')"); 930 } 931 } 932 return builder.toString(); 933 } 934 935 @Override 936 public boolean equals(@Nullable Object other) { 937 if (other instanceof StandardBaseEncoding) { 938 StandardBaseEncoding that = (StandardBaseEncoding) other; 939 return this.alphabet.equals(that.alphabet) 940 && Objects.equals(this.paddingChar, that.paddingChar); 941 } 942 return false; 943 } 944 945 @Override 946 public int hashCode() { 947 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 948 } 949 } 950 951 private static final class Base16Encoding extends StandardBaseEncoding { 952 final char[] encoding = new char[512]; 953 954 Base16Encoding(String name, String alphabetChars) { 955 this(new Alphabet(name, alphabetChars.toCharArray())); 956 } 957 958 private Base16Encoding(Alphabet alphabet) { 959 super(alphabet, null); 960 checkArgument(alphabet.chars.length == 16); 961 for (int i = 0; i < 256; ++i) { 962 encoding[i] = alphabet.encode(i >>> 4); 963 encoding[i | 0x100] = alphabet.encode(i & 0xF); 964 } 965 } 966 967 @Override 968 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 969 checkNotNull(target); 970 checkPositionIndexes(off, off + len, bytes.length); 971 for (int i = 0; i < len; ++i) { 972 int b = bytes[off + i] & 0xFF; 973 target.append(encoding[b]); 974 target.append(encoding[b | 0x100]); 975 } 976 } 977 978 @Override 979 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 980 checkNotNull(target); 981 if (chars.length() % 2 == 1) { 982 throw new DecodingException("Invalid input length " + chars.length()); 983 } 984 int bytesWritten = 0; 985 for (int i = 0; i < chars.length(); i += 2) { 986 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 987 target[bytesWritten++] = (byte) decoded; 988 } 989 return bytesWritten; 990 } 991 992 @Override 993 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 994 return new Base16Encoding(alphabet); 995 } 996 } 997 998 private static final class Base64Encoding extends StandardBaseEncoding { 999 Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) { 1000 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 1001 } 1002 1003 private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) { 1004 super(alphabet, paddingChar); 1005 checkArgument(alphabet.chars.length == 64); 1006 } 1007 1008 @Override 1009 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1010 checkNotNull(target); 1011 checkPositionIndexes(off, off + len, bytes.length); 1012 int i = off; 1013 for (int remaining = len; remaining >= 3; remaining -= 3) { 1014 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 1015 target.append(alphabet.encode(chunk >>> 18)); 1016 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 1017 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 1018 target.append(alphabet.encode(chunk & 0x3F)); 1019 } 1020 if (i < off + len) { 1021 encodeChunkTo(target, bytes, i, off + len - i); 1022 } 1023 } 1024 1025 @Override 1026 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1027 checkNotNull(target); 1028 chars = trimTrailingPadding(chars); 1029 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 1030 throw new DecodingException("Invalid input length " + chars.length()); 1031 } 1032 int bytesWritten = 0; 1033 for (int i = 0; i < chars.length(); ) { 1034 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 1035 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 1036 target[bytesWritten++] = (byte) (chunk >>> 16); 1037 if (i < chars.length()) { 1038 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 1039 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 1040 if (i < chars.length()) { 1041 chunk |= alphabet.decode(chars.charAt(i++)); 1042 target[bytesWritten++] = (byte) (chunk & 0xFF); 1043 } 1044 } 1045 } 1046 return bytesWritten; 1047 } 1048 1049 @Override 1050 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 1051 return new Base64Encoding(alphabet, paddingChar); 1052 } 1053 } 1054 1055 @J2ktIncompatible 1056 @GwtIncompatible 1057 static Reader ignoringReader(Reader delegate, String toIgnore) { 1058 checkNotNull(delegate); 1059 checkNotNull(toIgnore); 1060 return new Reader() { 1061 @Override 1062 public int read() throws IOException { 1063 int readChar; 1064 do { 1065 readChar = delegate.read(); 1066 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 1067 return readChar; 1068 } 1069 1070 @Override 1071 public int read(char[] cbuf, int off, int len) throws IOException { 1072 throw new UnsupportedOperationException(); 1073 } 1074 1075 @Override 1076 public void close() throws IOException { 1077 delegate.close(); 1078 } 1079 }; 1080 } 1081 1082 static Appendable separatingAppendable( 1083 Appendable delegate, String separator, int afterEveryChars) { 1084 checkNotNull(delegate); 1085 checkNotNull(separator); 1086 checkArgument(afterEveryChars > 0); 1087 return new Appendable() { 1088 int charsUntilSeparator = afterEveryChars; 1089 1090 @Override 1091 public Appendable append(char c) throws IOException { 1092 if (charsUntilSeparator == 0) { 1093 delegate.append(separator); 1094 charsUntilSeparator = afterEveryChars; 1095 } 1096 delegate.append(c); 1097 charsUntilSeparator--; 1098 return this; 1099 } 1100 1101 @Override 1102 public Appendable append(@Nullable CharSequence chars, int off, int len) { 1103 throw new UnsupportedOperationException(); 1104 } 1105 1106 @Override 1107 public Appendable append(@Nullable CharSequence chars) { 1108 throw new UnsupportedOperationException(); 1109 } 1110 }; 1111 } 1112 1113 @J2ktIncompatible 1114 @GwtIncompatible // Writer 1115 static Writer separatingWriter(Writer delegate, String separator, int afterEveryChars) { 1116 Appendable separatingAppendable = separatingAppendable(delegate, separator, afterEveryChars); 1117 return new Writer() { 1118 @Override 1119 public void write(int c) throws IOException { 1120 separatingAppendable.append((char) c); 1121 } 1122 1123 @Override 1124 public void write(char[] chars, int off, int len) throws IOException { 1125 throw new UnsupportedOperationException(); 1126 } 1127 1128 @Override 1129 public void flush() throws IOException { 1130 delegate.flush(); 1131 } 1132 1133 @Override 1134 public void close() throws IOException { 1135 delegate.close(); 1136 } 1137 }; 1138 } 1139 1140 static final class SeparatedBaseEncoding extends BaseEncoding { 1141 private final BaseEncoding delegate; 1142 private final String separator; 1143 private final int afterEveryChars; 1144 1145 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1146 this.delegate = checkNotNull(delegate); 1147 this.separator = checkNotNull(separator); 1148 this.afterEveryChars = afterEveryChars; 1149 checkArgument( 1150 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1151 } 1152 1153 @Override 1154 CharSequence trimTrailingPadding(CharSequence chars) { 1155 return delegate.trimTrailingPadding(chars); 1156 } 1157 1158 @Override 1159 int maxEncodedSize(int bytes) { 1160 int unseparatedSize = delegate.maxEncodedSize(bytes); 1161 return unseparatedSize 1162 + separator.length() * divide(max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1163 } 1164 1165 @J2ktIncompatible 1166 @GwtIncompatible // Writer,OutputStream 1167 @Override 1168 public OutputStream encodingStream(Writer output) { 1169 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1170 } 1171 1172 @Override 1173 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1174 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1175 } 1176 1177 @Override 1178 int maxDecodedSize(int chars) { 1179 return delegate.maxDecodedSize(chars); 1180 } 1181 1182 @Override 1183 public boolean canDecode(CharSequence chars) { 1184 StringBuilder builder = new StringBuilder(); 1185 for (int i = 0; i < chars.length(); i++) { 1186 char c = chars.charAt(i); 1187 if (separator.indexOf(c) < 0) { 1188 builder.append(c); 1189 } 1190 } 1191 return delegate.canDecode(builder); 1192 } 1193 1194 @Override 1195 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1196 StringBuilder stripped = new StringBuilder(chars.length()); 1197 for (int i = 0; i < chars.length(); i++) { 1198 char c = chars.charAt(i); 1199 if (separator.indexOf(c) < 0) { 1200 stripped.append(c); 1201 } 1202 } 1203 return delegate.decodeTo(target, stripped); 1204 } 1205 1206 @Override 1207 @J2ktIncompatible 1208 @GwtIncompatible // Reader,InputStream 1209 public InputStream decodingStream(Reader reader) { 1210 return delegate.decodingStream(ignoringReader(reader, separator)); 1211 } 1212 1213 @Override 1214 public BaseEncoding omitPadding() { 1215 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1216 } 1217 1218 @Override 1219 public BaseEncoding withPadChar(char padChar) { 1220 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1221 } 1222 1223 @Override 1224 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1225 throw new UnsupportedOperationException("Already have a separator"); 1226 } 1227 1228 @Override 1229 public BaseEncoding upperCase() { 1230 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1231 } 1232 1233 @Override 1234 public BaseEncoding lowerCase() { 1235 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1236 } 1237 1238 @Override 1239 public BaseEncoding ignoreCase() { 1240 return delegate.ignoreCase().withSeparator(separator, afterEveryChars); 1241 } 1242 1243 @Override 1244 public String toString() { 1245 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1246 } 1247 } 1248}