001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.Objects; 031import com.google.errorprone.annotations.concurrent.LazyInit; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.OutputStream; 035import java.io.Reader; 036import java.io.Writer; 037import java.util.Arrays; 038import org.checkerframework.checker.nullness.compatqual.NullableDecl; 039 040/** 041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 042 * strings. This class includes several constants for encoding schemes specified by <a 043 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 044 * 045 * <pre>{@code 046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII)) 047 * }</pre> 048 * 049 * <p>returns the string {@code "MZXW6==="}, and 050 * 051 * <pre>{@code 052 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 053 * }</pre> 054 * 055 * <p>...returns the ASCII bytes of the string {@code "foo"}. 056 * 057 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 058 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 059 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 060 * behavior: 061 * 062 * <pre>{@code 063 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 064 * }</pre> 065 * 066 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 067 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 068 * 069 * <pre>{@code 070 * // Do NOT do this 071 * BaseEncoding hex = BaseEncoding.base16(); 072 * hex.lowerCase(); // does nothing! 073 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 074 * }</pre> 075 * 076 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 077 * x}, but the reverse does not necessarily hold. 078 * 079 * <table> 080 * <caption>Encodings</caption> 081 * <tr> 082 * <th>Encoding 083 * <th>Alphabet 084 * <th>{@code char:byte} ratio 085 * <th>Default padding 086 * <th>Comments 087 * <tr> 088 * <td>{@link #base16()} 089 * <td>0-9 A-F 090 * <td>2.00 091 * <td>N/A 092 * <td>Traditional hexadecimal. Defaults to upper case. 093 * <tr> 094 * <td>{@link #base32()} 095 * <td>A-Z 2-7 096 * <td>1.60 097 * <td>= 098 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 099 * <tr> 100 * <td>{@link #base32Hex()} 101 * <td>0-9 A-V 102 * <td>1.60 103 * <td>= 104 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 105 * <tr> 106 * <td>{@link #base64()} 107 * <td>A-Z a-z 0-9 + / 108 * <td>1.33 109 * <td>= 110 * <td> 111 * <tr> 112 * <td>{@link #base64Url()} 113 * <td>A-Z a-z 0-9 - _ 114 * <td>1.33 115 * <td>= 116 * <td>Safe to use as filenames, or to pass in URLs without escaping 117 * </table> 118 * 119 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 120 * 121 * @author Louis Wasserman 122 * @since 14.0 123 */ 124@GwtCompatible(emulated = true) 125public abstract class BaseEncoding { 126 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 127 128 BaseEncoding() {} 129 130 /** 131 * Exception indicating invalid base-encoded input encountered while decoding. 132 * 133 * @author Louis Wasserman 134 * @since 15.0 135 */ 136 public static final class DecodingException extends IOException { 137 DecodingException(String message) { 138 super(message); 139 } 140 141 DecodingException(Throwable cause) { 142 super(cause); 143 } 144 } 145 146 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 147 public String encode(byte[] bytes) { 148 return encode(bytes, 0, bytes.length); 149 } 150 151 /** 152 * Encodes the specified range of the specified byte array, and returns the encoded {@code 153 * String}. 154 */ 155 public final String encode(byte[] bytes, int off, int len) { 156 checkPositionIndexes(off, off + len, bytes.length); 157 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 158 try { 159 encodeTo(result, bytes, off, len); 160 } catch (IOException impossible) { 161 throw new AssertionError(impossible); 162 } 163 return result.toString(); 164 } 165 166 /** 167 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 168 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 169 * Writer}. 170 */ 171 @GwtIncompatible // Writer,OutputStream 172 public abstract OutputStream encodingStream(Writer writer); 173 174 /** 175 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 176 */ 177 @GwtIncompatible // ByteSink,CharSink 178 public final ByteSink encodingSink(final CharSink encodedSink) { 179 checkNotNull(encodedSink); 180 return new ByteSink() { 181 @Override 182 public OutputStream openStream() throws IOException { 183 return encodingStream(encodedSink.openStream()); 184 } 185 }; 186 } 187 188 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 189 190 private static byte[] extract(byte[] result, int length) { 191 if (length == result.length) { 192 return result; 193 } else { 194 byte[] trunc = new byte[length]; 195 System.arraycopy(result, 0, trunc, 0, length); 196 return trunc; 197 } 198 } 199 200 /** 201 * Determines whether the specified character sequence is a valid encoded string according to this 202 * encoding. 203 * 204 * @since 20.0 205 */ 206 public abstract boolean canDecode(CharSequence chars); 207 208 /** 209 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 210 * inverse operation to {@link #encode(byte[])}. 211 * 212 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 213 * encoding. 214 */ 215 public final byte[] decode(CharSequence chars) { 216 try { 217 return decodeChecked(chars); 218 } catch (DecodingException badInput) { 219 throw new IllegalArgumentException(badInput); 220 } 221 } 222 223 /** 224 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 225 * inverse operation to {@link #encode(byte[])}. 226 * 227 * @throws DecodingException if the input is not a valid encoded string according to this 228 * encoding. 229 */ final byte[] decodeChecked(CharSequence chars) 230 throws DecodingException { 231 chars = trimTrailingPadding(chars); 232 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 233 int len = decodeTo(tmp, chars); 234 return extract(tmp, len); 235 } 236 237 /** 238 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 239 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 240 */ 241 @GwtIncompatible // Reader,InputStream 242 public abstract InputStream decodingStream(Reader reader); 243 244 /** 245 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 246 * CharSource}. 247 */ 248 @GwtIncompatible // ByteSource,CharSource 249 public final ByteSource decodingSource(final CharSource encodedSource) { 250 checkNotNull(encodedSource); 251 return new ByteSource() { 252 @Override 253 public InputStream openStream() throws IOException { 254 return decodingStream(encodedSource.openStream()); 255 } 256 }; 257 } 258 259 // Implementations for encoding/decoding 260 261 abstract int maxEncodedSize(int bytes); 262 263 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 264 265 abstract int maxDecodedSize(int chars); 266 267 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 268 269 CharSequence trimTrailingPadding(CharSequence chars) { 270 return checkNotNull(chars); 271 } 272 273 // Modified encoding generators 274 275 /** 276 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 277 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 278 * section 3.2</a>, Padding of Encoded Data. 279 */ 280 public abstract BaseEncoding omitPadding(); 281 282 /** 283 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 284 * for padding. 285 * 286 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 287 * separator 288 */ 289 public abstract BaseEncoding withPadChar(char padChar); 290 291 /** 292 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 293 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 294 * are skipped over in decoding. 295 * 296 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 297 * string, or if {@code n <= 0} 298 * @throws UnsupportedOperationException if this encoding already uses a separator 299 */ 300 public abstract BaseEncoding withSeparator(String separator, int n); 301 302 /** 303 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 304 * uppercase letters. Padding and separator characters remain in their original case. 305 * 306 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 307 * lower-case characters 308 */ 309 public abstract BaseEncoding upperCase(); 310 311 /** 312 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 313 * lowercase letters. Padding and separator characters remain in their original case. 314 * 315 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 316 * lower-case characters 317 */ 318 public abstract BaseEncoding lowerCase(); 319 320 private static final BaseEncoding BASE64 = 321 new Base64Encoding( 322 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 323 324 /** 325 * The "base64" base encoding specified by <a 326 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 327 * (This is the same as the base 64 encoding from <a 328 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 329 * 330 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 331 * omitted} or {@linkplain #withPadChar(char) replaced}. 332 * 333 * <p>No line feeds are added by default, as per <a 334 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 335 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 336 */ 337 public static BaseEncoding base64() { 338 return BASE64; 339 } 340 341 private static final BaseEncoding BASE64_URL = 342 new Base64Encoding( 343 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 344 345 /** 346 * The "base64url" encoding specified by <a 347 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 348 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 349 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 350 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 351 * 352 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 353 * omitted} or {@linkplain #withPadChar(char) replaced}. 354 * 355 * <p>No line feeds are added by default, as per <a 356 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 357 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 358 */ 359 public static BaseEncoding base64Url() { 360 return BASE64_URL; 361 } 362 363 private static final BaseEncoding BASE32 = 364 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 365 366 /** 367 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 368 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 369 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 370 * 371 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 372 * omitted} or {@linkplain #withPadChar(char) replaced}. 373 * 374 * <p>No line feeds are added by default, as per <a 375 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 376 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 377 */ 378 public static BaseEncoding base32() { 379 return BASE32; 380 } 381 382 private static final BaseEncoding BASE32_HEX = 383 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 384 385 /** 386 * The "base32hex" encoding specified by <a 387 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 388 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 389 * 390 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 391 * omitted} or {@linkplain #withPadChar(char) replaced}. 392 * 393 * <p>No line feeds are added by default, as per <a 394 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 395 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 396 */ 397 public static BaseEncoding base32Hex() { 398 return BASE32_HEX; 399 } 400 401 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 402 403 /** 404 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 405 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 406 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 407 * "hexadecimal" format. 408 * 409 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 410 * have no effect. 411 * 412 * <p>No line feeds are added by default, as per <a 413 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 414 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 415 */ 416 public static BaseEncoding base16() { 417 return BASE16; 418 } 419 420 private static final class Alphabet { 421 private final String name; 422 // this is meant to be immutable -- don't modify it! 423 private final char[] chars; 424 final int mask; 425 final int bitsPerChar; 426 final int charsPerChunk; 427 final int bytesPerChunk; 428 private final byte[] decodabet; 429 private final boolean[] validPadding; 430 431 Alphabet(String name, char[] chars) { 432 this.name = checkNotNull(name); 433 this.chars = checkNotNull(chars); 434 try { 435 this.bitsPerChar = log2(chars.length, UNNECESSARY); 436 } catch (ArithmeticException e) { 437 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 438 } 439 440 /* 441 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 442 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 443 */ 444 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 445 try { 446 this.charsPerChunk = 8 / gcd; 447 this.bytesPerChunk = bitsPerChar / gcd; 448 } catch (ArithmeticException e) { 449 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 450 } 451 452 this.mask = chars.length - 1; 453 454 byte[] decodabet = new byte[Ascii.MAX + 1]; 455 Arrays.fill(decodabet, (byte) -1); 456 for (int i = 0; i < chars.length; i++) { 457 char c = chars[i]; 458 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 459 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 460 decodabet[c] = (byte) i; 461 } 462 this.decodabet = decodabet; 463 464 boolean[] validPadding = new boolean[charsPerChunk]; 465 for (int i = 0; i < bytesPerChunk; i++) { 466 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 467 } 468 this.validPadding = validPadding; 469 } 470 471 char encode(int bits) { 472 return chars[bits]; 473 } 474 475 boolean isValidPaddingStartPosition(int index) { 476 return validPadding[index % charsPerChunk]; 477 } 478 479 boolean canDecode(char ch) { 480 return ch <= Ascii.MAX && decodabet[ch] != -1; 481 } 482 483 int decode(char ch) throws DecodingException { 484 if (ch > Ascii.MAX) { 485 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 486 } 487 int result = decodabet[ch]; 488 if (result == -1) { 489 if (ch <= 0x20 || ch == Ascii.MAX) { 490 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 491 } else { 492 throw new DecodingException("Unrecognized character: " + ch); 493 } 494 } 495 return result; 496 } 497 498 private boolean hasLowerCase() { 499 for (char c : chars) { 500 if (Ascii.isLowerCase(c)) { 501 return true; 502 } 503 } 504 return false; 505 } 506 507 private boolean hasUpperCase() { 508 for (char c : chars) { 509 if (Ascii.isUpperCase(c)) { 510 return true; 511 } 512 } 513 return false; 514 } 515 516 Alphabet upperCase() { 517 if (!hasLowerCase()) { 518 return this; 519 } else { 520 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 521 char[] upperCased = new char[chars.length]; 522 for (int i = 0; i < chars.length; i++) { 523 upperCased[i] = Ascii.toUpperCase(chars[i]); 524 } 525 return new Alphabet(name + ".upperCase()", upperCased); 526 } 527 } 528 529 Alphabet lowerCase() { 530 if (!hasUpperCase()) { 531 return this; 532 } else { 533 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 534 char[] lowerCased = new char[chars.length]; 535 for (int i = 0; i < chars.length; i++) { 536 lowerCased[i] = Ascii.toLowerCase(chars[i]); 537 } 538 return new Alphabet(name + ".lowerCase()", lowerCased); 539 } 540 } 541 542 public boolean matches(char c) { 543 return c < decodabet.length && decodabet[c] != -1; 544 } 545 546 @Override 547 public String toString() { 548 return name; 549 } 550 551 @Override 552 public boolean equals(@NullableDecl Object other) { 553 if (other instanceof Alphabet) { 554 Alphabet that = (Alphabet) other; 555 return Arrays.equals(this.chars, that.chars); 556 } 557 return false; 558 } 559 560 @Override 561 public int hashCode() { 562 return Arrays.hashCode(chars); 563 } 564 } 565 566 static class StandardBaseEncoding extends BaseEncoding { 567 // TODO(lowasser): provide a useful toString 568 final Alphabet alphabet; 569 570 @NullableDecl final Character paddingChar; 571 572 StandardBaseEncoding(String name, String alphabetChars, @NullableDecl Character paddingChar) { 573 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 574 } 575 576 StandardBaseEncoding(Alphabet alphabet, @NullableDecl Character paddingChar) { 577 this.alphabet = checkNotNull(alphabet); 578 checkArgument( 579 paddingChar == null || !alphabet.matches(paddingChar), 580 "Padding character %s was already in alphabet", 581 paddingChar); 582 this.paddingChar = paddingChar; 583 } 584 585 @Override 586 int maxEncodedSize(int bytes) { 587 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 588 } 589 590 @GwtIncompatible // Writer,OutputStream 591 @Override 592 public OutputStream encodingStream(final Writer out) { 593 checkNotNull(out); 594 return new OutputStream() { 595 int bitBuffer = 0; 596 int bitBufferLength = 0; 597 int writtenChars = 0; 598 599 @Override 600 public void write(int b) throws IOException { 601 bitBuffer <<= 8; 602 bitBuffer |= b & 0xFF; 603 bitBufferLength += 8; 604 while (bitBufferLength >= alphabet.bitsPerChar) { 605 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 606 out.write(alphabet.encode(charIndex)); 607 writtenChars++; 608 bitBufferLength -= alphabet.bitsPerChar; 609 } 610 } 611 612 @Override 613 public void flush() throws IOException { 614 out.flush(); 615 } 616 617 @Override 618 public void close() throws IOException { 619 if (bitBufferLength > 0) { 620 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 621 out.write(alphabet.encode(charIndex)); 622 writtenChars++; 623 if (paddingChar != null) { 624 while (writtenChars % alphabet.charsPerChunk != 0) { 625 out.write(paddingChar.charValue()); 626 writtenChars++; 627 } 628 } 629 } 630 out.close(); 631 } 632 }; 633 } 634 635 @Override 636 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 637 checkNotNull(target); 638 checkPositionIndexes(off, off + len, bytes.length); 639 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 640 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 641 } 642 } 643 644 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 645 checkNotNull(target); 646 checkPositionIndexes(off, off + len, bytes.length); 647 checkArgument(len <= alphabet.bytesPerChunk); 648 long bitBuffer = 0; 649 for (int i = 0; i < len; ++i) { 650 bitBuffer |= bytes[off + i] & 0xFF; 651 bitBuffer <<= 8; // Add additional zero byte in the end. 652 } 653 // Position of first character is length of bitBuffer minus bitsPerChar. 654 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 655 int bitsProcessed = 0; 656 while (bitsProcessed < len * 8) { 657 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 658 target.append(alphabet.encode(charIndex)); 659 bitsProcessed += alphabet.bitsPerChar; 660 } 661 if (paddingChar != null) { 662 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 663 target.append(paddingChar.charValue()); 664 bitsProcessed += alphabet.bitsPerChar; 665 } 666 } 667 } 668 669 @Override 670 int maxDecodedSize(int chars) { 671 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 672 } 673 674 @Override 675 CharSequence trimTrailingPadding(CharSequence chars) { 676 checkNotNull(chars); 677 if (paddingChar == null) { 678 return chars; 679 } 680 char padChar = paddingChar.charValue(); 681 int l; 682 for (l = chars.length() - 1; l >= 0; l--) { 683 if (chars.charAt(l) != padChar) { 684 break; 685 } 686 } 687 return chars.subSequence(0, l + 1); 688 } 689 690 @Override 691 public boolean canDecode(CharSequence chars) { 692 checkNotNull(chars); 693 chars = trimTrailingPadding(chars); 694 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 695 return false; 696 } 697 for (int i = 0; i < chars.length(); i++) { 698 if (!alphabet.canDecode(chars.charAt(i))) { 699 return false; 700 } 701 } 702 return true; 703 } 704 705 @Override 706 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 707 checkNotNull(target); 708 chars = trimTrailingPadding(chars); 709 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 710 throw new DecodingException("Invalid input length " + chars.length()); 711 } 712 int bytesWritten = 0; 713 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 714 long chunk = 0; 715 int charsProcessed = 0; 716 for (int i = 0; i < alphabet.charsPerChunk; i++) { 717 chunk <<= alphabet.bitsPerChar; 718 if (charIdx + i < chars.length()) { 719 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 720 } 721 } 722 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 723 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 724 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 725 } 726 } 727 return bytesWritten; 728 } 729 730 @Override 731 @GwtIncompatible // Reader,InputStream 732 public InputStream decodingStream(final Reader reader) { 733 checkNotNull(reader); 734 return new InputStream() { 735 int bitBuffer = 0; 736 int bitBufferLength = 0; 737 int readChars = 0; 738 boolean hitPadding = false; 739 740 @Override 741 public int read() throws IOException { 742 while (true) { 743 int readChar = reader.read(); 744 if (readChar == -1) { 745 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 746 throw new DecodingException("Invalid input length " + readChars); 747 } 748 return -1; 749 } 750 readChars++; 751 char ch = (char) readChar; 752 if (paddingChar != null && paddingChar.charValue() == ch) { 753 if (!hitPadding 754 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 755 throw new DecodingException("Padding cannot start at index " + readChars); 756 } 757 hitPadding = true; 758 } else if (hitPadding) { 759 throw new DecodingException( 760 "Expected padding character but found '" + ch + "' at index " + readChars); 761 } else { 762 bitBuffer <<= alphabet.bitsPerChar; 763 bitBuffer |= alphabet.decode(ch); 764 bitBufferLength += alphabet.bitsPerChar; 765 766 if (bitBufferLength >= 8) { 767 bitBufferLength -= 8; 768 return (bitBuffer >> bitBufferLength) & 0xFF; 769 } 770 } 771 } 772 } 773 774 @Override 775 public int read(byte[] buf, int off, int len) throws IOException { 776 // Overriding this to work around the fact that InputStream's default implementation of 777 // this method will silently swallow exceptions thrown by the single-byte read() method 778 // (other than on the first call to it), which in this case can cause invalid encoded 779 // strings to not throw an exception. 780 // See https://github.com/google/guava/issues/3542 781 checkPositionIndexes(off, off + len, buf.length); 782 783 int i = off; 784 for (; i < off + len; i++) { 785 int b = read(); 786 if (b == -1) { 787 int read = i - off; 788 return read == 0 ? -1 : read; 789 } 790 buf[i] = (byte) b; 791 } 792 return i - off; 793 } 794 795 @Override 796 public void close() throws IOException { 797 reader.close(); 798 } 799 }; 800 } 801 802 @Override 803 public BaseEncoding omitPadding() { 804 return (paddingChar == null) ? this : newInstance(alphabet, null); 805 } 806 807 @Override 808 public BaseEncoding withPadChar(char padChar) { 809 if (8 % alphabet.bitsPerChar == 0 810 || (paddingChar != null && paddingChar.charValue() == padChar)) { 811 return this; 812 } else { 813 return newInstance(alphabet, padChar); 814 } 815 } 816 817 @Override 818 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 819 for (int i = 0; i < separator.length(); i++) { 820 checkArgument( 821 !alphabet.matches(separator.charAt(i)), 822 "Separator (%s) cannot contain alphabet characters", 823 separator); 824 } 825 if (paddingChar != null) { 826 checkArgument( 827 separator.indexOf(paddingChar.charValue()) < 0, 828 "Separator (%s) cannot contain padding character", 829 separator); 830 } 831 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 832 } 833 834 @LazyInit @NullableDecl private transient BaseEncoding upperCase; 835 @LazyInit @NullableDecl private transient BaseEncoding lowerCase; 836 837 @Override 838 public BaseEncoding upperCase() { 839 BaseEncoding result = upperCase; 840 if (result == null) { 841 Alphabet upper = alphabet.upperCase(); 842 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 843 } 844 return result; 845 } 846 847 @Override 848 public BaseEncoding lowerCase() { 849 BaseEncoding result = lowerCase; 850 if (result == null) { 851 Alphabet lower = alphabet.lowerCase(); 852 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 853 } 854 return result; 855 } 856 857 BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) { 858 return new StandardBaseEncoding(alphabet, paddingChar); 859 } 860 861 @Override 862 public String toString() { 863 StringBuilder builder = new StringBuilder("BaseEncoding."); 864 builder.append(alphabet.toString()); 865 if (8 % alphabet.bitsPerChar != 0) { 866 if (paddingChar == null) { 867 builder.append(".omitPadding()"); 868 } else { 869 builder.append(".withPadChar('").append(paddingChar).append("')"); 870 } 871 } 872 return builder.toString(); 873 } 874 875 @Override 876 public boolean equals(@NullableDecl Object other) { 877 if (other instanceof StandardBaseEncoding) { 878 StandardBaseEncoding that = (StandardBaseEncoding) other; 879 return this.alphabet.equals(that.alphabet) 880 && Objects.equal(this.paddingChar, that.paddingChar); 881 } 882 return false; 883 } 884 885 @Override 886 public int hashCode() { 887 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 888 } 889 } 890 891 static final class Base16Encoding extends StandardBaseEncoding { 892 final char[] encoding = new char[512]; 893 894 Base16Encoding(String name, String alphabetChars) { 895 this(new Alphabet(name, alphabetChars.toCharArray())); 896 } 897 898 private Base16Encoding(Alphabet alphabet) { 899 super(alphabet, null); 900 checkArgument(alphabet.chars.length == 16); 901 for (int i = 0; i < 256; ++i) { 902 encoding[i] = alphabet.encode(i >>> 4); 903 encoding[i | 0x100] = alphabet.encode(i & 0xF); 904 } 905 } 906 907 @Override 908 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 909 checkNotNull(target); 910 checkPositionIndexes(off, off + len, bytes.length); 911 for (int i = 0; i < len; ++i) { 912 int b = bytes[off + i] & 0xFF; 913 target.append(encoding[b]); 914 target.append(encoding[b | 0x100]); 915 } 916 } 917 918 @Override 919 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 920 checkNotNull(target); 921 if (chars.length() % 2 == 1) { 922 throw new DecodingException("Invalid input length " + chars.length()); 923 } 924 int bytesWritten = 0; 925 for (int i = 0; i < chars.length(); i += 2) { 926 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 927 target[bytesWritten++] = (byte) decoded; 928 } 929 return bytesWritten; 930 } 931 932 @Override 933 BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) { 934 return new Base16Encoding(alphabet); 935 } 936 } 937 938 static final class Base64Encoding extends StandardBaseEncoding { 939 Base64Encoding(String name, String alphabetChars, @NullableDecl Character paddingChar) { 940 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 941 } 942 943 private Base64Encoding(Alphabet alphabet, @NullableDecl Character paddingChar) { 944 super(alphabet, paddingChar); 945 checkArgument(alphabet.chars.length == 64); 946 } 947 948 @Override 949 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 950 checkNotNull(target); 951 checkPositionIndexes(off, off + len, bytes.length); 952 int i = off; 953 for (int remaining = len; remaining >= 3; remaining -= 3) { 954 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 955 target.append(alphabet.encode(chunk >>> 18)); 956 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 957 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 958 target.append(alphabet.encode(chunk & 0x3F)); 959 } 960 if (i < off + len) { 961 encodeChunkTo(target, bytes, i, off + len - i); 962 } 963 } 964 965 @Override 966 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 967 checkNotNull(target); 968 chars = trimTrailingPadding(chars); 969 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 970 throw new DecodingException("Invalid input length " + chars.length()); 971 } 972 int bytesWritten = 0; 973 for (int i = 0; i < chars.length(); ) { 974 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 975 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 976 target[bytesWritten++] = (byte) (chunk >>> 16); 977 if (i < chars.length()) { 978 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 979 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 980 if (i < chars.length()) { 981 chunk |= alphabet.decode(chars.charAt(i++)); 982 target[bytesWritten++] = (byte) (chunk & 0xFF); 983 } 984 } 985 } 986 return bytesWritten; 987 } 988 989 @Override 990 BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) { 991 return new Base64Encoding(alphabet, paddingChar); 992 } 993 } 994 995 @GwtIncompatible 996 static Reader ignoringReader(final Reader delegate, final String toIgnore) { 997 checkNotNull(delegate); 998 checkNotNull(toIgnore); 999 return new Reader() { 1000 @Override 1001 public int read() throws IOException { 1002 int readChar; 1003 do { 1004 readChar = delegate.read(); 1005 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 1006 return readChar; 1007 } 1008 1009 @Override 1010 public int read(char[] cbuf, int off, int len) throws IOException { 1011 throw new UnsupportedOperationException(); 1012 } 1013 1014 @Override 1015 public void close() throws IOException { 1016 delegate.close(); 1017 } 1018 }; 1019 } 1020 1021 static Appendable separatingAppendable( 1022 final Appendable delegate, final String separator, final int afterEveryChars) { 1023 checkNotNull(delegate); 1024 checkNotNull(separator); 1025 checkArgument(afterEveryChars > 0); 1026 return new Appendable() { 1027 int charsUntilSeparator = afterEveryChars; 1028 1029 @Override 1030 public Appendable append(char c) throws IOException { 1031 if (charsUntilSeparator == 0) { 1032 delegate.append(separator); 1033 charsUntilSeparator = afterEveryChars; 1034 } 1035 delegate.append(c); 1036 charsUntilSeparator--; 1037 return this; 1038 } 1039 1040 @Override 1041 public Appendable append(@NullableDecl CharSequence chars, int off, int len) 1042 throws IOException { 1043 throw new UnsupportedOperationException(); 1044 } 1045 1046 @Override 1047 public Appendable append(@NullableDecl CharSequence chars) throws IOException { 1048 throw new UnsupportedOperationException(); 1049 } 1050 }; 1051 } 1052 1053 @GwtIncompatible // Writer 1054 static Writer separatingWriter( 1055 final Writer delegate, final String separator, final int afterEveryChars) { 1056 final Appendable seperatingAppendable = 1057 separatingAppendable(delegate, separator, afterEveryChars); 1058 return new Writer() { 1059 @Override 1060 public void write(int c) throws IOException { 1061 seperatingAppendable.append((char) c); 1062 } 1063 1064 @Override 1065 public void write(char[] chars, int off, int len) throws IOException { 1066 throw new UnsupportedOperationException(); 1067 } 1068 1069 @Override 1070 public void flush() throws IOException { 1071 delegate.flush(); 1072 } 1073 1074 @Override 1075 public void close() throws IOException { 1076 delegate.close(); 1077 } 1078 }; 1079 } 1080 1081 static final class SeparatedBaseEncoding extends BaseEncoding { 1082 private final BaseEncoding delegate; 1083 private final String separator; 1084 private final int afterEveryChars; 1085 1086 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1087 this.delegate = checkNotNull(delegate); 1088 this.separator = checkNotNull(separator); 1089 this.afterEveryChars = afterEveryChars; 1090 checkArgument( 1091 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1092 } 1093 1094 @Override 1095 CharSequence trimTrailingPadding(CharSequence chars) { 1096 return delegate.trimTrailingPadding(chars); 1097 } 1098 1099 @Override 1100 int maxEncodedSize(int bytes) { 1101 int unseparatedSize = delegate.maxEncodedSize(bytes); 1102 return unseparatedSize 1103 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1104 } 1105 1106 @GwtIncompatible // Writer,OutputStream 1107 @Override 1108 public OutputStream encodingStream(final Writer output) { 1109 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1110 } 1111 1112 @Override 1113 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1114 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1115 } 1116 1117 @Override 1118 int maxDecodedSize(int chars) { 1119 return delegate.maxDecodedSize(chars); 1120 } 1121 1122 @Override 1123 public boolean canDecode(CharSequence chars) { 1124 StringBuilder builder = new StringBuilder(); 1125 for (int i = 0; i < chars.length(); i++) { 1126 char c = chars.charAt(i); 1127 if (separator.indexOf(c) < 0) { 1128 builder.append(c); 1129 } 1130 } 1131 return delegate.canDecode(builder); 1132 } 1133 1134 @Override 1135 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1136 StringBuilder stripped = new StringBuilder(chars.length()); 1137 for (int i = 0; i < chars.length(); i++) { 1138 char c = chars.charAt(i); 1139 if (separator.indexOf(c) < 0) { 1140 stripped.append(c); 1141 } 1142 } 1143 return delegate.decodeTo(target, stripped); 1144 } 1145 1146 @Override 1147 @GwtIncompatible // Reader,InputStream 1148 public InputStream decodingStream(final Reader reader) { 1149 return delegate.decodingStream(ignoringReader(reader, separator)); 1150 } 1151 1152 @Override 1153 public BaseEncoding omitPadding() { 1154 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1155 } 1156 1157 @Override 1158 public BaseEncoding withPadChar(char padChar) { 1159 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1160 } 1161 1162 @Override 1163 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1164 throw new UnsupportedOperationException("Already have a separator"); 1165 } 1166 1167 @Override 1168 public BaseEncoding upperCase() { 1169 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1170 } 1171 1172 @Override 1173 public BaseEncoding lowerCase() { 1174 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1175 } 1176 1177 @Override 1178 public String toString() { 1179 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1180 } 1181 } 1182}