001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.Objects; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.OutputStream; 034import java.io.Reader; 035import java.io.Writer; 036import java.util.Arrays; 037import org.checkerframework.checker.nullness.compatqual.NullableDecl; 038 039/** 040 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 041 * strings. This class includes several constants for encoding schemes specified by <a 042 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 043 * 044 * <pre>{@code 045 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII)) 046 * }</pre> 047 * 048 * <p>returns the string {@code "MZXW6==="}, and 049 * 050 * <pre>{@code 051 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 052 * }</pre> 053 * 054 * <p>...returns the ASCII bytes of the string {@code "foo"}. 055 * 056 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 057 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 058 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 059 * behavior: 060 * 061 * <pre>{@code 062 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 063 * }</pre> 064 * 065 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 066 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 067 * 068 * <pre>{@code 069 * // Do NOT do this 070 * BaseEncoding hex = BaseEncoding.base16(); 071 * hex.lowerCase(); // does nothing! 072 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 073 * }</pre> 074 * 075 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to {@code 076 * x}, but the reverse does not necessarily hold. 077 * 078 * <table> 079 * <caption>Encodings</caption> 080 * <tr> 081 * <th>Encoding 082 * <th>Alphabet 083 * <th>{@code char:byte} ratio 084 * <th>Default padding 085 * <th>Comments 086 * <tr> 087 * <td>{@link #base16()} 088 * <td>0-9 A-F 089 * <td>2.00 090 * <td>N/A 091 * <td>Traditional hexadecimal. Defaults to upper case. 092 * <tr> 093 * <td>{@link #base32()} 094 * <td>A-Z 2-7 095 * <td>1.60 096 * <td>= 097 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 098 * <tr> 099 * <td>{@link #base32Hex()} 100 * <td>0-9 A-V 101 * <td>1.60 102 * <td>= 103 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 104 * <tr> 105 * <td>{@link #base64()} 106 * <td>A-Z a-z 0-9 + / 107 * <td>1.33 108 * <td>= 109 * <td> 110 * <tr> 111 * <td>{@link #base64Url()} 112 * <td>A-Z a-z 0-9 - _ 113 * <td>1.33 114 * <td>= 115 * <td>Safe to use as filenames, or to pass in URLs without escaping 116 * </table> 117 * 118 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 119 * 120 * @author Louis Wasserman 121 * @since 14.0 122 */ 123@GwtCompatible(emulated = true) 124public abstract class BaseEncoding { 125 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 126 127 BaseEncoding() {} 128 129 /** 130 * Exception indicating invalid base-encoded input encountered while decoding. 131 * 132 * @author Louis Wasserman 133 * @since 15.0 134 */ 135 public static final class DecodingException extends IOException { 136 DecodingException(String message) { 137 super(message); 138 } 139 140 DecodingException(Throwable cause) { 141 super(cause); 142 } 143 } 144 145 /** Encodes the specified byte array, and returns the encoded {@code String}. */ 146 public String encode(byte[] bytes) { 147 return encode(bytes, 0, bytes.length); 148 } 149 150 /** 151 * Encodes the specified range of the specified byte array, and returns the encoded {@code 152 * String}. 153 */ 154 public final String encode(byte[] bytes, int off, int len) { 155 checkPositionIndexes(off, off + len, bytes.length); 156 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 157 try { 158 encodeTo(result, bytes, off, len); 159 } catch (IOException impossible) { 160 throw new AssertionError(impossible); 161 } 162 return result.toString(); 163 } 164 165 /** 166 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 167 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing {@code 168 * Writer}. 169 */ 170 @GwtIncompatible // Writer,OutputStream 171 public abstract OutputStream encodingStream(Writer writer); 172 173 /** 174 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 175 */ 176 @GwtIncompatible // ByteSink,CharSink 177 public final ByteSink encodingSink(final CharSink encodedSink) { 178 checkNotNull(encodedSink); 179 return new ByteSink() { 180 @Override 181 public OutputStream openStream() throws IOException { 182 return encodingStream(encodedSink.openStream()); 183 } 184 }; 185 } 186 187 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 188 189 private static byte[] extract(byte[] result, int length) { 190 if (length == result.length) { 191 return result; 192 } else { 193 byte[] trunc = new byte[length]; 194 System.arraycopy(result, 0, trunc, 0, length); 195 return trunc; 196 } 197 } 198 199 /** 200 * Determines whether the specified character sequence is a valid encoded string according to this 201 * encoding. 202 * 203 * @since 20.0 204 */ 205 public abstract boolean canDecode(CharSequence chars); 206 207 /** 208 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 209 * inverse operation to {@link #encode(byte[])}. 210 * 211 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 212 * encoding. 213 */ 214 public final byte[] decode(CharSequence chars) { 215 try { 216 return decodeChecked(chars); 217 } catch (DecodingException badInput) { 218 throw new IllegalArgumentException(badInput); 219 } 220 } 221 222 /** 223 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 224 * inverse operation to {@link #encode(byte[])}. 225 * 226 * @throws DecodingException if the input is not a valid encoded string according to this 227 * encoding. 228 */ final byte[] decodeChecked(CharSequence chars) 229 throws DecodingException { 230 chars = trimTrailingPadding(chars); 231 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 232 int len = decodeTo(tmp, chars); 233 return extract(tmp, len); 234 } 235 236 /** 237 * Returns an {@code InputStream} that decodes base-encoded input from the specified {@code 238 * Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific errors. 239 */ 240 @GwtIncompatible // Reader,InputStream 241 public abstract InputStream decodingStream(Reader reader); 242 243 /** 244 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified {@code 245 * CharSource}. 246 */ 247 @GwtIncompatible // ByteSource,CharSource 248 public final ByteSource decodingSource(final CharSource encodedSource) { 249 checkNotNull(encodedSource); 250 return new ByteSource() { 251 @Override 252 public InputStream openStream() throws IOException { 253 return decodingStream(encodedSource.openStream()); 254 } 255 }; 256 } 257 258 // Implementations for encoding/decoding 259 260 abstract int maxEncodedSize(int bytes); 261 262 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 263 264 abstract int maxDecodedSize(int chars); 265 266 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 267 268 CharSequence trimTrailingPadding(CharSequence chars) { 269 return checkNotNull(chars); 270 } 271 272 // Modified encoding generators 273 274 /** 275 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 276 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 277 * section 3.2</a>, Padding of Encoded Data. 278 */ 279 public abstract BaseEncoding omitPadding(); 280 281 /** 282 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 283 * for padding. 284 * 285 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 286 * separator 287 */ 288 public abstract BaseEncoding withPadChar(char padChar); 289 290 /** 291 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 292 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 293 * are skipped over in decoding. 294 * 295 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 296 * string, or if {@code n <= 0} 297 * @throws UnsupportedOperationException if this encoding already uses a separator 298 */ 299 public abstract BaseEncoding withSeparator(String separator, int n); 300 301 /** 302 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 303 * uppercase letters. Padding and separator characters remain in their original case. 304 * 305 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 306 * lower-case characters 307 */ 308 public abstract BaseEncoding upperCase(); 309 310 /** 311 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 312 * lowercase letters. Padding and separator characters remain in their original case. 313 * 314 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 315 * lower-case characters 316 */ 317 public abstract BaseEncoding lowerCase(); 318 319 private static final BaseEncoding BASE64 = 320 new Base64Encoding( 321 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 322 323 /** 324 * The "base64" base encoding specified by <a 325 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 326 * (This is the same as the base 64 encoding from <a 327 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 328 * 329 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 330 * omitted} or {@linkplain #withPadChar(char) replaced}. 331 * 332 * <p>No line feeds are added by default, as per <a 333 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 334 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 335 */ 336 public static BaseEncoding base64() { 337 return BASE64; 338 } 339 340 private static final BaseEncoding BASE64_URL = 341 new Base64Encoding( 342 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 343 344 /** 345 * The "base64url" encoding specified by <a 346 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 347 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 348 * is the same as the base 64 encoding with URL and filename safe alphabet from <a 349 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 350 * 351 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 352 * omitted} or {@linkplain #withPadChar(char) replaced}. 353 * 354 * <p>No line feeds are added by default, as per <a 355 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 356 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 357 */ 358 public static BaseEncoding base64Url() { 359 return BASE64_URL; 360 } 361 362 private static final BaseEncoding BASE32 = 363 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 364 365 /** 366 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 367 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from <a 368 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 369 * 370 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 371 * omitted} or {@linkplain #withPadChar(char) replaced}. 372 * 373 * <p>No line feeds are added by default, as per <a 374 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 375 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 376 */ 377 public static BaseEncoding base32() { 378 return BASE32; 379 } 380 381 private static final BaseEncoding BASE32_HEX = 382 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 383 384 /** 385 * The "base32hex" encoding specified by <a 386 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 387 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 388 * 389 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 390 * omitted} or {@linkplain #withPadChar(char) replaced}. 391 * 392 * <p>No line feeds are added by default, as per <a 393 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 394 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 395 */ 396 public static BaseEncoding base32Hex() { 397 return BASE32_HEX; 398 } 399 400 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 401 402 /** 403 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 404 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from <a 405 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 406 * "hexadecimal" format. 407 * 408 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 409 * have no effect. 410 * 411 * <p>No line feeds are added by default, as per <a 412 * href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds in 413 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 414 */ 415 public static BaseEncoding base16() { 416 return BASE16; 417 } 418 419 private static final class Alphabet { 420 private final String name; 421 // this is meant to be immutable -- don't modify it! 422 private final char[] chars; 423 final int mask; 424 final int bitsPerChar; 425 final int charsPerChunk; 426 final int bytesPerChunk; 427 private final byte[] decodabet; 428 private final boolean[] validPadding; 429 430 Alphabet(String name, char[] chars) { 431 this.name = checkNotNull(name); 432 this.chars = checkNotNull(chars); 433 try { 434 this.bitsPerChar = log2(chars.length, UNNECESSARY); 435 } catch (ArithmeticException e) { 436 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 437 } 438 439 /* 440 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 441 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 442 */ 443 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 444 try { 445 this.charsPerChunk = 8 / gcd; 446 this.bytesPerChunk = bitsPerChar / gcd; 447 } catch (ArithmeticException e) { 448 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 449 } 450 451 this.mask = chars.length - 1; 452 453 byte[] decodabet = new byte[Ascii.MAX + 1]; 454 Arrays.fill(decodabet, (byte) -1); 455 for (int i = 0; i < chars.length; i++) { 456 char c = chars[i]; 457 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 458 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 459 decodabet[c] = (byte) i; 460 } 461 this.decodabet = decodabet; 462 463 boolean[] validPadding = new boolean[charsPerChunk]; 464 for (int i = 0; i < bytesPerChunk; i++) { 465 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 466 } 467 this.validPadding = validPadding; 468 } 469 470 char encode(int bits) { 471 return chars[bits]; 472 } 473 474 boolean isValidPaddingStartPosition(int index) { 475 return validPadding[index % charsPerChunk]; 476 } 477 478 boolean canDecode(char ch) { 479 return ch <= Ascii.MAX && decodabet[ch] != -1; 480 } 481 482 int decode(char ch) throws DecodingException { 483 if (ch > Ascii.MAX) { 484 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 485 } 486 int result = decodabet[ch]; 487 if (result == -1) { 488 if (ch <= 0x20 || ch == Ascii.MAX) { 489 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 490 } else { 491 throw new DecodingException("Unrecognized character: " + ch); 492 } 493 } 494 return result; 495 } 496 497 private boolean hasLowerCase() { 498 for (char c : chars) { 499 if (Ascii.isLowerCase(c)) { 500 return true; 501 } 502 } 503 return false; 504 } 505 506 private boolean hasUpperCase() { 507 for (char c : chars) { 508 if (Ascii.isUpperCase(c)) { 509 return true; 510 } 511 } 512 return false; 513 } 514 515 Alphabet upperCase() { 516 if (!hasLowerCase()) { 517 return this; 518 } else { 519 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 520 char[] upperCased = new char[chars.length]; 521 for (int i = 0; i < chars.length; i++) { 522 upperCased[i] = Ascii.toUpperCase(chars[i]); 523 } 524 return new Alphabet(name + ".upperCase()", upperCased); 525 } 526 } 527 528 Alphabet lowerCase() { 529 if (!hasUpperCase()) { 530 return this; 531 } else { 532 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 533 char[] lowerCased = new char[chars.length]; 534 for (int i = 0; i < chars.length; i++) { 535 lowerCased[i] = Ascii.toLowerCase(chars[i]); 536 } 537 return new Alphabet(name + ".lowerCase()", lowerCased); 538 } 539 } 540 541 public boolean matches(char c) { 542 return c < decodabet.length && decodabet[c] != -1; 543 } 544 545 @Override 546 public String toString() { 547 return name; 548 } 549 550 @Override 551 public boolean equals(@NullableDecl Object other) { 552 if (other instanceof Alphabet) { 553 Alphabet that = (Alphabet) other; 554 return Arrays.equals(this.chars, that.chars); 555 } 556 return false; 557 } 558 559 @Override 560 public int hashCode() { 561 return Arrays.hashCode(chars); 562 } 563 } 564 565 static class StandardBaseEncoding extends BaseEncoding { 566 // TODO(lowasser): provide a useful toString 567 final Alphabet alphabet; 568 569 @NullableDecl final Character paddingChar; 570 571 StandardBaseEncoding(String name, String alphabetChars, @NullableDecl Character paddingChar) { 572 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 573 } 574 575 StandardBaseEncoding(Alphabet alphabet, @NullableDecl Character paddingChar) { 576 this.alphabet = checkNotNull(alphabet); 577 checkArgument( 578 paddingChar == null || !alphabet.matches(paddingChar), 579 "Padding character %s was already in alphabet", 580 paddingChar); 581 this.paddingChar = paddingChar; 582 } 583 584 @Override 585 int maxEncodedSize(int bytes) { 586 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 587 } 588 589 @GwtIncompatible // Writer,OutputStream 590 @Override 591 public OutputStream encodingStream(final Writer out) { 592 checkNotNull(out); 593 return new OutputStream() { 594 int bitBuffer = 0; 595 int bitBufferLength = 0; 596 int writtenChars = 0; 597 598 @Override 599 public void write(int b) throws IOException { 600 bitBuffer <<= 8; 601 bitBuffer |= b & 0xFF; 602 bitBufferLength += 8; 603 while (bitBufferLength >= alphabet.bitsPerChar) { 604 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 605 out.write(alphabet.encode(charIndex)); 606 writtenChars++; 607 bitBufferLength -= alphabet.bitsPerChar; 608 } 609 } 610 611 @Override 612 public void flush() throws IOException { 613 out.flush(); 614 } 615 616 @Override 617 public void close() throws IOException { 618 if (bitBufferLength > 0) { 619 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 620 out.write(alphabet.encode(charIndex)); 621 writtenChars++; 622 if (paddingChar != null) { 623 while (writtenChars % alphabet.charsPerChunk != 0) { 624 out.write(paddingChar.charValue()); 625 writtenChars++; 626 } 627 } 628 } 629 out.close(); 630 } 631 }; 632 } 633 634 @Override 635 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 636 checkNotNull(target); 637 checkPositionIndexes(off, off + len, bytes.length); 638 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 639 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 640 } 641 } 642 643 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 644 checkNotNull(target); 645 checkPositionIndexes(off, off + len, bytes.length); 646 checkArgument(len <= alphabet.bytesPerChunk); 647 long bitBuffer = 0; 648 for (int i = 0; i < len; ++i) { 649 bitBuffer |= bytes[off + i] & 0xFF; 650 bitBuffer <<= 8; // Add additional zero byte in the end. 651 } 652 // Position of first character is length of bitBuffer minus bitsPerChar. 653 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 654 int bitsProcessed = 0; 655 while (bitsProcessed < len * 8) { 656 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 657 target.append(alphabet.encode(charIndex)); 658 bitsProcessed += alphabet.bitsPerChar; 659 } 660 if (paddingChar != null) { 661 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 662 target.append(paddingChar.charValue()); 663 bitsProcessed += alphabet.bitsPerChar; 664 } 665 } 666 } 667 668 @Override 669 int maxDecodedSize(int chars) { 670 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 671 } 672 673 @Override 674 CharSequence trimTrailingPadding(CharSequence chars) { 675 checkNotNull(chars); 676 if (paddingChar == null) { 677 return chars; 678 } 679 char padChar = paddingChar.charValue(); 680 int l; 681 for (l = chars.length() - 1; l >= 0; l--) { 682 if (chars.charAt(l) != padChar) { 683 break; 684 } 685 } 686 return chars.subSequence(0, l + 1); 687 } 688 689 @Override 690 public boolean canDecode(CharSequence chars) { 691 checkNotNull(chars); 692 chars = trimTrailingPadding(chars); 693 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 694 return false; 695 } 696 for (int i = 0; i < chars.length(); i++) { 697 if (!alphabet.canDecode(chars.charAt(i))) { 698 return false; 699 } 700 } 701 return true; 702 } 703 704 @Override 705 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 706 checkNotNull(target); 707 chars = trimTrailingPadding(chars); 708 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 709 throw new DecodingException("Invalid input length " + chars.length()); 710 } 711 int bytesWritten = 0; 712 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 713 long chunk = 0; 714 int charsProcessed = 0; 715 for (int i = 0; i < alphabet.charsPerChunk; i++) { 716 chunk <<= alphabet.bitsPerChar; 717 if (charIdx + i < chars.length()) { 718 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 719 } 720 } 721 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 722 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 723 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 724 } 725 } 726 return bytesWritten; 727 } 728 729 @Override 730 @GwtIncompatible // Reader,InputStream 731 public InputStream decodingStream(final Reader reader) { 732 checkNotNull(reader); 733 return new InputStream() { 734 int bitBuffer = 0; 735 int bitBufferLength = 0; 736 int readChars = 0; 737 boolean hitPadding = false; 738 739 @Override 740 public int read() throws IOException { 741 while (true) { 742 int readChar = reader.read(); 743 if (readChar == -1) { 744 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 745 throw new DecodingException("Invalid input length " + readChars); 746 } 747 return -1; 748 } 749 readChars++; 750 char ch = (char) readChar; 751 if (paddingChar != null && paddingChar.charValue() == ch) { 752 if (!hitPadding 753 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 754 throw new DecodingException("Padding cannot start at index " + readChars); 755 } 756 hitPadding = true; 757 } else if (hitPadding) { 758 throw new DecodingException( 759 "Expected padding character but found '" + ch + "' at index " + readChars); 760 } else { 761 bitBuffer <<= alphabet.bitsPerChar; 762 bitBuffer |= alphabet.decode(ch); 763 bitBufferLength += alphabet.bitsPerChar; 764 765 if (bitBufferLength >= 8) { 766 bitBufferLength -= 8; 767 return (bitBuffer >> bitBufferLength) & 0xFF; 768 } 769 } 770 } 771 } 772 773 @Override 774 public void close() throws IOException { 775 reader.close(); 776 } 777 }; 778 } 779 780 @Override 781 public BaseEncoding omitPadding() { 782 return (paddingChar == null) ? this : newInstance(alphabet, null); 783 } 784 785 @Override 786 public BaseEncoding withPadChar(char padChar) { 787 if (8 % alphabet.bitsPerChar == 0 788 || (paddingChar != null && paddingChar.charValue() == padChar)) { 789 return this; 790 } else { 791 return newInstance(alphabet, padChar); 792 } 793 } 794 795 @Override 796 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 797 for (int i = 0; i < separator.length(); i++) { 798 checkArgument( 799 !alphabet.matches(separator.charAt(i)), 800 "Separator (%s) cannot contain alphabet characters", 801 separator); 802 } 803 if (paddingChar != null) { 804 checkArgument( 805 separator.indexOf(paddingChar.charValue()) < 0, 806 "Separator (%s) cannot contain padding character", 807 separator); 808 } 809 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 810 } 811 812 private transient BaseEncoding upperCase; 813 private transient BaseEncoding lowerCase; 814 815 @Override 816 public BaseEncoding upperCase() { 817 BaseEncoding result = upperCase; 818 if (result == null) { 819 Alphabet upper = alphabet.upperCase(); 820 result = upperCase = (upper == alphabet) ? this : newInstance(upper, paddingChar); 821 } 822 return result; 823 } 824 825 @Override 826 public BaseEncoding lowerCase() { 827 BaseEncoding result = lowerCase; 828 if (result == null) { 829 Alphabet lower = alphabet.lowerCase(); 830 result = lowerCase = (lower == alphabet) ? this : newInstance(lower, paddingChar); 831 } 832 return result; 833 } 834 835 BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) { 836 return new StandardBaseEncoding(alphabet, paddingChar); 837 } 838 839 @Override 840 public String toString() { 841 StringBuilder builder = new StringBuilder("BaseEncoding."); 842 builder.append(alphabet.toString()); 843 if (8 % alphabet.bitsPerChar != 0) { 844 if (paddingChar == null) { 845 builder.append(".omitPadding()"); 846 } else { 847 builder.append(".withPadChar('").append(paddingChar).append("')"); 848 } 849 } 850 return builder.toString(); 851 } 852 853 @Override 854 public boolean equals(@NullableDecl Object other) { 855 if (other instanceof StandardBaseEncoding) { 856 StandardBaseEncoding that = (StandardBaseEncoding) other; 857 return this.alphabet.equals(that.alphabet) 858 && Objects.equal(this.paddingChar, that.paddingChar); 859 } 860 return false; 861 } 862 863 @Override 864 public int hashCode() { 865 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 866 } 867 } 868 869 static final class Base16Encoding extends StandardBaseEncoding { 870 final char[] encoding = new char[512]; 871 872 Base16Encoding(String name, String alphabetChars) { 873 this(new Alphabet(name, alphabetChars.toCharArray())); 874 } 875 876 private Base16Encoding(Alphabet alphabet) { 877 super(alphabet, null); 878 checkArgument(alphabet.chars.length == 16); 879 for (int i = 0; i < 256; ++i) { 880 encoding[i] = alphabet.encode(i >>> 4); 881 encoding[i | 0x100] = alphabet.encode(i & 0xF); 882 } 883 } 884 885 @Override 886 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 887 checkNotNull(target); 888 checkPositionIndexes(off, off + len, bytes.length); 889 for (int i = 0; i < len; ++i) { 890 int b = bytes[off + i] & 0xFF; 891 target.append(encoding[b]); 892 target.append(encoding[b | 0x100]); 893 } 894 } 895 896 @Override 897 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 898 checkNotNull(target); 899 if (chars.length() % 2 == 1) { 900 throw new DecodingException("Invalid input length " + chars.length()); 901 } 902 int bytesWritten = 0; 903 for (int i = 0; i < chars.length(); i += 2) { 904 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 905 target[bytesWritten++] = (byte) decoded; 906 } 907 return bytesWritten; 908 } 909 910 @Override 911 BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) { 912 return new Base16Encoding(alphabet); 913 } 914 } 915 916 static final class Base64Encoding extends StandardBaseEncoding { 917 Base64Encoding(String name, String alphabetChars, @NullableDecl Character paddingChar) { 918 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 919 } 920 921 private Base64Encoding(Alphabet alphabet, @NullableDecl Character paddingChar) { 922 super(alphabet, paddingChar); 923 checkArgument(alphabet.chars.length == 64); 924 } 925 926 @Override 927 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 928 checkNotNull(target); 929 checkPositionIndexes(off, off + len, bytes.length); 930 int i = off; 931 for (int remaining = len; remaining >= 3; remaining -= 3) { 932 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 933 target.append(alphabet.encode(chunk >>> 18)); 934 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 935 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 936 target.append(alphabet.encode(chunk & 0x3F)); 937 } 938 if (i < off + len) { 939 encodeChunkTo(target, bytes, i, off + len - i); 940 } 941 } 942 943 @Override 944 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 945 checkNotNull(target); 946 chars = trimTrailingPadding(chars); 947 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 948 throw new DecodingException("Invalid input length " + chars.length()); 949 } 950 int bytesWritten = 0; 951 for (int i = 0; i < chars.length(); ) { 952 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 953 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 954 target[bytesWritten++] = (byte) (chunk >>> 16); 955 if (i < chars.length()) { 956 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 957 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 958 if (i < chars.length()) { 959 chunk |= alphabet.decode(chars.charAt(i++)); 960 target[bytesWritten++] = (byte) (chunk & 0xFF); 961 } 962 } 963 } 964 return bytesWritten; 965 } 966 967 @Override 968 BaseEncoding newInstance(Alphabet alphabet, @NullableDecl Character paddingChar) { 969 return new Base64Encoding(alphabet, paddingChar); 970 } 971 } 972 973 @GwtIncompatible 974 static Reader ignoringReader(final Reader delegate, final String toIgnore) { 975 checkNotNull(delegate); 976 checkNotNull(toIgnore); 977 return new Reader() { 978 @Override 979 public int read() throws IOException { 980 int readChar; 981 do { 982 readChar = delegate.read(); 983 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 984 return readChar; 985 } 986 987 @Override 988 public int read(char[] cbuf, int off, int len) throws IOException { 989 throw new UnsupportedOperationException(); 990 } 991 992 @Override 993 public void close() throws IOException { 994 delegate.close(); 995 } 996 }; 997 } 998 999 static Appendable separatingAppendable( 1000 final Appendable delegate, final String separator, final int afterEveryChars) { 1001 checkNotNull(delegate); 1002 checkNotNull(separator); 1003 checkArgument(afterEveryChars > 0); 1004 return new Appendable() { 1005 int charsUntilSeparator = afterEveryChars; 1006 1007 @Override 1008 public Appendable append(char c) throws IOException { 1009 if (charsUntilSeparator == 0) { 1010 delegate.append(separator); 1011 charsUntilSeparator = afterEveryChars; 1012 } 1013 delegate.append(c); 1014 charsUntilSeparator--; 1015 return this; 1016 } 1017 1018 @Override 1019 public Appendable append(@NullableDecl CharSequence chars, int off, int len) 1020 throws IOException { 1021 throw new UnsupportedOperationException(); 1022 } 1023 1024 @Override 1025 public Appendable append(@NullableDecl CharSequence chars) throws IOException { 1026 throw new UnsupportedOperationException(); 1027 } 1028 }; 1029 } 1030 1031 @GwtIncompatible // Writer 1032 static Writer separatingWriter( 1033 final Writer delegate, final String separator, final int afterEveryChars) { 1034 final Appendable seperatingAppendable = 1035 separatingAppendable(delegate, separator, afterEveryChars); 1036 return new Writer() { 1037 @Override 1038 public void write(int c) throws IOException { 1039 seperatingAppendable.append((char) c); 1040 } 1041 1042 @Override 1043 public void write(char[] chars, int off, int len) throws IOException { 1044 throw new UnsupportedOperationException(); 1045 } 1046 1047 @Override 1048 public void flush() throws IOException { 1049 delegate.flush(); 1050 } 1051 1052 @Override 1053 public void close() throws IOException { 1054 delegate.close(); 1055 } 1056 }; 1057 } 1058 1059 static final class SeparatedBaseEncoding extends BaseEncoding { 1060 private final BaseEncoding delegate; 1061 private final String separator; 1062 private final int afterEveryChars; 1063 1064 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1065 this.delegate = checkNotNull(delegate); 1066 this.separator = checkNotNull(separator); 1067 this.afterEveryChars = afterEveryChars; 1068 checkArgument( 1069 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1070 } 1071 1072 @Override 1073 CharSequence trimTrailingPadding(CharSequence chars) { 1074 return delegate.trimTrailingPadding(chars); 1075 } 1076 1077 @Override 1078 int maxEncodedSize(int bytes) { 1079 int unseparatedSize = delegate.maxEncodedSize(bytes); 1080 return unseparatedSize 1081 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1082 } 1083 1084 @GwtIncompatible // Writer,OutputStream 1085 @Override 1086 public OutputStream encodingStream(final Writer output) { 1087 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1088 } 1089 1090 @Override 1091 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1092 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1093 } 1094 1095 @Override 1096 int maxDecodedSize(int chars) { 1097 return delegate.maxDecodedSize(chars); 1098 } 1099 1100 @Override 1101 public boolean canDecode(CharSequence chars) { 1102 StringBuilder builder = new StringBuilder(); 1103 for (int i = 0; i < chars.length(); i++) { 1104 char c = chars.charAt(i); 1105 if (separator.indexOf(c) < 0) { 1106 builder.append(c); 1107 } 1108 } 1109 return delegate.canDecode(builder); 1110 } 1111 1112 @Override 1113 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1114 StringBuilder stripped = new StringBuilder(chars.length()); 1115 for (int i = 0; i < chars.length(); i++) { 1116 char c = chars.charAt(i); 1117 if (separator.indexOf(c) < 0) { 1118 stripped.append(c); 1119 } 1120 } 1121 return delegate.decodeTo(target, stripped); 1122 } 1123 1124 @Override 1125 @GwtIncompatible // Reader,InputStream 1126 public InputStream decodingStream(final Reader reader) { 1127 return delegate.decodingStream(ignoringReader(reader, separator)); 1128 } 1129 1130 @Override 1131 public BaseEncoding omitPadding() { 1132 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1133 } 1134 1135 @Override 1136 public BaseEncoding withPadChar(char padChar) { 1137 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1138 } 1139 1140 @Override 1141 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1142 throw new UnsupportedOperationException("Already have a separator"); 1143 } 1144 1145 @Override 1146 public BaseEncoding upperCase() { 1147 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1148 } 1149 1150 @Override 1151 public BaseEncoding lowerCase() { 1152 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1153 } 1154 1155 @Override 1156 public String toString() { 1157 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1158 } 1159 } 1160}