001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.Objects; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.OutputStream; 034import java.io.Reader; 035import java.io.Writer; 036import java.util.Arrays; 037import javax.annotation.Nullable; 038 039/** 040 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 041 * strings. This class includes several constants for encoding schemes specified by 042 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 043 * 044 * <pre> {@code 045 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 046 * 047 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 048 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 049 * 050 * <p>...returns the ASCII bytes of the string {@code "foo"}. 051 * 052 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 053 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 054 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 055 * behavior: 056 * 057 * <pre> {@code 058 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 059 * 060 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 061 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 062 * 063 * <pre> {@code 064 * // Do NOT do this 065 * BaseEncoding hex = BaseEncoding.base16(); 066 * hex.lowerCase(); // does nothing! 067 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 068 * 069 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 070 * {@code x}, but the reverse does not necessarily hold. 071 * 072 * <table> 073 * <caption>Encodings</caption> 074 * <tr> 075 * <th>Encoding 076 * <th>Alphabet 077 * <th>{@code char:byte} ratio 078 * <th>Default padding 079 * <th>Comments 080 * <tr> 081 * <td>{@link #base16()} 082 * <td>0-9 A-F 083 * <td>2.00 084 * <td>N/A 085 * <td>Traditional hexadecimal. Defaults to upper case. 086 * <tr> 087 * <td>{@link #base32()} 088 * <td>A-Z 2-7 089 * <td>1.60 090 * <td>= 091 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 092 * <tr> 093 * <td>{@link #base32Hex()} 094 * <td>0-9 A-V 095 * <td>1.60 096 * <td>= 097 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 098 * <tr> 099 * <td>{@link #base64()} 100 * <td>A-Z a-z 0-9 + / 101 * <td>1.33 102 * <td>= 103 * <td> 104 * <tr> 105 * <td>{@link #base64Url()} 106 * <td>A-Z a-z 0-9 - _ 107 * <td>1.33 108 * <td>= 109 * <td>Safe to use as filenames, or to pass in URLs without escaping 110 * </table> 111 * 112 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 113 * 114 * @author Louis Wasserman 115 * @since 14.0 116 */ 117@GwtCompatible(emulated = true) 118public abstract class BaseEncoding { 119 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 120 121 BaseEncoding() {} 122 123 /** 124 * Exception indicating invalid base-encoded input encountered while decoding. 125 * 126 * @author Louis Wasserman 127 * @since 15.0 128 */ 129 public static final class DecodingException extends IOException { 130 DecodingException(String message) { 131 super(message); 132 } 133 134 DecodingException(Throwable cause) { 135 super(cause); 136 } 137 } 138 139 /** 140 * Encodes the specified byte array, and returns the encoded {@code String}. 141 */ 142 public String encode(byte[] bytes) { 143 return encode(bytes, 0, bytes.length); 144 } 145 146 /** 147 * Encodes the specified range of the specified byte array, and returns the encoded 148 * {@code String}. 149 */ 150 public final String encode(byte[] bytes, int off, int len) { 151 checkPositionIndexes(off, off + len, bytes.length); 152 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 153 try { 154 encodeTo(result, bytes, off, len); 155 } catch (IOException impossible) { 156 throw new AssertionError(impossible); 157 } 158 return result.toString(); 159 } 160 161 /** 162 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 163 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 164 * {@code Writer}. 165 */ 166 @GwtIncompatible // Writer,OutputStream 167 public abstract OutputStream encodingStream(Writer writer); 168 169 /** 170 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 171 */ 172 @GwtIncompatible // ByteSink,CharSink 173 public final ByteSink encodingSink(final CharSink encodedSink) { 174 checkNotNull(encodedSink); 175 return new ByteSink() { 176 @Override 177 public OutputStream openStream() throws IOException { 178 return encodingStream(encodedSink.openStream()); 179 } 180 }; 181 } 182 183 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 184 185 private static byte[] extract(byte[] result, int length) { 186 if (length == result.length) { 187 return result; 188 } else { 189 byte[] trunc = new byte[length]; 190 System.arraycopy(result, 0, trunc, 0, length); 191 return trunc; 192 } 193 } 194 195 /** 196 * Determines whether the specified character sequence is a valid encoded string according to this 197 * encoding. 198 * 199 * @since 20.0 200 */ 201 public abstract boolean canDecode(CharSequence chars); 202 203 /** 204 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 205 * inverse operation to {@link #encode(byte[])}. 206 * 207 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 208 * encoding. 209 */ 210 public final byte[] decode(CharSequence chars) { 211 try { 212 return decodeChecked(chars); 213 } catch (DecodingException badInput) { 214 throw new IllegalArgumentException(badInput); 215 } 216 } 217 218 /** 219 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 220 * inverse operation to {@link #encode(byte[])}. 221 * 222 * @throws DecodingException if the input is not a valid encoded string according to this 223 * encoding. 224 */ final byte[] decodeChecked(CharSequence chars) 225 throws DecodingException { 226 chars = trimTrailingPadding(chars); 227 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 228 int len = decodeTo(tmp, chars); 229 return extract(tmp, len); 230 } 231 232 /** 233 * Returns an {@code InputStream} that decodes base-encoded input from the specified 234 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 235 * errors. 236 */ 237 @GwtIncompatible // Reader,InputStream 238 public abstract InputStream decodingStream(Reader reader); 239 240 /** 241 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 242 * {@code CharSource}. 243 */ 244 @GwtIncompatible // ByteSource,CharSource 245 public final ByteSource decodingSource(final CharSource encodedSource) { 246 checkNotNull(encodedSource); 247 return new ByteSource() { 248 @Override 249 public InputStream openStream() throws IOException { 250 return decodingStream(encodedSource.openStream()); 251 } 252 }; 253 } 254 255 // Implementations for encoding/decoding 256 257 abstract int maxEncodedSize(int bytes); 258 259 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 260 261 abstract int maxDecodedSize(int chars); 262 263 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 264 265 CharSequence trimTrailingPadding(CharSequence chars) { 266 return checkNotNull(chars); 267 } 268 269 // Modified encoding generators 270 271 /** 272 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 273 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 274 * section 3.2</a>, Padding of Encoded Data. 275 */ 276 public abstract BaseEncoding omitPadding(); 277 278 /** 279 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 280 * for padding. 281 * 282 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 283 * separator 284 */ 285 public abstract BaseEncoding withPadChar(char padChar); 286 287 /** 288 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 289 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 290 * are skipped over in decoding. 291 * 292 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 293 * string, or if {@code n <= 0} 294 * @throws UnsupportedOperationException if this encoding already uses a separator 295 */ 296 public abstract BaseEncoding withSeparator(String separator, int n); 297 298 /** 299 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 300 * uppercase letters. Padding and separator characters remain in their original case. 301 * 302 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 303 * lower-case characters 304 */ 305 public abstract BaseEncoding upperCase(); 306 307 /** 308 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 309 * lowercase letters. Padding and separator characters remain in their original case. 310 * 311 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 312 * lower-case characters 313 */ 314 public abstract BaseEncoding lowerCase(); 315 316 private static final BaseEncoding BASE64 = 317 new Base64Encoding( 318 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 319 320 /** 321 * The "base64" base encoding specified by 322 * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 323 * Encoding. (This is the same as the base 64 encoding from 324 * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 325 * 326 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 327 * omitted} or {@linkplain #withPadChar(char) replaced}. 328 * 329 * <p>No line feeds are added by default, as per 330 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 331 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 332 */ 333 public static BaseEncoding base64() { 334 return BASE64; 335 } 336 337 private static final BaseEncoding BASE64_URL = 338 new Base64Encoding( 339 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 340 341 /** 342 * The "base64url" encoding specified by 343 * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 344 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 345 * is the same as the base 64 encoding with URL and filename safe alphabet from 346 * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 347 * 348 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 349 * omitted} or {@linkplain #withPadChar(char) replaced}. 350 * 351 * <p>No line feeds are added by default, as per 352 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 353 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 354 */ 355 public static BaseEncoding base64Url() { 356 return BASE64_URL; 357 } 358 359 private static final BaseEncoding BASE32 = 360 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 361 362 /** 363 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 364 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from 365 * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 366 * 367 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 368 * omitted} or {@linkplain #withPadChar(char) replaced}. 369 * 370 * <p>No line feeds are added by default, as per 371 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 372 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 373 */ 374 public static BaseEncoding base32() { 375 return BASE32; 376 } 377 378 private static final BaseEncoding BASE32_HEX = 379 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 380 381 /** 382 * The "base32hex" encoding specified by 383 * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 384 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 385 * 386 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 387 * omitted} or {@linkplain #withPadChar(char) replaced}. 388 * 389 * <p>No line feeds are added by default, as per 390 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 391 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 392 */ 393 public static BaseEncoding base32Hex() { 394 return BASE32_HEX; 395 } 396 397 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 398 399 /** 400 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 401 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from 402 * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 403 * "hexadecimal" format. 404 * 405 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 406 * have no effect. 407 * 408 * <p>No line feeds are added by default, as per 409 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 410 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 411 */ 412 public static BaseEncoding base16() { 413 return BASE16; 414 } 415 416 private static final class Alphabet { 417 private final String name; 418 // this is meant to be immutable -- don't modify it! 419 private final char[] chars; 420 final int mask; 421 final int bitsPerChar; 422 final int charsPerChunk; 423 final int bytesPerChunk; 424 private final byte[] decodabet; 425 private final boolean[] validPadding; 426 427 Alphabet(String name, char[] chars) { 428 this.name = checkNotNull(name); 429 this.chars = checkNotNull(chars); 430 try { 431 this.bitsPerChar = log2(chars.length, UNNECESSARY); 432 } catch (ArithmeticException e) { 433 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 434 } 435 436 /* 437 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 438 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 439 */ 440 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 441 try { 442 this.charsPerChunk = 8 / gcd; 443 this.bytesPerChunk = bitsPerChar / gcd; 444 } catch (ArithmeticException e) { 445 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 446 } 447 448 this.mask = chars.length - 1; 449 450 byte[] decodabet = new byte[Ascii.MAX + 1]; 451 Arrays.fill(decodabet, (byte) -1); 452 for (int i = 0; i < chars.length; i++) { 453 char c = chars[i]; 454 checkArgument(c < decodabet.length, "Non-ASCII character: %s", c); 455 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 456 decodabet[c] = (byte) i; 457 } 458 this.decodabet = decodabet; 459 460 boolean[] validPadding = new boolean[charsPerChunk]; 461 for (int i = 0; i < bytesPerChunk; i++) { 462 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 463 } 464 this.validPadding = validPadding; 465 } 466 467 char encode(int bits) { 468 return chars[bits]; 469 } 470 471 boolean isValidPaddingStartPosition(int index) { 472 return validPadding[index % charsPerChunk]; 473 } 474 475 boolean canDecode(char ch) { 476 return ch <= Ascii.MAX && decodabet[ch] != -1; 477 } 478 479 int decode(char ch) throws DecodingException { 480 if (ch > Ascii.MAX) { 481 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 482 } 483 int result = decodabet[ch]; 484 if (result == -1) { 485 if (ch <= 0x20 || ch == Ascii.MAX) { 486 throw new DecodingException("Unrecognized character: 0x" + Integer.toHexString(ch)); 487 } else { 488 throw new DecodingException("Unrecognized character: " + ch); 489 } 490 } 491 return result; 492 } 493 494 private boolean hasLowerCase() { 495 for (char c : chars) { 496 if (Ascii.isLowerCase(c)) { 497 return true; 498 } 499 } 500 return false; 501 } 502 503 private boolean hasUpperCase() { 504 for (char c : chars) { 505 if (Ascii.isUpperCase(c)) { 506 return true; 507 } 508 } 509 return false; 510 } 511 512 Alphabet upperCase() { 513 if (!hasLowerCase()) { 514 return this; 515 } else { 516 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 517 char[] upperCased = new char[chars.length]; 518 for (int i = 0; i < chars.length; i++) { 519 upperCased[i] = Ascii.toUpperCase(chars[i]); 520 } 521 return new Alphabet(name + ".upperCase()", upperCased); 522 } 523 } 524 525 Alphabet lowerCase() { 526 if (!hasUpperCase()) { 527 return this; 528 } else { 529 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 530 char[] lowerCased = new char[chars.length]; 531 for (int i = 0; i < chars.length; i++) { 532 lowerCased[i] = Ascii.toLowerCase(chars[i]); 533 } 534 return new Alphabet(name + ".lowerCase()", lowerCased); 535 } 536 } 537 538 public boolean matches(char c) { 539 return c < decodabet.length && decodabet[c] != -1; 540 } 541 542 @Override 543 public String toString() { 544 return name; 545 } 546 547 @Override 548 public boolean equals(@Nullable Object other) { 549 if (other instanceof Alphabet) { 550 Alphabet that = (Alphabet) other; 551 return Arrays.equals(this.chars, that.chars); 552 } 553 return false; 554 } 555 556 @Override 557 public int hashCode() { 558 return Arrays.hashCode(chars); 559 } 560 } 561 562 static class StandardBaseEncoding extends BaseEncoding { 563 // TODO(lowasser): provide a useful toString 564 final Alphabet alphabet; 565 566 @Nullable final Character paddingChar; 567 568 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 569 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 570 } 571 572 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 573 this.alphabet = checkNotNull(alphabet); 574 checkArgument( 575 paddingChar == null || !alphabet.matches(paddingChar), 576 "Padding character %s was already in alphabet", 577 paddingChar); 578 this.paddingChar = paddingChar; 579 } 580 581 @Override 582 int maxEncodedSize(int bytes) { 583 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 584 } 585 586 @GwtIncompatible // Writer,OutputStream 587 @Override 588 public OutputStream encodingStream(final Writer out) { 589 checkNotNull(out); 590 return new OutputStream() { 591 int bitBuffer = 0; 592 int bitBufferLength = 0; 593 int writtenChars = 0; 594 595 @Override 596 public void write(int b) throws IOException { 597 bitBuffer <<= 8; 598 bitBuffer |= b & 0xFF; 599 bitBufferLength += 8; 600 while (bitBufferLength >= alphabet.bitsPerChar) { 601 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 602 out.write(alphabet.encode(charIndex)); 603 writtenChars++; 604 bitBufferLength -= alphabet.bitsPerChar; 605 } 606 } 607 608 @Override 609 public void flush() throws IOException { 610 out.flush(); 611 } 612 613 @Override 614 public void close() throws IOException { 615 if (bitBufferLength > 0) { 616 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 617 out.write(alphabet.encode(charIndex)); 618 writtenChars++; 619 if (paddingChar != null) { 620 while (writtenChars % alphabet.charsPerChunk != 0) { 621 out.write(paddingChar.charValue()); 622 writtenChars++; 623 } 624 } 625 } 626 out.close(); 627 } 628 }; 629 } 630 631 @Override 632 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 633 checkNotNull(target); 634 checkPositionIndexes(off, off + len, bytes.length); 635 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 636 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 637 } 638 } 639 640 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 641 checkNotNull(target); 642 checkPositionIndexes(off, off + len, bytes.length); 643 checkArgument(len <= alphabet.bytesPerChunk); 644 long bitBuffer = 0; 645 for (int i = 0; i < len; ++i) { 646 bitBuffer |= bytes[off + i] & 0xFF; 647 bitBuffer <<= 8; // Add additional zero byte in the end. 648 } 649 // Position of first character is length of bitBuffer minus bitsPerChar. 650 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 651 int bitsProcessed = 0; 652 while (bitsProcessed < len * 8) { 653 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 654 target.append(alphabet.encode(charIndex)); 655 bitsProcessed += alphabet.bitsPerChar; 656 } 657 if (paddingChar != null) { 658 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 659 target.append(paddingChar.charValue()); 660 bitsProcessed += alphabet.bitsPerChar; 661 } 662 } 663 } 664 665 @Override 666 int maxDecodedSize(int chars) { 667 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 668 } 669 670 @Override 671 CharSequence trimTrailingPadding(CharSequence chars) { 672 checkNotNull(chars); 673 if (paddingChar == null) { 674 return chars; 675 } 676 char padChar = paddingChar.charValue(); 677 int l; 678 for (l = chars.length() - 1; l >= 0; l--) { 679 if (chars.charAt(l) != padChar) { 680 break; 681 } 682 } 683 return chars.subSequence(0, l + 1); 684 } 685 686 @Override 687 public boolean canDecode(CharSequence chars) { 688 checkNotNull(chars); 689 chars = trimTrailingPadding(chars); 690 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 691 return false; 692 } 693 for (int i = 0; i < chars.length(); i++) { 694 if (!alphabet.canDecode(chars.charAt(i))) { 695 return false; 696 } 697 } 698 return true; 699 } 700 701 @Override 702 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 703 checkNotNull(target); 704 chars = trimTrailingPadding(chars); 705 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 706 throw new DecodingException("Invalid input length " + chars.length()); 707 } 708 int bytesWritten = 0; 709 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 710 long chunk = 0; 711 int charsProcessed = 0; 712 for (int i = 0; i < alphabet.charsPerChunk; i++) { 713 chunk <<= alphabet.bitsPerChar; 714 if (charIdx + i < chars.length()) { 715 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 716 } 717 } 718 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 719 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 720 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 721 } 722 } 723 return bytesWritten; 724 } 725 726 @Override 727 @GwtIncompatible // Reader,InputStream 728 public InputStream decodingStream(final Reader reader) { 729 checkNotNull(reader); 730 return new InputStream() { 731 int bitBuffer = 0; 732 int bitBufferLength = 0; 733 int readChars = 0; 734 boolean hitPadding = false; 735 736 @Override 737 public int read() throws IOException { 738 while (true) { 739 int readChar = reader.read(); 740 if (readChar == -1) { 741 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 742 throw new DecodingException("Invalid input length " + readChars); 743 } 744 return -1; 745 } 746 readChars++; 747 char ch = (char) readChar; 748 if (paddingChar != null && paddingChar.charValue() == ch) { 749 if (!hitPadding 750 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 751 throw new DecodingException("Padding cannot start at index " + readChars); 752 } 753 hitPadding = true; 754 } else if (hitPadding) { 755 throw new DecodingException( 756 "Expected padding character but found '" + ch + "' at index " + readChars); 757 } else { 758 bitBuffer <<= alphabet.bitsPerChar; 759 bitBuffer |= alphabet.decode(ch); 760 bitBufferLength += alphabet.bitsPerChar; 761 762 if (bitBufferLength >= 8) { 763 bitBufferLength -= 8; 764 return (bitBuffer >> bitBufferLength) & 0xFF; 765 } 766 } 767 } 768 } 769 770 @Override 771 public void close() throws IOException { 772 reader.close(); 773 } 774 }; 775 } 776 777 @Override 778 public BaseEncoding omitPadding() { 779 return (paddingChar == null) ? this : newInstance(alphabet, null); 780 } 781 782 @Override 783 public BaseEncoding withPadChar(char padChar) { 784 if (8 % alphabet.bitsPerChar == 0 785 || (paddingChar != null && paddingChar.charValue() == padChar)) { 786 return this; 787 } else { 788 return newInstance(alphabet, padChar); 789 } 790 } 791 792 @Override 793 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 794 for (int i = 0; i < separator.length(); i++) { 795 checkArgument( 796 !alphabet.matches(separator.charAt(i)), 797 "Separator (%s) cannot contain alphabet characters", 798 separator); 799 } 800 if (paddingChar != null) { 801 checkArgument( 802 separator.indexOf(paddingChar.charValue()) < 0, 803 "Separator (%s) cannot contain padding character", 804 separator); 805 } 806 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 807 } 808 809 private transient BaseEncoding upperCase; 810 private transient BaseEncoding lowerCase; 811 812 @Override 813 public BaseEncoding upperCase() { 814 BaseEncoding result = upperCase; 815 if (result == null) { 816 Alphabet upper = alphabet.upperCase(); 817 result = upperCase = 818 (upper == alphabet) ? this : newInstance(upper, paddingChar); 819 } 820 return result; 821 } 822 823 @Override 824 public BaseEncoding lowerCase() { 825 BaseEncoding result = lowerCase; 826 if (result == null) { 827 Alphabet lower = alphabet.lowerCase(); 828 result = lowerCase = 829 (lower == alphabet) ? this : newInstance(lower, paddingChar); 830 } 831 return result; 832 } 833 834 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 835 return new StandardBaseEncoding(alphabet, paddingChar); 836 } 837 838 @Override 839 public String toString() { 840 StringBuilder builder = new StringBuilder("BaseEncoding."); 841 builder.append(alphabet.toString()); 842 if (8 % alphabet.bitsPerChar != 0) { 843 if (paddingChar == null) { 844 builder.append(".omitPadding()"); 845 } else { 846 builder.append(".withPadChar('").append(paddingChar).append("')"); 847 } 848 } 849 return builder.toString(); 850 } 851 852 @Override 853 public boolean equals(@Nullable Object other) { 854 if (other instanceof StandardBaseEncoding) { 855 StandardBaseEncoding that = (StandardBaseEncoding) other; 856 return this.alphabet.equals(that.alphabet) 857 && Objects.equal(this.paddingChar, that.paddingChar); 858 } 859 return false; 860 } 861 862 @Override 863 public int hashCode() { 864 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 865 } 866 } 867 868 static final class Base16Encoding extends StandardBaseEncoding { 869 final char[] encoding = new char[512]; 870 871 Base16Encoding(String name, String alphabetChars) { 872 this(new Alphabet(name, alphabetChars.toCharArray())); 873 } 874 875 private Base16Encoding(Alphabet alphabet) { 876 super(alphabet, null); 877 checkArgument(alphabet.chars.length == 16); 878 for (int i = 0; i < 256; ++i) { 879 encoding[i] = alphabet.encode(i >>> 4); 880 encoding[i | 0x100] = alphabet.encode(i & 0xF); 881 } 882 } 883 884 @Override 885 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 886 checkNotNull(target); 887 checkPositionIndexes(off, off + len, bytes.length); 888 for (int i = 0; i < len; ++i) { 889 int b = bytes[off + i] & 0xFF; 890 target.append(encoding[b]); 891 target.append(encoding[b | 0x100]); 892 } 893 } 894 895 @Override 896 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 897 checkNotNull(target); 898 if (chars.length() % 2 == 1) { 899 throw new DecodingException("Invalid input length " + chars.length()); 900 } 901 int bytesWritten = 0; 902 for (int i = 0; i < chars.length(); i += 2) { 903 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 904 target[bytesWritten++] = (byte) decoded; 905 } 906 return bytesWritten; 907 } 908 909 @Override 910 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 911 return new Base16Encoding(alphabet); 912 } 913 } 914 915 static final class Base64Encoding extends StandardBaseEncoding { 916 Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) { 917 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 918 } 919 920 private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) { 921 super(alphabet, paddingChar); 922 checkArgument(alphabet.chars.length == 64); 923 } 924 925 @Override 926 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 927 checkNotNull(target); 928 checkPositionIndexes(off, off + len, bytes.length); 929 int i = off; 930 for (int remaining = len; remaining >= 3; remaining -= 3) { 931 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 932 target.append(alphabet.encode(chunk >>> 18)); 933 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 934 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 935 target.append(alphabet.encode(chunk & 0x3F)); 936 } 937 if (i < off + len) { 938 encodeChunkTo(target, bytes, i, off + len - i); 939 } 940 } 941 942 @Override 943 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 944 checkNotNull(target); 945 chars = trimTrailingPadding(chars); 946 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 947 throw new DecodingException("Invalid input length " + chars.length()); 948 } 949 int bytesWritten = 0; 950 for (int i = 0; i < chars.length(); ) { 951 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 952 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 953 target[bytesWritten++] = (byte) (chunk >>> 16); 954 if (i < chars.length()) { 955 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 956 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 957 if (i < chars.length()) { 958 chunk |= alphabet.decode(chars.charAt(i++)); 959 target[bytesWritten++] = (byte) (chunk & 0xFF); 960 } 961 } 962 } 963 return bytesWritten; 964 } 965 966 @Override 967 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 968 return new Base64Encoding(alphabet, paddingChar); 969 } 970 } 971 972 @GwtIncompatible 973 static Reader ignoringReader(final Reader delegate, final String toIgnore) { 974 checkNotNull(delegate); 975 checkNotNull(toIgnore); 976 return new Reader() { 977 @Override 978 public int read() throws IOException { 979 int readChar; 980 do { 981 readChar = delegate.read(); 982 } while (readChar != -1 && toIgnore.indexOf((char) readChar) >= 0); 983 return readChar; 984 } 985 986 @Override 987 public int read(char[] cbuf, int off, int len) throws IOException { 988 throw new UnsupportedOperationException(); 989 } 990 991 @Override 992 public void close() throws IOException { 993 delegate.close(); 994 } 995 }; 996 } 997 998 static Appendable separatingAppendable( 999 final Appendable delegate, final String separator, final int afterEveryChars) { 1000 checkNotNull(delegate); 1001 checkNotNull(separator); 1002 checkArgument(afterEveryChars > 0); 1003 return new Appendable() { 1004 int charsUntilSeparator = afterEveryChars; 1005 1006 @Override 1007 public Appendable append(char c) throws IOException { 1008 if (charsUntilSeparator == 0) { 1009 delegate.append(separator); 1010 charsUntilSeparator = afterEveryChars; 1011 } 1012 delegate.append(c); 1013 charsUntilSeparator--; 1014 return this; 1015 } 1016 1017 @Override 1018 public Appendable append(CharSequence chars, int off, int len) throws IOException { 1019 throw new UnsupportedOperationException(); 1020 } 1021 1022 @Override 1023 public Appendable append(CharSequence chars) throws IOException { 1024 throw new UnsupportedOperationException(); 1025 } 1026 }; 1027 } 1028 1029 @GwtIncompatible // Writer 1030 static Writer separatingWriter( 1031 final Writer delegate, final String separator, final int afterEveryChars) { 1032 final Appendable seperatingAppendable = 1033 separatingAppendable(delegate, separator, afterEveryChars); 1034 return new Writer() { 1035 @Override 1036 public void write(int c) throws IOException { 1037 seperatingAppendable.append((char) c); 1038 } 1039 1040 @Override 1041 public void write(char[] chars, int off, int len) throws IOException { 1042 throw new UnsupportedOperationException(); 1043 } 1044 1045 @Override 1046 public void flush() throws IOException { 1047 delegate.flush(); 1048 } 1049 1050 @Override 1051 public void close() throws IOException { 1052 delegate.close(); 1053 } 1054 }; 1055 } 1056 1057 static final class SeparatedBaseEncoding extends BaseEncoding { 1058 private final BaseEncoding delegate; 1059 private final String separator; 1060 private final int afterEveryChars; 1061 1062 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1063 this.delegate = checkNotNull(delegate); 1064 this.separator = checkNotNull(separator); 1065 this.afterEveryChars = afterEveryChars; 1066 checkArgument( 1067 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1068 } 1069 1070 @Override 1071 CharSequence trimTrailingPadding(CharSequence chars) { 1072 return delegate.trimTrailingPadding(chars); 1073 } 1074 1075 @Override 1076 int maxEncodedSize(int bytes) { 1077 int unseparatedSize = delegate.maxEncodedSize(bytes); 1078 return unseparatedSize 1079 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1080 } 1081 1082 @GwtIncompatible // Writer,OutputStream 1083 @Override 1084 public OutputStream encodingStream(final Writer output) { 1085 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1086 } 1087 1088 @Override 1089 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1090 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1091 } 1092 1093 @Override 1094 int maxDecodedSize(int chars) { 1095 return delegate.maxDecodedSize(chars); 1096 } 1097 1098 @Override 1099 public boolean canDecode(CharSequence chars) { 1100 StringBuilder builder = new StringBuilder(); 1101 for (int i = 0; i < chars.length(); i++) { 1102 char c = chars.charAt(i); 1103 if (separator.indexOf(c) < 0) { 1104 builder.append(c); 1105 } 1106 } 1107 return delegate.canDecode(builder); 1108 } 1109 1110 @Override 1111 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1112 StringBuilder stripped = new StringBuilder(chars.length()); 1113 for (int i = 0; i < chars.length(); i++) { 1114 char c = chars.charAt(i); 1115 if (separator.indexOf(c) < 0) { 1116 stripped.append(c); 1117 } 1118 } 1119 return delegate.decodeTo(target, stripped); 1120 } 1121 1122 @Override 1123 @GwtIncompatible // Reader,InputStream 1124 public InputStream decodingStream(final Reader reader) { 1125 return delegate.decodingStream(ignoringReader(reader, separator)); 1126 } 1127 1128 @Override 1129 public BaseEncoding omitPadding() { 1130 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1131 } 1132 1133 @Override 1134 public BaseEncoding withPadChar(char padChar) { 1135 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1136 } 1137 1138 @Override 1139 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1140 throw new UnsupportedOperationException("Already have a separator"); 1141 } 1142 1143 @Override 1144 public BaseEncoding upperCase() { 1145 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1146 } 1147 1148 @Override 1149 public BaseEncoding lowerCase() { 1150 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1151 } 1152 1153 @Override 1154 public String toString() { 1155 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1156 } 1157 } 1158}