001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.CharMatcher; 031import com.google.common.base.Objects; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.OutputStream; 035import java.io.Reader; 036import java.io.Writer; 037import java.util.Arrays; 038import javax.annotation.Nullable; 039 040/** 041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 042 * strings. This class includes several constants for encoding schemes specified by 043 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 044 * 045 * <pre> {@code 046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 047 * 048 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 049 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 050 * 051 * <p>...returns the ASCII bytes of the string {@code "foo"}. 052 * 053 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 054 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 055 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 056 * behavior: 057 * 058 * <pre> {@code 059 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 060 * 061 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 062 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 063 * 064 * <pre> {@code 065 * // Do NOT do this 066 * BaseEncoding hex = BaseEncoding.base16(); 067 * hex.lowerCase(); // does nothing! 068 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 069 * 070 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 071 * {@code x}, but the reverse does not necessarily hold. 072 * 073 * <table> 074 * <caption>Encodings</caption> 075 * <tr> 076 * <th>Encoding 077 * <th>Alphabet 078 * <th>{@code char:byte} ratio 079 * <th>Default padding 080 * <th>Comments 081 * <tr> 082 * <td>{@link #base16()} 083 * <td>0-9 A-F 084 * <td>2.00 085 * <td>N/A 086 * <td>Traditional hexadecimal. Defaults to upper case. 087 * <tr> 088 * <td>{@link #base32()} 089 * <td>A-Z 2-7 090 * <td>1.60 091 * <td>= 092 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 093 * <tr> 094 * <td>{@link #base32Hex()} 095 * <td>0-9 A-V 096 * <td>1.60 097 * <td>= 098 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 099 * <tr> 100 * <td>{@link #base64()} 101 * <td>A-Z a-z 0-9 + / 102 * <td>1.33 103 * <td>= 104 * <td> 105 * <tr> 106 * <td>{@link #base64Url()} 107 * <td>A-Z a-z 0-9 - _ 108 * <td>1.33 109 * <td>= 110 * <td>Safe to use as filenames, or to pass in URLs without escaping 111 * </table> 112 * 113 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 114 * 115 * @author Louis Wasserman 116 * @since 14.0 117 */ 118@GwtCompatible(emulated = true) 119public abstract class BaseEncoding { 120 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 121 122 BaseEncoding() {} 123 124 /** 125 * Exception indicating invalid base-encoded input encountered while decoding. 126 * 127 * @author Louis Wasserman 128 * @since 15.0 129 */ 130 public static final class DecodingException extends IOException { 131 DecodingException(String message) { 132 super(message); 133 } 134 135 DecodingException(Throwable cause) { 136 super(cause); 137 } 138 } 139 140 /** 141 * Encodes the specified byte array, and returns the encoded {@code String}. 142 */ 143 public String encode(byte[] bytes) { 144 return encode(bytes, 0, bytes.length); 145 } 146 147 /** 148 * Encodes the specified range of the specified byte array, and returns the encoded 149 * {@code String}. 150 */ 151 public final String encode(byte[] bytes, int off, int len) { 152 checkPositionIndexes(off, off + len, bytes.length); 153 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 154 try { 155 encodeTo(result, bytes, off, len); 156 } catch (IOException impossible) { 157 throw new AssertionError(impossible); 158 } 159 return result.toString(); 160 } 161 162 /** 163 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 164 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 165 * {@code Writer}. 166 */ 167 @GwtIncompatible // Writer,OutputStream 168 public abstract OutputStream encodingStream(Writer writer); 169 170 /** 171 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 172 */ 173 @GwtIncompatible // ByteSink,CharSink 174 public final ByteSink encodingSink(final CharSink encodedSink) { 175 checkNotNull(encodedSink); 176 return new ByteSink() { 177 @Override 178 public OutputStream openStream() throws IOException { 179 return encodingStream(encodedSink.openStream()); 180 } 181 }; 182 } 183 184 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 185 186 private static byte[] extract(byte[] result, int length) { 187 if (length == result.length) { 188 return result; 189 } else { 190 byte[] trunc = new byte[length]; 191 System.arraycopy(result, 0, trunc, 0, length); 192 return trunc; 193 } 194 } 195 196 /** 197 * Determines whether the specified character sequence is a valid encoded string according to this 198 * encoding. 199 * 200 * @since 20.0 201 */ 202 public abstract boolean canDecode(CharSequence chars); 203 204 /** 205 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 206 * inverse operation to {@link #encode(byte[])}. 207 * 208 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 209 * encoding. 210 */ 211 public final byte[] decode(CharSequence chars) { 212 try { 213 return decodeChecked(chars); 214 } catch (DecodingException badInput) { 215 throw new IllegalArgumentException(badInput); 216 } 217 } 218 219 /** 220 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 221 * inverse operation to {@link #encode(byte[])}. 222 * 223 * @throws DecodingException if the input is not a valid encoded string according to this 224 * encoding. 225 */ final byte[] decodeChecked(CharSequence chars) 226 throws DecodingException { 227 chars = padding().trimTrailingFrom(chars); 228 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 229 int len = decodeTo(tmp, chars); 230 return extract(tmp, len); 231 } 232 233 /** 234 * Returns an {@code InputStream} that decodes base-encoded input from the specified 235 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 236 * errors. 237 */ 238 @GwtIncompatible // Reader,InputStream 239 public abstract InputStream decodingStream(Reader reader); 240 241 /** 242 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 243 * {@code CharSource}. 244 */ 245 @GwtIncompatible // ByteSource,CharSource 246 public final ByteSource decodingSource(final CharSource encodedSource) { 247 checkNotNull(encodedSource); 248 return new ByteSource() { 249 @Override 250 public InputStream openStream() throws IOException { 251 return decodingStream(encodedSource.openStream()); 252 } 253 }; 254 } 255 256 // Implementations for encoding/decoding 257 258 abstract int maxEncodedSize(int bytes); 259 260 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 261 262 abstract int maxDecodedSize(int chars); 263 264 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 265 266 abstract CharMatcher padding(); 267 268 // Modified encoding generators 269 270 /** 271 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 272 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 273 * section 3.2</a>, Padding of Encoded Data. 274 */ 275 public abstract BaseEncoding omitPadding(); 276 277 /** 278 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 279 * for padding. 280 * 281 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 282 * separator 283 */ 284 public abstract BaseEncoding withPadChar(char padChar); 285 286 /** 287 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 288 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 289 * are skipped over in decoding. 290 * 291 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 292 * string, or if {@code n <= 0} 293 * @throws UnsupportedOperationException if this encoding already uses a separator 294 */ 295 public abstract BaseEncoding withSeparator(String separator, int n); 296 297 /** 298 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 299 * uppercase letters. Padding and separator characters remain in their original case. 300 * 301 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 302 * lower-case characters 303 */ 304 public abstract BaseEncoding upperCase(); 305 306 /** 307 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 308 * lowercase letters. Padding and separator characters remain in their original case. 309 * 310 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 311 * lower-case characters 312 */ 313 public abstract BaseEncoding lowerCase(); 314 315 private static final BaseEncoding BASE64 = 316 new Base64Encoding( 317 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 318 319 /** 320 * The "base64" base encoding specified by 321 * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 322 * Encoding. (This is the same as the base 64 encoding from 323 * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 324 * 325 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 326 * omitted} or {@linkplain #withPadChar(char) replaced}. 327 * 328 * <p>No line feeds are added by default, as per 329 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 330 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 331 */ 332 public static BaseEncoding base64() { 333 return BASE64; 334 } 335 336 private static final BaseEncoding BASE64_URL = 337 new Base64Encoding( 338 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 339 340 /** 341 * The "base64url" encoding specified by 342 * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 343 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 344 * is the same as the base 64 encoding with URL and filename safe alphabet from 345 * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 346 * 347 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 348 * omitted} or {@linkplain #withPadChar(char) replaced}. 349 * 350 * <p>No line feeds are added by default, as per 351 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 352 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 353 */ 354 public static BaseEncoding base64Url() { 355 return BASE64_URL; 356 } 357 358 private static final BaseEncoding BASE32 = 359 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 360 361 /** 362 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 363 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from 364 * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 365 * 366 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 367 * omitted} or {@linkplain #withPadChar(char) replaced}. 368 * 369 * <p>No line feeds are added by default, as per 370 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 371 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 372 */ 373 public static BaseEncoding base32() { 374 return BASE32; 375 } 376 377 private static final BaseEncoding BASE32_HEX = 378 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 379 380 /** 381 * The "base32hex" encoding specified by 382 * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 383 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 384 * 385 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 386 * omitted} or {@linkplain #withPadChar(char) replaced}. 387 * 388 * <p>No line feeds are added by default, as per 389 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 390 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 391 */ 392 public static BaseEncoding base32Hex() { 393 return BASE32_HEX; 394 } 395 396 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 397 398 /** 399 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 400 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from 401 * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 402 * "hexadecimal" format. 403 * 404 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 405 * have no effect. 406 * 407 * <p>No line feeds are added by default, as per 408 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 409 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 410 */ 411 public static BaseEncoding base16() { 412 return BASE16; 413 } 414 415 private static final class Alphabet extends CharMatcher { 416 private final String name; 417 // this is meant to be immutable -- don't modify it! 418 private final char[] chars; 419 final int mask; 420 final int bitsPerChar; 421 final int charsPerChunk; 422 final int bytesPerChunk; 423 private final byte[] decodabet; 424 private final boolean[] validPadding; 425 426 Alphabet(String name, char[] chars) { 427 this.name = checkNotNull(name); 428 this.chars = checkNotNull(chars); 429 try { 430 this.bitsPerChar = log2(chars.length, UNNECESSARY); 431 } catch (ArithmeticException e) { 432 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 433 } 434 435 /* 436 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 437 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 438 */ 439 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 440 try { 441 this.charsPerChunk = 8 / gcd; 442 this.bytesPerChunk = bitsPerChar / gcd; 443 } catch (ArithmeticException e) { 444 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 445 } 446 447 this.mask = chars.length - 1; 448 449 byte[] decodabet = new byte[Ascii.MAX + 1]; 450 Arrays.fill(decodabet, (byte) -1); 451 for (int i = 0; i < chars.length; i++) { 452 char c = chars[i]; 453 checkArgument(CharMatcher.ascii().matches(c), "Non-ASCII character: %s", c); 454 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 455 decodabet[c] = (byte) i; 456 } 457 this.decodabet = decodabet; 458 459 boolean[] validPadding = new boolean[charsPerChunk]; 460 for (int i = 0; i < bytesPerChunk; i++) { 461 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 462 } 463 this.validPadding = validPadding; 464 } 465 466 char encode(int bits) { 467 return chars[bits]; 468 } 469 470 boolean isValidPaddingStartPosition(int index) { 471 return validPadding[index % charsPerChunk]; 472 } 473 474 boolean canDecode(char ch) { 475 return ch <= Ascii.MAX && decodabet[ch] != -1; 476 } 477 478 int decode(char ch) throws DecodingException { 479 if (ch > Ascii.MAX || decodabet[ch] == -1) { 480 throw new DecodingException( 481 "Unrecognized character: " 482 + (CharMatcher.invisible().matches(ch) ? "0x" + Integer.toHexString(ch) : ch)); 483 } 484 return decodabet[ch]; 485 } 486 487 private boolean hasLowerCase() { 488 for (char c : chars) { 489 if (Ascii.isLowerCase(c)) { 490 return true; 491 } 492 } 493 return false; 494 } 495 496 private boolean hasUpperCase() { 497 for (char c : chars) { 498 if (Ascii.isUpperCase(c)) { 499 return true; 500 } 501 } 502 return false; 503 } 504 505 Alphabet upperCase() { 506 if (!hasLowerCase()) { 507 return this; 508 } else { 509 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 510 char[] upperCased = new char[chars.length]; 511 for (int i = 0; i < chars.length; i++) { 512 upperCased[i] = Ascii.toUpperCase(chars[i]); 513 } 514 return new Alphabet(name + ".upperCase()", upperCased); 515 } 516 } 517 518 Alphabet lowerCase() { 519 if (!hasUpperCase()) { 520 return this; 521 } else { 522 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 523 char[] lowerCased = new char[chars.length]; 524 for (int i = 0; i < chars.length; i++) { 525 lowerCased[i] = Ascii.toLowerCase(chars[i]); 526 } 527 return new Alphabet(name + ".lowerCase()", lowerCased); 528 } 529 } 530 531 @Override 532 public boolean matches(char c) { 533 return CharMatcher.ascii().matches(c) && decodabet[c] != -1; 534 } 535 536 @Override 537 public String toString() { 538 return name; 539 } 540 541 @Override 542 public boolean equals(@Nullable Object other) { 543 if (other instanceof Alphabet) { 544 Alphabet that = (Alphabet) other; 545 return Arrays.equals(this.chars, that.chars); 546 } 547 return false; 548 } 549 550 @Override 551 public int hashCode() { 552 return Arrays.hashCode(chars); 553 } 554 } 555 556 static class StandardBaseEncoding extends BaseEncoding { 557 // TODO(lowasser): provide a useful toString 558 final Alphabet alphabet; 559 560 @Nullable final Character paddingChar; 561 562 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 563 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 564 } 565 566 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 567 this.alphabet = checkNotNull(alphabet); 568 checkArgument( 569 paddingChar == null || !alphabet.matches(paddingChar), 570 "Padding character %s was already in alphabet", 571 paddingChar); 572 this.paddingChar = paddingChar; 573 } 574 575 @Override 576 CharMatcher padding() { 577 return (paddingChar == null) ? CharMatcher.none() : CharMatcher.is(paddingChar.charValue()); 578 } 579 580 @Override 581 int maxEncodedSize(int bytes) { 582 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 583 } 584 585 @GwtIncompatible // Writer,OutputStream 586 @Override 587 public OutputStream encodingStream(final Writer out) { 588 checkNotNull(out); 589 return new OutputStream() { 590 int bitBuffer = 0; 591 int bitBufferLength = 0; 592 int writtenChars = 0; 593 594 @Override 595 public void write(int b) throws IOException { 596 bitBuffer <<= 8; 597 bitBuffer |= b & 0xFF; 598 bitBufferLength += 8; 599 while (bitBufferLength >= alphabet.bitsPerChar) { 600 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 601 out.write(alphabet.encode(charIndex)); 602 writtenChars++; 603 bitBufferLength -= alphabet.bitsPerChar; 604 } 605 } 606 607 @Override 608 public void flush() throws IOException { 609 out.flush(); 610 } 611 612 @Override 613 public void close() throws IOException { 614 if (bitBufferLength > 0) { 615 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 616 out.write(alphabet.encode(charIndex)); 617 writtenChars++; 618 if (paddingChar != null) { 619 while (writtenChars % alphabet.charsPerChunk != 0) { 620 out.write(paddingChar.charValue()); 621 writtenChars++; 622 } 623 } 624 } 625 out.close(); 626 } 627 }; 628 } 629 630 @Override 631 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 632 checkNotNull(target); 633 checkPositionIndexes(off, off + len, bytes.length); 634 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 635 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 636 } 637 } 638 639 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 640 checkNotNull(target); 641 checkPositionIndexes(off, off + len, bytes.length); 642 checkArgument(len <= alphabet.bytesPerChunk); 643 long bitBuffer = 0; 644 for (int i = 0; i < len; ++i) { 645 bitBuffer |= bytes[off + i] & 0xFF; 646 bitBuffer <<= 8; // Add additional zero byte in the end. 647 } 648 // Position of first character is length of bitBuffer minus bitsPerChar. 649 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 650 int bitsProcessed = 0; 651 while (bitsProcessed < len * 8) { 652 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 653 target.append(alphabet.encode(charIndex)); 654 bitsProcessed += alphabet.bitsPerChar; 655 } 656 if (paddingChar != null) { 657 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 658 target.append(paddingChar.charValue()); 659 bitsProcessed += alphabet.bitsPerChar; 660 } 661 } 662 } 663 664 @Override 665 int maxDecodedSize(int chars) { 666 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 667 } 668 669 @Override 670 public boolean canDecode(CharSequence chars) { 671 chars = padding().trimTrailingFrom(chars); 672 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 673 return false; 674 } 675 for (int i = 0; i < chars.length(); i++) { 676 if (!alphabet.canDecode(chars.charAt(i))) { 677 return false; 678 } 679 } 680 return true; 681 } 682 683 @Override 684 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 685 checkNotNull(target); 686 chars = padding().trimTrailingFrom(chars); 687 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 688 throw new DecodingException("Invalid input length " + chars.length()); 689 } 690 int bytesWritten = 0; 691 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 692 long chunk = 0; 693 int charsProcessed = 0; 694 for (int i = 0; i < alphabet.charsPerChunk; i++) { 695 chunk <<= alphabet.bitsPerChar; 696 if (charIdx + i < chars.length()) { 697 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 698 } 699 } 700 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 701 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 702 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 703 } 704 } 705 return bytesWritten; 706 } 707 708 @GwtIncompatible // Reader,InputStream 709 @Override 710 public InputStream decodingStream(final Reader reader) { 711 checkNotNull(reader); 712 return new InputStream() { 713 int bitBuffer = 0; 714 int bitBufferLength = 0; 715 int readChars = 0; 716 boolean hitPadding = false; 717 final CharMatcher paddingMatcher = padding(); 718 719 @Override 720 public int read() throws IOException { 721 while (true) { 722 int readChar = reader.read(); 723 if (readChar == -1) { 724 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 725 throw new DecodingException("Invalid input length " + readChars); 726 } 727 return -1; 728 } 729 readChars++; 730 char ch = (char) readChar; 731 if (paddingMatcher.matches(ch)) { 732 if (!hitPadding 733 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 734 throw new DecodingException("Padding cannot start at index " + readChars); 735 } 736 hitPadding = true; 737 } else if (hitPadding) { 738 throw new DecodingException( 739 "Expected padding character but found '" + ch + "' at index " + readChars); 740 } else { 741 bitBuffer <<= alphabet.bitsPerChar; 742 bitBuffer |= alphabet.decode(ch); 743 bitBufferLength += alphabet.bitsPerChar; 744 745 if (bitBufferLength >= 8) { 746 bitBufferLength -= 8; 747 return (bitBuffer >> bitBufferLength) & 0xFF; 748 } 749 } 750 } 751 } 752 753 @Override 754 public void close() throws IOException { 755 reader.close(); 756 } 757 }; 758 } 759 760 @Override 761 public BaseEncoding omitPadding() { 762 return (paddingChar == null) ? this : newInstance(alphabet, null); 763 } 764 765 @Override 766 public BaseEncoding withPadChar(char padChar) { 767 if (8 % alphabet.bitsPerChar == 0 768 || (paddingChar != null && paddingChar.charValue() == padChar)) { 769 return this; 770 } else { 771 return newInstance(alphabet, padChar); 772 } 773 } 774 775 @Override 776 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 777 checkArgument( 778 padding().or(alphabet).matchesNoneOf(separator), 779 "Separator (%s) cannot contain alphabet or padding characters", 780 separator); 781 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 782 } 783 784 private transient BaseEncoding upperCase; 785 private transient BaseEncoding lowerCase; 786 787 @Override 788 public BaseEncoding upperCase() { 789 BaseEncoding result = upperCase; 790 if (result == null) { 791 Alphabet upper = alphabet.upperCase(); 792 result = upperCase = 793 (upper == alphabet) ? this : newInstance(upper, paddingChar); 794 } 795 return result; 796 } 797 798 @Override 799 public BaseEncoding lowerCase() { 800 BaseEncoding result = lowerCase; 801 if (result == null) { 802 Alphabet lower = alphabet.lowerCase(); 803 result = lowerCase = 804 (lower == alphabet) ? this : newInstance(lower, paddingChar); 805 } 806 return result; 807 } 808 809 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 810 return new StandardBaseEncoding(alphabet, paddingChar); 811 } 812 813 @Override 814 public String toString() { 815 StringBuilder builder = new StringBuilder("BaseEncoding."); 816 builder.append(alphabet.toString()); 817 if (8 % alphabet.bitsPerChar != 0) { 818 if (paddingChar == null) { 819 builder.append(".omitPadding()"); 820 } else { 821 builder.append(".withPadChar('").append(paddingChar).append("')"); 822 } 823 } 824 return builder.toString(); 825 } 826 827 @Override 828 public boolean equals(@Nullable Object other) { 829 if (other instanceof StandardBaseEncoding) { 830 StandardBaseEncoding that = (StandardBaseEncoding) other; 831 return this.alphabet.equals(that.alphabet) 832 && Objects.equal(this.paddingChar, that.paddingChar); 833 } 834 return false; 835 } 836 837 @Override 838 public int hashCode() { 839 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 840 } 841 } 842 843 static final class Base16Encoding extends StandardBaseEncoding { 844 final char[] encoding = new char[512]; 845 846 Base16Encoding(String name, String alphabetChars) { 847 this(new Alphabet(name, alphabetChars.toCharArray())); 848 } 849 850 private Base16Encoding(Alphabet alphabet) { 851 super(alphabet, null); 852 checkArgument(alphabet.chars.length == 16); 853 for (int i = 0; i < 256; ++i) { 854 encoding[i] = alphabet.encode(i >>> 4); 855 encoding[i | 0x100] = alphabet.encode(i & 0xF); 856 } 857 } 858 859 @Override 860 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 861 checkNotNull(target); 862 checkPositionIndexes(off, off + len, bytes.length); 863 for (int i = 0; i < len; ++i) { 864 int b = bytes[off + i] & 0xFF; 865 target.append(encoding[b]); 866 target.append(encoding[b | 0x100]); 867 } 868 } 869 870 @Override 871 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 872 checkNotNull(target); 873 if (chars.length() % 2 == 1) { 874 throw new DecodingException("Invalid input length " + chars.length()); 875 } 876 int bytesWritten = 0; 877 for (int i = 0; i < chars.length(); i += 2) { 878 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 879 target[bytesWritten++] = (byte) decoded; 880 } 881 return bytesWritten; 882 } 883 884 @Override 885 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 886 return new Base16Encoding(alphabet); 887 } 888 } 889 890 static final class Base64Encoding extends StandardBaseEncoding { 891 Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) { 892 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 893 } 894 895 private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) { 896 super(alphabet, paddingChar); 897 checkArgument(alphabet.chars.length == 64); 898 } 899 900 @Override 901 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 902 checkNotNull(target); 903 checkPositionIndexes(off, off + len, bytes.length); 904 int i = off; 905 for (int remaining = len; remaining >= 3; remaining -= 3) { 906 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 907 target.append(alphabet.encode(chunk >>> 18)); 908 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 909 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 910 target.append(alphabet.encode(chunk & 0x3F)); 911 } 912 if (i < off + len) { 913 encodeChunkTo(target, bytes, i, off + len - i); 914 } 915 } 916 917 @Override 918 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 919 checkNotNull(target); 920 chars = padding().trimTrailingFrom(chars); 921 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 922 throw new DecodingException("Invalid input length " + chars.length()); 923 } 924 int bytesWritten = 0; 925 for (int i = 0; i < chars.length(); ) { 926 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 927 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 928 target[bytesWritten++] = (byte) (chunk >>> 16); 929 if (i < chars.length()) { 930 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 931 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 932 if (i < chars.length()) { 933 chunk |= alphabet.decode(chars.charAt(i++)); 934 target[bytesWritten++] = (byte) (chunk & 0xFF); 935 } 936 } 937 } 938 return bytesWritten; 939 } 940 941 @Override 942 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 943 return new Base64Encoding(alphabet, paddingChar); 944 } 945 } 946 947 @GwtIncompatible // Reader 948 static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) { 949 checkNotNull(delegate); 950 checkNotNull(toIgnore); 951 return new Reader() { 952 @Override 953 public int read() throws IOException { 954 int readChar; 955 do { 956 readChar = delegate.read(); 957 } while (readChar != -1 && toIgnore.matches((char) readChar)); 958 return readChar; 959 } 960 961 @Override 962 public int read(char[] cbuf, int off, int len) throws IOException { 963 throw new UnsupportedOperationException(); 964 } 965 966 @Override 967 public void close() throws IOException { 968 delegate.close(); 969 } 970 }; 971 } 972 973 static Appendable separatingAppendable( 974 final Appendable delegate, final String separator, final int afterEveryChars) { 975 checkNotNull(delegate); 976 checkNotNull(separator); 977 checkArgument(afterEveryChars > 0); 978 return new Appendable() { 979 int charsUntilSeparator = afterEveryChars; 980 981 @Override 982 public Appendable append(char c) throws IOException { 983 if (charsUntilSeparator == 0) { 984 delegate.append(separator); 985 charsUntilSeparator = afterEveryChars; 986 } 987 delegate.append(c); 988 charsUntilSeparator--; 989 return this; 990 } 991 992 @Override 993 public Appendable append(CharSequence chars, int off, int len) throws IOException { 994 throw new UnsupportedOperationException(); 995 } 996 997 @Override 998 public Appendable append(CharSequence chars) throws IOException { 999 throw new UnsupportedOperationException(); 1000 } 1001 }; 1002 } 1003 1004 @GwtIncompatible // Writer 1005 static Writer separatingWriter( 1006 final Writer delegate, final String separator, final int afterEveryChars) { 1007 final Appendable seperatingAppendable = 1008 separatingAppendable(delegate, separator, afterEveryChars); 1009 return new Writer() { 1010 @Override 1011 public void write(int c) throws IOException { 1012 seperatingAppendable.append((char) c); 1013 } 1014 1015 @Override 1016 public void write(char[] chars, int off, int len) throws IOException { 1017 throw new UnsupportedOperationException(); 1018 } 1019 1020 @Override 1021 public void flush() throws IOException { 1022 delegate.flush(); 1023 } 1024 1025 @Override 1026 public void close() throws IOException { 1027 delegate.close(); 1028 } 1029 }; 1030 } 1031 1032 static final class SeparatedBaseEncoding extends BaseEncoding { 1033 private final BaseEncoding delegate; 1034 private final String separator; 1035 private final int afterEveryChars; 1036 private final CharMatcher separatorChars; 1037 1038 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1039 this.delegate = checkNotNull(delegate); 1040 this.separator = checkNotNull(separator); 1041 this.afterEveryChars = afterEveryChars; 1042 checkArgument( 1043 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1044 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 1045 } 1046 1047 @Override 1048 CharMatcher padding() { 1049 return delegate.padding(); 1050 } 1051 1052 @Override 1053 int maxEncodedSize(int bytes) { 1054 int unseparatedSize = delegate.maxEncodedSize(bytes); 1055 return unseparatedSize 1056 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1057 } 1058 1059 @GwtIncompatible // Writer,OutputStream 1060 @Override 1061 public OutputStream encodingStream(final Writer output) { 1062 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1063 } 1064 1065 @Override 1066 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1067 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1068 } 1069 1070 @Override 1071 int maxDecodedSize(int chars) { 1072 return delegate.maxDecodedSize(chars); 1073 } 1074 1075 @Override 1076 public boolean canDecode(CharSequence chars) { 1077 return delegate.canDecode(separatorChars.removeFrom(chars)); 1078 } 1079 1080 @Override 1081 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1082 return delegate.decodeTo(target, separatorChars.removeFrom(chars)); 1083 } 1084 1085 @GwtIncompatible // Reader,InputStream 1086 @Override 1087 public InputStream decodingStream(final Reader reader) { 1088 return delegate.decodingStream(ignoringReader(reader, separatorChars)); 1089 } 1090 1091 @Override 1092 public BaseEncoding omitPadding() { 1093 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1094 } 1095 1096 @Override 1097 public BaseEncoding withPadChar(char padChar) { 1098 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1099 } 1100 1101 @Override 1102 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1103 throw new UnsupportedOperationException("Already have a separator"); 1104 } 1105 1106 @Override 1107 public BaseEncoding upperCase() { 1108 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1109 } 1110 1111 @Override 1112 public BaseEncoding lowerCase() { 1113 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1114 } 1115 1116 @Override 1117 public String toString() { 1118 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1119 } 1120 } 1121}