001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.CharMatcher; 031import com.google.common.base.Objects; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.OutputStream; 035import java.io.Reader; 036import java.io.Writer; 037import java.util.Arrays; 038import javax.annotation.Nullable; 039 040/** 041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 042 * strings. This class includes several constants for encoding schemes specified by 043 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 044 * 045 * <pre> {@code 046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 047 * 048 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 049 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 050 * 051 * <p>...returns the ASCII bytes of the string {@code "foo"}. 052 * 053 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 054 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 055 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 056 * behavior: 057 * 058 * <pre> {@code 059 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 060 * 061 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 062 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 063 * 064 * <pre> {@code 065 * // Do NOT do this 066 * BaseEncoding hex = BaseEncoding.base16(); 067 * hex.lowerCase(); // does nothing! 068 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 069 * 070 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 071 * {@code x}, but the reverse does not necessarily hold. 072 * 073 * <table> 074 * <tr> 075 * <th>Encoding 076 * <th>Alphabet 077 * <th>{@code char:byte} ratio 078 * <th>Default padding 079 * <th>Comments 080 * <tr> 081 * <td>{@link #base16()} 082 * <td>0-9 A-F 083 * <td>2.00 084 * <td>N/A 085 * <td>Traditional hexadecimal. Defaults to upper case. 086 * <tr> 087 * <td>{@link #base32()} 088 * <td>A-Z 2-7 089 * <td>1.60 090 * <td>= 091 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 092 * <tr> 093 * <td>{@link #base32Hex()} 094 * <td>0-9 A-V 095 * <td>1.60 096 * <td>= 097 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 098 * <tr> 099 * <td>{@link #base64()} 100 * <td>A-Z a-z 0-9 + / 101 * <td>1.33 102 * <td>= 103 * <td> 104 * <tr> 105 * <td>{@link #base64Url()} 106 * <td>A-Z a-z 0-9 - _ 107 * <td>1.33 108 * <td>= 109 * <td>Safe to use as filenames, or to pass in URLs without escaping 110 * </table> 111 * 112 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 113 * 114 * @author Louis Wasserman 115 * @since 14.0 116 */ 117@GwtCompatible(emulated = true) 118public abstract class BaseEncoding { 119 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 120 121 BaseEncoding() {} 122 123 /** 124 * Exception indicating invalid base-encoded input encountered while decoding. 125 * 126 * @author Louis Wasserman 127 * @since 15.0 128 */ 129 public static final class DecodingException extends IOException { 130 DecodingException(String message) { 131 super(message); 132 } 133 134 DecodingException(Throwable cause) { 135 super(cause); 136 } 137 } 138 139 /** 140 * Encodes the specified byte array, and returns the encoded {@code String}. 141 */ 142 public String encode(byte[] bytes) { 143 return encode(bytes, 0, bytes.length); 144 } 145 146 /** 147 * Encodes the specified range of the specified byte array, and returns the encoded 148 * {@code String}. 149 */ 150 public final String encode(byte[] bytes, int off, int len) { 151 checkPositionIndexes(off, off + len, bytes.length); 152 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 153 try { 154 encodeTo(result, bytes, off, len); 155 } catch (IOException impossible) { 156 throw new AssertionError(impossible); 157 } 158 return result.toString(); 159 } 160 161 /** 162 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 163 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 164 * {@code Writer}. 165 */ 166 @GwtIncompatible // Writer,OutputStream 167 public abstract OutputStream encodingStream(Writer writer); 168 169 /** 170 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 171 */ 172 @GwtIncompatible // ByteSink,CharSink 173 public final ByteSink encodingSink(final CharSink encodedSink) { 174 checkNotNull(encodedSink); 175 return new ByteSink() { 176 @Override 177 public OutputStream openStream() throws IOException { 178 return encodingStream(encodedSink.openStream()); 179 } 180 }; 181 } 182 183 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 184 185 private static byte[] extract(byte[] result, int length) { 186 if (length == result.length) { 187 return result; 188 } else { 189 byte[] trunc = new byte[length]; 190 System.arraycopy(result, 0, trunc, 0, length); 191 return trunc; 192 } 193 } 194 195 /** 196 * Determines whether the specified character sequence is a valid encoded string according to this 197 * encoding. 198 * 199 * @since 20.0 200 */ 201 public abstract boolean canDecode(CharSequence chars); 202 203 /** 204 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 205 * inverse operation to {@link #encode(byte[])}. 206 * 207 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 208 * encoding. 209 */ 210 public final byte[] decode(CharSequence chars) { 211 try { 212 return decodeChecked(chars); 213 } catch (DecodingException badInput) { 214 throw new IllegalArgumentException(badInput); 215 } 216 } 217 218 /** 219 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 220 * inverse operation to {@link #encode(byte[])}. 221 * 222 * @throws DecodingException if the input is not a valid encoded string according to this 223 * encoding. 224 */ final byte[] decodeChecked(CharSequence chars) 225 throws DecodingException { 226 chars = padding().trimTrailingFrom(chars); 227 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 228 int len = decodeTo(tmp, chars); 229 return extract(tmp, len); 230 } 231 232 /** 233 * Returns an {@code InputStream} that decodes base-encoded input from the specified 234 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 235 * errors. 236 */ 237 @GwtIncompatible // Reader,InputStream 238 public abstract InputStream decodingStream(Reader reader); 239 240 /** 241 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 242 * {@code CharSource}. 243 */ 244 @GwtIncompatible // ByteSource,CharSource 245 public final ByteSource decodingSource(final CharSource encodedSource) { 246 checkNotNull(encodedSource); 247 return new ByteSource() { 248 @Override 249 public InputStream openStream() throws IOException { 250 return decodingStream(encodedSource.openStream()); 251 } 252 }; 253 } 254 255 // Implementations for encoding/decoding 256 257 abstract int maxEncodedSize(int bytes); 258 259 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 260 261 abstract int maxDecodedSize(int chars); 262 263 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 264 265 abstract CharMatcher padding(); 266 267 // Modified encoding generators 268 269 /** 270 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 271 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 272 * section 3.2</a>, Padding of Encoded Data. 273 */ 274 public abstract BaseEncoding omitPadding(); 275 276 /** 277 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 278 * for padding. 279 * 280 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 281 * separator 282 */ 283 public abstract BaseEncoding withPadChar(char padChar); 284 285 /** 286 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 287 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 288 * are skipped over in decoding. 289 * 290 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 291 * string, or if {@code n <= 0} 292 * @throws UnsupportedOperationException if this encoding already uses a separator 293 */ 294 public abstract BaseEncoding withSeparator(String separator, int n); 295 296 /** 297 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 298 * uppercase letters. Padding and separator characters remain in their original case. 299 * 300 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 301 * lower-case characters 302 */ 303 public abstract BaseEncoding upperCase(); 304 305 /** 306 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 307 * lowercase letters. Padding and separator characters remain in their original case. 308 * 309 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 310 * lower-case characters 311 */ 312 public abstract BaseEncoding lowerCase(); 313 314 private static final BaseEncoding BASE64 = 315 new Base64Encoding( 316 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 317 318 /** 319 * The "base64" base encoding specified by 320 * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 321 * Encoding. (This is the same as the base 64 encoding from 322 * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 323 * 324 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 325 * omitted} or {@linkplain #withPadChar(char) replaced}. 326 * 327 * <p>No line feeds are added by default, as per 328 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 329 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 330 */ 331 public static BaseEncoding base64() { 332 return BASE64; 333 } 334 335 private static final BaseEncoding BASE64_URL = 336 new Base64Encoding( 337 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 338 339 /** 340 * The "base64url" encoding specified by 341 * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 342 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 343 * is the same as the base 64 encoding with URL and filename safe alphabet from 344 * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 345 * 346 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 347 * omitted} or {@linkplain #withPadChar(char) replaced}. 348 * 349 * <p>No line feeds are added by default, as per 350 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 351 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 352 */ 353 public static BaseEncoding base64Url() { 354 return BASE64_URL; 355 } 356 357 private static final BaseEncoding BASE32 = 358 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 359 360 /** 361 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 362 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from 363 * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 364 * 365 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 366 * omitted} or {@linkplain #withPadChar(char) replaced}. 367 * 368 * <p>No line feeds are added by default, as per 369 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 370 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 371 */ 372 public static BaseEncoding base32() { 373 return BASE32; 374 } 375 376 private static final BaseEncoding BASE32_HEX = 377 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 378 379 /** 380 * The "base32hex" encoding specified by 381 * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 382 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 383 * 384 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 385 * omitted} or {@linkplain #withPadChar(char) replaced}. 386 * 387 * <p>No line feeds are added by default, as per 388 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 389 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 390 */ 391 public static BaseEncoding base32Hex() { 392 return BASE32_HEX; 393 } 394 395 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 396 397 /** 398 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 399 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from 400 * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 401 * "hexadecimal" format. 402 * 403 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 404 * have no effect. 405 * 406 * <p>No line feeds are added by default, as per 407 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 408 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 409 */ 410 public static BaseEncoding base16() { 411 return BASE16; 412 } 413 414 private static final class Alphabet extends CharMatcher { 415 private final String name; 416 // this is meant to be immutable -- don't modify it! 417 private final char[] chars; 418 final int mask; 419 final int bitsPerChar; 420 final int charsPerChunk; 421 final int bytesPerChunk; 422 private final byte[] decodabet; 423 private final boolean[] validPadding; 424 425 Alphabet(String name, char[] chars) { 426 this.name = checkNotNull(name); 427 this.chars = checkNotNull(chars); 428 try { 429 this.bitsPerChar = log2(chars.length, UNNECESSARY); 430 } catch (ArithmeticException e) { 431 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 432 } 433 434 /* 435 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 436 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 437 */ 438 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 439 try { 440 this.charsPerChunk = 8 / gcd; 441 this.bytesPerChunk = bitsPerChar / gcd; 442 } catch (ArithmeticException e) { 443 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 444 } 445 446 this.mask = chars.length - 1; 447 448 byte[] decodabet = new byte[Ascii.MAX + 1]; 449 Arrays.fill(decodabet, (byte) -1); 450 for (int i = 0; i < chars.length; i++) { 451 char c = chars[i]; 452 checkArgument(CharMatcher.ascii().matches(c), "Non-ASCII character: %s", c); 453 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 454 decodabet[c] = (byte) i; 455 } 456 this.decodabet = decodabet; 457 458 boolean[] validPadding = new boolean[charsPerChunk]; 459 for (int i = 0; i < bytesPerChunk; i++) { 460 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 461 } 462 this.validPadding = validPadding; 463 } 464 465 char encode(int bits) { 466 return chars[bits]; 467 } 468 469 boolean isValidPaddingStartPosition(int index) { 470 return validPadding[index % charsPerChunk]; 471 } 472 473 boolean canDecode(char ch) { 474 return ch <= Ascii.MAX && decodabet[ch] != -1; 475 } 476 477 int decode(char ch) throws DecodingException { 478 if (ch > Ascii.MAX || decodabet[ch] == -1) { 479 throw new DecodingException( 480 "Unrecognized character: " 481 + (CharMatcher.invisible().matches(ch) ? "0x" + Integer.toHexString(ch) : ch)); 482 } 483 return decodabet[ch]; 484 } 485 486 private boolean hasLowerCase() { 487 for (char c : chars) { 488 if (Ascii.isLowerCase(c)) { 489 return true; 490 } 491 } 492 return false; 493 } 494 495 private boolean hasUpperCase() { 496 for (char c : chars) { 497 if (Ascii.isUpperCase(c)) { 498 return true; 499 } 500 } 501 return false; 502 } 503 504 Alphabet upperCase() { 505 if (!hasLowerCase()) { 506 return this; 507 } else { 508 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 509 char[] upperCased = new char[chars.length]; 510 for (int i = 0; i < chars.length; i++) { 511 upperCased[i] = Ascii.toUpperCase(chars[i]); 512 } 513 return new Alphabet(name + ".upperCase()", upperCased); 514 } 515 } 516 517 Alphabet lowerCase() { 518 if (!hasUpperCase()) { 519 return this; 520 } else { 521 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 522 char[] lowerCased = new char[chars.length]; 523 for (int i = 0; i < chars.length; i++) { 524 lowerCased[i] = Ascii.toLowerCase(chars[i]); 525 } 526 return new Alphabet(name + ".lowerCase()", lowerCased); 527 } 528 } 529 530 @Override 531 public boolean matches(char c) { 532 return CharMatcher.ascii().matches(c) && decodabet[c] != -1; 533 } 534 535 @Override 536 public String toString() { 537 return name; 538 } 539 540 @Override 541 public boolean equals(@Nullable Object other) { 542 if (other instanceof Alphabet) { 543 Alphabet that = (Alphabet) other; 544 return Arrays.equals(this.chars, that.chars); 545 } 546 return false; 547 } 548 549 @Override 550 public int hashCode() { 551 return Arrays.hashCode(chars); 552 } 553 } 554 555 static class StandardBaseEncoding extends BaseEncoding { 556 // TODO(lowasser): provide a useful toString 557 final Alphabet alphabet; 558 559 @Nullable final Character paddingChar; 560 561 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 562 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 563 } 564 565 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 566 this.alphabet = checkNotNull(alphabet); 567 checkArgument( 568 paddingChar == null || !alphabet.matches(paddingChar), 569 "Padding character %s was already in alphabet", 570 paddingChar); 571 this.paddingChar = paddingChar; 572 } 573 574 @Override 575 CharMatcher padding() { 576 return (paddingChar == null) ? CharMatcher.none() : CharMatcher.is(paddingChar.charValue()); 577 } 578 579 @Override 580 int maxEncodedSize(int bytes) { 581 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 582 } 583 584 @GwtIncompatible // Writer,OutputStream 585 @Override 586 public OutputStream encodingStream(final Writer out) { 587 checkNotNull(out); 588 return new OutputStream() { 589 int bitBuffer = 0; 590 int bitBufferLength = 0; 591 int writtenChars = 0; 592 593 @Override 594 public void write(int b) throws IOException { 595 bitBuffer <<= 8; 596 bitBuffer |= b & 0xFF; 597 bitBufferLength += 8; 598 while (bitBufferLength >= alphabet.bitsPerChar) { 599 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 600 out.write(alphabet.encode(charIndex)); 601 writtenChars++; 602 bitBufferLength -= alphabet.bitsPerChar; 603 } 604 } 605 606 @Override 607 public void flush() throws IOException { 608 out.flush(); 609 } 610 611 @Override 612 public void close() throws IOException { 613 if (bitBufferLength > 0) { 614 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 615 out.write(alphabet.encode(charIndex)); 616 writtenChars++; 617 if (paddingChar != null) { 618 while (writtenChars % alphabet.charsPerChunk != 0) { 619 out.write(paddingChar.charValue()); 620 writtenChars++; 621 } 622 } 623 } 624 out.close(); 625 } 626 }; 627 } 628 629 @Override 630 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 631 checkNotNull(target); 632 checkPositionIndexes(off, off + len, bytes.length); 633 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 634 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 635 } 636 } 637 638 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 639 checkNotNull(target); 640 checkPositionIndexes(off, off + len, bytes.length); 641 checkArgument(len <= alphabet.bytesPerChunk); 642 long bitBuffer = 0; 643 for (int i = 0; i < len; ++i) { 644 bitBuffer |= bytes[off + i] & 0xFF; 645 bitBuffer <<= 8; // Add additional zero byte in the end. 646 } 647 // Position of first character is length of bitBuffer minus bitsPerChar. 648 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 649 int bitsProcessed = 0; 650 while (bitsProcessed < len * 8) { 651 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 652 target.append(alphabet.encode(charIndex)); 653 bitsProcessed += alphabet.bitsPerChar; 654 } 655 if (paddingChar != null) { 656 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 657 target.append(paddingChar.charValue()); 658 bitsProcessed += alphabet.bitsPerChar; 659 } 660 } 661 } 662 663 @Override 664 int maxDecodedSize(int chars) { 665 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 666 } 667 668 @Override 669 public boolean canDecode(CharSequence chars) { 670 chars = padding().trimTrailingFrom(chars); 671 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 672 return false; 673 } 674 for (int i = 0; i < chars.length(); i++) { 675 if (!alphabet.canDecode(chars.charAt(i))) { 676 return false; 677 } 678 } 679 return true; 680 } 681 682 @Override 683 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 684 checkNotNull(target); 685 chars = padding().trimTrailingFrom(chars); 686 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 687 throw new DecodingException("Invalid input length " + chars.length()); 688 } 689 int bytesWritten = 0; 690 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 691 long chunk = 0; 692 int charsProcessed = 0; 693 for (int i = 0; i < alphabet.charsPerChunk; i++) { 694 chunk <<= alphabet.bitsPerChar; 695 if (charIdx + i < chars.length()) { 696 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 697 } 698 } 699 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 700 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 701 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 702 } 703 } 704 return bytesWritten; 705 } 706 707 @GwtIncompatible // Reader,InputStream 708 @Override 709 public InputStream decodingStream(final Reader reader) { 710 checkNotNull(reader); 711 return new InputStream() { 712 int bitBuffer = 0; 713 int bitBufferLength = 0; 714 int readChars = 0; 715 boolean hitPadding = false; 716 final CharMatcher paddingMatcher = padding(); 717 718 @Override 719 public int read() throws IOException { 720 while (true) { 721 int readChar = reader.read(); 722 if (readChar == -1) { 723 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 724 throw new DecodingException("Invalid input length " + readChars); 725 } 726 return -1; 727 } 728 readChars++; 729 char ch = (char) readChar; 730 if (paddingMatcher.matches(ch)) { 731 if (!hitPadding 732 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 733 throw new DecodingException("Padding cannot start at index " + readChars); 734 } 735 hitPadding = true; 736 } else if (hitPadding) { 737 throw new DecodingException( 738 "Expected padding character but found '" + ch + "' at index " + readChars); 739 } else { 740 bitBuffer <<= alphabet.bitsPerChar; 741 bitBuffer |= alphabet.decode(ch); 742 bitBufferLength += alphabet.bitsPerChar; 743 744 if (bitBufferLength >= 8) { 745 bitBufferLength -= 8; 746 return (bitBuffer >> bitBufferLength) & 0xFF; 747 } 748 } 749 } 750 } 751 752 @Override 753 public void close() throws IOException { 754 reader.close(); 755 } 756 }; 757 } 758 759 @Override 760 public BaseEncoding omitPadding() { 761 return (paddingChar == null) ? this : newInstance(alphabet, null); 762 } 763 764 @Override 765 public BaseEncoding withPadChar(char padChar) { 766 if (8 % alphabet.bitsPerChar == 0 767 || (paddingChar != null && paddingChar.charValue() == padChar)) { 768 return this; 769 } else { 770 return newInstance(alphabet, padChar); 771 } 772 } 773 774 @Override 775 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 776 checkArgument( 777 padding().or(alphabet).matchesNoneOf(separator), 778 "Separator (%s) cannot contain alphabet or padding characters", 779 separator); 780 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 781 } 782 783 private transient BaseEncoding upperCase; 784 private transient BaseEncoding lowerCase; 785 786 @Override 787 public BaseEncoding upperCase() { 788 BaseEncoding result = upperCase; 789 if (result == null) { 790 Alphabet upper = alphabet.upperCase(); 791 result = upperCase = 792 (upper == alphabet) ? this : newInstance(upper, paddingChar); 793 } 794 return result; 795 } 796 797 @Override 798 public BaseEncoding lowerCase() { 799 BaseEncoding result = lowerCase; 800 if (result == null) { 801 Alphabet lower = alphabet.lowerCase(); 802 result = lowerCase = 803 (lower == alphabet) ? this : newInstance(lower, paddingChar); 804 } 805 return result; 806 } 807 808 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 809 return new StandardBaseEncoding(alphabet, paddingChar); 810 } 811 812 @Override 813 public String toString() { 814 StringBuilder builder = new StringBuilder("BaseEncoding."); 815 builder.append(alphabet.toString()); 816 if (8 % alphabet.bitsPerChar != 0) { 817 if (paddingChar == null) { 818 builder.append(".omitPadding()"); 819 } else { 820 builder.append(".withPadChar('").append(paddingChar).append("')"); 821 } 822 } 823 return builder.toString(); 824 } 825 826 @Override 827 public boolean equals(@Nullable Object other) { 828 if (other instanceof StandardBaseEncoding) { 829 StandardBaseEncoding that = (StandardBaseEncoding) other; 830 return this.alphabet.equals(that.alphabet) 831 && Objects.equal(this.paddingChar, that.paddingChar); 832 } 833 return false; 834 } 835 836 @Override 837 public int hashCode() { 838 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 839 } 840 } 841 842 static final class Base16Encoding extends StandardBaseEncoding { 843 final char[] encoding = new char[512]; 844 845 Base16Encoding(String name, String alphabetChars) { 846 this(new Alphabet(name, alphabetChars.toCharArray())); 847 } 848 849 private Base16Encoding(Alphabet alphabet) { 850 super(alphabet, null); 851 checkArgument(alphabet.chars.length == 16); 852 for (int i = 0; i < 256; ++i) { 853 encoding[i] = alphabet.encode(i >>> 4); 854 encoding[i | 0x100] = alphabet.encode(i & 0xF); 855 } 856 } 857 858 @Override 859 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 860 checkNotNull(target); 861 checkPositionIndexes(off, off + len, bytes.length); 862 for (int i = 0; i < len; ++i) { 863 int b = bytes[off + i] & 0xFF; 864 target.append(encoding[b]); 865 target.append(encoding[b | 0x100]); 866 } 867 } 868 869 @Override 870 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 871 checkNotNull(target); 872 if (chars.length() % 2 == 1) { 873 throw new DecodingException("Invalid input length " + chars.length()); 874 } 875 int bytesWritten = 0; 876 for (int i = 0; i < chars.length(); i += 2) { 877 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 878 target[bytesWritten++] = (byte) decoded; 879 } 880 return bytesWritten; 881 } 882 883 @Override 884 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 885 return new Base16Encoding(alphabet); 886 } 887 } 888 889 static final class Base64Encoding extends StandardBaseEncoding { 890 Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) { 891 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 892 } 893 894 private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) { 895 super(alphabet, paddingChar); 896 checkArgument(alphabet.chars.length == 64); 897 } 898 899 @Override 900 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 901 checkNotNull(target); 902 checkPositionIndexes(off, off + len, bytes.length); 903 int i = off; 904 for (int remaining = len; remaining >= 3; remaining -= 3) { 905 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 906 target.append(alphabet.encode(chunk >>> 18)); 907 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 908 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 909 target.append(alphabet.encode(chunk & 0x3F)); 910 } 911 if (i < off + len) { 912 encodeChunkTo(target, bytes, i, off + len - i); 913 } 914 } 915 916 @Override 917 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 918 checkNotNull(target); 919 chars = padding().trimTrailingFrom(chars); 920 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 921 throw new DecodingException("Invalid input length " + chars.length()); 922 } 923 int bytesWritten = 0; 924 for (int i = 0; i < chars.length(); ) { 925 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 926 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 927 target[bytesWritten++] = (byte) (chunk >>> 16); 928 if (i < chars.length()) { 929 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 930 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 931 if (i < chars.length()) { 932 chunk |= alphabet.decode(chars.charAt(i++)); 933 target[bytesWritten++] = (byte) (chunk & 0xFF); 934 } 935 } 936 } 937 return bytesWritten; 938 } 939 940 @Override 941 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 942 return new Base64Encoding(alphabet, paddingChar); 943 } 944 } 945 946 @GwtIncompatible // Reader 947 static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) { 948 checkNotNull(delegate); 949 checkNotNull(toIgnore); 950 return new Reader() { 951 @Override 952 public int read() throws IOException { 953 int readChar; 954 do { 955 readChar = delegate.read(); 956 } while (readChar != -1 && toIgnore.matches((char) readChar)); 957 return readChar; 958 } 959 960 @Override 961 public int read(char[] cbuf, int off, int len) throws IOException { 962 throw new UnsupportedOperationException(); 963 } 964 965 @Override 966 public void close() throws IOException { 967 delegate.close(); 968 } 969 }; 970 } 971 972 static Appendable separatingAppendable( 973 final Appendable delegate, final String separator, final int afterEveryChars) { 974 checkNotNull(delegate); 975 checkNotNull(separator); 976 checkArgument(afterEveryChars > 0); 977 return new Appendable() { 978 int charsUntilSeparator = afterEveryChars; 979 980 @Override 981 public Appendable append(char c) throws IOException { 982 if (charsUntilSeparator == 0) { 983 delegate.append(separator); 984 charsUntilSeparator = afterEveryChars; 985 } 986 delegate.append(c); 987 charsUntilSeparator--; 988 return this; 989 } 990 991 @Override 992 public Appendable append(CharSequence chars, int off, int len) throws IOException { 993 throw new UnsupportedOperationException(); 994 } 995 996 @Override 997 public Appendable append(CharSequence chars) throws IOException { 998 throw new UnsupportedOperationException(); 999 } 1000 }; 1001 } 1002 1003 @GwtIncompatible // Writer 1004 static Writer separatingWriter( 1005 final Writer delegate, final String separator, final int afterEveryChars) { 1006 final Appendable seperatingAppendable = 1007 separatingAppendable(delegate, separator, afterEveryChars); 1008 return new Writer() { 1009 @Override 1010 public void write(int c) throws IOException { 1011 seperatingAppendable.append((char) c); 1012 } 1013 1014 @Override 1015 public void write(char[] chars, int off, int len) throws IOException { 1016 throw new UnsupportedOperationException(); 1017 } 1018 1019 @Override 1020 public void flush() throws IOException { 1021 delegate.flush(); 1022 } 1023 1024 @Override 1025 public void close() throws IOException { 1026 delegate.close(); 1027 } 1028 }; 1029 } 1030 1031 static final class SeparatedBaseEncoding extends BaseEncoding { 1032 private final BaseEncoding delegate; 1033 private final String separator; 1034 private final int afterEveryChars; 1035 private final CharMatcher separatorChars; 1036 1037 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1038 this.delegate = checkNotNull(delegate); 1039 this.separator = checkNotNull(separator); 1040 this.afterEveryChars = afterEveryChars; 1041 checkArgument( 1042 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1043 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 1044 } 1045 1046 @Override 1047 CharMatcher padding() { 1048 return delegate.padding(); 1049 } 1050 1051 @Override 1052 int maxEncodedSize(int bytes) { 1053 int unseparatedSize = delegate.maxEncodedSize(bytes); 1054 return unseparatedSize 1055 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1056 } 1057 1058 @GwtIncompatible // Writer,OutputStream 1059 @Override 1060 public OutputStream encodingStream(final Writer output) { 1061 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1062 } 1063 1064 @Override 1065 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1066 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1067 } 1068 1069 @Override 1070 int maxDecodedSize(int chars) { 1071 return delegate.maxDecodedSize(chars); 1072 } 1073 1074 @Override 1075 public boolean canDecode(CharSequence chars) { 1076 return delegate.canDecode(separatorChars.removeFrom(chars)); 1077 } 1078 1079 @Override 1080 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1081 return delegate.decodeTo(target, separatorChars.removeFrom(chars)); 1082 } 1083 1084 @GwtIncompatible // Reader,InputStream 1085 @Override 1086 public InputStream decodingStream(final Reader reader) { 1087 return delegate.decodingStream(ignoringReader(reader, separatorChars)); 1088 } 1089 1090 @Override 1091 public BaseEncoding omitPadding() { 1092 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1093 } 1094 1095 @Override 1096 public BaseEncoding withPadChar(char padChar) { 1097 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1098 } 1099 1100 @Override 1101 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1102 throw new UnsupportedOperationException("Already have a separator"); 1103 } 1104 1105 @Override 1106 public BaseEncoding upperCase() { 1107 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1108 } 1109 1110 @Override 1111 public BaseEncoding lowerCase() { 1112 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1113 } 1114 1115 @Override 1116 public String toString() { 1117 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1118 } 1119 } 1120}