001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.GwtCompatible; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.Ascii; 030import com.google.common.base.CharMatcher; 031import com.google.common.base.Objects; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.OutputStream; 035import java.io.Reader; 036import java.io.Writer; 037import java.util.Arrays; 038import javax.annotation.Nullable; 039 040/** 041 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 042 * strings. This class includes several constants for encoding schemes specified by 043 * <a href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 044 * 045 * <pre> {@code 046 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 047 * 048 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 049 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 050 * 051 * <p>...returns the ASCII bytes of the string {@code "foo"}. 052 * 053 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with RFC 054 * 4648. Decoding rejects characters in the wrong case, though padding is optional. To modify 055 * encoding and decoding behavior, use configuration methods to obtain a new encoding with modified 056 * behavior: 057 * 058 * <pre> {@code 059 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 060 * 061 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 062 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 063 * 064 * <pre> {@code 065 * // Do NOT do this 066 * BaseEncoding hex = BaseEncoding.base16(); 067 * hex.lowerCase(); // does nothing! 068 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 069 * 070 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 071 * {@code x}, but the reverse does not necessarily hold. 072 * 073 * <table> 074 * <tr> 075 * <th>Encoding 076 * <th>Alphabet 077 * <th>{@code char:byte} ratio 078 * <th>Default padding 079 * <th>Comments 080 * <tr> 081 * <td>{@link #base16()} 082 * <td>0-9 A-F 083 * <td>2.00 084 * <td>N/A 085 * <td>Traditional hexadecimal. Defaults to upper case. 086 * <tr> 087 * <td>{@link #base32()} 088 * <td>A-Z 2-7 089 * <td>1.60 090 * <td>= 091 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 092 * <tr> 093 * <td>{@link #base32Hex()} 094 * <td>0-9 A-V 095 * <td>1.60 096 * <td>= 097 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 098 * <tr> 099 * <td>{@link #base64()} 100 * <td>A-Z a-z 0-9 + / 101 * <td>1.33 102 * <td>= 103 * <td> 104 * <tr> 105 * <td>{@link #base64Url()} 106 * <td>A-Z a-z 0-9 - _ 107 * <td>1.33 108 * <td>= 109 * <td>Safe to use as filenames, or to pass in URLs without escaping 110 * </table> 111 * 112 * <p>All instances of this class are immutable, so they may be stored safely as static constants. 113 * 114 * @author Louis Wasserman 115 * @since 14.0 116 */ 117@GwtCompatible(emulated = true) 118public abstract class BaseEncoding { 119 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 120 121 BaseEncoding() {} 122 123 /** 124 * Exception indicating invalid base-encoded input encountered while decoding. 125 * 126 * @author Louis Wasserman 127 * @since 15.0 128 */ 129 public static final class DecodingException extends IOException { 130 DecodingException(String message) { 131 super(message); 132 } 133 134 DecodingException(Throwable cause) { 135 super(cause); 136 } 137 } 138 139 /** 140 * Encodes the specified byte array, and returns the encoded {@code String}. 141 */ 142 public String encode(byte[] bytes) { 143 return encode(bytes, 0, bytes.length); 144 } 145 146 /** 147 * Encodes the specified range of the specified byte array, and returns the encoded 148 * {@code String}. 149 */ 150 public final String encode(byte[] bytes, int off, int len) { 151 checkPositionIndexes(off, off + len, bytes.length); 152 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 153 try { 154 encodeTo(result, bytes, off, len); 155 } catch (IOException impossible) { 156 throw new AssertionError(impossible); 157 } 158 return result.toString(); 159 } 160 161 /** 162 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 163 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 164 * {@code Writer}. 165 */ 166 @GwtIncompatible // Writer,OutputStream 167 public abstract OutputStream encodingStream(Writer writer); 168 169 /** 170 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 171 */ 172 @GwtIncompatible // ByteSink,CharSink 173 public final ByteSink encodingSink(final CharSink encodedSink) { 174 checkNotNull(encodedSink); 175 return new ByteSink() { 176 @Override 177 public OutputStream openStream() throws IOException { 178 return encodingStream(encodedSink.openStream()); 179 } 180 }; 181 } 182 183 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 184 185 private static byte[] extract(byte[] result, int length) { 186 if (length == result.length) { 187 return result; 188 } else { 189 byte[] trunc = new byte[length]; 190 System.arraycopy(result, 0, trunc, 0, length); 191 return trunc; 192 } 193 } 194 195 /** 196 * Determines whether the specified character sequence is a valid encoded string according to this 197 * encoding. 198 */ 199 public abstract boolean canDecode(CharSequence chars); 200 201 /** 202 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 203 * inverse operation to {@link #encode(byte[])}. 204 * 205 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 206 * encoding. 207 */ 208 public final byte[] decode(CharSequence chars) { 209 try { 210 return decodeChecked(chars); 211 } catch (DecodingException badInput) { 212 throw new IllegalArgumentException(badInput); 213 } 214 } 215 216 /** 217 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. This is the 218 * inverse operation to {@link #encode(byte[])}. 219 * 220 * @throws DecodingException if the input is not a valid encoded string according to this 221 * encoding. 222 */ final byte[] decodeChecked(CharSequence chars) 223 throws DecodingException { 224 chars = padding().trimTrailingFrom(chars); 225 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 226 int len = decodeTo(tmp, chars); 227 return extract(tmp, len); 228 } 229 230 /** 231 * Returns an {@code InputStream} that decodes base-encoded input from the specified 232 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 233 * errors. 234 */ 235 @GwtIncompatible // Reader,InputStream 236 public abstract InputStream decodingStream(Reader reader); 237 238 /** 239 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 240 * {@code CharSource}. 241 */ 242 @GwtIncompatible // ByteSource,CharSource 243 public final ByteSource decodingSource(final CharSource encodedSource) { 244 checkNotNull(encodedSource); 245 return new ByteSource() { 246 @Override 247 public InputStream openStream() throws IOException { 248 return decodingStream(encodedSource.openStream()); 249 } 250 }; 251 } 252 253 // Implementations for encoding/decoding 254 255 abstract int maxEncodedSize(int bytes); 256 257 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 258 259 abstract int maxDecodedSize(int chars); 260 261 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 262 263 abstract CharMatcher padding(); 264 265 // Modified encoding generators 266 267 /** 268 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 269 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 270 * section 3.2</a>, Padding of Encoded Data. 271 */ 272 public abstract BaseEncoding omitPadding(); 273 274 /** 275 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 276 * for padding. 277 * 278 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 279 * separator 280 */ 281 public abstract BaseEncoding withPadChar(char padChar); 282 283 /** 284 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 285 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 286 * are skipped over in decoding. 287 * 288 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 289 * string, or if {@code n <= 0} 290 * @throws UnsupportedOperationException if this encoding already uses a separator 291 */ 292 public abstract BaseEncoding withSeparator(String separator, int n); 293 294 /** 295 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 296 * uppercase letters. Padding and separator characters remain in their original case. 297 * 298 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 299 * lower-case characters 300 */ 301 public abstract BaseEncoding upperCase(); 302 303 /** 304 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 305 * lowercase letters. Padding and separator characters remain in their original case. 306 * 307 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 308 * lower-case characters 309 */ 310 public abstract BaseEncoding lowerCase(); 311 312 private static final BaseEncoding BASE64 = 313 new Base64Encoding( 314 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 315 316 /** 317 * The "base64" base encoding specified by 318 * <a href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 319 * Encoding. (This is the same as the base 64 encoding from 320 * <a href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 321 * 322 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 323 * omitted} or {@linkplain #withPadChar(char) replaced}. 324 * 325 * <p>No line feeds are added by default, as per 326 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 327 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 328 */ 329 public static BaseEncoding base64() { 330 return BASE64; 331 } 332 333 private static final BaseEncoding BASE64_URL = 334 new Base64Encoding( 335 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 336 337 /** 338 * The "base64url" encoding specified by 339 * <a href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 340 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." (This 341 * is the same as the base 64 encoding with URL and filename safe alphabet from 342 * <a href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 343 * 344 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 345 * omitted} or {@linkplain #withPadChar(char) replaced}. 346 * 347 * <p>No line feeds are added by default, as per 348 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 349 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 350 */ 351 public static BaseEncoding base64Url() { 352 return BASE64_URL; 353 } 354 355 private static final BaseEncoding BASE32 = 356 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 357 358 /** 359 * The "base32" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-6">RFC 360 * 4648 section 6</a>, Base 32 Encoding. (This is the same as the base 32 encoding from 361 * <a href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 362 * 363 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 364 * omitted} or {@linkplain #withPadChar(char) replaced}. 365 * 366 * <p>No line feeds are added by default, as per 367 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 368 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 369 */ 370 public static BaseEncoding base32() { 371 return BASE32; 372 } 373 374 private static final BaseEncoding BASE32_HEX = 375 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 376 377 /** 378 * The "base32hex" encoding specified by 379 * <a href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 380 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 381 * 382 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 383 * omitted} or {@linkplain #withPadChar(char) replaced}. 384 * 385 * <p>No line feeds are added by default, as per 386 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 387 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 388 */ 389 public static BaseEncoding base32Hex() { 390 return BASE32_HEX; 391 } 392 393 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 394 395 /** 396 * The "base16" encoding specified by <a href="http://tools.ietf.org/html/rfc4648#section-8">RFC 397 * 4648 section 8</a>, Base 16 Encoding. (This is the same as the base 16 encoding from 398 * <a href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 399 * "hexadecimal" format. 400 * 401 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and {@link #omitPadding()} 402 * have no effect. 403 * 404 * <p>No line feeds are added by default, as per 405 * <a href="http://tools.ietf.org/html/rfc4648#section-3.1">RFC 4648 section 3.1</a>, Line Feeds 406 * in Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 407 */ 408 public static BaseEncoding base16() { 409 return BASE16; 410 } 411 412 private static final class Alphabet extends CharMatcher { 413 private final String name; 414 // this is meant to be immutable -- don't modify it! 415 private final char[] chars; 416 final int mask; 417 final int bitsPerChar; 418 final int charsPerChunk; 419 final int bytesPerChunk; 420 private final byte[] decodabet; 421 private final boolean[] validPadding; 422 423 Alphabet(String name, char[] chars) { 424 this.name = checkNotNull(name); 425 this.chars = checkNotNull(chars); 426 try { 427 this.bitsPerChar = log2(chars.length, UNNECESSARY); 428 } catch (ArithmeticException e) { 429 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 430 } 431 432 /* 433 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 434 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 435 */ 436 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 437 try { 438 this.charsPerChunk = 8 / gcd; 439 this.bytesPerChunk = bitsPerChar / gcd; 440 } catch (ArithmeticException e) { 441 throw new IllegalArgumentException("Illegal alphabet " + new String(chars), e); 442 } 443 444 this.mask = chars.length - 1; 445 446 byte[] decodabet = new byte[Ascii.MAX + 1]; 447 Arrays.fill(decodabet, (byte) -1); 448 for (int i = 0; i < chars.length; i++) { 449 char c = chars[i]; 450 checkArgument(CharMatcher.ascii().matches(c), "Non-ASCII character: %s", c); 451 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 452 decodabet[c] = (byte) i; 453 } 454 this.decodabet = decodabet; 455 456 boolean[] validPadding = new boolean[charsPerChunk]; 457 for (int i = 0; i < bytesPerChunk; i++) { 458 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 459 } 460 this.validPadding = validPadding; 461 } 462 463 char encode(int bits) { 464 return chars[bits]; 465 } 466 467 boolean isValidPaddingStartPosition(int index) { 468 return validPadding[index % charsPerChunk]; 469 } 470 471 boolean canDecode(char ch) { 472 return ch <= Ascii.MAX && decodabet[ch] != -1; 473 } 474 475 int decode(char ch) throws DecodingException { 476 if (ch > Ascii.MAX || decodabet[ch] == -1) { 477 throw new DecodingException( 478 "Unrecognized character: " 479 + (CharMatcher.invisible().matches(ch) ? "0x" + Integer.toHexString(ch) : ch)); 480 } 481 return decodabet[ch]; 482 } 483 484 private boolean hasLowerCase() { 485 for (char c : chars) { 486 if (Ascii.isLowerCase(c)) { 487 return true; 488 } 489 } 490 return false; 491 } 492 493 private boolean hasUpperCase() { 494 for (char c : chars) { 495 if (Ascii.isUpperCase(c)) { 496 return true; 497 } 498 } 499 return false; 500 } 501 502 Alphabet upperCase() { 503 if (!hasLowerCase()) { 504 return this; 505 } else { 506 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 507 char[] upperCased = new char[chars.length]; 508 for (int i = 0; i < chars.length; i++) { 509 upperCased[i] = Ascii.toUpperCase(chars[i]); 510 } 511 return new Alphabet(name + ".upperCase()", upperCased); 512 } 513 } 514 515 Alphabet lowerCase() { 516 if (!hasUpperCase()) { 517 return this; 518 } else { 519 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 520 char[] lowerCased = new char[chars.length]; 521 for (int i = 0; i < chars.length; i++) { 522 lowerCased[i] = Ascii.toLowerCase(chars[i]); 523 } 524 return new Alphabet(name + ".lowerCase()", lowerCased); 525 } 526 } 527 528 @Override 529 public boolean matches(char c) { 530 return CharMatcher.ascii().matches(c) && decodabet[c] != -1; 531 } 532 533 @Override 534 public String toString() { 535 return name; 536 } 537 538 @Override 539 public boolean equals(@Nullable Object other) { 540 if (other instanceof Alphabet) { 541 Alphabet that = (Alphabet) other; 542 return Arrays.equals(this.chars, that.chars); 543 } 544 return false; 545 } 546 547 @Override 548 public int hashCode() { 549 return Arrays.hashCode(chars); 550 } 551 } 552 553 static class StandardBaseEncoding extends BaseEncoding { 554 // TODO(lowasser): provide a useful toString 555 final Alphabet alphabet; 556 557 @Nullable final Character paddingChar; 558 559 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 560 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 561 } 562 563 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 564 this.alphabet = checkNotNull(alphabet); 565 checkArgument( 566 paddingChar == null || !alphabet.matches(paddingChar), 567 "Padding character %s was already in alphabet", 568 paddingChar); 569 this.paddingChar = paddingChar; 570 } 571 572 @Override 573 CharMatcher padding() { 574 return (paddingChar == null) ? CharMatcher.none() : CharMatcher.is(paddingChar.charValue()); 575 } 576 577 @Override 578 int maxEncodedSize(int bytes) { 579 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 580 } 581 582 @GwtIncompatible // Writer,OutputStream 583 @Override 584 public OutputStream encodingStream(final Writer out) { 585 checkNotNull(out); 586 return new OutputStream() { 587 int bitBuffer = 0; 588 int bitBufferLength = 0; 589 int writtenChars = 0; 590 591 @Override 592 public void write(int b) throws IOException { 593 bitBuffer <<= 8; 594 bitBuffer |= b & 0xFF; 595 bitBufferLength += 8; 596 while (bitBufferLength >= alphabet.bitsPerChar) { 597 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) & alphabet.mask; 598 out.write(alphabet.encode(charIndex)); 599 writtenChars++; 600 bitBufferLength -= alphabet.bitsPerChar; 601 } 602 } 603 604 @Override 605 public void flush() throws IOException { 606 out.flush(); 607 } 608 609 @Override 610 public void close() throws IOException { 611 if (bitBufferLength > 0) { 612 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) & alphabet.mask; 613 out.write(alphabet.encode(charIndex)); 614 writtenChars++; 615 if (paddingChar != null) { 616 while (writtenChars % alphabet.charsPerChunk != 0) { 617 out.write(paddingChar.charValue()); 618 writtenChars++; 619 } 620 } 621 } 622 out.close(); 623 } 624 }; 625 } 626 627 @Override 628 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 629 checkNotNull(target); 630 checkPositionIndexes(off, off + len, bytes.length); 631 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 632 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 633 } 634 } 635 636 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 637 checkNotNull(target); 638 checkPositionIndexes(off, off + len, bytes.length); 639 checkArgument(len <= alphabet.bytesPerChunk); 640 long bitBuffer = 0; 641 for (int i = 0; i < len; ++i) { 642 bitBuffer |= bytes[off + i] & 0xFF; 643 bitBuffer <<= 8; // Add additional zero byte in the end. 644 } 645 // Position of first character is length of bitBuffer minus bitsPerChar. 646 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 647 int bitsProcessed = 0; 648 while (bitsProcessed < len * 8) { 649 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 650 target.append(alphabet.encode(charIndex)); 651 bitsProcessed += alphabet.bitsPerChar; 652 } 653 if (paddingChar != null) { 654 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 655 target.append(paddingChar.charValue()); 656 bitsProcessed += alphabet.bitsPerChar; 657 } 658 } 659 } 660 661 @Override 662 int maxDecodedSize(int chars) { 663 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 664 } 665 666 @Override 667 public boolean canDecode(CharSequence chars) { 668 chars = padding().trimTrailingFrom(chars); 669 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 670 return false; 671 } 672 for (int i = 0; i < chars.length(); i++) { 673 if (!alphabet.canDecode(chars.charAt(i))) { 674 return false; 675 } 676 } 677 return true; 678 } 679 680 @Override 681 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 682 checkNotNull(target); 683 chars = padding().trimTrailingFrom(chars); 684 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 685 throw new DecodingException("Invalid input length " + chars.length()); 686 } 687 int bytesWritten = 0; 688 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 689 long chunk = 0; 690 int charsProcessed = 0; 691 for (int i = 0; i < alphabet.charsPerChunk; i++) { 692 chunk <<= alphabet.bitsPerChar; 693 if (charIdx + i < chars.length()) { 694 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 695 } 696 } 697 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 698 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 699 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 700 } 701 } 702 return bytesWritten; 703 } 704 705 @GwtIncompatible // Reader,InputStream 706 @Override 707 public InputStream decodingStream(final Reader reader) { 708 checkNotNull(reader); 709 return new InputStream() { 710 int bitBuffer = 0; 711 int bitBufferLength = 0; 712 int readChars = 0; 713 boolean hitPadding = false; 714 final CharMatcher paddingMatcher = padding(); 715 716 @Override 717 public int read() throws IOException { 718 while (true) { 719 int readChar = reader.read(); 720 if (readChar == -1) { 721 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 722 throw new DecodingException("Invalid input length " + readChars); 723 } 724 return -1; 725 } 726 readChars++; 727 char ch = (char) readChar; 728 if (paddingMatcher.matches(ch)) { 729 if (!hitPadding 730 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 731 throw new DecodingException("Padding cannot start at index " + readChars); 732 } 733 hitPadding = true; 734 } else if (hitPadding) { 735 throw new DecodingException( 736 "Expected padding character but found '" + ch + "' at index " + readChars); 737 } else { 738 bitBuffer <<= alphabet.bitsPerChar; 739 bitBuffer |= alphabet.decode(ch); 740 bitBufferLength += alphabet.bitsPerChar; 741 742 if (bitBufferLength >= 8) { 743 bitBufferLength -= 8; 744 return (bitBuffer >> bitBufferLength) & 0xFF; 745 } 746 } 747 } 748 } 749 750 @Override 751 public void close() throws IOException { 752 reader.close(); 753 } 754 }; 755 } 756 757 @Override 758 public BaseEncoding omitPadding() { 759 return (paddingChar == null) ? this : newInstance(alphabet, null); 760 } 761 762 @Override 763 public BaseEncoding withPadChar(char padChar) { 764 if (8 % alphabet.bitsPerChar == 0 765 || (paddingChar != null && paddingChar.charValue() == padChar)) { 766 return this; 767 } else { 768 return newInstance(alphabet, padChar); 769 } 770 } 771 772 @Override 773 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 774 checkArgument( 775 padding().or(alphabet).matchesNoneOf(separator), 776 "Separator (%s) cannot contain alphabet or padding characters", 777 separator); 778 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 779 } 780 781 private transient BaseEncoding upperCase; 782 private transient BaseEncoding lowerCase; 783 784 @Override 785 public BaseEncoding upperCase() { 786 BaseEncoding result = upperCase; 787 if (result == null) { 788 Alphabet upper = alphabet.upperCase(); 789 result = upperCase = 790 (upper == alphabet) ? this : newInstance(upper, paddingChar); 791 } 792 return result; 793 } 794 795 @Override 796 public BaseEncoding lowerCase() { 797 BaseEncoding result = lowerCase; 798 if (result == null) { 799 Alphabet lower = alphabet.lowerCase(); 800 result = lowerCase = 801 (lower == alphabet) ? this : newInstance(lower, paddingChar); 802 } 803 return result; 804 } 805 806 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 807 return new StandardBaseEncoding(alphabet, paddingChar); 808 } 809 810 @Override 811 public String toString() { 812 StringBuilder builder = new StringBuilder("BaseEncoding."); 813 builder.append(alphabet.toString()); 814 if (8 % alphabet.bitsPerChar != 0) { 815 if (paddingChar == null) { 816 builder.append(".omitPadding()"); 817 } else { 818 builder.append(".withPadChar('").append(paddingChar).append("')"); 819 } 820 } 821 return builder.toString(); 822 } 823 824 @Override 825 public boolean equals(@Nullable Object other) { 826 if (other instanceof StandardBaseEncoding) { 827 StandardBaseEncoding that = (StandardBaseEncoding) other; 828 return this.alphabet.equals(that.alphabet) 829 && Objects.equal(this.paddingChar, that.paddingChar); 830 } 831 return false; 832 } 833 834 @Override 835 public int hashCode() { 836 return alphabet.hashCode() ^ Objects.hashCode(paddingChar); 837 } 838 } 839 840 static final class Base16Encoding extends StandardBaseEncoding { 841 final char[] encoding = new char[512]; 842 843 Base16Encoding(String name, String alphabetChars) { 844 this(new Alphabet(name, alphabetChars.toCharArray())); 845 } 846 847 private Base16Encoding(Alphabet alphabet) { 848 super(alphabet, null); 849 checkArgument(alphabet.chars.length == 16); 850 for (int i = 0; i < 256; ++i) { 851 encoding[i] = alphabet.encode(i >>> 4); 852 encoding[i | 0x100] = alphabet.encode(i & 0xF); 853 } 854 } 855 856 @Override 857 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 858 checkNotNull(target); 859 checkPositionIndexes(off, off + len, bytes.length); 860 for (int i = 0; i < len; ++i) { 861 int b = bytes[off + i] & 0xFF; 862 target.append(encoding[b]); 863 target.append(encoding[b | 0x100]); 864 } 865 } 866 867 @Override 868 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 869 checkNotNull(target); 870 if (chars.length() % 2 == 1) { 871 throw new DecodingException("Invalid input length " + chars.length()); 872 } 873 int bytesWritten = 0; 874 for (int i = 0; i < chars.length(); i += 2) { 875 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 876 target[bytesWritten++] = (byte) decoded; 877 } 878 return bytesWritten; 879 } 880 881 @Override 882 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 883 return new Base16Encoding(alphabet); 884 } 885 } 886 887 static final class Base64Encoding extends StandardBaseEncoding { 888 Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) { 889 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 890 } 891 892 private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) { 893 super(alphabet, paddingChar); 894 checkArgument(alphabet.chars.length == 64); 895 } 896 897 @Override 898 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 899 checkNotNull(target); 900 checkPositionIndexes(off, off + len, bytes.length); 901 int i = off; 902 for (int remaining = len; remaining >= 3; remaining -= 3) { 903 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 904 target.append(alphabet.encode(chunk >>> 18)); 905 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 906 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 907 target.append(alphabet.encode(chunk & 0x3F)); 908 } 909 if (i < off + len) { 910 encodeChunkTo(target, bytes, i, off + len - i); 911 } 912 } 913 914 @Override 915 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 916 checkNotNull(target); 917 chars = padding().trimTrailingFrom(chars); 918 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 919 throw new DecodingException("Invalid input length " + chars.length()); 920 } 921 int bytesWritten = 0; 922 for (int i = 0; i < chars.length(); ) { 923 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 924 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 925 target[bytesWritten++] = (byte) (chunk >>> 16); 926 if (i < chars.length()) { 927 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 928 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 929 if (i < chars.length()) { 930 chunk |= alphabet.decode(chars.charAt(i++)); 931 target[bytesWritten++] = (byte) (chunk & 0xFF); 932 } 933 } 934 } 935 return bytesWritten; 936 } 937 938 @Override 939 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 940 return new Base64Encoding(alphabet, paddingChar); 941 } 942 } 943 944 @GwtIncompatible // Reader 945 static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) { 946 checkNotNull(delegate); 947 checkNotNull(toIgnore); 948 return new Reader() { 949 @Override 950 public int read() throws IOException { 951 int readChar; 952 do { 953 readChar = delegate.read(); 954 } while (readChar != -1 && toIgnore.matches((char) readChar)); 955 return readChar; 956 } 957 958 @Override 959 public int read(char[] cbuf, int off, int len) throws IOException { 960 throw new UnsupportedOperationException(); 961 } 962 963 @Override 964 public void close() throws IOException { 965 delegate.close(); 966 } 967 }; 968 } 969 970 static Appendable separatingAppendable( 971 final Appendable delegate, final String separator, final int afterEveryChars) { 972 checkNotNull(delegate); 973 checkNotNull(separator); 974 checkArgument(afterEveryChars > 0); 975 return new Appendable() { 976 int charsUntilSeparator = afterEveryChars; 977 978 @Override 979 public Appendable append(char c) throws IOException { 980 if (charsUntilSeparator == 0) { 981 delegate.append(separator); 982 charsUntilSeparator = afterEveryChars; 983 } 984 delegate.append(c); 985 charsUntilSeparator--; 986 return this; 987 } 988 989 @Override 990 public Appendable append(CharSequence chars, int off, int len) throws IOException { 991 throw new UnsupportedOperationException(); 992 } 993 994 @Override 995 public Appendable append(CharSequence chars) throws IOException { 996 throw new UnsupportedOperationException(); 997 } 998 }; 999 } 1000 1001 @GwtIncompatible // Writer 1002 static Writer separatingWriter( 1003 final Writer delegate, final String separator, final int afterEveryChars) { 1004 final Appendable seperatingAppendable = 1005 separatingAppendable(delegate, separator, afterEveryChars); 1006 return new Writer() { 1007 @Override 1008 public void write(int c) throws IOException { 1009 seperatingAppendable.append((char) c); 1010 } 1011 1012 @Override 1013 public void write(char[] chars, int off, int len) throws IOException { 1014 throw new UnsupportedOperationException(); 1015 } 1016 1017 @Override 1018 public void flush() throws IOException { 1019 delegate.flush(); 1020 } 1021 1022 @Override 1023 public void close() throws IOException { 1024 delegate.close(); 1025 } 1026 }; 1027 } 1028 1029 static final class SeparatedBaseEncoding extends BaseEncoding { 1030 private final BaseEncoding delegate; 1031 private final String separator; 1032 private final int afterEveryChars; 1033 private final CharMatcher separatorChars; 1034 1035 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 1036 this.delegate = checkNotNull(delegate); 1037 this.separator = checkNotNull(separator); 1038 this.afterEveryChars = afterEveryChars; 1039 checkArgument( 1040 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 1041 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 1042 } 1043 1044 @Override 1045 CharMatcher padding() { 1046 return delegate.padding(); 1047 } 1048 1049 @Override 1050 int maxEncodedSize(int bytes) { 1051 int unseparatedSize = delegate.maxEncodedSize(bytes); 1052 return unseparatedSize 1053 + separator.length() * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1054 } 1055 1056 @GwtIncompatible // Writer,OutputStream 1057 @Override 1058 public OutputStream encodingStream(final Writer output) { 1059 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1060 } 1061 1062 @Override 1063 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1064 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1065 } 1066 1067 @Override 1068 int maxDecodedSize(int chars) { 1069 return delegate.maxDecodedSize(chars); 1070 } 1071 1072 @Override 1073 public boolean canDecode(CharSequence chars) { 1074 return delegate.canDecode(separatorChars.removeFrom(chars)); 1075 } 1076 1077 @Override 1078 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1079 return delegate.decodeTo(target, separatorChars.removeFrom(chars)); 1080 } 1081 1082 @GwtIncompatible // Reader,InputStream 1083 @Override 1084 public InputStream decodingStream(final Reader reader) { 1085 return delegate.decodingStream(ignoringReader(reader, separatorChars)); 1086 } 1087 1088 @Override 1089 public BaseEncoding omitPadding() { 1090 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1091 } 1092 1093 @Override 1094 public BaseEncoding withPadChar(char padChar) { 1095 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1096 } 1097 1098 @Override 1099 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1100 throw new UnsupportedOperationException("Already have a separator"); 1101 } 1102 1103 @Override 1104 public BaseEncoding upperCase() { 1105 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1106 } 1107 1108 @Override 1109 public BaseEncoding lowerCase() { 1110 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1111 } 1112 1113 @Override 1114 public String toString() { 1115 return delegate + ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1116 } 1117 } 1118}