001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.math.IntMath.divide; 022import static com.google.common.math.IntMath.log2; 023import static java.math.RoundingMode.CEILING; 024import static java.math.RoundingMode.FLOOR; 025import static java.math.RoundingMode.UNNECESSARY; 026 027import com.google.common.annotations.Beta; 028import com.google.common.annotations.GwtCompatible; 029import com.google.common.annotations.GwtIncompatible; 030import com.google.common.base.Ascii; 031import com.google.common.base.CharMatcher; 032 033import java.io.IOException; 034import java.io.InputStream; 035import java.io.OutputStream; 036import java.io.Reader; 037import java.io.Writer; 038import java.util.Arrays; 039 040import javax.annotation.CheckReturnValue; 041import javax.annotation.Nullable; 042 043/** 044 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 045 * strings. This class includes several constants for encoding schemes specified by <a 046 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 047 * 048 * <pre> {@code 049 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 050 * 051 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 052 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 053 * 054 * <p>...returns the ASCII bytes of the string {@code "foo"}. 055 * 056 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with 057 * RFC 4648. Decoding rejects characters in the wrong case, though padding is optional. 058 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding 059 * with modified behavior: 060 * 061 * <pre> {@code 062 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 063 * 064 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 065 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 066 * 067 * <pre> {@code 068 * // Do NOT do this 069 * BaseEncoding hex = BaseEncoding.base16(); 070 * hex.lowerCase(); // does nothing! 071 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 072 * 073 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 074 * {@code x}, but the reverse does not necessarily hold. 075 * 076 * <p> 077 * <table> 078 * <tr> 079 * <th>Encoding 080 * <th>Alphabet 081 * <th>{@code char:byte} ratio 082 * <th>Default padding 083 * <th>Comments 084 * <tr> 085 * <td>{@link #base16()} 086 * <td>0-9 A-F 087 * <td>2.00 088 * <td>N/A 089 * <td>Traditional hexadecimal. Defaults to upper case. 090 * <tr> 091 * <td>{@link #base32()} 092 * <td>A-Z 2-7 093 * <td>1.60 094 * <td>= 095 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 096 * <tr> 097 * <td>{@link #base32Hex()} 098 * <td>0-9 A-V 099 * <td>1.60 100 * <td>= 101 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 102 * <tr> 103 * <td>{@link #base64()} 104 * <td>A-Z a-z 0-9 + / 105 * <td>1.33 106 * <td>= 107 * <td> 108 * <tr> 109 * <td>{@link #base64Url()} 110 * <td>A-Z a-z 0-9 - _ 111 * <td>1.33 112 * <td>= 113 * <td>Safe to use as filenames, or to pass in URLs without escaping 114 * </table> 115 * 116 * <p> 117 * All instances of this class are immutable, so they may be stored safely as static constants. 118 * 119 * @author Louis Wasserman 120 * @since 14.0 121 */ 122@Beta 123@GwtCompatible(emulated = true) 124public abstract class BaseEncoding { 125 // TODO(lowasser): consider making encodeTo(Appendable, byte[], int, int) public. 126 127 BaseEncoding() {} 128 129 /** 130 * Exception indicating invalid base-encoded input encountered while decoding. 131 * 132 * @author Louis Wasserman 133 * @since 15.0 134 */ 135 public static final class DecodingException extends IOException { 136 DecodingException(String message) { 137 super(message); 138 } 139 140 DecodingException(Throwable cause) { 141 super(cause); 142 } 143 } 144 145 /** 146 * Encodes the specified byte array, and returns the encoded {@code String}. 147 */ 148 public String encode(byte[] bytes) { 149 return encode(bytes, 0, bytes.length); 150 } 151 152 /** 153 * Encodes the specified range of the specified byte array, and returns the encoded 154 * {@code String}. 155 */ 156 public final String encode(byte[] bytes, int off, int len) { 157 checkPositionIndexes(off, off + len, bytes.length); 158 StringBuilder result = new StringBuilder(maxEncodedSize(len)); 159 try { 160 encodeTo(result, bytes, off, len); 161 } catch (IOException impossible) { 162 throw new AssertionError(impossible); 163 } 164 return result.toString(); 165 } 166 167 /** 168 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 169 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 170 * {@code Writer}. 171 */ 172 @GwtIncompatible("Writer,OutputStream") 173 public abstract OutputStream encodingStream(Writer writer); 174 175 /** 176 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 177 */ 178 @GwtIncompatible("ByteSink,CharSink") 179 public final ByteSink encodingSink(final CharSink encodedSink) { 180 checkNotNull(encodedSink); 181 return new ByteSink() { 182 @Override 183 public OutputStream openStream() throws IOException { 184 return encodingStream(encodedSink.openStream()); 185 } 186 }; 187 } 188 189 // TODO(lowasser): document the extent of leniency, probably after adding ignore(CharMatcher) 190 191 private static byte[] extract(byte[] result, int length) { 192 if (length == result.length) { 193 return result; 194 } else { 195 byte[] trunc = new byte[length]; 196 System.arraycopy(result, 0, trunc, 0, length); 197 return trunc; 198 } 199 } 200 201 /** 202 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 203 * This is the inverse operation to {@link #encode(byte[])}. 204 * 205 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 206 * encoding. 207 */ 208 public final byte[] decode(CharSequence chars) { 209 try { 210 return decodeChecked(chars); 211 } catch (DecodingException badInput) { 212 throw new IllegalArgumentException(badInput); 213 } 214 } 215 216 /** 217 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 218 * This is the inverse operation to {@link #encode(byte[])}. 219 * 220 * @throws DecodingException if the input is not a valid encoded string according to this 221 * encoding. 222 */ 223 final byte[] decodeChecked(CharSequence chars) throws DecodingException { 224 chars = padding().trimTrailingFrom(chars); 225 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 226 int len = decodeTo(tmp, chars); 227 return extract(tmp, len); 228 } 229 230 /** 231 * Returns an {@code InputStream} that decodes base-encoded input from the specified 232 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 233 * errors. 234 */ 235 @GwtIncompatible("Reader,InputStream") 236 public abstract InputStream decodingStream(Reader reader); 237 238 /** 239 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 240 * {@code CharSource}. 241 */ 242 @GwtIncompatible("ByteSource,CharSource") 243 public final ByteSource decodingSource(final CharSource encodedSource) { 244 checkNotNull(encodedSource); 245 return new ByteSource() { 246 @Override 247 public InputStream openStream() throws IOException { 248 return decodingStream(encodedSource.openStream()); 249 } 250 }; 251 } 252 253 // Implementations for encoding/decoding 254 255 abstract int maxEncodedSize(int bytes); 256 257 abstract void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException; 258 259 abstract int maxDecodedSize(int chars); 260 261 abstract int decodeTo(byte[] target, CharSequence chars) throws DecodingException; 262 263 abstract CharMatcher padding(); 264 265 // Modified encoding generators 266 267 /** 268 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 269 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 270 * section 3.2</a>, Padding of Encoded Data. 271 */ 272 @CheckReturnValue 273 public abstract BaseEncoding omitPadding(); 274 275 /** 276 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 277 * for padding. 278 * 279 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 280 * separator 281 */ 282 @CheckReturnValue 283 public abstract BaseEncoding withPadChar(char padChar); 284 285 /** 286 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 287 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 288 * are skipped over in decoding. 289 * 290 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 291 * string, or if {@code n <= 0} 292 * @throws UnsupportedOperationException if this encoding already uses a separator 293 */ 294 @CheckReturnValue 295 public abstract BaseEncoding withSeparator(String separator, int n); 296 297 /** 298 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 299 * uppercase letters. Padding and separator characters remain in their original case. 300 * 301 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 302 * lower-case characters 303 */ 304 @CheckReturnValue 305 public abstract BaseEncoding upperCase(); 306 307 /** 308 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 309 * lowercase letters. Padding and separator characters remain in their original case. 310 * 311 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 312 * lower-case characters 313 */ 314 @CheckReturnValue 315 public abstract BaseEncoding lowerCase(); 316 317 private static final BaseEncoding BASE64 = new Base64Encoding( 318 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 319 320 /** 321 * The "base64" base encoding specified by <a 322 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 323 * (This is the same as the base 64 encoding from <a 324 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 325 * 326 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 327 * omitted} or {@linkplain #withPadChar(char) replaced}. 328 * 329 * <p>No line feeds are added by default, as per <a 330 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 331 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 332 */ 333 public static BaseEncoding base64() { 334 return BASE64; 335 } 336 337 private static final BaseEncoding BASE64_URL = new Base64Encoding( 338 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 339 340 /** 341 * The "base64url" encoding specified by <a 342 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 343 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." 344 * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a 345 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 346 * 347 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 348 * omitted} or {@linkplain #withPadChar(char) replaced}. 349 * 350 * <p>No line feeds are added by default, as per <a 351 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 352 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 353 */ 354 public static BaseEncoding base64Url() { 355 return BASE64_URL; 356 } 357 358 private static final BaseEncoding BASE32 = 359 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 360 361 /** 362 * The "base32" encoding specified by <a 363 * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding. 364 * (This is the same as the base 32 encoding from <a 365 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 366 * 367 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 368 * omitted} or {@linkplain #withPadChar(char) replaced}. 369 * 370 * <p>No line feeds are added by default, as per <a 371 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 372 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 373 */ 374 public static BaseEncoding base32() { 375 return BASE32; 376 } 377 378 private static final BaseEncoding BASE32_HEX = 379 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 380 381 /** 382 * The "base32hex" encoding specified by <a 383 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 384 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 385 * 386 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 387 * omitted} or {@linkplain #withPadChar(char) replaced}. 388 * 389 * <p>No line feeds are added by default, as per <a 390 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 391 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 392 */ 393 public static BaseEncoding base32Hex() { 394 return BASE32_HEX; 395 } 396 397 private static final BaseEncoding BASE16 = new Base16Encoding("base16()", "0123456789ABCDEF"); 398 399 /** 400 * The "base16" encoding specified by <a 401 * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding. 402 * (This is the same as the base 16 encoding from <a 403 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 404 * "hexadecimal" format. 405 * 406 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and 407 * {@link #omitPadding()} have no effect. 408 * 409 * <p>No line feeds are added by default, as per <a 410 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 411 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 412 */ 413 public static BaseEncoding base16() { 414 return BASE16; 415 } 416 417 private static final class Alphabet extends CharMatcher { 418 private final String name; 419 // this is meant to be immutable -- don't modify it! 420 private final char[] chars; 421 final int mask; 422 final int bitsPerChar; 423 final int charsPerChunk; 424 final int bytesPerChunk; 425 private final byte[] decodabet; 426 private final boolean[] validPadding; 427 428 Alphabet(String name, char[] chars) { 429 this.name = checkNotNull(name); 430 this.chars = checkNotNull(chars); 431 try { 432 this.bitsPerChar = log2(chars.length, UNNECESSARY); 433 } catch (ArithmeticException e) { 434 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 435 } 436 437 /* 438 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 439 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 440 */ 441 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 442 this.charsPerChunk = 8 / gcd; 443 this.bytesPerChunk = bitsPerChar / gcd; 444 445 this.mask = chars.length - 1; 446 447 byte[] decodabet = new byte[Ascii.MAX + 1]; 448 Arrays.fill(decodabet, (byte) -1); 449 for (int i = 0; i < chars.length; i++) { 450 char c = chars[i]; 451 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); 452 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 453 decodabet[c] = (byte) i; 454 } 455 this.decodabet = decodabet; 456 457 boolean[] validPadding = new boolean[charsPerChunk]; 458 for (int i = 0; i < bytesPerChunk; i++) { 459 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 460 } 461 this.validPadding = validPadding; 462 } 463 464 char encode(int bits) { 465 return chars[bits]; 466 } 467 468 boolean isValidPaddingStartPosition(int index) { 469 return validPadding[index % charsPerChunk]; 470 } 471 472 int decode(char ch) throws DecodingException { 473 if (ch > Ascii.MAX || decodabet[ch] == -1) { 474 throw new DecodingException("Unrecognized character: " 475 + (CharMatcher.INVISIBLE.matches(ch) ? "0x" + Integer.toHexString(ch) : ch)); 476 } 477 return decodabet[ch]; 478 } 479 480 private boolean hasLowerCase() { 481 for (char c : chars) { 482 if (Ascii.isLowerCase(c)) { 483 return true; 484 } 485 } 486 return false; 487 } 488 489 private boolean hasUpperCase() { 490 for (char c : chars) { 491 if (Ascii.isUpperCase(c)) { 492 return true; 493 } 494 } 495 return false; 496 } 497 498 Alphabet upperCase() { 499 if (!hasLowerCase()) { 500 return this; 501 } else { 502 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 503 char[] upperCased = new char[chars.length]; 504 for (int i = 0; i < chars.length; i++) { 505 upperCased[i] = Ascii.toUpperCase(chars[i]); 506 } 507 return new Alphabet(name + ".upperCase()", upperCased); 508 } 509 } 510 511 Alphabet lowerCase() { 512 if (!hasUpperCase()) { 513 return this; 514 } else { 515 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 516 char[] lowerCased = new char[chars.length]; 517 for (int i = 0; i < chars.length; i++) { 518 lowerCased[i] = Ascii.toLowerCase(chars[i]); 519 } 520 return new Alphabet(name + ".lowerCase()", lowerCased); 521 } 522 } 523 524 @Override 525 public boolean matches(char c) { 526 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; 527 } 528 529 @Override 530 public String toString() { 531 return name; 532 } 533 } 534 535 static class StandardBaseEncoding extends BaseEncoding { 536 // TODO(lowasser): provide a useful toString 537 final Alphabet alphabet; 538 539 @Nullable 540 final Character paddingChar; 541 542 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 543 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 544 } 545 546 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 547 this.alphabet = checkNotNull(alphabet); 548 checkArgument(paddingChar == null || !alphabet.matches(paddingChar), 549 "Padding character %s was already in alphabet", paddingChar); 550 this.paddingChar = paddingChar; 551 } 552 553 @Override 554 CharMatcher padding() { 555 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); 556 } 557 558 @Override 559 int maxEncodedSize(int bytes) { 560 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 561 } 562 563 @GwtIncompatible("Writer,OutputStream") 564 @Override 565 public OutputStream encodingStream(final Writer out) { 566 checkNotNull(out); 567 return new OutputStream() { 568 int bitBuffer = 0; 569 int bitBufferLength = 0; 570 int writtenChars = 0; 571 572 @Override 573 public void write(int b) throws IOException { 574 bitBuffer <<= 8; 575 bitBuffer |= b & 0xFF; 576 bitBufferLength += 8; 577 while (bitBufferLength >= alphabet.bitsPerChar) { 578 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) 579 & alphabet.mask; 580 out.write(alphabet.encode(charIndex)); 581 writtenChars++; 582 bitBufferLength -= alphabet.bitsPerChar; 583 } 584 } 585 586 @Override 587 public void flush() throws IOException { 588 out.flush(); 589 } 590 591 @Override 592 public void close() throws IOException { 593 if (bitBufferLength > 0) { 594 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) 595 & alphabet.mask; 596 out.write(alphabet.encode(charIndex)); 597 writtenChars++; 598 if (paddingChar != null) { 599 while (writtenChars % alphabet.charsPerChunk != 0) { 600 out.write(paddingChar.charValue()); 601 writtenChars++; 602 } 603 } 604 } 605 out.close(); 606 } 607 }; 608 } 609 610 @Override 611 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 612 checkNotNull(target); 613 checkPositionIndexes(off, off + len, bytes.length); 614 for (int i = 0; i < len; i += alphabet.bytesPerChunk) { 615 encodeChunkTo(target, bytes, off + i, Math.min(alphabet.bytesPerChunk, len - i)); 616 } 617 } 618 619 void encodeChunkTo(Appendable target, byte[] bytes, int off, int len) 620 throws IOException { 621 checkNotNull(target); 622 checkPositionIndexes(off, off + len, bytes.length); 623 checkArgument(len <= alphabet.bytesPerChunk); 624 long bitBuffer = 0; 625 for (int i = 0; i < len; ++i) { 626 bitBuffer |= bytes[off + i] & 0xFF; 627 bitBuffer <<= 8; // Add additional zero byte in the end. 628 } 629 // Position of first character is length of bitBuffer minus bitsPerChar. 630 final int bitOffset = (len + 1) * 8 - alphabet.bitsPerChar; 631 int bitsProcessed = 0; 632 while (bitsProcessed < len * 8) { 633 int charIndex = (int) (bitBuffer >>> (bitOffset - bitsProcessed)) & alphabet.mask; 634 target.append(alphabet.encode(charIndex)); 635 bitsProcessed += alphabet.bitsPerChar; 636 } 637 if (paddingChar != null) { 638 while (bitsProcessed < alphabet.bytesPerChunk * 8) { 639 target.append(paddingChar.charValue()); 640 bitsProcessed += alphabet.bitsPerChar; 641 } 642 } 643 } 644 645 @Override 646 int maxDecodedSize(int chars) { 647 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 648 } 649 650 @Override 651 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 652 checkNotNull(target); 653 chars = padding().trimTrailingFrom(chars); 654 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 655 throw new DecodingException("Invalid input length " + chars.length()); 656 } 657 int bytesWritten = 0; 658 for (int charIdx = 0; charIdx < chars.length(); charIdx += alphabet.charsPerChunk) { 659 long chunk = 0; 660 int charsProcessed = 0; 661 for (int i = 0; i < alphabet.charsPerChunk; i++) { 662 chunk <<= alphabet.bitsPerChar; 663 if (charIdx + i < chars.length()) { 664 chunk |= alphabet.decode(chars.charAt(charIdx + charsProcessed++)); 665 } 666 } 667 final int minOffset = alphabet.bytesPerChunk * 8 - charsProcessed * alphabet.bitsPerChar; 668 for (int offset = (alphabet.bytesPerChunk - 1) * 8; offset >= minOffset; offset -= 8) { 669 target[bytesWritten++] = (byte) ((chunk >>> offset) & 0xFF); 670 } 671 } 672 return bytesWritten; 673 } 674 675 @GwtIncompatible("Reader,InputStream") 676 @Override 677 public InputStream decodingStream(final Reader reader) { 678 checkNotNull(reader); 679 return new InputStream() { 680 int bitBuffer = 0; 681 int bitBufferLength = 0; 682 int readChars = 0; 683 boolean hitPadding = false; 684 final CharMatcher paddingMatcher = padding(); 685 686 @Override 687 public int read() throws IOException { 688 while (true) { 689 int readChar = reader.read(); 690 if (readChar == -1) { 691 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 692 throw new DecodingException("Invalid input length " + readChars); 693 } 694 return -1; 695 } 696 readChars++; 697 char ch = (char) readChar; 698 if (paddingMatcher.matches(ch)) { 699 if (!hitPadding 700 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 701 throw new DecodingException("Padding cannot start at index " + readChars); 702 } 703 hitPadding = true; 704 } else if (hitPadding) { 705 throw new DecodingException( 706 "Expected padding character but found '" + ch + "' at index " + readChars); 707 } else { 708 bitBuffer <<= alphabet.bitsPerChar; 709 bitBuffer |= alphabet.decode(ch); 710 bitBufferLength += alphabet.bitsPerChar; 711 712 if (bitBufferLength >= 8) { 713 bitBufferLength -= 8; 714 return (bitBuffer >> bitBufferLength) & 0xFF; 715 } 716 } 717 } 718 } 719 720 @Override 721 public void close() throws IOException { 722 reader.close(); 723 } 724 }; 725 } 726 727 @Override 728 public BaseEncoding omitPadding() { 729 return (paddingChar == null) ? this : newInstance(alphabet, null); 730 } 731 732 @Override 733 public BaseEncoding withPadChar(char padChar) { 734 if (8 % alphabet.bitsPerChar == 0 || 735 (paddingChar != null && paddingChar.charValue() == padChar)) { 736 return this; 737 } else { 738 return newInstance(alphabet, padChar); 739 } 740 } 741 742 @Override 743 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 744 checkArgument(padding().or(alphabet).matchesNoneOf(separator), 745 "Separator (%s) cannot contain alphabet or padding characters", separator); 746 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 747 } 748 749 private transient BaseEncoding upperCase; 750 private transient BaseEncoding lowerCase; 751 752 @Override 753 public BaseEncoding upperCase() { 754 BaseEncoding result = upperCase; 755 if (result == null) { 756 Alphabet upper = alphabet.upperCase(); 757 result = upperCase = 758 (upper == alphabet) ? this : newInstance(upper, paddingChar); 759 } 760 return result; 761 } 762 763 @Override 764 public BaseEncoding lowerCase() { 765 BaseEncoding result = lowerCase; 766 if (result == null) { 767 Alphabet lower = alphabet.lowerCase(); 768 result = lowerCase = 769 (lower == alphabet) ? this : newInstance(lower, paddingChar); 770 } 771 return result; 772 } 773 774 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 775 return new StandardBaseEncoding(alphabet, paddingChar); 776 } 777 778 @Override 779 public String toString() { 780 StringBuilder builder = new StringBuilder("BaseEncoding."); 781 builder.append(alphabet.toString()); 782 if (8 % alphabet.bitsPerChar != 0) { 783 if (paddingChar == null) { 784 builder.append(".omitPadding()"); 785 } else { 786 builder.append(".withPadChar(").append(paddingChar).append(')'); 787 } 788 } 789 return builder.toString(); 790 } 791 } 792 793 static final class Base16Encoding extends StandardBaseEncoding { 794 final char[] encoding = new char[512]; 795 796 Base16Encoding(String name, String alphabetChars) { 797 this(new Alphabet(name, alphabetChars.toCharArray())); 798 } 799 800 private Base16Encoding(Alphabet alphabet) { 801 super(alphabet, null); 802 checkArgument(alphabet.chars.length == 16); 803 for (int i = 0; i < 256; ++i) { 804 encoding[i] = alphabet.encode(i >>> 4); 805 encoding[i | 0x100] = alphabet.encode(i & 0xF); 806 } 807 } 808 809 @Override 810 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 811 checkNotNull(target); 812 checkPositionIndexes(off, off + len, bytes.length); 813 for (int i = 0; i < len; ++i) { 814 int b = bytes[off + i] & 0xFF; 815 target.append(encoding[b]); 816 target.append(encoding[b | 0x100]); 817 } 818 } 819 820 @Override 821 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 822 checkNotNull(target); 823 if (chars.length() % 2 == 1) { 824 throw new DecodingException("Invalid input length " + chars.length()); 825 } 826 int bytesWritten = 0; 827 for (int i = 0; i < chars.length(); i += 2) { 828 int decoded = alphabet.decode(chars.charAt(i)) << 4 | alphabet.decode(chars.charAt(i + 1)); 829 target[bytesWritten++] = (byte) decoded; 830 } 831 return bytesWritten; 832 } 833 834 @Override 835 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 836 return new Base16Encoding(alphabet); 837 } 838 } 839 840 static final class Base64Encoding extends StandardBaseEncoding { 841 Base64Encoding(String name, String alphabetChars, @Nullable Character paddingChar) { 842 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 843 } 844 845 private Base64Encoding(Alphabet alphabet, @Nullable Character paddingChar) { 846 super(alphabet, paddingChar); 847 checkArgument(alphabet.chars.length == 64); 848 } 849 850 @Override 851 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 852 checkNotNull(target); 853 checkPositionIndexes(off, off + len, bytes.length); 854 int i = off; 855 for (int remaining = len; remaining >= 3; remaining -= 3) { 856 int chunk = (bytes[i++] & 0xFF) << 16 | (bytes[i++] & 0xFF) << 8 | bytes[i++] & 0xFF; 857 target.append(alphabet.encode(chunk >>> 18)); 858 target.append(alphabet.encode((chunk >>> 12) & 0x3F)); 859 target.append(alphabet.encode((chunk >>> 6) & 0x3F)); 860 target.append(alphabet.encode(chunk & 0x3F)); 861 } 862 if (i < off + len) { 863 encodeChunkTo(target, bytes, i, off + len - i); 864 } 865 } 866 867 @Override 868 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 869 checkNotNull(target); 870 chars = padding().trimTrailingFrom(chars); 871 if (!alphabet.isValidPaddingStartPosition(chars.length())) { 872 throw new DecodingException("Invalid input length " + chars.length()); 873 } 874 int bytesWritten = 0; 875 for (int i = 0; i < chars.length();) { 876 int chunk = alphabet.decode(chars.charAt(i++)) << 18; 877 chunk |= alphabet.decode(chars.charAt(i++)) << 12; 878 target[bytesWritten++] = (byte) (chunk >>> 16); 879 if (i < chars.length()) { 880 chunk |= alphabet.decode(chars.charAt(i++)) << 6; 881 target[bytesWritten++] = (byte) ((chunk >>> 8) & 0xFF); 882 if (i < chars.length()) { 883 chunk |= alphabet.decode(chars.charAt(i++)); 884 target[bytesWritten++] = (byte) (chunk & 0xFF); 885 } 886 } 887 } 888 return bytesWritten; 889 } 890 891 @Override 892 BaseEncoding newInstance(Alphabet alphabet, @Nullable Character paddingChar) { 893 return new Base64Encoding(alphabet, paddingChar); 894 } 895 } 896 897 @GwtIncompatible("Reader") 898 static Reader ignoringReader(final Reader delegate, final CharMatcher toIgnore) { 899 checkNotNull(delegate); 900 checkNotNull(toIgnore); 901 return new Reader() { 902 @Override 903 public int read() throws IOException { 904 int readChar; 905 do { 906 readChar = delegate.read(); 907 } while (readChar != -1 && toIgnore.matches((char) readChar)); 908 return readChar; 909 } 910 911 @Override 912 public int read(char[] cbuf, int off, int len) throws IOException { 913 throw new UnsupportedOperationException(); 914 } 915 916 @Override 917 public void close() throws IOException { 918 delegate.close(); 919 } 920 }; 921 } 922 923 static Appendable separatingAppendable( 924 final Appendable delegate, final String separator, final int afterEveryChars) { 925 checkNotNull(delegate); 926 checkNotNull(separator); 927 checkArgument(afterEveryChars > 0); 928 return new Appendable() { 929 int charsUntilSeparator = afterEveryChars; 930 931 @Override 932 public Appendable append(char c) throws IOException { 933 if (charsUntilSeparator == 0) { 934 delegate.append(separator); 935 charsUntilSeparator = afterEveryChars; 936 } 937 delegate.append(c); 938 charsUntilSeparator--; 939 return this; 940 } 941 942 @Override 943 public Appendable append(CharSequence chars, int off, int len) throws IOException { 944 throw new UnsupportedOperationException(); 945 } 946 947 @Override 948 public Appendable append(CharSequence chars) throws IOException { 949 throw new UnsupportedOperationException(); 950 } 951 }; 952 } 953 954 @GwtIncompatible("Writer") 955 static Writer separatingWriter( 956 final Writer delegate, final String separator, final int afterEveryChars) { 957 final Appendable seperatingAppendable = 958 separatingAppendable(delegate, separator, afterEveryChars); 959 return new Writer() { 960 @Override 961 public void write(int c) throws IOException { 962 seperatingAppendable.append((char) c); 963 } 964 965 @Override 966 public void write(char[] chars, int off, int len) throws IOException { 967 throw new UnsupportedOperationException(); 968 } 969 970 @Override 971 public void flush() throws IOException { 972 delegate.flush(); 973 } 974 975 @Override 976 public void close() throws IOException { 977 delegate.close(); 978 } 979 }; 980 } 981 982 static final class SeparatedBaseEncoding extends BaseEncoding { 983 private final BaseEncoding delegate; 984 private final String separator; 985 private final int afterEveryChars; 986 private final CharMatcher separatorChars; 987 988 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 989 this.delegate = checkNotNull(delegate); 990 this.separator = checkNotNull(separator); 991 this.afterEveryChars = afterEveryChars; 992 checkArgument( 993 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 994 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 995 } 996 997 @Override 998 CharMatcher padding() { 999 return delegate.padding(); 1000 } 1001 1002 @Override 1003 int maxEncodedSize(int bytes) { 1004 int unseparatedSize = delegate.maxEncodedSize(bytes); 1005 return unseparatedSize + separator.length() 1006 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 1007 } 1008 1009 @GwtIncompatible("Writer,OutputStream") 1010 @Override 1011 public OutputStream encodingStream(final Writer output) { 1012 return delegate.encodingStream(separatingWriter(output, separator, afterEveryChars)); 1013 } 1014 1015 @Override 1016 void encodeTo(Appendable target, byte[] bytes, int off, int len) throws IOException { 1017 delegate.encodeTo(separatingAppendable(target, separator, afterEveryChars), bytes, off, len); 1018 } 1019 1020 @Override 1021 int maxDecodedSize(int chars) { 1022 return delegate.maxDecodedSize(chars); 1023 } 1024 1025 @Override 1026 int decodeTo(byte[] target, CharSequence chars) throws DecodingException { 1027 return delegate.decodeTo(target, separatorChars.removeFrom(chars)); 1028 } 1029 1030 @GwtIncompatible("Reader,InputStream") 1031 @Override 1032 public InputStream decodingStream(final Reader reader) { 1033 return delegate.decodingStream(ignoringReader(reader, separatorChars)); 1034 } 1035 1036 @Override 1037 public BaseEncoding omitPadding() { 1038 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 1039 } 1040 1041 @Override 1042 public BaseEncoding withPadChar(char padChar) { 1043 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 1044 } 1045 1046 @Override 1047 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 1048 throw new UnsupportedOperationException("Already have a separator"); 1049 } 1050 1051 @Override 1052 public BaseEncoding upperCase() { 1053 return delegate.upperCase().withSeparator(separator, afterEveryChars); 1054 } 1055 1056 @Override 1057 public BaseEncoding lowerCase() { 1058 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 1059 } 1060 1061 @Override 1062 public String toString() { 1063 return delegate.toString() + 1064 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 1065 } 1066 } 1067}