001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.io.GwtWorkarounds.asCharInput; 022import static com.google.common.io.GwtWorkarounds.asCharOutput; 023import static com.google.common.io.GwtWorkarounds.asInputStream; 024import static com.google.common.io.GwtWorkarounds.asOutputStream; 025import static com.google.common.io.GwtWorkarounds.stringBuilderOutput; 026import static com.google.common.math.IntMath.divide; 027import static com.google.common.math.IntMath.log2; 028import static java.math.RoundingMode.CEILING; 029import static java.math.RoundingMode.FLOOR; 030import static java.math.RoundingMode.UNNECESSARY; 031 032import com.google.common.annotations.Beta; 033import com.google.common.annotations.GwtCompatible; 034import com.google.common.annotations.GwtIncompatible; 035import com.google.common.base.Ascii; 036import com.google.common.base.CharMatcher; 037import com.google.common.io.GwtWorkarounds.ByteInput; 038import com.google.common.io.GwtWorkarounds.ByteOutput; 039import com.google.common.io.GwtWorkarounds.CharInput; 040import com.google.common.io.GwtWorkarounds.CharOutput; 041 042import java.io.IOException; 043import java.io.InputStream; 044import java.io.OutputStream; 045import java.io.Reader; 046import java.io.Writer; 047import java.util.Arrays; 048 049import javax.annotation.CheckReturnValue; 050import javax.annotation.Nullable; 051 052/** 053 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 054 * strings. This class includes several constants for encoding schemes specified by <a 055 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 056 * <pre> {@code 057 * 058 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII)) 059 * }</pre> 060 * returns the string {@code "MZXW6==="}, and <pre> {@code 061 * 062 * byte[] decoded = BaseEncoding.base32().decode("MZXW6==="); 063 * }</pre> 064 * 065 * ...returns the ASCII bytes of the string {@code "foo"}. 066 * 067 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with 068 * RFC 4648. Decoding rejects characters in the wrong case, though padding is optional. 069 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding 070 * with modified behavior: <pre> {@code 071 * 072 * BaseEncoding.base16().lowerCase().decode("deadbeef"); 073 * }</pre> 074 * 075 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 076 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 077 * <pre> {@code 078 * 079 * // Do NOT do this 080 * BaseEncoding hex = BaseEncoding.base16(); 081 * hex.lowerCase(); // does nothing! 082 * return hex.decode("deadbeef"); // throws an IllegalArgumentException 083 * }</pre> 084 * 085 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 086 * {@code x}, but the reverse does not necessarily hold. 087 * 088 * <p> 089 * <table> 090 * <tr> 091 * <th>Encoding 092 * <th>Alphabet 093 * <th>{@code char:byte} ratio 094 * <th>Default padding 095 * <th>Comments 096 * <tr> 097 * <td>{@link #base16()} 098 * <td>0-9 A-F 099 * <td>2.00 100 * <td>N/A 101 * <td>Traditional hexadecimal. Defaults to upper case. 102 * <tr> 103 * <td>{@link #base32()} 104 * <td>A-Z 2-7 105 * <td>1.60 106 * <td>= 107 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 108 * <tr> 109 * <td>{@link #base32Hex()} 110 * <td>0-9 A-V 111 * <td>1.60 112 * <td>= 113 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 114 * <tr> 115 * <td>{@link #base64()} 116 * <td>A-Z a-z 0-9 + / 117 * <td>1.33 118 * <td>= 119 * <td> 120 * <tr> 121 * <td>{@link #base64Url()} 122 * <td>A-Z a-z 0-9 - _ 123 * <td>1.33 124 * <td>= 125 * <td>Safe to use as filenames, or to pass in URLs without escaping 126 * </table> 127 * 128 * <p> 129 * All instances of this class are immutable, so they may be stored safely as static constants. 130 * 131 * @author Louis Wasserman 132 * @since 14.0 133 */ 134@Beta 135@GwtCompatible(emulated = true) 136public abstract class BaseEncoding { 137 // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int]) 138 139 BaseEncoding() {} 140 141 /** 142 * Encodes the specified byte array, and returns the encoded {@code String}. 143 */ 144 public String encode(byte[] bytes) { 145 return encode(checkNotNull(bytes), 0, bytes.length); 146 } 147 148 /** 149 * Encodes the specified range of the specified byte array, and returns the encoded 150 * {@code String}. 151 */ 152 public final String encode(byte[] bytes, int off, int len) { 153 checkNotNull(bytes); 154 checkPositionIndexes(off, off + len, bytes.length); 155 CharOutput result = stringBuilderOutput(maxEncodedSize(len)); 156 ByteOutput byteOutput = encodingStream(result); 157 try { 158 for (int i = 0; i < len; i++) { 159 byteOutput.write(bytes[off + i]); 160 } 161 byteOutput.close(); 162 } catch (IOException impossible) { 163 throw new AssertionError("impossible"); 164 } 165 return result.toString(); 166 } 167 168 /** 169 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 170 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 171 * {@code Writer}. 172 */ 173 @GwtIncompatible("Writer,OutputStream") 174 public final OutputStream encodingStream(Writer writer) { 175 return asOutputStream(encodingStream(asCharOutput(writer))); 176 } 177 178 /** 179 * Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding 180 * into writers from the specified {@code OutputSupplier}. 181 */ 182 @GwtIncompatible("Writer,OutputStream") 183 public final OutputSupplier<OutputStream> encodingStream( 184 final OutputSupplier<? extends Writer> writerSupplier) { 185 checkNotNull(writerSupplier); 186 return new OutputSupplier<OutputStream>() { 187 @Override 188 public OutputStream getOutput() throws IOException { 189 return encodingStream(writerSupplier.getOutput()); 190 } 191 }; 192 } 193 194 /** 195 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 196 */ 197 @GwtIncompatible("ByteSink,CharSink") 198 public final ByteSink encodingSink(final CharSink encodedSink) { 199 checkNotNull(encodedSink); 200 return new ByteSink() { 201 @Override 202 public OutputStream openStream() throws IOException { 203 return encodingStream(encodedSink.openStream()); 204 } 205 }; 206 } 207 208 // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher) 209 210 private static byte[] extract(byte[] result, int length) { 211 if (length == result.length) { 212 return result; 213 } else { 214 byte[] trunc = new byte[length]; 215 System.arraycopy(result, 0, trunc, 0, length); 216 return trunc; 217 } 218 } 219 220 /** 221 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 222 * This is the inverse operation to {@link #encode(byte[])}. 223 * 224 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 225 * encoding. 226 */ 227 public final byte[] decode(CharSequence chars) { 228 chars = padding().trimTrailingFrom(chars); 229 ByteInput decodedInput = decodingStream(asCharInput(chars)); 230 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 231 int index = 0; 232 try { 233 for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) { 234 tmp[index++] = (byte) i; 235 } 236 } catch (IOException badInput) { 237 throw new IllegalArgumentException(badInput); 238 } 239 return extract(tmp, index); 240 } 241 242 /** 243 * Returns an {@code InputStream} that decodes base-encoded input from the specified 244 * {@code Reader}. 245 */ 246 @GwtIncompatible("Reader,InputStream") 247 public final InputStream decodingStream(Reader reader) { 248 return asInputStream(decodingStream(asCharInput(reader))); 249 } 250 251 /** 252 * Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input 253 * from readers from the specified supplier. 254 */ 255 @GwtIncompatible("Reader,InputStream") 256 public final InputSupplier<InputStream> decodingStream( 257 final InputSupplier<? extends Reader> readerSupplier) { 258 checkNotNull(readerSupplier); 259 return new InputSupplier<InputStream>() { 260 @Override 261 public InputStream getInput() throws IOException { 262 return decodingStream(readerSupplier.getInput()); 263 } 264 }; 265 } 266 267 /** 268 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 269 * {@code CharSource}. 270 */ 271 @GwtIncompatible("ByteSource,CharSource") 272 public final ByteSource decodingSource(final CharSource encodedSource) { 273 checkNotNull(encodedSource); 274 return new ByteSource() { 275 @Override 276 public InputStream openStream() throws IOException { 277 return decodingStream(encodedSource.openStream()); 278 } 279 }; 280 } 281 282 // Implementations for encoding/decoding 283 284 abstract int maxEncodedSize(int bytes); 285 286 abstract ByteOutput encodingStream(CharOutput charOutput); 287 288 abstract int maxDecodedSize(int chars); 289 290 abstract ByteInput decodingStream(CharInput charInput); 291 292 abstract CharMatcher padding(); 293 294 // Modified encoding generators 295 296 /** 297 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 298 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 299 * section 3.2</a>, Padding of Encoded Data. 300 */ 301 @CheckReturnValue 302 public abstract BaseEncoding omitPadding(); 303 304 /** 305 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 306 * for padding. 307 * 308 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 309 * separator 310 */ 311 @CheckReturnValue 312 public abstract BaseEncoding withPadChar(char padChar); 313 314 /** 315 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 316 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 317 * are skipped over in decoding. 318 * 319 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 320 * string, or if {@code n <= 0} 321 * @throws UnsupportedOperationException if this encoding already uses a separator 322 */ 323 @CheckReturnValue 324 public abstract BaseEncoding withSeparator(String separator, int n); 325 326 /** 327 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 328 * uppercase letters. Padding and separator characters remain in their original case. 329 * 330 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 331 * lower-case characters 332 */ 333 @CheckReturnValue 334 public abstract BaseEncoding upperCase(); 335 336 /** 337 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 338 * lowercase letters. Padding and separator characters remain in their original case. 339 * 340 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 341 * lower-case characters 342 */ 343 @CheckReturnValue 344 public abstract BaseEncoding lowerCase(); 345 346 private static final BaseEncoding BASE64 = new StandardBaseEncoding( 347 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 348 349 /** 350 * The "base64" base encoding specified by <a 351 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 352 * (This is the same as the base 64 encoding from <a 353 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 354 * 355 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 356 * omitted} or {@linkplain #withPadChar(char) replaced}. 357 * 358 * <p>No line feeds are added by default, as per <a 359 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 360 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 361 */ 362 public static BaseEncoding base64() { 363 return BASE64; 364 } 365 366 private static final BaseEncoding BASE64_URL = new StandardBaseEncoding( 367 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 368 369 /** 370 * The "base64url" encoding specified by <a 371 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 372 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." 373 * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a 374 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 375 * 376 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 377 * omitted} or {@linkplain #withPadChar(char) replaced}. 378 * 379 * <p>No line feeds are added by default, as per <a 380 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 381 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 382 */ 383 public static BaseEncoding base64Url() { 384 return BASE64_URL; 385 } 386 387 private static final BaseEncoding BASE32 = 388 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 389 390 /** 391 * The "base32" encoding specified by <a 392 * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding. 393 * (This is the same as the base 32 encoding from <a 394 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 395 * 396 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 397 * omitted} or {@linkplain #withPadChar(char) replaced}. 398 * 399 * <p>No line feeds are added by default, as per <a 400 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 401 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 402 */ 403 public static BaseEncoding base32() { 404 return BASE32; 405 } 406 407 private static final BaseEncoding BASE32_HEX = 408 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 409 410 /** 411 * The "base32hex" encoding specified by <a 412 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 413 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 414 * 415 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 416 * omitted} or {@linkplain #withPadChar(char) replaced}. 417 * 418 * <p>No line feeds are added by default, as per <a 419 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 420 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 421 */ 422 public static BaseEncoding base32Hex() { 423 return BASE32_HEX; 424 } 425 426 private static final BaseEncoding BASE16 = 427 new StandardBaseEncoding("base16()", "0123456789ABCDEF", null); 428 429 /** 430 * The "base16" encoding specified by <a 431 * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding. 432 * (This is the same as the base 16 encoding from <a 433 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 434 * "hexadecimal" format. 435 * 436 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and 437 * {@link #omitPadding()} have no effect. 438 * 439 * <p>No line feeds are added by default, as per <a 440 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 441 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 442 */ 443 public static BaseEncoding base16() { 444 return BASE16; 445 } 446 447 private static final class Alphabet extends CharMatcher { 448 private final String name; 449 // this is meant to be immutable -- don't modify it! 450 private final char[] chars; 451 final int mask; 452 final int bitsPerChar; 453 final int charsPerChunk; 454 final int bytesPerChunk; 455 private final byte[] decodabet; 456 private final boolean[] validPadding; 457 458 Alphabet(String name, char[] chars) { 459 this.name = checkNotNull(name); 460 this.chars = checkNotNull(chars); 461 try { 462 this.bitsPerChar = log2(chars.length, UNNECESSARY); 463 } catch (ArithmeticException e) { 464 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 465 } 466 467 /* 468 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 469 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 470 */ 471 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 472 this.charsPerChunk = 8 / gcd; 473 this.bytesPerChunk = bitsPerChar / gcd; 474 475 this.mask = chars.length - 1; 476 477 byte[] decodabet = new byte[Ascii.MAX + 1]; 478 Arrays.fill(decodabet, (byte) -1); 479 for (int i = 0; i < chars.length; i++) { 480 char c = chars[i]; 481 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); 482 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 483 decodabet[c] = (byte) i; 484 } 485 this.decodabet = decodabet; 486 487 boolean[] validPadding = new boolean[charsPerChunk]; 488 for (int i = 0; i < bytesPerChunk; i++) { 489 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 490 } 491 this.validPadding = validPadding; 492 } 493 494 char encode(int bits) { 495 return chars[bits]; 496 } 497 498 boolean isValidPaddingStartPosition(int index) { 499 return validPadding[index % charsPerChunk]; 500 } 501 502 int decode(char ch) throws IOException { 503 if (ch > Ascii.MAX || decodabet[ch] == -1) { 504 throw new IOException("Unrecognized character: " + ch); 505 } 506 return decodabet[ch]; 507 } 508 509 private boolean hasLowerCase() { 510 for (char c : chars) { 511 if (Ascii.isLowerCase(c)) { 512 return true; 513 } 514 } 515 return false; 516 } 517 518 private boolean hasUpperCase() { 519 for (char c : chars) { 520 if (Ascii.isUpperCase(c)) { 521 return true; 522 } 523 } 524 return false; 525 } 526 527 Alphabet upperCase() { 528 if (!hasLowerCase()) { 529 return this; 530 } else { 531 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 532 char[] upperCased = new char[chars.length]; 533 for (int i = 0; i < chars.length; i++) { 534 upperCased[i] = Ascii.toUpperCase(chars[i]); 535 } 536 return new Alphabet(name + ".upperCase()", upperCased); 537 } 538 } 539 540 Alphabet lowerCase() { 541 if (!hasUpperCase()) { 542 return this; 543 } else { 544 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 545 char[] lowerCased = new char[chars.length]; 546 for (int i = 0; i < chars.length; i++) { 547 lowerCased[i] = Ascii.toLowerCase(chars[i]); 548 } 549 return new Alphabet(name + ".lowerCase()", lowerCased); 550 } 551 } 552 553 @Override 554 public boolean matches(char c) { 555 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; 556 } 557 558 @Override 559 public String toString() { 560 return name; 561 } 562 } 563 564 static final class StandardBaseEncoding extends BaseEncoding { 565 // TODO(user): provide a useful toString 566 private final Alphabet alphabet; 567 568 @Nullable 569 private final Character paddingChar; 570 571 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 572 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 573 } 574 575 StandardBaseEncoding(Alphabet alphabet, Character paddingChar) { 576 this.alphabet = checkNotNull(alphabet); 577 checkArgument(paddingChar == null || !alphabet.matches(paddingChar), 578 "Padding character %s was already in alphabet", paddingChar); 579 this.paddingChar = paddingChar; 580 } 581 582 @Override 583 CharMatcher padding() { 584 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); 585 } 586 587 @Override 588 int maxEncodedSize(int bytes) { 589 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 590 } 591 592 @Override 593 ByteOutput encodingStream(final CharOutput out) { 594 checkNotNull(out); 595 return new ByteOutput() { 596 int bitBuffer = 0; 597 int bitBufferLength = 0; 598 int writtenChars = 0; 599 600 @Override 601 public void write(byte b) throws IOException { 602 bitBuffer <<= 8; 603 bitBuffer |= b & 0xFF; 604 bitBufferLength += 8; 605 while (bitBufferLength >= alphabet.bitsPerChar) { 606 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) 607 & alphabet.mask; 608 out.write(alphabet.encode(charIndex)); 609 writtenChars++; 610 bitBufferLength -= alphabet.bitsPerChar; 611 } 612 } 613 614 @Override 615 public void flush() throws IOException { 616 out.flush(); 617 } 618 619 @Override 620 public void close() throws IOException { 621 if (bitBufferLength > 0) { 622 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) 623 & alphabet.mask; 624 out.write(alphabet.encode(charIndex)); 625 writtenChars++; 626 if (paddingChar != null) { 627 while (writtenChars % alphabet.charsPerChunk != 0) { 628 out.write(paddingChar.charValue()); 629 writtenChars++; 630 } 631 } 632 } 633 out.close(); 634 } 635 }; 636 } 637 638 @Override 639 int maxDecodedSize(int chars) { 640 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 641 } 642 643 @Override 644 ByteInput decodingStream(final CharInput reader) { 645 checkNotNull(reader); 646 return new ByteInput() { 647 int bitBuffer = 0; 648 int bitBufferLength = 0; 649 int readChars = 0; 650 boolean hitPadding = false; 651 final CharMatcher paddingMatcher = padding(); 652 653 @Override 654 public int read() throws IOException { 655 while (true) { 656 int readChar = reader.read(); 657 if (readChar == -1) { 658 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 659 throw new IOException("Invalid input length " + readChars); 660 } 661 return -1; 662 } 663 readChars++; 664 char ch = (char) readChar; 665 if (paddingMatcher.matches(ch)) { 666 if (!hitPadding 667 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 668 throw new IOException("Padding cannot start at index " + readChars); 669 } 670 hitPadding = true; 671 } else if (hitPadding) { 672 throw new IOException( 673 "Expected padding character but found '" + ch + "' at index " + readChars); 674 } else { 675 bitBuffer <<= alphabet.bitsPerChar; 676 bitBuffer |= alphabet.decode(ch); 677 bitBufferLength += alphabet.bitsPerChar; 678 679 if (bitBufferLength >= 8) { 680 bitBufferLength -= 8; 681 return (bitBuffer >> bitBufferLength) & 0xFF; 682 } 683 } 684 } 685 } 686 687 @Override 688 public void close() throws IOException { 689 reader.close(); 690 } 691 }; 692 } 693 694 @Override 695 public BaseEncoding omitPadding() { 696 return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null); 697 } 698 699 @Override 700 public BaseEncoding withPadChar(char padChar) { 701 if (8 % alphabet.bitsPerChar == 0 || 702 (paddingChar != null && paddingChar.charValue() == padChar)) { 703 return this; 704 } else { 705 return new StandardBaseEncoding(alphabet, padChar); 706 } 707 } 708 709 @Override 710 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 711 checkNotNull(separator); 712 checkArgument(padding().or(alphabet).matchesNoneOf(separator), 713 "Separator cannot contain alphabet or padding characters"); 714 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 715 } 716 717 private transient BaseEncoding upperCase; 718 private transient BaseEncoding lowerCase; 719 720 @Override 721 public BaseEncoding upperCase() { 722 BaseEncoding result = upperCase; 723 if (result == null) { 724 Alphabet upper = alphabet.upperCase(); 725 result = upperCase = 726 (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar); 727 } 728 return result; 729 } 730 731 @Override 732 public BaseEncoding lowerCase() { 733 BaseEncoding result = lowerCase; 734 if (result == null) { 735 Alphabet lower = alphabet.lowerCase(); 736 result = lowerCase = 737 (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar); 738 } 739 return result; 740 } 741 742 @Override 743 public String toString() { 744 StringBuilder builder = new StringBuilder("BaseEncoding."); 745 builder.append(alphabet.toString()); 746 if (8 % alphabet.bitsPerChar != 0) { 747 if (paddingChar == null) { 748 builder.append(".omitPadding()"); 749 } else { 750 builder.append(".withPadChar(").append(paddingChar).append(')'); 751 } 752 } 753 return builder.toString(); 754 } 755 } 756 757 static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) { 758 checkNotNull(delegate); 759 checkNotNull(toIgnore); 760 return new CharInput() { 761 @Override 762 public int read() throws IOException { 763 int readChar; 764 do { 765 readChar = delegate.read(); 766 } while (readChar != -1 && toIgnore.matches((char) readChar)); 767 return readChar; 768 } 769 770 @Override 771 public void close() throws IOException { 772 delegate.close(); 773 } 774 }; 775 } 776 777 static CharOutput separatingOutput( 778 final CharOutput delegate, final String separator, final int afterEveryChars) { 779 checkNotNull(delegate); 780 checkNotNull(separator); 781 checkArgument(afterEveryChars > 0); 782 return new CharOutput() { 783 int charsUntilSeparator = afterEveryChars; 784 785 @Override 786 public void write(char c) throws IOException { 787 if (charsUntilSeparator == 0) { 788 for (int i = 0; i < separator.length(); i++) { 789 delegate.write(separator.charAt(i)); 790 } 791 charsUntilSeparator = afterEveryChars; 792 } 793 delegate.write(c); 794 charsUntilSeparator--; 795 } 796 797 @Override 798 public void flush() throws IOException { 799 delegate.flush(); 800 } 801 802 @Override 803 public void close() throws IOException { 804 delegate.close(); 805 } 806 }; 807 } 808 809 static final class SeparatedBaseEncoding extends BaseEncoding { 810 private final BaseEncoding delegate; 811 private final String separator; 812 private final int afterEveryChars; 813 private final CharMatcher separatorChars; 814 815 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 816 this.delegate = checkNotNull(delegate); 817 this.separator = checkNotNull(separator); 818 this.afterEveryChars = afterEveryChars; 819 checkArgument( 820 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 821 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 822 } 823 824 @Override 825 CharMatcher padding() { 826 return delegate.padding(); 827 } 828 829 @Override 830 int maxEncodedSize(int bytes) { 831 int unseparatedSize = delegate.maxEncodedSize(bytes); 832 return unseparatedSize + separator.length() 833 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 834 } 835 836 @Override 837 ByteOutput encodingStream(final CharOutput output) { 838 return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars)); 839 } 840 841 @Override 842 int maxDecodedSize(int chars) { 843 return delegate.maxDecodedSize(chars); 844 } 845 846 @Override 847 ByteInput decodingStream(final CharInput input) { 848 return delegate.decodingStream(ignoringInput(input, separatorChars)); 849 } 850 851 @Override 852 public BaseEncoding omitPadding() { 853 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 854 } 855 856 @Override 857 public BaseEncoding withPadChar(char padChar) { 858 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 859 } 860 861 @Override 862 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 863 throw new UnsupportedOperationException("Already have a separator"); 864 } 865 866 @Override 867 public BaseEncoding upperCase() { 868 return delegate.upperCase().withSeparator(separator, afterEveryChars); 869 } 870 871 @Override 872 public BaseEncoding lowerCase() { 873 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 874 } 875 876 @Override 877 public String toString() { 878 return delegate.toString() + 879 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 880 } 881 } 882}