001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.io.GwtWorkarounds.asCharInput; 022import static com.google.common.io.GwtWorkarounds.asCharOutput; 023import static com.google.common.io.GwtWorkarounds.asInputStream; 024import static com.google.common.io.GwtWorkarounds.asOutputStream; 025import static com.google.common.io.GwtWorkarounds.stringBuilderOutput; 026import static com.google.common.math.IntMath.divide; 027import static com.google.common.math.IntMath.log2; 028import static java.math.RoundingMode.CEILING; 029import static java.math.RoundingMode.FLOOR; 030import static java.math.RoundingMode.UNNECESSARY; 031 032import com.google.common.annotations.Beta; 033import com.google.common.annotations.GwtCompatible; 034import com.google.common.annotations.GwtIncompatible; 035import com.google.common.base.Ascii; 036import com.google.common.base.CharMatcher; 037import com.google.common.io.GwtWorkarounds.ByteInput; 038import com.google.common.io.GwtWorkarounds.ByteOutput; 039import com.google.common.io.GwtWorkarounds.CharInput; 040import com.google.common.io.GwtWorkarounds.CharOutput; 041 042import java.io.IOException; 043import java.io.InputStream; 044import java.io.OutputStream; 045import java.io.Reader; 046import java.io.Writer; 047import java.util.Arrays; 048 049import javax.annotation.CheckReturnValue; 050import javax.annotation.Nullable; 051 052/** 053 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 054 * strings. This class includes several constants for encoding schemes specified by <a 055 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 056 * 057 * <pre> {@code 058 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 059 * 060 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 061 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 062 * 063 * <p>...returns the ASCII bytes of the string {@code "foo"}. 064 * 065 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with 066 * RFC 4648. Decoding rejects characters in the wrong case, though padding is optional. 067 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding 068 * with modified behavior: 069 * 070 * <pre> {@code 071 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 072 * 073 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 074 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 075 * 076 * <pre> {@code 077 * // Do NOT do this 078 * BaseEncoding hex = BaseEncoding.base16(); 079 * hex.lowerCase(); // does nothing! 080 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 081 * 082 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 083 * {@code x}, but the reverse does not necessarily hold. 084 * 085 * <p> 086 * <table> 087 * <tr> 088 * <th>Encoding 089 * <th>Alphabet 090 * <th>{@code char:byte} ratio 091 * <th>Default padding 092 * <th>Comments 093 * <tr> 094 * <td>{@link #base16()} 095 * <td>0-9 A-F 096 * <td>2.00 097 * <td>N/A 098 * <td>Traditional hexadecimal. Defaults to upper case. 099 * <tr> 100 * <td>{@link #base32()} 101 * <td>A-Z 2-7 102 * <td>1.60 103 * <td>= 104 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 105 * <tr> 106 * <td>{@link #base32Hex()} 107 * <td>0-9 A-V 108 * <td>1.60 109 * <td>= 110 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 111 * <tr> 112 * <td>{@link #base64()} 113 * <td>A-Z a-z 0-9 + / 114 * <td>1.33 115 * <td>= 116 * <td> 117 * <tr> 118 * <td>{@link #base64Url()} 119 * <td>A-Z a-z 0-9 - _ 120 * <td>1.33 121 * <td>= 122 * <td>Safe to use as filenames, or to pass in URLs without escaping 123 * </table> 124 * 125 * <p> 126 * All instances of this class are immutable, so they may be stored safely as static constants. 127 * 128 * @author Louis Wasserman 129 * @since 14.0 130 */ 131@Beta 132@GwtCompatible(emulated = true) 133public abstract class BaseEncoding { 134 // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int]) 135 136 BaseEncoding() {} 137 138 /** 139 * Exception indicating invalid base-encoded input encountered while decoding. 140 * 141 * @author Louis Wasserman 142 * @since 15.0 143 */ 144 public static final class DecodingException extends IOException { 145 DecodingException(String message) { 146 super(message); 147 } 148 149 DecodingException(Throwable cause) { 150 super(cause); 151 } 152 } 153 154 /** 155 * Encodes the specified byte array, and returns the encoded {@code String}. 156 */ 157 public String encode(byte[] bytes) { 158 return encode(checkNotNull(bytes), 0, bytes.length); 159 } 160 161 /** 162 * Encodes the specified range of the specified byte array, and returns the encoded 163 * {@code String}. 164 */ 165 public final String encode(byte[] bytes, int off, int len) { 166 checkNotNull(bytes); 167 checkPositionIndexes(off, off + len, bytes.length); 168 CharOutput result = stringBuilderOutput(maxEncodedSize(len)); 169 ByteOutput byteOutput = encodingStream(result); 170 try { 171 for (int i = 0; i < len; i++) { 172 byteOutput.write(bytes[off + i]); 173 } 174 byteOutput.close(); 175 } catch (IOException impossible) { 176 throw new AssertionError("impossible"); 177 } 178 return result.toString(); 179 } 180 181 /** 182 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 183 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 184 * {@code Writer}. 185 */ 186 @GwtIncompatible("Writer,OutputStream") 187 public final OutputStream encodingStream(Writer writer) { 188 return asOutputStream(encodingStream(asCharOutput(writer))); 189 } 190 191 /** 192 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 193 */ 194 @GwtIncompatible("ByteSink,CharSink") 195 public final ByteSink encodingSink(final CharSink encodedSink) { 196 checkNotNull(encodedSink); 197 return new ByteSink() { 198 @Override 199 public OutputStream openStream() throws IOException { 200 return encodingStream(encodedSink.openStream()); 201 } 202 }; 203 } 204 205 // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher) 206 207 private static byte[] extract(byte[] result, int length) { 208 if (length == result.length) { 209 return result; 210 } else { 211 byte[] trunc = new byte[length]; 212 System.arraycopy(result, 0, trunc, 0, length); 213 return trunc; 214 } 215 } 216 217 /** 218 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 219 * This is the inverse operation to {@link #encode(byte[])}. 220 * 221 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 222 * encoding. 223 */ 224 public final byte[] decode(CharSequence chars) { 225 try { 226 return decodeChecked(chars); 227 } catch (DecodingException badInput) { 228 throw new IllegalArgumentException(badInput); 229 } 230 } 231 232 /** 233 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 234 * This is the inverse operation to {@link #encode(byte[])}. 235 * 236 * @throws DecodingException if the input is not a valid encoded string according to this 237 * encoding. 238 */ 239 final byte[] decodeChecked(CharSequence chars) throws DecodingException { 240 chars = padding().trimTrailingFrom(chars); 241 ByteInput decodedInput = decodingStream(asCharInput(chars)); 242 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 243 int index = 0; 244 try { 245 for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) { 246 tmp[index++] = (byte) i; 247 } 248 } catch (DecodingException badInput) { 249 throw badInput; 250 } catch (IOException impossible) { 251 throw new AssertionError(impossible); 252 } 253 return extract(tmp, index); 254 } 255 256 /** 257 * Returns an {@code InputStream} that decodes base-encoded input from the specified 258 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 259 * errors. 260 */ 261 @GwtIncompatible("Reader,InputStream") 262 public final InputStream decodingStream(Reader reader) { 263 return asInputStream(decodingStream(asCharInput(reader))); 264 } 265 266 /** 267 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 268 * {@code CharSource}. 269 */ 270 @GwtIncompatible("ByteSource,CharSource") 271 public final ByteSource decodingSource(final CharSource encodedSource) { 272 checkNotNull(encodedSource); 273 return new ByteSource() { 274 @Override 275 public InputStream openStream() throws IOException { 276 return decodingStream(encodedSource.openStream()); 277 } 278 }; 279 } 280 281 // Implementations for encoding/decoding 282 283 abstract int maxEncodedSize(int bytes); 284 285 abstract ByteOutput encodingStream(CharOutput charOutput); 286 287 abstract int maxDecodedSize(int chars); 288 289 abstract ByteInput decodingStream(CharInput charInput); 290 291 abstract CharMatcher padding(); 292 293 // Modified encoding generators 294 295 /** 296 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 297 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 298 * section 3.2</a>, Padding of Encoded Data. 299 */ 300 @CheckReturnValue 301 public abstract BaseEncoding omitPadding(); 302 303 /** 304 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 305 * for padding. 306 * 307 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 308 * separator 309 */ 310 @CheckReturnValue 311 public abstract BaseEncoding withPadChar(char padChar); 312 313 /** 314 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 315 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 316 * are skipped over in decoding. 317 * 318 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 319 * string, or if {@code n <= 0} 320 * @throws UnsupportedOperationException if this encoding already uses a separator 321 */ 322 @CheckReturnValue 323 public abstract BaseEncoding withSeparator(String separator, int n); 324 325 /** 326 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 327 * uppercase letters. Padding and separator characters remain in their original case. 328 * 329 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 330 * lower-case characters 331 */ 332 @CheckReturnValue 333 public abstract BaseEncoding upperCase(); 334 335 /** 336 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 337 * lowercase letters. Padding and separator characters remain in their original case. 338 * 339 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 340 * lower-case characters 341 */ 342 @CheckReturnValue 343 public abstract BaseEncoding lowerCase(); 344 345 private static final BaseEncoding BASE64 = new StandardBaseEncoding( 346 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 347 348 /** 349 * The "base64" base encoding specified by <a 350 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 351 * (This is the same as the base 64 encoding from <a 352 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 353 * 354 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 355 * omitted} or {@linkplain #withPadChar(char) replaced}. 356 * 357 * <p>No line feeds are added by default, as per <a 358 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 359 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 360 */ 361 public static BaseEncoding base64() { 362 return BASE64; 363 } 364 365 private static final BaseEncoding BASE64_URL = new StandardBaseEncoding( 366 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 367 368 /** 369 * The "base64url" encoding specified by <a 370 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 371 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." 372 * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a 373 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 374 * 375 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 376 * omitted} or {@linkplain #withPadChar(char) replaced}. 377 * 378 * <p>No line feeds are added by default, as per <a 379 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 380 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 381 */ 382 public static BaseEncoding base64Url() { 383 return BASE64_URL; 384 } 385 386 private static final BaseEncoding BASE32 = 387 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 388 389 /** 390 * The "base32" encoding specified by <a 391 * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding. 392 * (This is the same as the base 32 encoding from <a 393 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 394 * 395 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 396 * omitted} or {@linkplain #withPadChar(char) replaced}. 397 * 398 * <p>No line feeds are added by default, as per <a 399 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 400 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 401 */ 402 public static BaseEncoding base32() { 403 return BASE32; 404 } 405 406 private static final BaseEncoding BASE32_HEX = 407 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 408 409 /** 410 * The "base32hex" encoding specified by <a 411 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 412 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 413 * 414 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 415 * omitted} or {@linkplain #withPadChar(char) replaced}. 416 * 417 * <p>No line feeds are added by default, as per <a 418 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 419 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 420 */ 421 public static BaseEncoding base32Hex() { 422 return BASE32_HEX; 423 } 424 425 private static final BaseEncoding BASE16 = 426 new StandardBaseEncoding("base16()", "0123456789ABCDEF", null); 427 428 /** 429 * The "base16" encoding specified by <a 430 * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding. 431 * (This is the same as the base 16 encoding from <a 432 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 433 * "hexadecimal" format. 434 * 435 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and 436 * {@link #omitPadding()} have no effect. 437 * 438 * <p>No line feeds are added by default, as per <a 439 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 440 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 441 */ 442 public static BaseEncoding base16() { 443 return BASE16; 444 } 445 446 private static final class Alphabet extends CharMatcher { 447 private final String name; 448 // this is meant to be immutable -- don't modify it! 449 private final char[] chars; 450 final int mask; 451 final int bitsPerChar; 452 final int charsPerChunk; 453 final int bytesPerChunk; 454 private final byte[] decodabet; 455 private final boolean[] validPadding; 456 457 Alphabet(String name, char[] chars) { 458 this.name = checkNotNull(name); 459 this.chars = checkNotNull(chars); 460 try { 461 this.bitsPerChar = log2(chars.length, UNNECESSARY); 462 } catch (ArithmeticException e) { 463 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 464 } 465 466 /* 467 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 468 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 469 */ 470 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 471 this.charsPerChunk = 8 / gcd; 472 this.bytesPerChunk = bitsPerChar / gcd; 473 474 this.mask = chars.length - 1; 475 476 byte[] decodabet = new byte[Ascii.MAX + 1]; 477 Arrays.fill(decodabet, (byte) -1); 478 for (int i = 0; i < chars.length; i++) { 479 char c = chars[i]; 480 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); 481 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 482 decodabet[c] = (byte) i; 483 } 484 this.decodabet = decodabet; 485 486 boolean[] validPadding = new boolean[charsPerChunk]; 487 for (int i = 0; i < bytesPerChunk; i++) { 488 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 489 } 490 this.validPadding = validPadding; 491 } 492 493 char encode(int bits) { 494 return chars[bits]; 495 } 496 497 boolean isValidPaddingStartPosition(int index) { 498 return validPadding[index % charsPerChunk]; 499 } 500 501 int decode(char ch) throws IOException { 502 if (ch > Ascii.MAX || decodabet[ch] == -1) { 503 throw new DecodingException("Unrecognized character: " + ch); 504 } 505 return decodabet[ch]; 506 } 507 508 private boolean hasLowerCase() { 509 for (char c : chars) { 510 if (Ascii.isLowerCase(c)) { 511 return true; 512 } 513 } 514 return false; 515 } 516 517 private boolean hasUpperCase() { 518 for (char c : chars) { 519 if (Ascii.isUpperCase(c)) { 520 return true; 521 } 522 } 523 return false; 524 } 525 526 Alphabet upperCase() { 527 if (!hasLowerCase()) { 528 return this; 529 } else { 530 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 531 char[] upperCased = new char[chars.length]; 532 for (int i = 0; i < chars.length; i++) { 533 upperCased[i] = Ascii.toUpperCase(chars[i]); 534 } 535 return new Alphabet(name + ".upperCase()", upperCased); 536 } 537 } 538 539 Alphabet lowerCase() { 540 if (!hasUpperCase()) { 541 return this; 542 } else { 543 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 544 char[] lowerCased = new char[chars.length]; 545 for (int i = 0; i < chars.length; i++) { 546 lowerCased[i] = Ascii.toLowerCase(chars[i]); 547 } 548 return new Alphabet(name + ".lowerCase()", lowerCased); 549 } 550 } 551 552 @Override 553 public boolean matches(char c) { 554 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; 555 } 556 557 @Override 558 public String toString() { 559 return name; 560 } 561 } 562 563 static final class StandardBaseEncoding extends BaseEncoding { 564 // TODO(user): provide a useful toString 565 private final Alphabet alphabet; 566 567 @Nullable 568 private final Character paddingChar; 569 570 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 571 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 572 } 573 574 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 575 this.alphabet = checkNotNull(alphabet); 576 checkArgument(paddingChar == null || !alphabet.matches(paddingChar), 577 "Padding character %s was already in alphabet", paddingChar); 578 this.paddingChar = paddingChar; 579 } 580 581 @Override 582 CharMatcher padding() { 583 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); 584 } 585 586 @Override 587 int maxEncodedSize(int bytes) { 588 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 589 } 590 591 @Override 592 ByteOutput encodingStream(final CharOutput out) { 593 checkNotNull(out); 594 return new ByteOutput() { 595 int bitBuffer = 0; 596 int bitBufferLength = 0; 597 int writtenChars = 0; 598 599 @Override 600 public void write(byte b) throws IOException { 601 bitBuffer <<= 8; 602 bitBuffer |= b & 0xFF; 603 bitBufferLength += 8; 604 while (bitBufferLength >= alphabet.bitsPerChar) { 605 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) 606 & alphabet.mask; 607 out.write(alphabet.encode(charIndex)); 608 writtenChars++; 609 bitBufferLength -= alphabet.bitsPerChar; 610 } 611 } 612 613 @Override 614 public void flush() throws IOException { 615 out.flush(); 616 } 617 618 @Override 619 public void close() throws IOException { 620 if (bitBufferLength > 0) { 621 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) 622 & alphabet.mask; 623 out.write(alphabet.encode(charIndex)); 624 writtenChars++; 625 if (paddingChar != null) { 626 while (writtenChars % alphabet.charsPerChunk != 0) { 627 out.write(paddingChar.charValue()); 628 writtenChars++; 629 } 630 } 631 } 632 out.close(); 633 } 634 }; 635 } 636 637 @Override 638 int maxDecodedSize(int chars) { 639 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 640 } 641 642 @Override 643 ByteInput decodingStream(final CharInput reader) { 644 checkNotNull(reader); 645 return new ByteInput() { 646 int bitBuffer = 0; 647 int bitBufferLength = 0; 648 int readChars = 0; 649 boolean hitPadding = false; 650 final CharMatcher paddingMatcher = padding(); 651 652 @Override 653 public int read() throws IOException { 654 while (true) { 655 int readChar = reader.read(); 656 if (readChar == -1) { 657 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 658 throw new DecodingException("Invalid input length " + readChars); 659 } 660 return -1; 661 } 662 readChars++; 663 char ch = (char) readChar; 664 if (paddingMatcher.matches(ch)) { 665 if (!hitPadding 666 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 667 throw new DecodingException("Padding cannot start at index " + readChars); 668 } 669 hitPadding = true; 670 } else if (hitPadding) { 671 throw new DecodingException( 672 "Expected padding character but found '" + ch + "' at index " + readChars); 673 } else { 674 bitBuffer <<= alphabet.bitsPerChar; 675 bitBuffer |= alphabet.decode(ch); 676 bitBufferLength += alphabet.bitsPerChar; 677 678 if (bitBufferLength >= 8) { 679 bitBufferLength -= 8; 680 return (bitBuffer >> bitBufferLength) & 0xFF; 681 } 682 } 683 } 684 } 685 686 @Override 687 public void close() throws IOException { 688 reader.close(); 689 } 690 }; 691 } 692 693 @Override 694 public BaseEncoding omitPadding() { 695 return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null); 696 } 697 698 @Override 699 public BaseEncoding withPadChar(char padChar) { 700 if (8 % alphabet.bitsPerChar == 0 || 701 (paddingChar != null && paddingChar.charValue() == padChar)) { 702 return this; 703 } else { 704 return new StandardBaseEncoding(alphabet, padChar); 705 } 706 } 707 708 @Override 709 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 710 checkNotNull(separator); 711 checkArgument(padding().or(alphabet).matchesNoneOf(separator), 712 "Separator cannot contain alphabet or padding characters"); 713 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 714 } 715 716 private transient BaseEncoding upperCase; 717 private transient BaseEncoding lowerCase; 718 719 @Override 720 public BaseEncoding upperCase() { 721 BaseEncoding result = upperCase; 722 if (result == null) { 723 Alphabet upper = alphabet.upperCase(); 724 result = upperCase = 725 (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar); 726 } 727 return result; 728 } 729 730 @Override 731 public BaseEncoding lowerCase() { 732 BaseEncoding result = lowerCase; 733 if (result == null) { 734 Alphabet lower = alphabet.lowerCase(); 735 result = lowerCase = 736 (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar); 737 } 738 return result; 739 } 740 741 @Override 742 public String toString() { 743 StringBuilder builder = new StringBuilder("BaseEncoding."); 744 builder.append(alphabet.toString()); 745 if (8 % alphabet.bitsPerChar != 0) { 746 if (paddingChar == null) { 747 builder.append(".omitPadding()"); 748 } else { 749 builder.append(".withPadChar(").append(paddingChar).append(')'); 750 } 751 } 752 return builder.toString(); 753 } 754 } 755 756 static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) { 757 checkNotNull(delegate); 758 checkNotNull(toIgnore); 759 return new CharInput() { 760 @Override 761 public int read() throws IOException { 762 int readChar; 763 do { 764 readChar = delegate.read(); 765 } while (readChar != -1 && toIgnore.matches((char) readChar)); 766 return readChar; 767 } 768 769 @Override 770 public void close() throws IOException { 771 delegate.close(); 772 } 773 }; 774 } 775 776 static CharOutput separatingOutput( 777 final CharOutput delegate, final String separator, final int afterEveryChars) { 778 checkNotNull(delegate); 779 checkNotNull(separator); 780 checkArgument(afterEveryChars > 0); 781 return new CharOutput() { 782 int charsUntilSeparator = afterEveryChars; 783 784 @Override 785 public void write(char c) throws IOException { 786 if (charsUntilSeparator == 0) { 787 for (int i = 0; i < separator.length(); i++) { 788 delegate.write(separator.charAt(i)); 789 } 790 charsUntilSeparator = afterEveryChars; 791 } 792 delegate.write(c); 793 charsUntilSeparator--; 794 } 795 796 @Override 797 public void flush() throws IOException { 798 delegate.flush(); 799 } 800 801 @Override 802 public void close() throws IOException { 803 delegate.close(); 804 } 805 }; 806 } 807 808 static final class SeparatedBaseEncoding extends BaseEncoding { 809 private final BaseEncoding delegate; 810 private final String separator; 811 private final int afterEveryChars; 812 private final CharMatcher separatorChars; 813 814 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 815 this.delegate = checkNotNull(delegate); 816 this.separator = checkNotNull(separator); 817 this.afterEveryChars = afterEveryChars; 818 checkArgument( 819 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 820 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 821 } 822 823 @Override 824 CharMatcher padding() { 825 return delegate.padding(); 826 } 827 828 @Override 829 int maxEncodedSize(int bytes) { 830 int unseparatedSize = delegate.maxEncodedSize(bytes); 831 return unseparatedSize + separator.length() 832 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 833 } 834 835 @Override 836 ByteOutput encodingStream(final CharOutput output) { 837 return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars)); 838 } 839 840 @Override 841 int maxDecodedSize(int chars) { 842 return delegate.maxDecodedSize(chars); 843 } 844 845 @Override 846 ByteInput decodingStream(final CharInput input) { 847 return delegate.decodingStream(ignoringInput(input, separatorChars)); 848 } 849 850 @Override 851 public BaseEncoding omitPadding() { 852 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 853 } 854 855 @Override 856 public BaseEncoding withPadChar(char padChar) { 857 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 858 } 859 860 @Override 861 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 862 throw new UnsupportedOperationException("Already have a separator"); 863 } 864 865 @Override 866 public BaseEncoding upperCase() { 867 return delegate.upperCase().withSeparator(separator, afterEveryChars); 868 } 869 870 @Override 871 public BaseEncoding lowerCase() { 872 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 873 } 874 875 @Override 876 public String toString() { 877 return delegate.toString() + 878 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 879 } 880 } 881}