001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkPositionIndexes; 020import static com.google.common.base.Preconditions.checkState; 021import static com.google.common.io.GwtWorkarounds.asCharInput; 022import static com.google.common.io.GwtWorkarounds.asCharOutput; 023import static com.google.common.io.GwtWorkarounds.asInputStream; 024import static com.google.common.io.GwtWorkarounds.asOutputStream; 025import static com.google.common.io.GwtWorkarounds.stringBuilderOutput; 026import static com.google.common.math.IntMath.divide; 027import static com.google.common.math.IntMath.log2; 028import static java.math.RoundingMode.CEILING; 029import static java.math.RoundingMode.FLOOR; 030import static java.math.RoundingMode.UNNECESSARY; 031 032import com.google.common.annotations.Beta; 033import com.google.common.annotations.GwtCompatible; 034import com.google.common.annotations.GwtIncompatible; 035import com.google.common.base.Ascii; 036import com.google.common.base.CharMatcher; 037import com.google.common.io.GwtWorkarounds.ByteInput; 038import com.google.common.io.GwtWorkarounds.ByteOutput; 039import com.google.common.io.GwtWorkarounds.CharInput; 040import com.google.common.io.GwtWorkarounds.CharOutput; 041 042import java.io.IOException; 043import java.io.InputStream; 044import java.io.OutputStream; 045import java.io.Reader; 046import java.io.Writer; 047import java.util.Arrays; 048 049import javax.annotation.CheckReturnValue; 050import javax.annotation.Nullable; 051 052/** 053 * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII 054 * strings. This class includes several constants for encoding schemes specified by <a 055 * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression: 056 * 057 * <pre> {@code 058 * BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre> 059 * 060 * <p>returns the string {@code "MZXW6==="}, and <pre> {@code 061 * byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre> 062 * 063 * <p>...returns the ASCII bytes of the string {@code "foo"}. 064 * 065 * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with 066 * RFC 4648. Decoding rejects characters in the wrong case, though padding is optional. 067 * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding 068 * with modified behavior: 069 * 070 * <pre> {@code 071 * BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre> 072 * 073 * <p>Warning: BaseEncoding instances are immutable. Invoking a configuration method has no effect 074 * on the receiving instance; you must store and use the new encoding instance it returns, instead. 075 * 076 * <pre> {@code 077 * // Do NOT do this 078 * BaseEncoding hex = BaseEncoding.base16(); 079 * hex.lowerCase(); // does nothing! 080 * return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre> 081 * 082 * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to 083 * {@code x}, but the reverse does not necessarily hold. 084 * 085 * <p> 086 * <table> 087 * <tr> 088 * <th>Encoding 089 * <th>Alphabet 090 * <th>{@code char:byte} ratio 091 * <th>Default padding 092 * <th>Comments 093 * <tr> 094 * <td>{@link #base16()} 095 * <td>0-9 A-F 096 * <td>2.00 097 * <td>N/A 098 * <td>Traditional hexadecimal. Defaults to upper case. 099 * <tr> 100 * <td>{@link #base32()} 101 * <td>A-Z 2-7 102 * <td>1.60 103 * <td>= 104 * <td>Human-readable; no possibility of mixing up 0/O or 1/I. Defaults to upper case. 105 * <tr> 106 * <td>{@link #base32Hex()} 107 * <td>0-9 A-V 108 * <td>1.60 109 * <td>= 110 * <td>"Numerical" base 32; extended from the traditional hex alphabet. Defaults to upper case. 111 * <tr> 112 * <td>{@link #base64()} 113 * <td>A-Z a-z 0-9 + / 114 * <td>1.33 115 * <td>= 116 * <td> 117 * <tr> 118 * <td>{@link #base64Url()} 119 * <td>A-Z a-z 0-9 - _ 120 * <td>1.33 121 * <td>= 122 * <td>Safe to use as filenames, or to pass in URLs without escaping 123 * </table> 124 * 125 * <p> 126 * All instances of this class are immutable, so they may be stored safely as static constants. 127 * 128 * @author Louis Wasserman 129 * @since 14.0 130 */ 131@Beta 132@GwtCompatible(emulated = true) 133public abstract class BaseEncoding { 134 // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int]) 135 136 BaseEncoding() {} 137 138 /** 139 * Exception indicating invalid base-encoded input encountered while decoding. 140 * 141 * @author Louis Wasserman 142 * @since 15.0 143 */ 144 public static final class DecodingException extends IOException { 145 DecodingException(String message) { 146 super(message); 147 } 148 149 DecodingException(Throwable cause) { 150 super(cause); 151 } 152 } 153 154 /** 155 * Encodes the specified byte array, and returns the encoded {@code String}. 156 */ 157 public String encode(byte[] bytes) { 158 return encode(checkNotNull(bytes), 0, bytes.length); 159 } 160 161 /** 162 * Encodes the specified range of the specified byte array, and returns the encoded 163 * {@code String}. 164 */ 165 public final String encode(byte[] bytes, int off, int len) { 166 checkNotNull(bytes); 167 checkPositionIndexes(off, off + len, bytes.length); 168 CharOutput result = stringBuilderOutput(maxEncodedSize(len)); 169 ByteOutput byteOutput = encodingStream(result); 170 try { 171 for (int i = 0; i < len; i++) { 172 byteOutput.write(bytes[off + i]); 173 } 174 byteOutput.close(); 175 } catch (IOException impossible) { 176 throw new AssertionError("impossible"); 177 } 178 return result.toString(); 179 } 180 181 /** 182 * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified 183 * {@code Writer}. When the returned {@code OutputStream} is closed, so is the backing 184 * {@code Writer}. 185 */ 186 @GwtIncompatible("Writer,OutputStream") 187 public final OutputStream encodingStream(Writer writer) { 188 return asOutputStream(encodingStream(asCharOutput(writer))); 189 } 190 191 /** 192 * Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding 193 * into writers from the specified {@code OutputSupplier}. 194 * 195 * @deprecated Use {@link #encodingSink(CharSink)} instead. This method is scheduled to be 196 * removed in Guava 16.0. 197 */ 198 @Deprecated 199 @GwtIncompatible("Writer,OutputStream") 200 public final OutputSupplier<OutputStream> encodingStream( 201 final OutputSupplier<? extends Writer> writerSupplier) { 202 checkNotNull(writerSupplier); 203 return new OutputSupplier<OutputStream>() { 204 @Override 205 public OutputStream getOutput() throws IOException { 206 return encodingStream(writerSupplier.getOutput()); 207 } 208 }; 209 } 210 211 /** 212 * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}. 213 */ 214 @GwtIncompatible("ByteSink,CharSink") 215 public final ByteSink encodingSink(final CharSink encodedSink) { 216 checkNotNull(encodedSink); 217 return new ByteSink() { 218 @Override 219 public OutputStream openStream() throws IOException { 220 return encodingStream(encodedSink.openStream()); 221 } 222 }; 223 } 224 225 // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher) 226 227 private static byte[] extract(byte[] result, int length) { 228 if (length == result.length) { 229 return result; 230 } else { 231 byte[] trunc = new byte[length]; 232 System.arraycopy(result, 0, trunc, 0, length); 233 return trunc; 234 } 235 } 236 237 /** 238 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 239 * This is the inverse operation to {@link #encode(byte[])}. 240 * 241 * @throws IllegalArgumentException if the input is not a valid encoded string according to this 242 * encoding. 243 */ 244 public final byte[] decode(CharSequence chars) { 245 try { 246 return decodeChecked(chars); 247 } catch (DecodingException badInput) { 248 throw new IllegalArgumentException(badInput); 249 } 250 } 251 252 /** 253 * Decodes the specified character sequence, and returns the resulting {@code byte[]}. 254 * This is the inverse operation to {@link #encode(byte[])}. 255 * 256 * @throws DecodingException if the input is not a valid encoded string according to this 257 * encoding. 258 */ 259 final byte[] decodeChecked(CharSequence chars) throws DecodingException { 260 chars = padding().trimTrailingFrom(chars); 261 ByteInput decodedInput = decodingStream(asCharInput(chars)); 262 byte[] tmp = new byte[maxDecodedSize(chars.length())]; 263 int index = 0; 264 try { 265 for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) { 266 tmp[index++] = (byte) i; 267 } 268 } catch (DecodingException badInput) { 269 throw badInput; 270 } catch (IOException impossible) { 271 throw new AssertionError(impossible); 272 } 273 return extract(tmp, index); 274 } 275 276 /** 277 * Returns an {@code InputStream} that decodes base-encoded input from the specified 278 * {@code Reader}. The returned stream throws a {@link DecodingException} upon decoding-specific 279 * errors. 280 */ 281 @GwtIncompatible("Reader,InputStream") 282 public final InputStream decodingStream(Reader reader) { 283 return asInputStream(decodingStream(asCharInput(reader))); 284 } 285 286 /** 287 * Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input 288 * from readers from the specified supplier. 289 * 290 * @deprecated Use {@link #decodingSource(CharSource)} instead. This method is scheduled to be 291 * removed in Guava 16.0. 292 */ 293 @Deprecated 294 @GwtIncompatible("Reader,InputStream") 295 public final InputSupplier<InputStream> decodingStream( 296 final InputSupplier<? extends Reader> readerSupplier) { 297 checkNotNull(readerSupplier); 298 return new InputSupplier<InputStream>() { 299 @Override 300 public InputStream getInput() throws IOException { 301 return decodingStream(readerSupplier.getInput()); 302 } 303 }; 304 } 305 306 /** 307 * Returns a {@code ByteSource} that reads base-encoded bytes from the specified 308 * {@code CharSource}. 309 */ 310 @GwtIncompatible("ByteSource,CharSource") 311 public final ByteSource decodingSource(final CharSource encodedSource) { 312 checkNotNull(encodedSource); 313 return new ByteSource() { 314 @Override 315 public InputStream openStream() throws IOException { 316 return decodingStream(encodedSource.openStream()); 317 } 318 }; 319 } 320 321 // Implementations for encoding/decoding 322 323 abstract int maxEncodedSize(int bytes); 324 325 abstract ByteOutput encodingStream(CharOutput charOutput); 326 327 abstract int maxDecodedSize(int chars); 328 329 abstract ByteInput decodingStream(CharInput charInput); 330 331 abstract CharMatcher padding(); 332 333 // Modified encoding generators 334 335 /** 336 * Returns an encoding that behaves equivalently to this encoding, but omits any padding 337 * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648 338 * section 3.2</a>, Padding of Encoded Data. 339 */ 340 @CheckReturnValue 341 public abstract BaseEncoding omitPadding(); 342 343 /** 344 * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character 345 * for padding. 346 * 347 * @throws IllegalArgumentException if this padding character is already used in the alphabet or a 348 * separator 349 */ 350 @CheckReturnValue 351 public abstract BaseEncoding withPadChar(char padChar); 352 353 /** 354 * Returns an encoding that behaves equivalently to this encoding, but adds a separator string 355 * after every {@code n} characters. Any occurrences of any characters that occur in the separator 356 * are skipped over in decoding. 357 * 358 * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator 359 * string, or if {@code n <= 0} 360 * @throws UnsupportedOperationException if this encoding already uses a separator 361 */ 362 @CheckReturnValue 363 public abstract BaseEncoding withSeparator(String separator, int n); 364 365 /** 366 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 367 * uppercase letters. Padding and separator characters remain in their original case. 368 * 369 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 370 * lower-case characters 371 */ 372 @CheckReturnValue 373 public abstract BaseEncoding upperCase(); 374 375 /** 376 * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with 377 * lowercase letters. Padding and separator characters remain in their original case. 378 * 379 * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and 380 * lower-case characters 381 */ 382 @CheckReturnValue 383 public abstract BaseEncoding lowerCase(); 384 385 private static final BaseEncoding BASE64 = new StandardBaseEncoding( 386 "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '='); 387 388 /** 389 * The "base64" base encoding specified by <a 390 * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding. 391 * (This is the same as the base 64 encoding from <a 392 * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.) 393 * 394 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 395 * omitted} or {@linkplain #withPadChar(char) replaced}. 396 * 397 * <p>No line feeds are added by default, as per <a 398 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 399 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 400 */ 401 public static BaseEncoding base64() { 402 return BASE64; 403 } 404 405 private static final BaseEncoding BASE64_URL = new StandardBaseEncoding( 406 "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '='); 407 408 /** 409 * The "base64url" encoding specified by <a 410 * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding 411 * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64." 412 * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a 413 * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.) 414 * 415 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 416 * omitted} or {@linkplain #withPadChar(char) replaced}. 417 * 418 * <p>No line feeds are added by default, as per <a 419 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 420 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 421 */ 422 public static BaseEncoding base64Url() { 423 return BASE64_URL; 424 } 425 426 private static final BaseEncoding BASE32 = 427 new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '='); 428 429 /** 430 * The "base32" encoding specified by <a 431 * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding. 432 * (This is the same as the base 32 encoding from <a 433 * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.) 434 * 435 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 436 * omitted} or {@linkplain #withPadChar(char) replaced}. 437 * 438 * <p>No line feeds are added by default, as per <a 439 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 440 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 441 */ 442 public static BaseEncoding base32() { 443 return BASE32; 444 } 445 446 private static final BaseEncoding BASE32_HEX = 447 new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '='); 448 449 /** 450 * The "base32hex" encoding specified by <a 451 * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding 452 * with Extended Hex Alphabet. There is no corresponding encoding in RFC 3548. 453 * 454 * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding() 455 * omitted} or {@linkplain #withPadChar(char) replaced}. 456 * 457 * <p>No line feeds are added by default, as per <a 458 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 459 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 460 */ 461 public static BaseEncoding base32Hex() { 462 return BASE32_HEX; 463 } 464 465 private static final BaseEncoding BASE16 = 466 new StandardBaseEncoding("base16()", "0123456789ABCDEF", null); 467 468 /** 469 * The "base16" encoding specified by <a 470 * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding. 471 * (This is the same as the base 16 encoding from <a 472 * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as 473 * "hexadecimal" format. 474 * 475 * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and 476 * {@link #omitPadding()} have no effect. 477 * 478 * <p>No line feeds are added by default, as per <a 479 * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in 480 * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}. 481 */ 482 public static BaseEncoding base16() { 483 return BASE16; 484 } 485 486 private static final class Alphabet extends CharMatcher { 487 private final String name; 488 // this is meant to be immutable -- don't modify it! 489 private final char[] chars; 490 final int mask; 491 final int bitsPerChar; 492 final int charsPerChunk; 493 final int bytesPerChunk; 494 private final byte[] decodabet; 495 private final boolean[] validPadding; 496 497 Alphabet(String name, char[] chars) { 498 this.name = checkNotNull(name); 499 this.chars = checkNotNull(chars); 500 try { 501 this.bitsPerChar = log2(chars.length, UNNECESSARY); 502 } catch (ArithmeticException e) { 503 throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e); 504 } 505 506 /* 507 * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes 508 * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8. 509 */ 510 int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar)); 511 this.charsPerChunk = 8 / gcd; 512 this.bytesPerChunk = bitsPerChar / gcd; 513 514 this.mask = chars.length - 1; 515 516 byte[] decodabet = new byte[Ascii.MAX + 1]; 517 Arrays.fill(decodabet, (byte) -1); 518 for (int i = 0; i < chars.length; i++) { 519 char c = chars[i]; 520 checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c); 521 checkArgument(decodabet[c] == -1, "Duplicate character: %s", c); 522 decodabet[c] = (byte) i; 523 } 524 this.decodabet = decodabet; 525 526 boolean[] validPadding = new boolean[charsPerChunk]; 527 for (int i = 0; i < bytesPerChunk; i++) { 528 validPadding[divide(i * 8, bitsPerChar, CEILING)] = true; 529 } 530 this.validPadding = validPadding; 531 } 532 533 char encode(int bits) { 534 return chars[bits]; 535 } 536 537 boolean isValidPaddingStartPosition(int index) { 538 return validPadding[index % charsPerChunk]; 539 } 540 541 int decode(char ch) throws IOException { 542 if (ch > Ascii.MAX || decodabet[ch] == -1) { 543 throw new DecodingException("Unrecognized character: " + ch); 544 } 545 return decodabet[ch]; 546 } 547 548 private boolean hasLowerCase() { 549 for (char c : chars) { 550 if (Ascii.isLowerCase(c)) { 551 return true; 552 } 553 } 554 return false; 555 } 556 557 private boolean hasUpperCase() { 558 for (char c : chars) { 559 if (Ascii.isUpperCase(c)) { 560 return true; 561 } 562 } 563 return false; 564 } 565 566 Alphabet upperCase() { 567 if (!hasLowerCase()) { 568 return this; 569 } else { 570 checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet"); 571 char[] upperCased = new char[chars.length]; 572 for (int i = 0; i < chars.length; i++) { 573 upperCased[i] = Ascii.toUpperCase(chars[i]); 574 } 575 return new Alphabet(name + ".upperCase()", upperCased); 576 } 577 } 578 579 Alphabet lowerCase() { 580 if (!hasUpperCase()) { 581 return this; 582 } else { 583 checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet"); 584 char[] lowerCased = new char[chars.length]; 585 for (int i = 0; i < chars.length; i++) { 586 lowerCased[i] = Ascii.toLowerCase(chars[i]); 587 } 588 return new Alphabet(name + ".lowerCase()", lowerCased); 589 } 590 } 591 592 @Override 593 public boolean matches(char c) { 594 return CharMatcher.ASCII.matches(c) && decodabet[c] != -1; 595 } 596 597 @Override 598 public String toString() { 599 return name; 600 } 601 } 602 603 static final class StandardBaseEncoding extends BaseEncoding { 604 // TODO(user): provide a useful toString 605 private final Alphabet alphabet; 606 607 @Nullable 608 private final Character paddingChar; 609 610 StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) { 611 this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar); 612 } 613 614 StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) { 615 this.alphabet = checkNotNull(alphabet); 616 checkArgument(paddingChar == null || !alphabet.matches(paddingChar), 617 "Padding character %s was already in alphabet", paddingChar); 618 this.paddingChar = paddingChar; 619 } 620 621 @Override 622 CharMatcher padding() { 623 return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue()); 624 } 625 626 @Override 627 int maxEncodedSize(int bytes) { 628 return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING); 629 } 630 631 @Override 632 ByteOutput encodingStream(final CharOutput out) { 633 checkNotNull(out); 634 return new ByteOutput() { 635 int bitBuffer = 0; 636 int bitBufferLength = 0; 637 int writtenChars = 0; 638 639 @Override 640 public void write(byte b) throws IOException { 641 bitBuffer <<= 8; 642 bitBuffer |= b & 0xFF; 643 bitBufferLength += 8; 644 while (bitBufferLength >= alphabet.bitsPerChar) { 645 int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar)) 646 & alphabet.mask; 647 out.write(alphabet.encode(charIndex)); 648 writtenChars++; 649 bitBufferLength -= alphabet.bitsPerChar; 650 } 651 } 652 653 @Override 654 public void flush() throws IOException { 655 out.flush(); 656 } 657 658 @Override 659 public void close() throws IOException { 660 if (bitBufferLength > 0) { 661 int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength)) 662 & alphabet.mask; 663 out.write(alphabet.encode(charIndex)); 664 writtenChars++; 665 if (paddingChar != null) { 666 while (writtenChars % alphabet.charsPerChunk != 0) { 667 out.write(paddingChar.charValue()); 668 writtenChars++; 669 } 670 } 671 } 672 out.close(); 673 } 674 }; 675 } 676 677 @Override 678 int maxDecodedSize(int chars) { 679 return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L); 680 } 681 682 @Override 683 ByteInput decodingStream(final CharInput reader) { 684 checkNotNull(reader); 685 return new ByteInput() { 686 int bitBuffer = 0; 687 int bitBufferLength = 0; 688 int readChars = 0; 689 boolean hitPadding = false; 690 final CharMatcher paddingMatcher = padding(); 691 692 @Override 693 public int read() throws IOException { 694 while (true) { 695 int readChar = reader.read(); 696 if (readChar == -1) { 697 if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) { 698 throw new DecodingException("Invalid input length " + readChars); 699 } 700 return -1; 701 } 702 readChars++; 703 char ch = (char) readChar; 704 if (paddingMatcher.matches(ch)) { 705 if (!hitPadding 706 && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) { 707 throw new DecodingException("Padding cannot start at index " + readChars); 708 } 709 hitPadding = true; 710 } else if (hitPadding) { 711 throw new DecodingException( 712 "Expected padding character but found '" + ch + "' at index " + readChars); 713 } else { 714 bitBuffer <<= alphabet.bitsPerChar; 715 bitBuffer |= alphabet.decode(ch); 716 bitBufferLength += alphabet.bitsPerChar; 717 718 if (bitBufferLength >= 8) { 719 bitBufferLength -= 8; 720 return (bitBuffer >> bitBufferLength) & 0xFF; 721 } 722 } 723 } 724 } 725 726 @Override 727 public void close() throws IOException { 728 reader.close(); 729 } 730 }; 731 } 732 733 @Override 734 public BaseEncoding omitPadding() { 735 return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null); 736 } 737 738 @Override 739 public BaseEncoding withPadChar(char padChar) { 740 if (8 % alphabet.bitsPerChar == 0 || 741 (paddingChar != null && paddingChar.charValue() == padChar)) { 742 return this; 743 } else { 744 return new StandardBaseEncoding(alphabet, padChar); 745 } 746 } 747 748 @Override 749 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 750 checkNotNull(separator); 751 checkArgument(padding().or(alphabet).matchesNoneOf(separator), 752 "Separator cannot contain alphabet or padding characters"); 753 return new SeparatedBaseEncoding(this, separator, afterEveryChars); 754 } 755 756 private transient BaseEncoding upperCase; 757 private transient BaseEncoding lowerCase; 758 759 @Override 760 public BaseEncoding upperCase() { 761 BaseEncoding result = upperCase; 762 if (result == null) { 763 Alphabet upper = alphabet.upperCase(); 764 result = upperCase = 765 (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar); 766 } 767 return result; 768 } 769 770 @Override 771 public BaseEncoding lowerCase() { 772 BaseEncoding result = lowerCase; 773 if (result == null) { 774 Alphabet lower = alphabet.lowerCase(); 775 result = lowerCase = 776 (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar); 777 } 778 return result; 779 } 780 781 @Override 782 public String toString() { 783 StringBuilder builder = new StringBuilder("BaseEncoding."); 784 builder.append(alphabet.toString()); 785 if (8 % alphabet.bitsPerChar != 0) { 786 if (paddingChar == null) { 787 builder.append(".omitPadding()"); 788 } else { 789 builder.append(".withPadChar(").append(paddingChar).append(')'); 790 } 791 } 792 return builder.toString(); 793 } 794 } 795 796 static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) { 797 checkNotNull(delegate); 798 checkNotNull(toIgnore); 799 return new CharInput() { 800 @Override 801 public int read() throws IOException { 802 int readChar; 803 do { 804 readChar = delegate.read(); 805 } while (readChar != -1 && toIgnore.matches((char) readChar)); 806 return readChar; 807 } 808 809 @Override 810 public void close() throws IOException { 811 delegate.close(); 812 } 813 }; 814 } 815 816 static CharOutput separatingOutput( 817 final CharOutput delegate, final String separator, final int afterEveryChars) { 818 checkNotNull(delegate); 819 checkNotNull(separator); 820 checkArgument(afterEveryChars > 0); 821 return new CharOutput() { 822 int charsUntilSeparator = afterEveryChars; 823 824 @Override 825 public void write(char c) throws IOException { 826 if (charsUntilSeparator == 0) { 827 for (int i = 0; i < separator.length(); i++) { 828 delegate.write(separator.charAt(i)); 829 } 830 charsUntilSeparator = afterEveryChars; 831 } 832 delegate.write(c); 833 charsUntilSeparator--; 834 } 835 836 @Override 837 public void flush() throws IOException { 838 delegate.flush(); 839 } 840 841 @Override 842 public void close() throws IOException { 843 delegate.close(); 844 } 845 }; 846 } 847 848 static final class SeparatedBaseEncoding extends BaseEncoding { 849 private final BaseEncoding delegate; 850 private final String separator; 851 private final int afterEveryChars; 852 private final CharMatcher separatorChars; 853 854 SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) { 855 this.delegate = checkNotNull(delegate); 856 this.separator = checkNotNull(separator); 857 this.afterEveryChars = afterEveryChars; 858 checkArgument( 859 afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars); 860 this.separatorChars = CharMatcher.anyOf(separator).precomputed(); 861 } 862 863 @Override 864 CharMatcher padding() { 865 return delegate.padding(); 866 } 867 868 @Override 869 int maxEncodedSize(int bytes) { 870 int unseparatedSize = delegate.maxEncodedSize(bytes); 871 return unseparatedSize + separator.length() 872 * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR); 873 } 874 875 @Override 876 ByteOutput encodingStream(final CharOutput output) { 877 return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars)); 878 } 879 880 @Override 881 int maxDecodedSize(int chars) { 882 return delegate.maxDecodedSize(chars); 883 } 884 885 @Override 886 ByteInput decodingStream(final CharInput input) { 887 return delegate.decodingStream(ignoringInput(input, separatorChars)); 888 } 889 890 @Override 891 public BaseEncoding omitPadding() { 892 return delegate.omitPadding().withSeparator(separator, afterEveryChars); 893 } 894 895 @Override 896 public BaseEncoding withPadChar(char padChar) { 897 return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars); 898 } 899 900 @Override 901 public BaseEncoding withSeparator(String separator, int afterEveryChars) { 902 throw new UnsupportedOperationException("Already have a separator"); 903 } 904 905 @Override 906 public BaseEncoding upperCase() { 907 return delegate.upperCase().withSeparator(separator, afterEveryChars); 908 } 909 910 @Override 911 public BaseEncoding lowerCase() { 912 return delegate.lowerCase().withSeparator(separator, afterEveryChars); 913 } 914 915 @Override 916 public String toString() { 917 return delegate.toString() + 918 ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")"; 919 } 920 } 921}