001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.io.ByteStreams.createBuffer; 020import static com.google.common.io.ByteStreams.skipUpTo; 021 022import com.google.common.annotations.Beta; 023import com.google.common.annotations.GwtIncompatible; 024import com.google.common.base.Ascii; 025import com.google.common.base.Optional; 026import com.google.common.collect.ImmutableList; 027import com.google.common.hash.Funnels; 028import com.google.common.hash.HashCode; 029import com.google.common.hash.HashFunction; 030import com.google.common.hash.Hasher; 031import com.google.errorprone.annotations.CanIgnoreReturnValue; 032import java.io.BufferedInputStream; 033import java.io.ByteArrayInputStream; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.InputStreamReader; 037import java.io.OutputStream; 038import java.io.Reader; 039import java.nio.charset.Charset; 040import java.util.Arrays; 041import java.util.Collection; 042import java.util.Iterator; 043 044/** 045 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a {@code ByteSource} 046 * is not an open, stateful stream for input that can be read and closed. Instead, it is an 047 * immutable <i>supplier</i> of {@code InputStream} instances. 048 * 049 * <p>{@code ByteSource} provides two kinds of methods: 050 * 051 * <ul> 052 * <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent 053 * instance each time they are called. The caller is responsible for ensuring that the 054 * returned stream is closed. 055 * <li><b>Convenience methods:</b> These are implementations of common operations that are 056 * typically implemented by opening a stream using one of the methods in the first category, 057 * doing something and finally closing the stream that was opened. 058 * </ul> 059 * 060 * @since 14.0 061 * @author Colin Decker 062 */ 063@GwtIncompatible 064public abstract class ByteSource { 065 066 /** Constructor for use by subclasses. */ 067 protected ByteSource() {} 068 069 /** 070 * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source 071 * as characters using the given {@link Charset}. 072 * 073 * <p>If {@link CharSource#asByteSource} is called on the returned source with the same charset, 074 * the default implementation of this method will ensure that the original {@code ByteSource} is 075 * returned, rather than round-trip encoding. Subclasses that override this method should behave 076 * the same way. 077 */ 078 public CharSource asCharSource(Charset charset) { 079 return new AsCharSource(charset); 080 } 081 082 /** 083 * Opens a new {@link InputStream} for reading from this source. This method returns a new, 084 * independent stream each time it is called. 085 * 086 * <p>The caller is responsible for ensuring that the returned stream is closed. 087 * 088 * @throws IOException if an I/O error occurs while opening the stream 089 */ 090 public abstract InputStream openStream() throws IOException; 091 092 /** 093 * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is 094 * not required to be a {@link BufferedInputStream} in order to allow implementations to simply 095 * delegate to {@link #openStream()} when the stream returned by that method does not benefit from 096 * additional buffering (for example, a {@code ByteArrayInputStream}). This method returns a new, 097 * independent stream each time it is called. 098 * 099 * <p>The caller is responsible for ensuring that the returned stream is closed. 100 * 101 * @throws IOException if an I/O error occurs while opening the stream 102 * @since 15.0 (in 14.0 with return type {@link BufferedInputStream}) 103 */ 104 public InputStream openBufferedStream() throws IOException { 105 InputStream in = openStream(); 106 return (in instanceof BufferedInputStream) 107 ? (BufferedInputStream) in 108 : new BufferedInputStream(in); 109 } 110 111 /** 112 * Returns a view of a slice of this byte source that is at most {@code length} bytes long 113 * starting at the given {@code offset}. If {@code offset} is greater than the size of this 114 * source, the returned source will be empty. If {@code offset + length} is greater than the size 115 * of this source, the returned source will contain the slice starting at {@code offset} and 116 * ending at the end of this source. 117 * 118 * @throws IllegalArgumentException if {@code offset} or {@code length} is negative 119 */ 120 public ByteSource slice(long offset, long length) { 121 return new SlicedByteSource(offset, length); 122 } 123 124 /** 125 * Returns whether the source has zero bytes. The default implementation first checks {@link 126 * #sizeIfKnown}, returning true if it's known to be zero and false if it's known to be non-zero. 127 * If the size is not known, it falls back to opening a stream and checking for EOF. 128 * 129 * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes 130 * are actually available for reading. (For example, some special files may return a size of 0 131 * despite actually having content when read.) This means that a source may return {@code true} 132 * from {@code isEmpty()} despite having readable content. 133 * 134 * @throws IOException if an I/O error occurs 135 * @since 15.0 136 */ 137 public boolean isEmpty() throws IOException { 138 Optional<Long> sizeIfKnown = sizeIfKnown(); 139 if (sizeIfKnown.isPresent()) { 140 return sizeIfKnown.get() == 0L; 141 } 142 Closer closer = Closer.create(); 143 try { 144 InputStream in = closer.register(openStream()); 145 return in.read() == -1; 146 } catch (Throwable e) { 147 throw closer.rethrow(e); 148 } finally { 149 closer.close(); 150 } 151 } 152 153 /** 154 * Returns the size of this source in bytes, if the size can be easily determined without actually 155 * opening the data stream. 156 * 157 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file, 158 * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method 159 * will return a different number of bytes than would be returned by reading all of the bytes (for 160 * example, some special files may return a size of 0 despite actually having content when read). 161 * 162 * <p>Additionally, for mutable sources such as files, a subsequent read may return a different 163 * number of bytes if the contents are changed. 164 * 165 * @since 19.0 166 */ 167 @Beta 168 public Optional<Long> sizeIfKnown() { 169 return Optional.absent(); 170 } 171 172 /** 173 * Returns the size of this source in bytes, even if doing so requires opening and traversing an 174 * entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}. 175 * 176 * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. If 177 * absent, it will fall back to a heavyweight operation that will open a stream, read (or {@link 178 * InputStream#skip(long) skip}, if possible) to the end of the stream and return the total number 179 * of bytes that were read. 180 * 181 * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient 182 * implementation, it is <i>possible</i> that this method will return a different number of bytes 183 * than would be returned by reading all of the bytes (for example, some special files may return 184 * a size of 0 despite actually having content when read). 185 * 186 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 187 * number of bytes if the contents are changed. 188 * 189 * @throws IOException if an I/O error occurs while reading the size of this source 190 */ 191 public long size() throws IOException { 192 Optional<Long> sizeIfKnown = sizeIfKnown(); 193 if (sizeIfKnown.isPresent()) { 194 return sizeIfKnown.get(); 195 } 196 197 Closer closer = Closer.create(); 198 try { 199 InputStream in = closer.register(openStream()); 200 return countBySkipping(in); 201 } catch (IOException e) { 202 // skip may not be supported... at any rate, try reading 203 } finally { 204 closer.close(); 205 } 206 207 closer = Closer.create(); 208 try { 209 InputStream in = closer.register(openStream()); 210 return ByteStreams.exhaust(in); 211 } catch (Throwable e) { 212 throw closer.rethrow(e); 213 } finally { 214 closer.close(); 215 } 216 } 217 218 /** 219 * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the 220 * first call to skip threw, in which case skip may just not be supported. 221 */ 222 private long countBySkipping(InputStream in) throws IOException { 223 long count = 0; 224 long skipped; 225 while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) { 226 count += skipped; 227 } 228 return count; 229 } 230 231 /** 232 * Copies the contents of this byte source to the given {@code OutputStream}. Does not close 233 * {@code output}. 234 * 235 * @return the number of bytes copied 236 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 237 * output} 238 */ 239 @CanIgnoreReturnValue 240 public long copyTo(OutputStream output) throws IOException { 241 checkNotNull(output); 242 243 Closer closer = Closer.create(); 244 try { 245 InputStream in = closer.register(openStream()); 246 return ByteStreams.copy(in, output); 247 } catch (Throwable e) { 248 throw closer.rethrow(e); 249 } finally { 250 closer.close(); 251 } 252 } 253 254 /** 255 * Copies the contents of this byte source to the given {@code ByteSink}. 256 * 257 * @return the number of bytes copied 258 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 259 * sink} 260 */ 261 @CanIgnoreReturnValue 262 public long copyTo(ByteSink sink) throws IOException { 263 checkNotNull(sink); 264 265 Closer closer = Closer.create(); 266 try { 267 InputStream in = closer.register(openStream()); 268 OutputStream out = closer.register(sink.openStream()); 269 return ByteStreams.copy(in, out); 270 } catch (Throwable e) { 271 throw closer.rethrow(e); 272 } finally { 273 closer.close(); 274 } 275 } 276 277 /** 278 * Reads the full contents of this byte source as a byte array. 279 * 280 * @throws IOException if an I/O error occurs while reading from this source 281 */ 282 public byte[] read() throws IOException { 283 Closer closer = Closer.create(); 284 try { 285 InputStream in = closer.register(openStream()); 286 Optional<Long> size = sizeIfKnown(); 287 return size.isPresent() 288 ? ByteStreams.toByteArray(in, size.get()) 289 : ByteStreams.toByteArray(in); 290 } catch (Throwable e) { 291 throw closer.rethrow(e); 292 } finally { 293 closer.close(); 294 } 295 } 296 297 /** 298 * Reads the contents of this byte source using the given {@code processor} to process bytes as 299 * they are read. Stops when all bytes have been read or the consumer returns {@code false}. 300 * Returns the result produced by the processor. 301 * 302 * @throws IOException if an I/O error occurs while reading from this source or if {@code 303 * processor} throws an {@code IOException} 304 * @since 16.0 305 */ 306 @Beta 307 @CanIgnoreReturnValue // some processors won't return a useful result 308 public <T> T read(ByteProcessor<T> processor) throws IOException { 309 checkNotNull(processor); 310 311 Closer closer = Closer.create(); 312 try { 313 InputStream in = closer.register(openStream()); 314 return ByteStreams.readBytes(in, processor); 315 } catch (Throwable e) { 316 throw closer.rethrow(e); 317 } finally { 318 closer.close(); 319 } 320 } 321 322 /** 323 * Hashes the contents of this byte source using the given hash function. 324 * 325 * @throws IOException if an I/O error occurs while reading from this source 326 */ 327 public HashCode hash(HashFunction hashFunction) throws IOException { 328 Hasher hasher = hashFunction.newHasher(); 329 copyTo(Funnels.asOutputStream(hasher)); 330 return hasher.hash(); 331 } 332 333 /** 334 * Checks that the contents of this byte source are equal to the contents of the given byte 335 * source. 336 * 337 * @throws IOException if an I/O error occurs while reading from this source or {@code other} 338 */ 339 public boolean contentEquals(ByteSource other) throws IOException { 340 checkNotNull(other); 341 342 byte[] buf1 = createBuffer(); 343 byte[] buf2 = createBuffer(); 344 345 Closer closer = Closer.create(); 346 try { 347 InputStream in1 = closer.register(openStream()); 348 InputStream in2 = closer.register(other.openStream()); 349 while (true) { 350 int read1 = ByteStreams.read(in1, buf1, 0, buf1.length); 351 int read2 = ByteStreams.read(in2, buf2, 0, buf2.length); 352 if (read1 != read2 || !Arrays.equals(buf1, buf2)) { 353 return false; 354 } else if (read1 != buf1.length) { 355 return true; 356 } 357 } 358 } catch (Throwable e) { 359 throw closer.rethrow(e); 360 } finally { 361 closer.close(); 362 } 363 } 364 365 /** 366 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 367 * the source will contain the concatenated data from the streams of the underlying sources. 368 * 369 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 370 * close the open underlying stream. 371 * 372 * @param sources the sources to concatenate 373 * @return a {@code ByteSource} containing the concatenated data 374 * @since 15.0 375 */ 376 public static ByteSource concat(Iterable<? extends ByteSource> sources) { 377 return new ConcatenatedByteSource(sources); 378 } 379 380 /** 381 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 382 * the source will contain the concatenated data from the streams of the underlying sources. 383 * 384 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 385 * close the open underlying stream. 386 * 387 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 388 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 389 * eagerly fetches data for each source when iterated (rather than producing sources that only 390 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 391 * possible. 392 * 393 * @param sources the sources to concatenate 394 * @return a {@code ByteSource} containing the concatenated data 395 * @throws NullPointerException if any of {@code sources} is {@code null} 396 * @since 15.0 397 */ 398 public static ByteSource concat(Iterator<? extends ByteSource> sources) { 399 return concat(ImmutableList.copyOf(sources)); 400 } 401 402 /** 403 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 404 * the source will contain the concatenated data from the streams of the underlying sources. 405 * 406 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 407 * close the open underlying stream. 408 * 409 * @param sources the sources to concatenate 410 * @return a {@code ByteSource} containing the concatenated data 411 * @throws NullPointerException if any of {@code sources} is {@code null} 412 * @since 15.0 413 */ 414 public static ByteSource concat(ByteSource... sources) { 415 return concat(ImmutableList.copyOf(sources)); 416 } 417 418 /** 419 * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range 420 * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}. 421 * 422 * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}). 423 */ 424 public static ByteSource wrap(byte[] b) { 425 return new ByteArrayByteSource(b); 426 } 427 428 /** 429 * Returns an immutable {@link ByteSource} that contains no bytes. 430 * 431 * @since 15.0 432 */ 433 public static ByteSource empty() { 434 return EmptyByteSource.INSTANCE; 435 } 436 437 /** 438 * A char source that reads bytes from this source and decodes them as characters using a charset. 439 */ 440 class AsCharSource extends CharSource { 441 442 final Charset charset; 443 444 AsCharSource(Charset charset) { 445 this.charset = checkNotNull(charset); 446 } 447 448 @Override 449 public ByteSource asByteSource(Charset charset) { 450 if (charset.equals(this.charset)) { 451 return ByteSource.this; 452 } 453 return super.asByteSource(charset); 454 } 455 456 @Override 457 public Reader openStream() throws IOException { 458 return new InputStreamReader(ByteSource.this.openStream(), charset); 459 } 460 461 @Override 462 public String read() throws IOException { 463 // Reading all the data as a byte array is more efficient than the default read() 464 // implementation because: 465 // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the 466 // internal char array (hard to avoid using StringBuilder) 467 // 2. we avoid extra copies into temporary buffers altogether 468 // The downside is that this will cause us to store the file bytes in memory twice for a short 469 // amount of time. 470 return new String(ByteSource.this.read(), charset); 471 } 472 473 @Override 474 public String toString() { 475 return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; 476 } 477 } 478 479 /** A view of a subsection of the containing byte source. */ 480 private final class SlicedByteSource extends ByteSource { 481 482 final long offset; 483 final long length; 484 485 SlicedByteSource(long offset, long length) { 486 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 487 checkArgument(length >= 0, "length (%s) may not be negative", length); 488 this.offset = offset; 489 this.length = length; 490 } 491 492 @Override 493 public InputStream openStream() throws IOException { 494 return sliceStream(ByteSource.this.openStream()); 495 } 496 497 @Override 498 public InputStream openBufferedStream() throws IOException { 499 return sliceStream(ByteSource.this.openBufferedStream()); 500 } 501 502 private InputStream sliceStream(InputStream in) throws IOException { 503 if (offset > 0) { 504 long skipped; 505 try { 506 skipped = ByteStreams.skipUpTo(in, offset); 507 } catch (Throwable e) { 508 Closer closer = Closer.create(); 509 closer.register(in); 510 try { 511 throw closer.rethrow(e); 512 } finally { 513 closer.close(); 514 } 515 } 516 517 if (skipped < offset) { 518 // offset was beyond EOF 519 in.close(); 520 return new ByteArrayInputStream(new byte[0]); 521 } 522 } 523 return ByteStreams.limit(in, length); 524 } 525 526 @Override 527 public ByteSource slice(long offset, long length) { 528 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 529 checkArgument(length >= 0, "length (%s) may not be negative", length); 530 long maxLength = this.length - offset; 531 return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength)); 532 } 533 534 @Override 535 public boolean isEmpty() throws IOException { 536 return length == 0 || super.isEmpty(); 537 } 538 539 @Override 540 public Optional<Long> sizeIfKnown() { 541 Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown(); 542 if (optionalUnslicedSize.isPresent()) { 543 long unslicedSize = optionalUnslicedSize.get(); 544 long off = Math.min(offset, unslicedSize); 545 return Optional.of(Math.min(length, unslicedSize - off)); 546 } 547 return Optional.absent(); 548 } 549 550 @Override 551 public String toString() { 552 return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")"; 553 } 554 } 555 556 private static class ByteArrayByteSource extends ByteSource { 557 558 final byte[] bytes; 559 final int offset; 560 final int length; 561 562 ByteArrayByteSource(byte[] bytes) { 563 this(bytes, 0, bytes.length); 564 } 565 566 // NOTE: Preconditions are enforced by slice, the only non-trivial caller. 567 ByteArrayByteSource(byte[] bytes, int offset, int length) { 568 this.bytes = bytes; 569 this.offset = offset; 570 this.length = length; 571 } 572 573 @Override 574 public InputStream openStream() { 575 return new ByteArrayInputStream(bytes, offset, length); 576 } 577 578 @Override 579 public InputStream openBufferedStream() throws IOException { 580 return openStream(); 581 } 582 583 @Override 584 public boolean isEmpty() { 585 return length == 0; 586 } 587 588 @Override 589 public long size() { 590 return length; 591 } 592 593 @Override 594 public Optional<Long> sizeIfKnown() { 595 return Optional.of((long) length); 596 } 597 598 @Override 599 public byte[] read() { 600 return Arrays.copyOfRange(bytes, offset, offset + length); 601 } 602 603 @SuppressWarnings("CheckReturnValue") // it doesn't matter what processBytes returns here 604 @Override 605 public <T> T read(ByteProcessor<T> processor) throws IOException { 606 processor.processBytes(bytes, offset, length); 607 return processor.getResult(); 608 } 609 610 @Override 611 public long copyTo(OutputStream output) throws IOException { 612 output.write(bytes, offset, length); 613 return length; 614 } 615 616 @Override 617 public HashCode hash(HashFunction hashFunction) throws IOException { 618 return hashFunction.hashBytes(bytes, offset, length); 619 } 620 621 @Override 622 public ByteSource slice(long offset, long length) { 623 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 624 checkArgument(length >= 0, "length (%s) may not be negative", length); 625 626 offset = Math.min(offset, this.length); 627 length = Math.min(length, this.length - offset); 628 int newOffset = this.offset + (int) offset; 629 return new ByteArrayByteSource(bytes, newOffset, (int) length); 630 } 631 632 @Override 633 public String toString() { 634 return "ByteSource.wrap(" 635 + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") 636 + ")"; 637 } 638 } 639 640 private static final class EmptyByteSource extends ByteArrayByteSource { 641 642 static final EmptyByteSource INSTANCE = new EmptyByteSource(); 643 644 EmptyByteSource() { 645 super(new byte[0]); 646 } 647 648 @Override 649 public CharSource asCharSource(Charset charset) { 650 checkNotNull(charset); 651 return CharSource.empty(); 652 } 653 654 @Override 655 public byte[] read() { 656 return bytes; // length is 0, no need to clone 657 } 658 659 @Override 660 public String toString() { 661 return "ByteSource.empty()"; 662 } 663 } 664 665 private static final class ConcatenatedByteSource extends ByteSource { 666 667 final Iterable<? extends ByteSource> sources; 668 669 ConcatenatedByteSource(Iterable<? extends ByteSource> sources) { 670 this.sources = checkNotNull(sources); 671 } 672 673 @Override 674 public InputStream openStream() throws IOException { 675 return new MultiInputStream(sources.iterator()); 676 } 677 678 @Override 679 public boolean isEmpty() throws IOException { 680 for (ByteSource source : sources) { 681 if (!source.isEmpty()) { 682 return false; 683 } 684 } 685 return true; 686 } 687 688 @Override 689 public Optional<Long> sizeIfKnown() { 690 if (!(sources instanceof Collection)) { 691 // Infinite Iterables can cause problems here. Of course, it's true that most of the other 692 // methods on this class also have potential problems with infinite Iterables. But unlike 693 // those, this method can cause issues even if the user is dealing with a (finite) slice() 694 // of this source, since the slice's sizeIfKnown() method needs to know the size of the 695 // underlying source to know what its size actually is. 696 return Optional.absent(); 697 } 698 long result = 0L; 699 for (ByteSource source : sources) { 700 Optional<Long> sizeIfKnown = source.sizeIfKnown(); 701 if (!sizeIfKnown.isPresent()) { 702 return Optional.absent(); 703 } 704 result += sizeIfKnown.get(); 705 if (result < 0) { 706 // Overflow (or one or more sources that returned a negative size, but all bets are off in 707 // that case) 708 // Can't represent anything higher, and realistically there probably isn't anything that 709 // can actually be done anyway with the supposed 8+ exbibytes of data the source is 710 // claiming to have if we get here, so just stop. 711 return Optional.of(Long.MAX_VALUE); 712 } 713 } 714 return Optional.of(result); 715 } 716 717 @Override 718 public long size() throws IOException { 719 long result = 0L; 720 for (ByteSource source : sources) { 721 result += source.size(); 722 if (result < 0) { 723 // Overflow (or one or more sources that returned a negative size, but all bets are off in 724 // that case) 725 // Can't represent anything higher, and realistically there probably isn't anything that 726 // can actually be done anyway with the supposed 8+ exbibytes of data the source is 727 // claiming to have if we get here, so just stop. 728 return Long.MAX_VALUE; 729 } 730 } 731 return result; 732 } 733 734 @Override 735 public String toString() { 736 return "ByteSource.concat(" + sources + ")"; 737 } 738 } 739}