001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.io.ByteStreams.createBuffer; 020import static com.google.common.io.ByteStreams.skipUpTo; 021 022import com.google.common.annotations.Beta; 023import com.google.common.annotations.GwtIncompatible; 024import com.google.common.base.Ascii; 025import com.google.common.base.Optional; 026import com.google.common.collect.ImmutableList; 027import com.google.common.hash.Funnels; 028import com.google.common.hash.HashCode; 029import com.google.common.hash.HashFunction; 030import com.google.common.hash.Hasher; 031import com.google.errorprone.annotations.CanIgnoreReturnValue; 032import java.io.BufferedInputStream; 033import java.io.ByteArrayInputStream; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.InputStreamReader; 037import java.io.OutputStream; 038import java.io.Reader; 039import java.nio.charset.Charset; 040import java.util.Arrays; 041import java.util.Iterator; 042 043/** 044 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a {@code ByteSource} 045 * is not an open, stateful stream for input that can be read and closed. Instead, it is an 046 * immutable <i>supplier</i> of {@code InputStream} instances. 047 * 048 * <p>{@code ByteSource} provides two kinds of methods: 049 * 050 * <ul> 051 * <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent 052 * instance each time they are called. The caller is responsible for ensuring that the 053 * returned stream is closed. 054 * <li><b>Convenience methods:</b> These are implementations of common operations that are 055 * typically implemented by opening a stream using one of the methods in the first category, 056 * doing something and finally closing the stream that was opened. 057 * </ul> 058 * 059 * @since 14.0 060 * @author Colin Decker 061 */ 062@GwtIncompatible 063public abstract class ByteSource { 064 065 /** Constructor for use by subclasses. */ 066 protected ByteSource() {} 067 068 /** 069 * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source 070 * as characters using the given {@link Charset}. 071 * 072 * <p>If {@link CharSource#asByteSource} is called on the returned source with the same charset, 073 * the default implementation of this method will ensure that the original {@code ByteSource} is 074 * returned, rather than round-trip encoding. Subclasses that override this method should behave 075 * the same way. 076 */ 077 public CharSource asCharSource(Charset charset) { 078 return new AsCharSource(charset); 079 } 080 081 /** 082 * Opens a new {@link InputStream} for reading from this source. This method returns a new, 083 * independent stream each time it is called. 084 * 085 * <p>The caller is responsible for ensuring that the returned stream is closed. 086 * 087 * @throws IOException if an I/O error occurs while opening the stream 088 */ 089 public abstract InputStream openStream() throws IOException; 090 091 /** 092 * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is 093 * not required to be a {@link BufferedInputStream} in order to allow implementations to simply 094 * delegate to {@link #openStream()} when the stream returned by that method does not benefit from 095 * additional buffering (for example, a {@code ByteArrayInputStream}). This method returns a new, 096 * independent stream each time it is called. 097 * 098 * <p>The caller is responsible for ensuring that the returned stream is closed. 099 * 100 * @throws IOException if an I/O error occurs while opening the stream 101 * @since 15.0 (in 14.0 with return type {@link BufferedInputStream}) 102 */ 103 public InputStream openBufferedStream() throws IOException { 104 InputStream in = openStream(); 105 return (in instanceof BufferedInputStream) 106 ? (BufferedInputStream) in 107 : new BufferedInputStream(in); 108 } 109 110 /** 111 * Returns a view of a slice of this byte source that is at most {@code length} bytes long 112 * starting at the given {@code offset}. If {@code offset} is greater than the size of this 113 * source, the returned source will be empty. If {@code offset + length} is greater than the size 114 * of this source, the returned source will contain the slice starting at {@code offset} and 115 * ending at the end of this source. 116 * 117 * @throws IllegalArgumentException if {@code offset} or {@code length} is negative 118 */ 119 public ByteSource slice(long offset, long length) { 120 return new SlicedByteSource(offset, length); 121 } 122 123 /** 124 * Returns whether the source has zero bytes. The default implementation first checks {@link 125 * #sizeIfKnown}, returning true if it's known to be zero and false if it's known to be non-zero. 126 * If the size is not known, it falls back to opening a stream and checking for EOF. 127 * 128 * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes 129 * are actually available for reading. (For example, some special files may return a size of 0 130 * despite actually having content when read.) This means that a source may return {@code true} 131 * from {@code isEmpty()} despite having readable content. 132 * 133 * @throws IOException if an I/O error occurs 134 * @since 15.0 135 */ 136 public boolean isEmpty() throws IOException { 137 Optional<Long> sizeIfKnown = sizeIfKnown(); 138 if (sizeIfKnown.isPresent()) { 139 return sizeIfKnown.get() == 0L; 140 } 141 Closer closer = Closer.create(); 142 try { 143 InputStream in = closer.register(openStream()); 144 return in.read() == -1; 145 } catch (Throwable e) { 146 throw closer.rethrow(e); 147 } finally { 148 closer.close(); 149 } 150 } 151 152 /** 153 * Returns the size of this source in bytes, if the size can be easily determined without actually 154 * opening the data stream. 155 * 156 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file, 157 * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method 158 * will return a different number of bytes than would be returned by reading all of the bytes (for 159 * example, some special files may return a size of 0 despite actually having content when read). 160 * 161 * <p>Additionally, for mutable sources such as files, a subsequent read may return a different 162 * number of bytes if the contents are changed. 163 * 164 * @since 19.0 165 */ 166 @Beta 167 public Optional<Long> sizeIfKnown() { 168 return Optional.absent(); 169 } 170 171 /** 172 * Returns the size of this source in bytes, even if doing so requires opening and traversing an 173 * entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}. 174 * 175 * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. If 176 * absent, it will fall back to a heavyweight operation that will open a stream, read (or {@link 177 * InputStream#skip(long) skip}, if possible) to the end of the stream and return the total number 178 * of bytes that were read. 179 * 180 * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient 181 * implementation, it is <i>possible</i> that this method will return a different number of bytes 182 * than would be returned by reading all of the bytes (for example, some special files may return 183 * a size of 0 despite actually having content when read). 184 * 185 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 186 * number of bytes if the contents are changed. 187 * 188 * @throws IOException if an I/O error occurs while reading the size of this source 189 */ 190 public long size() throws IOException { 191 Optional<Long> sizeIfKnown = sizeIfKnown(); 192 if (sizeIfKnown.isPresent()) { 193 return sizeIfKnown.get(); 194 } 195 196 Closer closer = Closer.create(); 197 try { 198 InputStream in = closer.register(openStream()); 199 return countBySkipping(in); 200 } catch (IOException e) { 201 // skip may not be supported... at any rate, try reading 202 } finally { 203 closer.close(); 204 } 205 206 closer = Closer.create(); 207 try { 208 InputStream in = closer.register(openStream()); 209 return ByteStreams.exhaust(in); 210 } catch (Throwable e) { 211 throw closer.rethrow(e); 212 } finally { 213 closer.close(); 214 } 215 } 216 217 /** 218 * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the 219 * first call to skip threw, in which case skip may just not be supported. 220 */ 221 private long countBySkipping(InputStream in) throws IOException { 222 long count = 0; 223 long skipped; 224 while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) { 225 count += skipped; 226 } 227 return count; 228 } 229 230 /** 231 * Copies the contents of this byte source to the given {@code OutputStream}. Does not close 232 * {@code output}. 233 * 234 * @return the number of bytes copied 235 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 236 * output} 237 */ 238 @CanIgnoreReturnValue 239 public long copyTo(OutputStream output) throws IOException { 240 checkNotNull(output); 241 242 Closer closer = Closer.create(); 243 try { 244 InputStream in = closer.register(openStream()); 245 return ByteStreams.copy(in, output); 246 } catch (Throwable e) { 247 throw closer.rethrow(e); 248 } finally { 249 closer.close(); 250 } 251 } 252 253 /** 254 * Copies the contents of this byte source to the given {@code ByteSink}. 255 * 256 * @return the number of bytes copied 257 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 258 * sink} 259 */ 260 @CanIgnoreReturnValue 261 public long copyTo(ByteSink sink) throws IOException { 262 checkNotNull(sink); 263 264 Closer closer = Closer.create(); 265 try { 266 InputStream in = closer.register(openStream()); 267 OutputStream out = closer.register(sink.openStream()); 268 return ByteStreams.copy(in, out); 269 } catch (Throwable e) { 270 throw closer.rethrow(e); 271 } finally { 272 closer.close(); 273 } 274 } 275 276 /** 277 * Reads the full contents of this byte source as a byte array. 278 * 279 * @throws IOException if an I/O error occurs while reading from this source 280 */ 281 public byte[] read() throws IOException { 282 Closer closer = Closer.create(); 283 try { 284 InputStream in = closer.register(openStream()); 285 return ByteStreams.toByteArray(in); 286 } catch (Throwable e) { 287 throw closer.rethrow(e); 288 } finally { 289 closer.close(); 290 } 291 } 292 293 /** 294 * Reads the contents of this byte source using the given {@code processor} to process bytes as 295 * they are read. Stops when all bytes have been read or the consumer returns {@code false}. 296 * Returns the result produced by the processor. 297 * 298 * @throws IOException if an I/O error occurs while reading from this source or if {@code 299 * processor} throws an {@code IOException} 300 * @since 16.0 301 */ 302 @Beta 303 @CanIgnoreReturnValue // some processors won't return a useful result 304 public <T> T read(ByteProcessor<T> processor) throws IOException { 305 checkNotNull(processor); 306 307 Closer closer = Closer.create(); 308 try { 309 InputStream in = closer.register(openStream()); 310 return ByteStreams.readBytes(in, processor); 311 } catch (Throwable e) { 312 throw closer.rethrow(e); 313 } finally { 314 closer.close(); 315 } 316 } 317 318 /** 319 * Hashes the contents of this byte source using the given hash function. 320 * 321 * @throws IOException if an I/O error occurs while reading from this source 322 */ 323 public HashCode hash(HashFunction hashFunction) throws IOException { 324 Hasher hasher = hashFunction.newHasher(); 325 copyTo(Funnels.asOutputStream(hasher)); 326 return hasher.hash(); 327 } 328 329 /** 330 * Checks that the contents of this byte source are equal to the contents of the given byte 331 * source. 332 * 333 * @throws IOException if an I/O error occurs while reading from this source or {@code other} 334 */ 335 public boolean contentEquals(ByteSource other) throws IOException { 336 checkNotNull(other); 337 338 byte[] buf1 = createBuffer(); 339 byte[] buf2 = createBuffer(); 340 341 Closer closer = Closer.create(); 342 try { 343 InputStream in1 = closer.register(openStream()); 344 InputStream in2 = closer.register(other.openStream()); 345 while (true) { 346 int read1 = ByteStreams.read(in1, buf1, 0, buf1.length); 347 int read2 = ByteStreams.read(in2, buf2, 0, buf2.length); 348 if (read1 != read2 || !Arrays.equals(buf1, buf2)) { 349 return false; 350 } else if (read1 != buf1.length) { 351 return true; 352 } 353 } 354 } catch (Throwable e) { 355 throw closer.rethrow(e); 356 } finally { 357 closer.close(); 358 } 359 } 360 361 /** 362 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 363 * the source will contain the concatenated data from the streams of the underlying sources. 364 * 365 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 366 * close the open underlying stream. 367 * 368 * @param sources the sources to concatenate 369 * @return a {@code ByteSource} containing the concatenated data 370 * @since 15.0 371 */ 372 public static ByteSource concat(Iterable<? extends ByteSource> sources) { 373 return new ConcatenatedByteSource(sources); 374 } 375 376 /** 377 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 378 * the source will contain the concatenated data from the streams of the underlying sources. 379 * 380 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 381 * close the open underlying stream. 382 * 383 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 384 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 385 * eagerly fetches data for each source when iterated (rather than producing sources that only 386 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 387 * possible. 388 * 389 * @param sources the sources to concatenate 390 * @return a {@code ByteSource} containing the concatenated data 391 * @throws NullPointerException if any of {@code sources} is {@code null} 392 * @since 15.0 393 */ 394 public static ByteSource concat(Iterator<? extends ByteSource> sources) { 395 return concat(ImmutableList.copyOf(sources)); 396 } 397 398 /** 399 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 400 * the source will contain the concatenated data from the streams of the underlying sources. 401 * 402 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 403 * close the open underlying stream. 404 * 405 * @param sources the sources to concatenate 406 * @return a {@code ByteSource} containing the concatenated data 407 * @throws NullPointerException if any of {@code sources} is {@code null} 408 * @since 15.0 409 */ 410 public static ByteSource concat(ByteSource... sources) { 411 return concat(ImmutableList.copyOf(sources)); 412 } 413 414 /** 415 * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range 416 * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}. 417 * 418 * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}). 419 */ 420 public static ByteSource wrap(byte[] b) { 421 return new ByteArrayByteSource(b); 422 } 423 424 /** 425 * Returns an immutable {@link ByteSource} that contains no bytes. 426 * 427 * @since 15.0 428 */ 429 public static ByteSource empty() { 430 return EmptyByteSource.INSTANCE; 431 } 432 433 /** 434 * A char source that reads bytes from this source and decodes them as characters using a charset. 435 */ 436 class AsCharSource extends CharSource { 437 438 final Charset charset; 439 440 AsCharSource(Charset charset) { 441 this.charset = checkNotNull(charset); 442 } 443 444 @Override 445 public ByteSource asByteSource(Charset charset) { 446 if (charset.equals(this.charset)) { 447 return ByteSource.this; 448 } 449 return super.asByteSource(charset); 450 } 451 452 @Override 453 public Reader openStream() throws IOException { 454 return new InputStreamReader(ByteSource.this.openStream(), charset); 455 } 456 457 @Override 458 public String read() throws IOException { 459 // Reading all the data as a byte array is more efficient than the default read() 460 // implementation because: 461 // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the 462 // internal char array (hard to avoid using StringBuilder) 463 // 2. we avoid extra copies into temporary buffers altogether 464 // The downside is that this will cause us to store the file bytes in memory twice for a short 465 // amount of time. 466 return new String(ByteSource.this.read(), charset); 467 } 468 469 @Override 470 public String toString() { 471 return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; 472 } 473 } 474 475 /** A view of a subsection of the containing byte source. */ 476 private final class SlicedByteSource extends ByteSource { 477 478 final long offset; 479 final long length; 480 481 SlicedByteSource(long offset, long length) { 482 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 483 checkArgument(length >= 0, "length (%s) may not be negative", length); 484 this.offset = offset; 485 this.length = length; 486 } 487 488 @Override 489 public InputStream openStream() throws IOException { 490 return sliceStream(ByteSource.this.openStream()); 491 } 492 493 @Override 494 public InputStream openBufferedStream() throws IOException { 495 return sliceStream(ByteSource.this.openBufferedStream()); 496 } 497 498 private InputStream sliceStream(InputStream in) throws IOException { 499 if (offset > 0) { 500 long skipped; 501 try { 502 skipped = ByteStreams.skipUpTo(in, offset); 503 } catch (Throwable e) { 504 Closer closer = Closer.create(); 505 closer.register(in); 506 try { 507 throw closer.rethrow(e); 508 } finally { 509 closer.close(); 510 } 511 } 512 513 if (skipped < offset) { 514 // offset was beyond EOF 515 in.close(); 516 return new ByteArrayInputStream(new byte[0]); 517 } 518 } 519 return ByteStreams.limit(in, length); 520 } 521 522 @Override 523 public ByteSource slice(long offset, long length) { 524 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 525 checkArgument(length >= 0, "length (%s) may not be negative", length); 526 long maxLength = this.length - offset; 527 return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength)); 528 } 529 530 @Override 531 public boolean isEmpty() throws IOException { 532 return length == 0 || super.isEmpty(); 533 } 534 535 @Override 536 public Optional<Long> sizeIfKnown() { 537 Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown(); 538 if (optionalUnslicedSize.isPresent()) { 539 long unslicedSize = optionalUnslicedSize.get(); 540 long off = Math.min(offset, unslicedSize); 541 return Optional.of(Math.min(length, unslicedSize - off)); 542 } 543 return Optional.absent(); 544 } 545 546 @Override 547 public String toString() { 548 return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")"; 549 } 550 } 551 552 private static class ByteArrayByteSource extends ByteSource { 553 554 final byte[] bytes; 555 final int offset; 556 final int length; 557 558 ByteArrayByteSource(byte[] bytes) { 559 this(bytes, 0, bytes.length); 560 } 561 562 // NOTE: Preconditions are enforced by slice, the only non-trivial caller. 563 ByteArrayByteSource(byte[] bytes, int offset, int length) { 564 this.bytes = bytes; 565 this.offset = offset; 566 this.length = length; 567 } 568 569 @Override 570 public InputStream openStream() { 571 return new ByteArrayInputStream(bytes, offset, length); 572 } 573 574 @Override 575 public InputStream openBufferedStream() throws IOException { 576 return openStream(); 577 } 578 579 @Override 580 public boolean isEmpty() { 581 return length == 0; 582 } 583 584 @Override 585 public long size() { 586 return length; 587 } 588 589 @Override 590 public Optional<Long> sizeIfKnown() { 591 return Optional.of((long) length); 592 } 593 594 @Override 595 public byte[] read() { 596 return Arrays.copyOfRange(bytes, offset, offset + length); 597 } 598 599 @SuppressWarnings("CheckReturnValue") // it doesn't matter what processBytes returns here 600 @Override 601 public <T> T read(ByteProcessor<T> processor) throws IOException { 602 processor.processBytes(bytes, offset, length); 603 return processor.getResult(); 604 } 605 606 @Override 607 public long copyTo(OutputStream output) throws IOException { 608 output.write(bytes, offset, length); 609 return length; 610 } 611 612 @Override 613 public HashCode hash(HashFunction hashFunction) throws IOException { 614 return hashFunction.hashBytes(bytes, offset, length); 615 } 616 617 @Override 618 public ByteSource slice(long offset, long length) { 619 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 620 checkArgument(length >= 0, "length (%s) may not be negative", length); 621 622 offset = Math.min(offset, this.length); 623 length = Math.min(length, this.length - offset); 624 int newOffset = this.offset + (int) offset; 625 return new ByteArrayByteSource(bytes, newOffset, (int) length); 626 } 627 628 @Override 629 public String toString() { 630 return "ByteSource.wrap(" 631 + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") 632 + ")"; 633 } 634 } 635 636 private static final class EmptyByteSource extends ByteArrayByteSource { 637 638 static final EmptyByteSource INSTANCE = new EmptyByteSource(); 639 640 EmptyByteSource() { 641 super(new byte[0]); 642 } 643 644 @Override 645 public CharSource asCharSource(Charset charset) { 646 checkNotNull(charset); 647 return CharSource.empty(); 648 } 649 650 @Override 651 public byte[] read() { 652 return bytes; // length is 0, no need to clone 653 } 654 655 @Override 656 public String toString() { 657 return "ByteSource.empty()"; 658 } 659 } 660 661 private static final class ConcatenatedByteSource extends ByteSource { 662 663 final Iterable<? extends ByteSource> sources; 664 665 ConcatenatedByteSource(Iterable<? extends ByteSource> sources) { 666 this.sources = checkNotNull(sources); 667 } 668 669 @Override 670 public InputStream openStream() throws IOException { 671 return new MultiInputStream(sources.iterator()); 672 } 673 674 @Override 675 public boolean isEmpty() throws IOException { 676 for (ByteSource source : sources) { 677 if (!source.isEmpty()) { 678 return false; 679 } 680 } 681 return true; 682 } 683 684 @Override 685 public Optional<Long> sizeIfKnown() { 686 long result = 0L; 687 for (ByteSource source : sources) { 688 Optional<Long> sizeIfKnown = source.sizeIfKnown(); 689 if (!sizeIfKnown.isPresent()) { 690 return Optional.absent(); 691 } 692 result += sizeIfKnown.get(); 693 } 694 return Optional.of(result); 695 } 696 697 @Override 698 public long size() throws IOException { 699 long result = 0L; 700 for (ByteSource source : sources) { 701 result += source.size(); 702 } 703 return result; 704 } 705 706 @Override 707 public String toString() { 708 return "ByteSource.concat(" + sources + ")"; 709 } 710 } 711}