001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.io; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021import static com.google.common.io.ByteStreams.BUF_SIZE; 022import static com.google.common.io.ByteStreams.skipUpTo; 023 024import com.google.common.annotations.Beta; 025import com.google.common.base.Ascii; 026import com.google.common.base.Optional; 027import com.google.common.collect.ImmutableList; 028import com.google.common.hash.Funnels; 029import com.google.common.hash.HashCode; 030import com.google.common.hash.HashFunction; 031import com.google.common.hash.Hasher; 032 033import java.io.BufferedInputStream; 034import java.io.ByteArrayInputStream; 035import java.io.IOException; 036import java.io.InputStream; 037import java.io.InputStreamReader; 038import java.io.OutputStream; 039import java.io.Reader; 040import java.nio.charset.Charset; 041import java.util.Arrays; 042import java.util.Iterator; 043 044/** 045 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a 046 * {@code ByteSource} is not an open, stateful stream for input that can be read and closed. 047 * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances. 048 * 049 * <p>{@code ByteSource} provides two kinds of methods: 050 * <ul> 051 * <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent 052 * instance each time they are called. The caller is responsible for ensuring that the returned 053 * stream is closed. 054 * <li><b>Convenience methods:</b> These are implementations of common operations that are 055 * typically implemented by opening a stream using one of the methods in the first category, doing 056 * something and finally closing the stream that was opened. 057 * </ul> 058 * 059 * @since 14.0 060 * @author Colin Decker 061 */ 062public abstract class ByteSource { 063 064 /** 065 * Constructor for use by subclasses. 066 */ 067 protected ByteSource() {} 068 069 /** 070 * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source 071 * as characters using the given {@link Charset}. 072 */ 073 public CharSource asCharSource(Charset charset) { 074 return new AsCharSource(charset); 075 } 076 077 /** 078 * Opens a new {@link InputStream} for reading from this source. This method should return a new, 079 * independent stream each time it is called. 080 * 081 * <p>The caller is responsible for ensuring that the returned stream is closed. 082 * 083 * @throws IOException if an I/O error occurs in the process of opening the stream 084 */ 085 public abstract InputStream openStream() throws IOException; 086 087 /** 088 * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is 089 * not required to be a {@link BufferedInputStream} in order to allow implementations to simply 090 * delegate to {@link #openStream()} when the stream returned by that method does not benefit 091 * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should 092 * return a new, independent stream each time it is called. 093 * 094 * <p>The caller is responsible for ensuring that the returned stream is closed. 095 * 096 * @throws IOException if an I/O error occurs in the process of opening the stream 097 * @since 15.0 (in 14.0 with return type {@link BufferedInputStream}) 098 */ 099 public InputStream openBufferedStream() throws IOException { 100 InputStream in = openStream(); 101 return (in instanceof BufferedInputStream) 102 ? (BufferedInputStream) in 103 : new BufferedInputStream(in); 104 } 105 106 /** 107 * Returns a view of a slice of this byte source that is at most {@code length} bytes long 108 * starting at the given {@code offset}. If {@code offset} is greater than the size of this 109 * source, the returned source will be empty. If {@code offset + length} is greater than the size 110 * of this source, the returned source will contain the slice starting at {@code offset} and 111 * ending at the end of this source. 112 * 113 * @throws IllegalArgumentException if {@code offset} or {@code length} is negative 114 */ 115 public ByteSource slice(long offset, long length) { 116 return new SlicedByteSource(offset, length); 117 } 118 119 /** 120 * Returns whether the source has zero bytes. The default implementation returns true if 121 * {@link #sizeIfKnown} returns zero, falling back to opening a stream and checking for 122 * EOF if the size is not known. 123 * 124 * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes 125 * are actually available for reading. (For example, some special files may return a size of 0 126 * despite actually having content when read.) This means that a source may return {@code true} 127 * from {@code isEmpty()} despite having readable content. 128 * 129 * @throws IOException if an I/O error occurs 130 * @since 15.0 131 */ 132 public boolean isEmpty() throws IOException { 133 Optional<Long> sizeIfKnown = sizeIfKnown(); 134 if (sizeIfKnown.isPresent() && sizeIfKnown.get() == 0L) { 135 return true; 136 } 137 Closer closer = Closer.create(); 138 try { 139 InputStream in = closer.register(openStream()); 140 return in.read() == -1; 141 } catch (Throwable e) { 142 throw closer.rethrow(e); 143 } finally { 144 closer.close(); 145 } 146 } 147 148 /** 149 * Returns the size of this source in bytes, if the size can be easily determined without 150 * actually opening the data stream. 151 * 152 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file, 153 * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method 154 * will return a different number of bytes than would be returned by reading all of the bytes (for 155 * example, some special files may return a size of 0 despite actually having content when read). 156 * 157 * <p>Additionally, for mutable sources such as files, a subsequent read may return a different 158 * number of bytes if the contents are changed. 159 * 160 * @since 19.0 161 */ 162 @Beta 163 public Optional<Long> sizeIfKnown() { 164 return Optional.absent(); 165 } 166 167 /** 168 * Returns the size of this source in bytes, even if doing so requires opening and traversing 169 * an entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}. 170 * 171 * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. 172 * If absent, it will fall back to a heavyweight operation that will open a stream, read (or 173 * {@link InputStream#skip(long) skip}, if possible) to the end of the stream and return the total 174 * number of bytes that were read. 175 * 176 * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient 177 * implementation, it is <i>possible</i> that this method will return a different number of bytes 178 * than would be returned by reading all of the bytes (for example, some special files may return 179 * a size of 0 despite actually having content when read). 180 * 181 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 182 * number of bytes if the contents are changed. 183 * 184 * @throws IOException if an I/O error occurs in the process of reading the size of this source 185 */ 186 public long size() throws IOException { 187 Optional<Long> sizeIfKnown = sizeIfKnown(); 188 if (sizeIfKnown.isPresent()) { 189 return sizeIfKnown.get(); 190 } 191 192 Closer closer = Closer.create(); 193 try { 194 InputStream in = closer.register(openStream()); 195 return countBySkipping(in); 196 } catch (IOException e) { 197 // skip may not be supported... at any rate, try reading 198 } finally { 199 closer.close(); 200 } 201 202 closer = Closer.create(); 203 try { 204 InputStream in = closer.register(openStream()); 205 return countByReading(in); 206 } catch (Throwable e) { 207 throw closer.rethrow(e); 208 } finally { 209 closer.close(); 210 } 211 } 212 213 /** 214 * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the 215 * first call to skip threw, in which case skip may just not be supported. 216 */ 217 private long countBySkipping(InputStream in) throws IOException { 218 long count = 0; 219 long skipped; 220 while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) { 221 count += skipped; 222 } 223 return count; 224 } 225 226 private long countByReading(InputStream in) throws IOException { 227 long count = 0; 228 long read; 229 while ((read = in.read(ByteStreams.skipBuffer)) != -1) { 230 count += read; 231 } 232 return count; 233 } 234 235 /** 236 * Copies the contents of this byte source to the given {@code OutputStream}. Does not close 237 * {@code output}. 238 * 239 * @throws IOException if an I/O error occurs in the process of reading from this source or 240 * writing to {@code output} 241 */ 242 public long copyTo(OutputStream output) throws IOException { 243 checkNotNull(output); 244 245 Closer closer = Closer.create(); 246 try { 247 InputStream in = closer.register(openStream()); 248 return ByteStreams.copy(in, output); 249 } catch (Throwable e) { 250 throw closer.rethrow(e); 251 } finally { 252 closer.close(); 253 } 254 } 255 256 /** 257 * Copies the contents of this byte source to the given {@code ByteSink}. 258 * 259 * @throws IOException if an I/O error occurs in the process of reading from this source or 260 * writing to {@code sink} 261 */ 262 public long copyTo(ByteSink sink) throws IOException { 263 checkNotNull(sink); 264 265 Closer closer = Closer.create(); 266 try { 267 InputStream in = closer.register(openStream()); 268 OutputStream out = closer.register(sink.openStream()); 269 return ByteStreams.copy(in, out); 270 } catch (Throwable e) { 271 throw closer.rethrow(e); 272 } finally { 273 closer.close(); 274 } 275 } 276 277 /** 278 * Reads the full contents of this byte source as a byte array. 279 * 280 * @throws IOException if an I/O error occurs in the process of reading from this source 281 */ 282 public byte[] read() throws IOException { 283 Closer closer = Closer.create(); 284 try { 285 InputStream in = closer.register(openStream()); 286 return ByteStreams.toByteArray(in); 287 } catch (Throwable e) { 288 throw closer.rethrow(e); 289 } finally { 290 closer.close(); 291 } 292 } 293 294 /** 295 * Reads the contents of this byte source using the given {@code processor} to process bytes as 296 * they are read. Stops when all bytes have been read or the consumer returns {@code false}. 297 * Returns the result produced by the processor. 298 * 299 * @throws IOException if an I/O error occurs in the process of reading from this source or if 300 * {@code processor} throws an {@code IOException} 301 * @since 16.0 302 */ 303 @Beta 304 public <T> T read(ByteProcessor<T> processor) throws IOException { 305 checkNotNull(processor); 306 307 Closer closer = Closer.create(); 308 try { 309 InputStream in = closer.register(openStream()); 310 return ByteStreams.readBytes(in, processor); 311 } catch (Throwable e) { 312 throw closer.rethrow(e); 313 } finally { 314 closer.close(); 315 } 316 } 317 318 /** 319 * Hashes the contents of this byte source using the given hash function. 320 * 321 * @throws IOException if an I/O error occurs in the process of reading from this source 322 */ 323 public HashCode hash(HashFunction hashFunction) throws IOException { 324 Hasher hasher = hashFunction.newHasher(); 325 copyTo(Funnels.asOutputStream(hasher)); 326 return hasher.hash(); 327 } 328 329 /** 330 * Checks that the contents of this byte source are equal to the contents of the given byte 331 * source. 332 * 333 * @throws IOException if an I/O error occurs in the process of reading from this source or 334 * {@code other} 335 */ 336 public boolean contentEquals(ByteSource other) throws IOException { 337 checkNotNull(other); 338 339 byte[] buf1 = new byte[BUF_SIZE]; 340 byte[] buf2 = new byte[BUF_SIZE]; 341 342 Closer closer = Closer.create(); 343 try { 344 InputStream in1 = closer.register(openStream()); 345 InputStream in2 = closer.register(other.openStream()); 346 while (true) { 347 int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE); 348 int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE); 349 if (read1 != read2 || !Arrays.equals(buf1, buf2)) { 350 return false; 351 } else if (read1 != BUF_SIZE) { 352 return true; 353 } 354 } 355 } catch (Throwable e) { 356 throw closer.rethrow(e); 357 } finally { 358 closer.close(); 359 } 360 } 361 362 /** 363 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 364 * the source will contain the concatenated data from the streams of the underlying sources. 365 * 366 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 367 * close the open underlying stream. 368 * 369 * @param sources the sources to concatenate 370 * @return a {@code ByteSource} containing the concatenated data 371 * @since 15.0 372 */ 373 public static ByteSource concat(Iterable<? extends ByteSource> sources) { 374 return new ConcatenatedByteSource(sources); 375 } 376 377 /** 378 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 379 * the source will contain the concatenated data from the streams of the underlying sources. 380 * 381 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 382 * close the open underlying stream. 383 * 384 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this 385 * method is called. This will fail if the iterator is infinite and may cause problems if the 386 * iterator eagerly fetches data for each source when iterated (rather than producing sources 387 * that only load data through their streams). Prefer using the {@link #concat(Iterable)} 388 * overload if possible. 389 * 390 * @param sources the sources to concatenate 391 * @return a {@code ByteSource} containing the concatenated data 392 * @throws NullPointerException if any of {@code sources} is {@code null} 393 * @since 15.0 394 */ 395 public static ByteSource concat(Iterator<? extends ByteSource> sources) { 396 return concat(ImmutableList.copyOf(sources)); 397 } 398 399 /** 400 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 401 * the source will contain the concatenated data from the streams of the underlying sources. 402 * 403 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 404 * close the open underlying stream. 405 * 406 * @param sources the sources to concatenate 407 * @return a {@code ByteSource} containing the concatenated data 408 * @throws NullPointerException if any of {@code sources} is {@code null} 409 * @since 15.0 410 */ 411 public static ByteSource concat(ByteSource... sources) { 412 return concat(ImmutableList.copyOf(sources)); 413 } 414 415 /** 416 * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range 417 * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}. 418 * 419 * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}). 420 */ 421 public static ByteSource wrap(byte[] b) { 422 return new ByteArrayByteSource(b); 423 } 424 425 /** 426 * Returns an immutable {@link ByteSource} that contains no bytes. 427 * 428 * @since 15.0 429 */ 430 public static ByteSource empty() { 431 return EmptyByteSource.INSTANCE; 432 } 433 434 /** 435 * A char source that reads bytes from this source and decodes them as characters using a 436 * charset. 437 */ 438 private final class AsCharSource extends CharSource { 439 440 private final Charset charset; 441 442 private AsCharSource(Charset charset) { 443 this.charset = checkNotNull(charset); 444 } 445 446 @Override 447 public Reader openStream() throws IOException { 448 return new InputStreamReader(ByteSource.this.openStream(), charset); 449 } 450 451 @Override 452 public String toString() { 453 return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; 454 } 455 } 456 457 /** 458 * A view of a subsection of the containing byte source. 459 */ 460 private final class SlicedByteSource extends ByteSource { 461 462 final long offset; 463 final long length; 464 465 SlicedByteSource(long offset, long length) { 466 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 467 checkArgument(length >= 0, "length (%s) may not be negative", length); 468 this.offset = offset; 469 this.length = length; 470 } 471 472 @Override 473 public InputStream openStream() throws IOException { 474 return sliceStream(ByteSource.this.openStream()); 475 } 476 477 @Override 478 public InputStream openBufferedStream() throws IOException { 479 return sliceStream(ByteSource.this.openBufferedStream()); 480 } 481 482 private InputStream sliceStream(InputStream in) throws IOException { 483 if (offset > 0) { 484 long skipped; 485 try { 486 skipped = ByteStreams.skipUpTo(in, offset); 487 } catch (Throwable e) { 488 Closer closer = Closer.create(); 489 closer.register(in); 490 try { 491 throw closer.rethrow(e); 492 } finally { 493 closer.close(); 494 } 495 } 496 497 if (skipped < offset) { 498 // offset was beyond EOF 499 in.close(); 500 return new ByteArrayInputStream(new byte[0]); 501 } 502 } 503 return ByteStreams.limit(in, length); 504 } 505 506 @Override 507 public ByteSource slice(long offset, long length) { 508 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 509 checkArgument(length >= 0, "length (%s) may not be negative", length); 510 long maxLength = this.length - offset; 511 return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength)); 512 } 513 514 @Override 515 public boolean isEmpty() throws IOException { 516 return length == 0 || super.isEmpty(); 517 } 518 519 @Override 520 public Optional<Long> sizeIfKnown() { 521 Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown(); 522 if (optionalUnslicedSize.isPresent()) { 523 long unslicedSize = optionalUnslicedSize.get(); 524 long off = Math.min(offset, unslicedSize); 525 return Optional.of(Math.min(length, unslicedSize - off)); 526 } 527 return Optional.absent(); 528 } 529 530 @Override 531 public String toString() { 532 return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")"; 533 } 534 } 535 536 private static class ByteArrayByteSource extends ByteSource { 537 538 final byte[] bytes; 539 final int offset; 540 final int length; 541 542 ByteArrayByteSource(byte[] bytes) { 543 this(bytes, 0, bytes.length); 544 } 545 546 // NOTE: Preconditions are enforced by slice, the only non-trivial caller. 547 ByteArrayByteSource(byte[] bytes, int offset, int length) { 548 this.bytes = bytes; 549 this.offset = offset; 550 this.length = length; 551 } 552 553 @Override 554 public InputStream openStream() { 555 return new ByteArrayInputStream(bytes, offset, length); 556 } 557 558 @Override 559 public InputStream openBufferedStream() throws IOException { 560 return openStream(); 561 } 562 563 @Override 564 public boolean isEmpty() { 565 return length == 0; 566 } 567 568 @Override 569 public long size() { 570 return length; 571 } 572 573 @Override 574 public Optional<Long> sizeIfKnown() { 575 return Optional.of((long) length); 576 } 577 578 @Override 579 public byte[] read() { 580 return Arrays.copyOfRange(bytes, offset, offset + length); 581 } 582 583 @Override 584 public long copyTo(OutputStream output) throws IOException { 585 output.write(bytes, offset, length); 586 return length; 587 } 588 589 @Override 590 public <T> T read(ByteProcessor<T> processor) throws IOException { 591 processor.processBytes(bytes, offset, length); 592 return processor.getResult(); 593 } 594 595 @Override 596 public HashCode hash(HashFunction hashFunction) throws IOException { 597 return hashFunction.hashBytes(bytes, offset, length); 598 } 599 600 @Override 601 public ByteSource slice(long offset, long length) { 602 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 603 checkArgument(length >= 0, "length (%s) may not be negative", length); 604 605 offset = Math.min(offset, this.length); 606 length = Math.min(length, this.length - offset); 607 int newOffset = this.offset + (int) offset; 608 return new ByteArrayByteSource(bytes, newOffset, (int) length); 609 } 610 611 @Override 612 public String toString() { 613 return "ByteSource.wrap(" 614 + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") + ")"; 615 } 616 } 617 618 private static final class EmptyByteSource extends ByteArrayByteSource { 619 620 static final EmptyByteSource INSTANCE = new EmptyByteSource(); 621 622 EmptyByteSource() { 623 super(new byte[0]); 624 } 625 626 @Override 627 public CharSource asCharSource(Charset charset) { 628 checkNotNull(charset); 629 return CharSource.empty(); 630 } 631 632 @Override 633 public byte[] read() { 634 return bytes; // length is 0, no need to clone 635 } 636 637 @Override 638 public String toString() { 639 return "ByteSource.empty()"; 640 } 641 } 642 643 private static final class ConcatenatedByteSource extends ByteSource { 644 645 final Iterable<? extends ByteSource> sources; 646 647 ConcatenatedByteSource(Iterable<? extends ByteSource> sources) { 648 this.sources = checkNotNull(sources); 649 } 650 651 @Override 652 public InputStream openStream() throws IOException { 653 return new MultiInputStream(sources.iterator()); 654 } 655 656 @Override 657 public boolean isEmpty() throws IOException { 658 for (ByteSource source : sources) { 659 if (!source.isEmpty()) { 660 return false; 661 } 662 } 663 return true; 664 } 665 666 @Override 667 public Optional<Long> sizeIfKnown() { 668 long result = 0L; 669 for (ByteSource source : sources) { 670 Optional<Long> sizeIfKnown = source.sizeIfKnown(); 671 if (!sizeIfKnown.isPresent()) { 672 return Optional.absent(); 673 } 674 result += sizeIfKnown.get(); 675 } 676 return Optional.of(result); 677 } 678 679 @Override 680 public long size() throws IOException { 681 long result = 0L; 682 for (ByteSource source : sources) { 683 result += source.size(); 684 } 685 return result; 686 } 687 688 @Override 689 public String toString() { 690 return "ByteSource.concat(" + sources + ")"; 691 } 692 } 693}