001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.io; 018 019import static com.google.common.base.Preconditions.checkArgument; 020import static com.google.common.base.Preconditions.checkNotNull; 021 022import com.google.common.annotations.Beta; 023import com.google.common.base.Ascii; 024import com.google.common.collect.ImmutableList; 025import com.google.common.hash.Funnels; 026import com.google.common.hash.HashCode; 027import com.google.common.hash.HashFunction; 028import com.google.common.hash.Hasher; 029 030import java.io.BufferedInputStream; 031import java.io.ByteArrayInputStream; 032import java.io.IOException; 033import java.io.InputStream; 034import java.io.InputStreamReader; 035import java.io.OutputStream; 036import java.io.Reader; 037import java.nio.charset.Charset; 038import java.util.Arrays; 039import java.util.Iterator; 040 041/** 042 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a 043 * {@code ByteSource} is not an open, stateful stream for input that can be read and closed. 044 * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances. 045 * 046 * <p>{@code ByteSource} provides two kinds of methods: 047 * <ul> 048 * <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent 049 * instance each time they are called. The caller is responsible for ensuring that the returned 050 * stream is closed. 051 * <li><b>Convenience methods:</b> These are implementations of common operations that are 052 * typically implemented by opening a stream using one of the methods in the first category, doing 053 * something and finally closing the stream that was opened. 054 * </ul> 055 * 056 * @since 14.0 057 * @author Colin Decker 058 */ 059public abstract class ByteSource implements InputSupplier<InputStream> { 060 061 private static final int BUF_SIZE = 0x1000; // 4K 062 063 /** 064 * Constructor for use by subclasses. 065 */ 066 protected ByteSource() {} 067 068 /** 069 * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source 070 * as characters using the given {@link Charset}. 071 */ 072 public CharSource asCharSource(Charset charset) { 073 return new AsCharSource(charset); 074 } 075 076 /** 077 * Opens a new {@link InputStream} for reading from this source. This method should return a new, 078 * independent stream each time it is called. 079 * 080 * <p>The caller is responsible for ensuring that the returned stream is closed. 081 * 082 * @throws IOException if an I/O error occurs in the process of opening the stream 083 */ 084 public abstract InputStream openStream() throws IOException; 085 086 /** 087 * This method is a temporary method provided for easing migration from suppliers to sources and 088 * sinks. 089 * 090 * @since 15.0 091 * @deprecated This method is only provided for temporary compatibility with the 092 * {@link InputSupplier} interface and should not be called directly. Use {@link #openStream} 093 * instead. This method is scheduled for removal in Guava 18.0. 094 */ 095 @Override 096 @Deprecated 097 public final InputStream getInput() throws IOException { 098 return openStream(); 099 } 100 101 /** 102 * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is 103 * not required to be a {@link BufferedInputStream} in order to allow implementations to simply 104 * delegate to {@link #openStream()} when the stream returned by that method does not benefit 105 * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should 106 * return a new, independent stream each time it is called. 107 * 108 * <p>The caller is responsible for ensuring that the returned stream is closed. 109 * 110 * @throws IOException if an I/O error occurs in the process of opening the stream 111 * @since 15.0 (in 14.0 with return type {@link BufferedInputStream}) 112 */ 113 public InputStream openBufferedStream() throws IOException { 114 InputStream in = openStream(); 115 return (in instanceof BufferedInputStream) 116 ? (BufferedInputStream) in 117 : new BufferedInputStream(in); 118 } 119 120 /** 121 * Returns a view of a slice of this byte source that is at most {@code length} bytes long 122 * starting at the given {@code offset}. 123 * 124 * @throws IllegalArgumentException if {@code offset} or {@code length} is negative 125 */ 126 public ByteSource slice(long offset, long length) { 127 return new SlicedByteSource(offset, length); 128 } 129 130 /** 131 * Returns whether the source has zero bytes. The default implementation is to open a stream and 132 * check for EOF. 133 * 134 * @throws IOException if an I/O error occurs 135 * @since 15.0 136 */ 137 public boolean isEmpty() throws IOException { 138 Closer closer = Closer.create(); 139 try { 140 InputStream in = closer.register(openStream()); 141 return in.read() == -1; 142 } catch (Throwable e) { 143 throw closer.rethrow(e); 144 } finally { 145 closer.close(); 146 } 147 } 148 149 /** 150 * Returns the size of this source in bytes. For most implementations, this is a heavyweight 151 * operation that will open a stream, read (or {@link InputStream#skip(long) skip}, if possible) 152 * to the end of the stream and return the total number of bytes that were read. 153 * 154 * <p>For some sources, such as a file, this method may use a more efficient implementation. Note 155 * that in such cases, it is <i>possible</i> that this method will return a different number of 156 * bytes than would be returned by reading all of the bytes (for example, some special files may 157 * return a size of 0 despite actually having content when read). 158 * 159 * <p>In either case, if this is a mutable source such as a file, the size it returns may not be 160 * the same number of bytes a subsequent read would return. 161 * 162 * @throws IOException if an I/O error occurs in the process of reading the size of this source 163 */ 164 public long size() throws IOException { 165 Closer closer = Closer.create(); 166 try { 167 InputStream in = closer.register(openStream()); 168 return countBySkipping(in); 169 } catch (IOException e) { 170 // skip may not be supported... at any rate, try reading 171 } finally { 172 closer.close(); 173 } 174 175 closer = Closer.create(); 176 try { 177 InputStream in = closer.register(openStream()); 178 return countByReading(in); 179 } catch (Throwable e) { 180 throw closer.rethrow(e); 181 } finally { 182 closer.close(); 183 } 184 } 185 186 /** 187 * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the 188 * first call to skip threw, in which case skip may just not be supported. 189 */ 190 private long countBySkipping(InputStream in) throws IOException { 191 long count = 0; 192 while (true) { 193 // don't try to skip more than available() 194 // things may work really wrong with FileInputStream otherwise 195 long skipped = in.skip(Math.min(in.available(), Integer.MAX_VALUE)); 196 if (skipped <= 0) { 197 if (in.read() == -1) { 198 return count; 199 } else if (count == 0 && in.available() == 0) { 200 // if available is still zero after reading a single byte, it 201 // will probably always be zero, so we should countByReading 202 throw new IOException(); 203 } 204 count++; 205 } else { 206 count += skipped; 207 } 208 } 209 } 210 211 private static final byte[] countBuffer = new byte[BUF_SIZE]; 212 213 private long countByReading(InputStream in) throws IOException { 214 long count = 0; 215 long read; 216 while ((read = in.read(countBuffer)) != -1) { 217 count += read; 218 } 219 return count; 220 } 221 222 /** 223 * Copies the contents of this byte source to the given {@code OutputStream}. Does not close 224 * {@code output}. 225 * 226 * @throws IOException if an I/O error occurs in the process of reading from this source or 227 * writing to {@code output} 228 */ 229 public long copyTo(OutputStream output) throws IOException { 230 checkNotNull(output); 231 232 Closer closer = Closer.create(); 233 try { 234 InputStream in = closer.register(openStream()); 235 return ByteStreams.copy(in, output); 236 } catch (Throwable e) { 237 throw closer.rethrow(e); 238 } finally { 239 closer.close(); 240 } 241 } 242 243 /** 244 * Copies the contents of this byte source to the given {@code ByteSink}. 245 * 246 * @throws IOException if an I/O error occurs in the process of reading from this source or 247 * writing to {@code sink} 248 */ 249 public long copyTo(ByteSink sink) throws IOException { 250 checkNotNull(sink); 251 252 Closer closer = Closer.create(); 253 try { 254 InputStream in = closer.register(openStream()); 255 OutputStream out = closer.register(sink.openStream()); 256 return ByteStreams.copy(in, out); 257 } catch (Throwable e) { 258 throw closer.rethrow(e); 259 } finally { 260 closer.close(); 261 } 262 } 263 264 /** 265 * Reads the full contents of this byte source as a byte array. 266 * 267 * @throws IOException if an I/O error occurs in the process of reading from this source 268 */ 269 public byte[] read() throws IOException { 270 Closer closer = Closer.create(); 271 try { 272 InputStream in = closer.register(openStream()); 273 return ByteStreams.toByteArray(in); 274 } catch (Throwable e) { 275 throw closer.rethrow(e); 276 } finally { 277 closer.close(); 278 } 279 } 280 281 /** 282 * Reads the contents of this byte source using the given {@code processor} to process bytes as 283 * they are read. Stops when all bytes have been read or the consumer returns {@code false}. 284 * Returns the result produced by the processor. 285 * 286 * @throws IOException if an I/O error occurs in the process of reading from this source or if 287 * {@code processor} throws an {@code IOException} 288 * @since 16.0 289 */ 290 @Beta 291 public <T> T read(ByteProcessor<T> processor) throws IOException { 292 checkNotNull(processor); 293 294 Closer closer = Closer.create(); 295 try { 296 InputStream in = closer.register(openStream()); 297 return ByteStreams.readBytes(in, processor); 298 } catch (Throwable e) { 299 throw closer.rethrow(e); 300 } finally { 301 closer.close(); 302 } 303 } 304 305 /** 306 * Hashes the contents of this byte source using the given hash function. 307 * 308 * @throws IOException if an I/O error occurs in the process of reading from this source 309 */ 310 public HashCode hash(HashFunction hashFunction) throws IOException { 311 Hasher hasher = hashFunction.newHasher(); 312 copyTo(Funnels.asOutputStream(hasher)); 313 return hasher.hash(); 314 } 315 316 /** 317 * Checks that the contents of this byte source are equal to the contents of the given byte 318 * source. 319 * 320 * @throws IOException if an I/O error occurs in the process of reading from this source or 321 * {@code other} 322 */ 323 public boolean contentEquals(ByteSource other) throws IOException { 324 checkNotNull(other); 325 326 byte[] buf1 = new byte[BUF_SIZE]; 327 byte[] buf2 = new byte[BUF_SIZE]; 328 329 Closer closer = Closer.create(); 330 try { 331 InputStream in1 = closer.register(openStream()); 332 InputStream in2 = closer.register(other.openStream()); 333 while (true) { 334 int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE); 335 int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE); 336 if (read1 != read2 || !Arrays.equals(buf1, buf2)) { 337 return false; 338 } else if (read1 != BUF_SIZE) { 339 return true; 340 } 341 } 342 } catch (Throwable e) { 343 throw closer.rethrow(e); 344 } finally { 345 closer.close(); 346 } 347 } 348 349 /** 350 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 351 * the source will contain the concatenated data from the streams of the underlying sources. 352 * 353 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 354 * close the open underlying stream. 355 * 356 * @param sources the sources to concatenate 357 * @return a {@code ByteSource} containing the concatenated data 358 * @since 15.0 359 */ 360 public static ByteSource concat(Iterable<? extends ByteSource> sources) { 361 return new ConcatenatedByteSource(sources); 362 } 363 364 /** 365 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 366 * the source will contain the concatenated data from the streams of the underlying sources. 367 * 368 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 369 * close the open underlying stream. 370 * 371 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this 372 * method is called. This will fail if the iterator is infinite and may cause problems if the 373 * iterator eagerly fetches data for each source when iterated (rather than producing sources 374 * that only load data through their streams). Prefer using the {@link #concat(Iterable)} 375 * overload if possible. 376 * 377 * @param sources the sources to concatenate 378 * @return a {@code ByteSource} containing the concatenated data 379 * @throws NullPointerException if any of {@code sources} is {@code null} 380 * @since 15.0 381 */ 382 public static ByteSource concat(Iterator<? extends ByteSource> sources) { 383 return concat(ImmutableList.copyOf(sources)); 384 } 385 386 /** 387 * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from 388 * the source will contain the concatenated data from the streams of the underlying sources. 389 * 390 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 391 * close the open underlying stream. 392 * 393 * @param sources the sources to concatenate 394 * @return a {@code ByteSource} containing the concatenated data 395 * @throws NullPointerException if any of {@code sources} is {@code null} 396 * @since 15.0 397 */ 398 public static ByteSource concat(ByteSource... sources) { 399 return concat(ImmutableList.copyOf(sources)); 400 } 401 402 /** 403 * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range 404 * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}. 405 * 406 * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}). 407 */ 408 public static ByteSource wrap(byte[] b) { 409 return new ByteArrayByteSource(b); 410 } 411 412 /** 413 * Returns an immutable {@link ByteSource} that contains no bytes. 414 * 415 * @since 15.0 416 */ 417 public static ByteSource empty() { 418 return EmptyByteSource.INSTANCE; 419 } 420 421 /** 422 * A char source that reads bytes from this source and decodes them as characters using a 423 * charset. 424 */ 425 private final class AsCharSource extends CharSource { 426 427 private final Charset charset; 428 429 private AsCharSource(Charset charset) { 430 this.charset = checkNotNull(charset); 431 } 432 433 @Override 434 public Reader openStream() throws IOException { 435 return new InputStreamReader(ByteSource.this.openStream(), charset); 436 } 437 438 @Override 439 public String toString() { 440 return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; 441 } 442 } 443 444 /** 445 * A view of a subsection of the containing byte source. 446 */ 447 private final class SlicedByteSource extends ByteSource { 448 449 private final long offset; 450 private final long length; 451 452 private SlicedByteSource(long offset, long length) { 453 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 454 checkArgument(length >= 0, "length (%s) may not be negative", length); 455 this.offset = offset; 456 this.length = length; 457 } 458 459 @Override 460 public InputStream openStream() throws IOException { 461 return sliceStream(ByteSource.this.openStream()); 462 } 463 464 @Override 465 public InputStream openBufferedStream() throws IOException { 466 return sliceStream(ByteSource.this.openBufferedStream()); 467 } 468 469 private InputStream sliceStream(InputStream in) throws IOException { 470 if (offset > 0) { 471 try { 472 ByteStreams.skipFully(in, offset); 473 } catch (Throwable e) { 474 Closer closer = Closer.create(); 475 closer.register(in); 476 try { 477 throw closer.rethrow(e); 478 } finally { 479 closer.close(); 480 } 481 } 482 } 483 return ByteStreams.limit(in, length); 484 } 485 486 @Override 487 public ByteSource slice(long offset, long length) { 488 checkArgument(offset >= 0, "offset (%s) may not be negative", offset); 489 checkArgument(length >= 0, "length (%s) may not be negative", length); 490 long maxLength = this.length - offset; 491 return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength)); 492 } 493 494 @Override 495 public boolean isEmpty() throws IOException { 496 return length == 0 || super.isEmpty(); 497 } 498 499 @Override 500 public String toString() { 501 return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")"; 502 } 503 } 504 505 private static class ByteArrayByteSource extends ByteSource { 506 507 protected final byte[] bytes; 508 509 protected ByteArrayByteSource(byte[] bytes) { 510 this.bytes = checkNotNull(bytes); 511 } 512 513 @Override 514 public InputStream openStream() { 515 return new ByteArrayInputStream(bytes); 516 } 517 518 @Override 519 public InputStream openBufferedStream() throws IOException { 520 return openStream(); 521 } 522 523 @Override 524 public boolean isEmpty() { 525 return bytes.length == 0; 526 } 527 528 @Override 529 public long size() { 530 return bytes.length; 531 } 532 533 @Override 534 public byte[] read() { 535 return bytes.clone(); 536 } 537 538 @Override 539 public long copyTo(OutputStream output) throws IOException { 540 output.write(bytes); 541 return bytes.length; 542 } 543 544 @Override 545 public <T> T read(ByteProcessor<T> processor) throws IOException { 546 processor.processBytes(bytes, 0, bytes.length); 547 return processor.getResult(); 548 } 549 550 @Override 551 public HashCode hash(HashFunction hashFunction) throws IOException { 552 return hashFunction.hashBytes(bytes); 553 } 554 555 // TODO(user): Possibly override slice() 556 557 @Override 558 public String toString() { 559 return "ByteSource.wrap(" 560 + Ascii.truncate(BaseEncoding.base16().encode(bytes), 30, "...") + ")"; 561 } 562 } 563 564 private static final class EmptyByteSource extends ByteArrayByteSource { 565 566 private static final EmptyByteSource INSTANCE = new EmptyByteSource(); 567 568 private EmptyByteSource() { 569 super(new byte[0]); 570 } 571 572 @Override 573 public CharSource asCharSource(Charset charset) { 574 checkNotNull(charset); 575 return CharSource.empty(); 576 } 577 578 @Override 579 public byte[] read() { 580 return bytes; // length is 0, no need to clone 581 } 582 583 @Override 584 public String toString() { 585 return "ByteSource.empty()"; 586 } 587 } 588 589 private static final class ConcatenatedByteSource extends ByteSource { 590 591 private final Iterable<? extends ByteSource> sources; 592 593 ConcatenatedByteSource(Iterable<? extends ByteSource> sources) { 594 this.sources = checkNotNull(sources); 595 } 596 597 @Override 598 public InputStream openStream() throws IOException { 599 return new MultiInputStream(sources.iterator()); 600 } 601 602 @Override 603 public boolean isEmpty() throws IOException { 604 for (ByteSource source : sources) { 605 if (!source.isEmpty()) { 606 return false; 607 } 608 } 609 return true; 610 } 611 612 @Override 613 public long size() throws IOException { 614 long result = 0L; 615 for (ByteSource source : sources) { 616 result += source.size(); 617 } 618 return result; 619 } 620 621 @Override 622 public String toString() { 623 return "ByteSource.concat(" + sources + ")"; 624 } 625 } 626}