001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.io.ByteStreams.createBuffer;
020import static com.google.common.io.ByteStreams.skipUpTo;
021
022import com.google.common.annotations.Beta;
023import com.google.common.annotations.GwtIncompatible;
024import com.google.common.base.Ascii;
025import com.google.common.base.Optional;
026import com.google.common.collect.ImmutableList;
027import com.google.common.hash.Funnels;
028import com.google.common.hash.HashCode;
029import com.google.common.hash.HashFunction;
030import com.google.common.hash.Hasher;
031import com.google.errorprone.annotations.CanIgnoreReturnValue;
032import java.io.BufferedInputStream;
033import java.io.ByteArrayInputStream;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.InputStreamReader;
037import java.io.OutputStream;
038import java.io.Reader;
039import java.nio.charset.Charset;
040import java.util.Arrays;
041import java.util.Collection;
042import java.util.Iterator;
043
044/**
045 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a {@code ByteSource}
046 * is not an open, stateful stream for input that can be read and closed. Instead, it is an
047 * immutable <i>supplier</i> of {@code InputStream} instances.
048 *
049 * <p>{@code ByteSource} provides two kinds of methods:
050 *
051 * <ul>
052 *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
053 *       instance each time they are called. The caller is responsible for ensuring that the
054 *       returned stream is closed.
055 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
056 *       typically implemented by opening a stream using one of the methods in the first category,
057 *       doing something and finally closing the stream that was opened.
058 * </ul>
059 *
060 * @since 14.0
061 * @author Colin Decker
062 */
063@GwtIncompatible
064public abstract class ByteSource {
065
066  /** Constructor for use by subclasses. */
067  protected ByteSource() {}
068
069  /**
070   * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
071   * as characters using the given {@link Charset}.
072   *
073   * <p>If {@link CharSource#asByteSource} is called on the returned source with the same charset,
074   * the default implementation of this method will ensure that the original {@code ByteSource} is
075   * returned, rather than round-trip encoding. Subclasses that override this method should behave
076   * the same way.
077   */
078  public CharSource asCharSource(Charset charset) {
079    return new AsCharSource(charset);
080  }
081
082  /**
083   * Opens a new {@link InputStream} for reading from this source. This method returns a new,
084   * independent stream each time it is called.
085   *
086   * <p>The caller is responsible for ensuring that the returned stream is closed.
087   *
088   * @throws IOException if an I/O error occurs while opening the stream
089   */
090  public abstract InputStream openStream() throws IOException;
091
092  /**
093   * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
094   * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
095   * delegate to {@link #openStream()} when the stream returned by that method does not benefit from
096   * additional buffering (for example, a {@code ByteArrayInputStream}). This method returns a new,
097   * independent stream each time it is called.
098   *
099   * <p>The caller is responsible for ensuring that the returned stream is closed.
100   *
101   * @throws IOException if an I/O error occurs while opening the stream
102   * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
103   */
104  public InputStream openBufferedStream() throws IOException {
105    InputStream in = openStream();
106    return (in instanceof BufferedInputStream)
107        ? (BufferedInputStream) in
108        : new BufferedInputStream(in);
109  }
110
111  /**
112   * Returns a view of a slice of this byte source that is at most {@code length} bytes long
113   * starting at the given {@code offset}. If {@code offset} is greater than the size of this
114   * source, the returned source will be empty. If {@code offset + length} is greater than the size
115   * of this source, the returned source will contain the slice starting at {@code offset} and
116   * ending at the end of this source.
117   *
118   * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
119   */
120  public ByteSource slice(long offset, long length) {
121    return new SlicedByteSource(offset, length);
122  }
123
124  /**
125   * Returns whether the source has zero bytes. The default implementation first checks {@link
126   * #sizeIfKnown}, returning true if it's known to be zero and false if it's known to be non-zero.
127   * If the size is not known, it falls back to opening a stream and checking for EOF.
128   *
129   * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes
130   * are actually available for reading. (For example, some special files may return a size of 0
131   * despite actually having content when read.) This means that a source may return {@code true}
132   * from {@code isEmpty()} despite having readable content.
133   *
134   * @throws IOException if an I/O error occurs
135   * @since 15.0
136   */
137  public boolean isEmpty() throws IOException {
138    Optional<Long> sizeIfKnown = sizeIfKnown();
139    if (sizeIfKnown.isPresent()) {
140      return sizeIfKnown.get() == 0L;
141    }
142    Closer closer = Closer.create();
143    try {
144      InputStream in = closer.register(openStream());
145      return in.read() == -1;
146    } catch (Throwable e) {
147      throw closer.rethrow(e);
148    } finally {
149      closer.close();
150    }
151  }
152
153  /**
154   * Returns the size of this source in bytes, if the size can be easily determined without actually
155   * opening the data stream.
156   *
157   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file,
158   * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method
159   * will return a different number of bytes than would be returned by reading all of the bytes (for
160   * example, some special files may return a size of 0 despite actually having content when read).
161   *
162   * <p>Additionally, for mutable sources such as files, a subsequent read may return a different
163   * number of bytes if the contents are changed.
164   *
165   * @since 19.0
166   */
167  @Beta
168  public Optional<Long> sizeIfKnown() {
169    return Optional.absent();
170  }
171
172  /**
173   * Returns the size of this source in bytes, even if doing so requires opening and traversing an
174   * entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}.
175   *
176   * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. If
177   * absent, it will fall back to a heavyweight operation that will open a stream, read (or {@link
178   * InputStream#skip(long) skip}, if possible) to the end of the stream and return the total number
179   * of bytes that were read.
180   *
181   * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient
182   * implementation, it is <i>possible</i> that this method will return a different number of bytes
183   * than would be returned by reading all of the bytes (for example, some special files may return
184   * a size of 0 despite actually having content when read).
185   *
186   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
187   * number of bytes if the contents are changed.
188   *
189   * @throws IOException if an I/O error occurs while reading the size of this source
190   */
191  public long size() throws IOException {
192    Optional<Long> sizeIfKnown = sizeIfKnown();
193    if (sizeIfKnown.isPresent()) {
194      return sizeIfKnown.get();
195    }
196
197    Closer closer = Closer.create();
198    try {
199      InputStream in = closer.register(openStream());
200      return countBySkipping(in);
201    } catch (IOException e) {
202      // skip may not be supported... at any rate, try reading
203    } finally {
204      closer.close();
205    }
206
207    closer = Closer.create();
208    try {
209      InputStream in = closer.register(openStream());
210      return ByteStreams.exhaust(in);
211    } catch (Throwable e) {
212      throw closer.rethrow(e);
213    } finally {
214      closer.close();
215    }
216  }
217
218  /** Counts the bytes in the given input stream using skip if possible. */
219  private long countBySkipping(InputStream in) throws IOException {
220    long count = 0;
221    long skipped;
222    while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) {
223      count += skipped;
224    }
225    return count;
226  }
227
228  /**
229   * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
230   * {@code output}.
231   *
232   * @return the number of bytes copied
233   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
234   *     output}
235   */
236  @CanIgnoreReturnValue
237  public long copyTo(OutputStream output) throws IOException {
238    checkNotNull(output);
239
240    Closer closer = Closer.create();
241    try {
242      InputStream in = closer.register(openStream());
243      return ByteStreams.copy(in, output);
244    } catch (Throwable e) {
245      throw closer.rethrow(e);
246    } finally {
247      closer.close();
248    }
249  }
250
251  /**
252   * Copies the contents of this byte source to the given {@code ByteSink}.
253   *
254   * @return the number of bytes copied
255   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
256   *     sink}
257   */
258  @CanIgnoreReturnValue
259  public long copyTo(ByteSink sink) throws IOException {
260    checkNotNull(sink);
261
262    Closer closer = Closer.create();
263    try {
264      InputStream in = closer.register(openStream());
265      OutputStream out = closer.register(sink.openStream());
266      return ByteStreams.copy(in, out);
267    } catch (Throwable e) {
268      throw closer.rethrow(e);
269    } finally {
270      closer.close();
271    }
272  }
273
274  /**
275   * Reads the full contents of this byte source as a byte array.
276   *
277   * @throws IOException if an I/O error occurs while reading from this source
278   */
279  public byte[] read() throws IOException {
280    Closer closer = Closer.create();
281    try {
282      InputStream in = closer.register(openStream());
283      Optional<Long> size = sizeIfKnown();
284      return size.isPresent()
285          ? ByteStreams.toByteArray(in, size.get())
286          : ByteStreams.toByteArray(in);
287    } catch (Throwable e) {
288      throw closer.rethrow(e);
289    } finally {
290      closer.close();
291    }
292  }
293
294  /**
295   * Reads the contents of this byte source using the given {@code processor} to process bytes as
296   * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
297   * Returns the result produced by the processor.
298   *
299   * @throws IOException if an I/O error occurs while reading from this source or if {@code
300   *     processor} throws an {@code IOException}
301   * @since 16.0
302   */
303  @Beta
304  @CanIgnoreReturnValue // some processors won't return a useful result
305  public <T> T read(ByteProcessor<T> processor) throws IOException {
306    checkNotNull(processor);
307
308    Closer closer = Closer.create();
309    try {
310      InputStream in = closer.register(openStream());
311      return ByteStreams.readBytes(in, processor);
312    } catch (Throwable e) {
313      throw closer.rethrow(e);
314    } finally {
315      closer.close();
316    }
317  }
318
319  /**
320   * Hashes the contents of this byte source using the given hash function.
321   *
322   * @throws IOException if an I/O error occurs while reading from this source
323   */
324  public HashCode hash(HashFunction hashFunction) throws IOException {
325    Hasher hasher = hashFunction.newHasher();
326    copyTo(Funnels.asOutputStream(hasher));
327    return hasher.hash();
328  }
329
330  /**
331   * Checks that the contents of this byte source are equal to the contents of the given byte
332   * source.
333   *
334   * @throws IOException if an I/O error occurs while reading from this source or {@code other}
335   */
336  public boolean contentEquals(ByteSource other) throws IOException {
337    checkNotNull(other);
338
339    byte[] buf1 = createBuffer();
340    byte[] buf2 = createBuffer();
341
342    Closer closer = Closer.create();
343    try {
344      InputStream in1 = closer.register(openStream());
345      InputStream in2 = closer.register(other.openStream());
346      while (true) {
347        int read1 = ByteStreams.read(in1, buf1, 0, buf1.length);
348        int read2 = ByteStreams.read(in2, buf2, 0, buf2.length);
349        if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
350          return false;
351        } else if (read1 != buf1.length) {
352          return true;
353        }
354      }
355    } catch (Throwable e) {
356      throw closer.rethrow(e);
357    } finally {
358      closer.close();
359    }
360  }
361
362  /**
363   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
364   * the source will contain the concatenated data from the streams of the underlying sources.
365   *
366   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
367   * close the open underlying stream.
368   *
369   * @param sources the sources to concatenate
370   * @return a {@code ByteSource} containing the concatenated data
371   * @since 15.0
372   */
373  public static ByteSource concat(Iterable<? extends ByteSource> sources) {
374    return new ConcatenatedByteSource(sources);
375  }
376
377  /**
378   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
379   * the source will contain the concatenated data from the streams of the underlying sources.
380   *
381   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
382   * close the open underlying stream.
383   *
384   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
385   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
386   * eagerly fetches data for each source when iterated (rather than producing sources that only
387   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
388   * possible.
389   *
390   * @param sources the sources to concatenate
391   * @return a {@code ByteSource} containing the concatenated data
392   * @throws NullPointerException if any of {@code sources} is {@code null}
393   * @since 15.0
394   */
395  public static ByteSource concat(Iterator<? extends ByteSource> sources) {
396    return concat(ImmutableList.copyOf(sources));
397  }
398
399  /**
400   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
401   * the source will contain the concatenated data from the streams of the underlying sources.
402   *
403   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
404   * close the open underlying stream.
405   *
406   * @param sources the sources to concatenate
407   * @return a {@code ByteSource} containing the concatenated data
408   * @throws NullPointerException if any of {@code sources} is {@code null}
409   * @since 15.0
410   */
411  public static ByteSource concat(ByteSource... sources) {
412    return concat(ImmutableList.copyOf(sources));
413  }
414
415  /**
416   * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
417   * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
418   *
419   * <p>Note that the given byte array may be be passed directly to methods on, for example, {@code
420   * OutputStream} (when {@code copyTo(OutputStream)} is called on the resulting {@code
421   * ByteSource}). This could allow a malicious {@code OutputStream} implementation to modify the
422   * contents of the array, but provides better performance in the normal case.
423   *
424   * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
425   */
426  public static ByteSource wrap(byte[] b) {
427    return new ByteArrayByteSource(b);
428  }
429
430  /**
431   * Returns an immutable {@link ByteSource} that contains no bytes.
432   *
433   * @since 15.0
434   */
435  public static ByteSource empty() {
436    return EmptyByteSource.INSTANCE;
437  }
438
439  /**
440   * A char source that reads bytes from this source and decodes them as characters using a charset.
441   */
442  class AsCharSource extends CharSource {
443
444    final Charset charset;
445
446    AsCharSource(Charset charset) {
447      this.charset = checkNotNull(charset);
448    }
449
450    @Override
451    public ByteSource asByteSource(Charset charset) {
452      if (charset.equals(this.charset)) {
453        return ByteSource.this;
454      }
455      return super.asByteSource(charset);
456    }
457
458    @Override
459    public Reader openStream() throws IOException {
460      return new InputStreamReader(ByteSource.this.openStream(), charset);
461    }
462
463    @Override
464    public String read() throws IOException {
465      // Reading all the data as a byte array is more efficient than the default read()
466      // implementation because:
467      // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the
468      //    internal char array (hard to avoid using StringBuilder)
469      // 2. we avoid extra copies into temporary buffers altogether
470      // The downside is that this will cause us to store the file bytes in memory twice for a short
471      // amount of time.
472      return new String(ByteSource.this.read(), charset);
473    }
474
475    @Override
476    public String toString() {
477      return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
478    }
479  }
480
481  /** A view of a subsection of the containing byte source. */
482  private final class SlicedByteSource extends ByteSource {
483
484    final long offset;
485    final long length;
486
487    SlicedByteSource(long offset, long length) {
488      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
489      checkArgument(length >= 0, "length (%s) may not be negative", length);
490      this.offset = offset;
491      this.length = length;
492    }
493
494    @Override
495    public InputStream openStream() throws IOException {
496      return sliceStream(ByteSource.this.openStream());
497    }
498
499    @Override
500    public InputStream openBufferedStream() throws IOException {
501      return sliceStream(ByteSource.this.openBufferedStream());
502    }
503
504    private InputStream sliceStream(InputStream in) throws IOException {
505      if (offset > 0) {
506        long skipped;
507        try {
508          skipped = ByteStreams.skipUpTo(in, offset);
509        } catch (Throwable e) {
510          Closer closer = Closer.create();
511          closer.register(in);
512          try {
513            throw closer.rethrow(e);
514          } finally {
515            closer.close();
516          }
517        }
518
519        if (skipped < offset) {
520          // offset was beyond EOF
521          in.close();
522          return new ByteArrayInputStream(new byte[0]);
523        }
524      }
525      return ByteStreams.limit(in, length);
526    }
527
528    @Override
529    public ByteSource slice(long offset, long length) {
530      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
531      checkArgument(length >= 0, "length (%s) may not be negative", length);
532      long maxLength = this.length - offset;
533      return maxLength <= 0
534          ? ByteSource.empty()
535          : ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
536    }
537
538    @Override
539    public boolean isEmpty() throws IOException {
540      return length == 0 || super.isEmpty();
541    }
542
543    @Override
544    public Optional<Long> sizeIfKnown() {
545      Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown();
546      if (optionalUnslicedSize.isPresent()) {
547        long unslicedSize = optionalUnslicedSize.get();
548        long off = Math.min(offset, unslicedSize);
549        return Optional.of(Math.min(length, unslicedSize - off));
550      }
551      return Optional.absent();
552    }
553
554    @Override
555    public String toString() {
556      return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
557    }
558  }
559
560  private static class ByteArrayByteSource extends ByteSource {
561
562    final byte[] bytes;
563    final int offset;
564    final int length;
565
566    ByteArrayByteSource(byte[] bytes) {
567      this(bytes, 0, bytes.length);
568    }
569
570    // NOTE: Preconditions are enforced by slice, the only non-trivial caller.
571    ByteArrayByteSource(byte[] bytes, int offset, int length) {
572      this.bytes = bytes;
573      this.offset = offset;
574      this.length = length;
575    }
576
577    @Override
578    public InputStream openStream() {
579      return new ByteArrayInputStream(bytes, offset, length);
580    }
581
582    @Override
583    public InputStream openBufferedStream() throws IOException {
584      return openStream();
585    }
586
587    @Override
588    public boolean isEmpty() {
589      return length == 0;
590    }
591
592    @Override
593    public long size() {
594      return length;
595    }
596
597    @Override
598    public Optional<Long> sizeIfKnown() {
599      return Optional.of((long) length);
600    }
601
602    @Override
603    public byte[] read() {
604      return Arrays.copyOfRange(bytes, offset, offset + length);
605    }
606
607    @SuppressWarnings("CheckReturnValue") // it doesn't matter what processBytes returns here
608    @Override
609    public <T> T read(ByteProcessor<T> processor) throws IOException {
610      processor.processBytes(bytes, offset, length);
611      return processor.getResult();
612    }
613
614    @Override
615    public long copyTo(OutputStream output) throws IOException {
616      output.write(bytes, offset, length);
617      return length;
618    }
619
620    @Override
621    public HashCode hash(HashFunction hashFunction) throws IOException {
622      return hashFunction.hashBytes(bytes, offset, length);
623    }
624
625    @Override
626    public ByteSource slice(long offset, long length) {
627      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
628      checkArgument(length >= 0, "length (%s) may not be negative", length);
629
630      offset = Math.min(offset, this.length);
631      length = Math.min(length, this.length - offset);
632      int newOffset = this.offset + (int) offset;
633      return new ByteArrayByteSource(bytes, newOffset, (int) length);
634    }
635
636    @Override
637    public String toString() {
638      return "ByteSource.wrap("
639          + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...")
640          + ")";
641    }
642  }
643
644  private static final class EmptyByteSource extends ByteArrayByteSource {
645
646    static final EmptyByteSource INSTANCE = new EmptyByteSource();
647
648    EmptyByteSource() {
649      super(new byte[0]);
650    }
651
652    @Override
653    public CharSource asCharSource(Charset charset) {
654      checkNotNull(charset);
655      return CharSource.empty();
656    }
657
658    @Override
659    public byte[] read() {
660      return bytes; // length is 0, no need to clone
661    }
662
663    @Override
664    public String toString() {
665      return "ByteSource.empty()";
666    }
667  }
668
669  private static final class ConcatenatedByteSource extends ByteSource {
670
671    final Iterable<? extends ByteSource> sources;
672
673    ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
674      this.sources = checkNotNull(sources);
675    }
676
677    @Override
678    public InputStream openStream() throws IOException {
679      return new MultiInputStream(sources.iterator());
680    }
681
682    @Override
683    public boolean isEmpty() throws IOException {
684      for (ByteSource source : sources) {
685        if (!source.isEmpty()) {
686          return false;
687        }
688      }
689      return true;
690    }
691
692    @Override
693    public Optional<Long> sizeIfKnown() {
694      if (!(sources instanceof Collection)) {
695        // Infinite Iterables can cause problems here. Of course, it's true that most of the other
696        // methods on this class also have potential problems with infinite  Iterables. But unlike
697        // those, this method can cause issues even if the user is dealing with a (finite) slice()
698        // of this source, since the slice's sizeIfKnown() method needs to know the size of the
699        // underlying source to know what its size actually is.
700        return Optional.absent();
701      }
702      long result = 0L;
703      for (ByteSource source : sources) {
704        Optional<Long> sizeIfKnown = source.sizeIfKnown();
705        if (!sizeIfKnown.isPresent()) {
706          return Optional.absent();
707        }
708        result += sizeIfKnown.get();
709        if (result < 0) {
710          // Overflow (or one or more sources that returned a negative size, but all bets are off in
711          // that case)
712          // Can't represent anything higher, and realistically there probably isn't anything that
713          // can actually be done anyway with the supposed 8+ exbibytes of data the source is
714          // claiming to have if we get here, so just stop.
715          return Optional.of(Long.MAX_VALUE);
716        }
717      }
718      return Optional.of(result);
719    }
720
721    @Override
722    public long size() throws IOException {
723      long result = 0L;
724      for (ByteSource source : sources) {
725        result += source.size();
726        if (result < 0) {
727          // Overflow (or one or more sources that returned a negative size, but all bets are off in
728          // that case)
729          // Can't represent anything higher, and realistically there probably isn't anything that
730          // can actually be done anyway with the supposed 8+ exbibytes of data the source is
731          // claiming to have if we get here, so just stop.
732          return Long.MAX_VALUE;
733        }
734      }
735      return result;
736    }
737
738    @Override
739    public String toString() {
740      return "ByteSource.concat(" + sources + ")";
741    }
742  }
743}