001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.io.ByteStreams.createBuffer;
020import static com.google.common.io.ByteStreams.skipUpTo;
021
022import com.google.common.annotations.Beta;
023import com.google.common.annotations.GwtIncompatible;
024import com.google.common.base.Ascii;
025import com.google.common.base.Optional;
026import com.google.common.collect.ImmutableList;
027import com.google.common.hash.Funnels;
028import com.google.common.hash.HashCode;
029import com.google.common.hash.HashFunction;
030import com.google.common.hash.Hasher;
031import com.google.errorprone.annotations.CanIgnoreReturnValue;
032import java.io.BufferedInputStream;
033import java.io.ByteArrayInputStream;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.InputStreamReader;
037import java.io.OutputStream;
038import java.io.Reader;
039import java.nio.charset.Charset;
040import java.util.Arrays;
041import java.util.Iterator;
042
043/**
044 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a {@code ByteSource}
045 * is not an open, stateful stream for input that can be read and closed. Instead, it is an
046 * immutable <i>supplier</i> of {@code InputStream} instances.
047 *
048 * <p>{@code ByteSource} provides two kinds of methods:
049 *
050 * <ul>
051 *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
052 *       instance each time they are called. The caller is responsible for ensuring that the
053 *       returned stream is closed.
054 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
055 *       typically implemented by opening a stream using one of the methods in the first category,
056 *       doing something and finally closing the stream that was opened.
057 * </ul>
058 *
059 * @since 14.0
060 * @author Colin Decker
061 */
062@GwtIncompatible
063public abstract class ByteSource {
064
065  /** Constructor for use by subclasses. */
066  protected ByteSource() {}
067
068  /**
069   * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
070   * as characters using the given {@link Charset}.
071   *
072   * <p>If {@link CharSource#asByteSource} is called on the returned source with the same charset,
073   * the default implementation of this method will ensure that the original {@code ByteSource} is
074   * returned, rather than round-trip encoding. Subclasses that override this method should behave
075   * the same way.
076   */
077  public CharSource asCharSource(Charset charset) {
078    return new AsCharSource(charset);
079  }
080
081  /**
082   * Opens a new {@link InputStream} for reading from this source. This method returns a new,
083   * independent stream each time it is called.
084   *
085   * <p>The caller is responsible for ensuring that the returned stream is closed.
086   *
087   * @throws IOException if an I/O error occurs while opening the stream
088   */
089  public abstract InputStream openStream() throws IOException;
090
091  /**
092   * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
093   * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
094   * delegate to {@link #openStream()} when the stream returned by that method does not benefit from
095   * additional buffering (for example, a {@code ByteArrayInputStream}). This method returns a new,
096   * independent stream each time it is called.
097   *
098   * <p>The caller is responsible for ensuring that the returned stream is closed.
099   *
100   * @throws IOException if an I/O error occurs while opening the stream
101   * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
102   */
103  public InputStream openBufferedStream() throws IOException {
104    InputStream in = openStream();
105    return (in instanceof BufferedInputStream)
106        ? (BufferedInputStream) in
107        : new BufferedInputStream(in);
108  }
109
110  /**
111   * Returns a view of a slice of this byte source that is at most {@code length} bytes long
112   * starting at the given {@code offset}. If {@code offset} is greater than the size of this
113   * source, the returned source will be empty. If {@code offset + length} is greater than the size
114   * of this source, the returned source will contain the slice starting at {@code offset} and
115   * ending at the end of this source.
116   *
117   * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
118   */
119  public ByteSource slice(long offset, long length) {
120    return new SlicedByteSource(offset, length);
121  }
122
123  /**
124   * Returns whether the source has zero bytes. The default implementation first checks {@link
125   * #sizeIfKnown}, returning true if it's known to be zero and false if it's known to be non-zero.
126   * If the size is not known, it falls back to opening a stream and checking for EOF.
127   *
128   * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes
129   * are actually available for reading. (For example, some special files may return a size of 0
130   * despite actually having content when read.) This means that a source may return {@code true}
131   * from {@code isEmpty()} despite having readable content.
132   *
133   * @throws IOException if an I/O error occurs
134   * @since 15.0
135   */
136  public boolean isEmpty() throws IOException {
137    Optional<Long> sizeIfKnown = sizeIfKnown();
138    if (sizeIfKnown.isPresent()) {
139      return sizeIfKnown.get() == 0L;
140    }
141    Closer closer = Closer.create();
142    try {
143      InputStream in = closer.register(openStream());
144      return in.read() == -1;
145    } catch (Throwable e) {
146      throw closer.rethrow(e);
147    } finally {
148      closer.close();
149    }
150  }
151
152  /**
153   * Returns the size of this source in bytes, if the size can be easily determined without actually
154   * opening the data stream.
155   *
156   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file,
157   * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method
158   * will return a different number of bytes than would be returned by reading all of the bytes (for
159   * example, some special files may return a size of 0 despite actually having content when read).
160   *
161   * <p>Additionally, for mutable sources such as files, a subsequent read may return a different
162   * number of bytes if the contents are changed.
163   *
164   * @since 19.0
165   */
166  @Beta
167  public Optional<Long> sizeIfKnown() {
168    return Optional.absent();
169  }
170
171  /**
172   * Returns the size of this source in bytes, even if doing so requires opening and traversing an
173   * entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}.
174   *
175   * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present. If
176   * absent, it will fall back to a heavyweight operation that will open a stream, read (or {@link
177   * InputStream#skip(long) skip}, if possible) to the end of the stream and return the total number
178   * of bytes that were read.
179   *
180   * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient
181   * implementation, it is <i>possible</i> that this method will return a different number of bytes
182   * than would be returned by reading all of the bytes (for example, some special files may return
183   * a size of 0 despite actually having content when read).
184   *
185   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
186   * number of bytes if the contents are changed.
187   *
188   * @throws IOException if an I/O error occurs while reading the size of this source
189   */
190  public long size() throws IOException {
191    Optional<Long> sizeIfKnown = sizeIfKnown();
192    if (sizeIfKnown.isPresent()) {
193      return sizeIfKnown.get();
194    }
195
196    Closer closer = Closer.create();
197    try {
198      InputStream in = closer.register(openStream());
199      return countBySkipping(in);
200    } catch (IOException e) {
201      // skip may not be supported... at any rate, try reading
202    } finally {
203      closer.close();
204    }
205
206    closer = Closer.create();
207    try {
208      InputStream in = closer.register(openStream());
209      return ByteStreams.exhaust(in);
210    } catch (Throwable e) {
211      throw closer.rethrow(e);
212    } finally {
213      closer.close();
214    }
215  }
216
217  /**
218   * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
219   * first call to skip threw, in which case skip may just not be supported.
220   */
221  private long countBySkipping(InputStream in) throws IOException {
222    long count = 0;
223    long skipped;
224    while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) {
225      count += skipped;
226    }
227    return count;
228  }
229
230  /**
231   * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
232   * {@code output}.
233   *
234   * @return the number of bytes copied
235   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
236   *     output}
237   */
238  @CanIgnoreReturnValue
239  public long copyTo(OutputStream output) throws IOException {
240    checkNotNull(output);
241
242    Closer closer = Closer.create();
243    try {
244      InputStream in = closer.register(openStream());
245      return ByteStreams.copy(in, output);
246    } catch (Throwable e) {
247      throw closer.rethrow(e);
248    } finally {
249      closer.close();
250    }
251  }
252
253  /**
254   * Copies the contents of this byte source to the given {@code ByteSink}.
255   *
256   * @return the number of bytes copied
257   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
258   *     sink}
259   */
260  @CanIgnoreReturnValue
261  public long copyTo(ByteSink sink) throws IOException {
262    checkNotNull(sink);
263
264    Closer closer = Closer.create();
265    try {
266      InputStream in = closer.register(openStream());
267      OutputStream out = closer.register(sink.openStream());
268      return ByteStreams.copy(in, out);
269    } catch (Throwable e) {
270      throw closer.rethrow(e);
271    } finally {
272      closer.close();
273    }
274  }
275
276  /**
277   * Reads the full contents of this byte source as a byte array.
278   *
279   * @throws IOException if an I/O error occurs while reading from this source
280   */
281  public byte[] read() throws IOException {
282    Closer closer = Closer.create();
283    try {
284      InputStream in = closer.register(openStream());
285      return ByteStreams.toByteArray(in);
286    } catch (Throwable e) {
287      throw closer.rethrow(e);
288    } finally {
289      closer.close();
290    }
291  }
292
293  /**
294   * Reads the contents of this byte source using the given {@code processor} to process bytes as
295   * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
296   * Returns the result produced by the processor.
297   *
298   * @throws IOException if an I/O error occurs while reading from this source or if {@code
299   *     processor} throws an {@code IOException}
300   * @since 16.0
301   */
302  @Beta
303  @CanIgnoreReturnValue // some processors won't return a useful result
304  public <T> T read(ByteProcessor<T> processor) throws IOException {
305    checkNotNull(processor);
306
307    Closer closer = Closer.create();
308    try {
309      InputStream in = closer.register(openStream());
310      return ByteStreams.readBytes(in, processor);
311    } catch (Throwable e) {
312      throw closer.rethrow(e);
313    } finally {
314      closer.close();
315    }
316  }
317
318  /**
319   * Hashes the contents of this byte source using the given hash function.
320   *
321   * @throws IOException if an I/O error occurs while reading from this source
322   */
323  public HashCode hash(HashFunction hashFunction) throws IOException {
324    Hasher hasher = hashFunction.newHasher();
325    copyTo(Funnels.asOutputStream(hasher));
326    return hasher.hash();
327  }
328
329  /**
330   * Checks that the contents of this byte source are equal to the contents of the given byte
331   * source.
332   *
333   * @throws IOException if an I/O error occurs while reading from this source or {@code other}
334   */
335  public boolean contentEquals(ByteSource other) throws IOException {
336    checkNotNull(other);
337
338    byte[] buf1 = createBuffer();
339    byte[] buf2 = createBuffer();
340
341    Closer closer = Closer.create();
342    try {
343      InputStream in1 = closer.register(openStream());
344      InputStream in2 = closer.register(other.openStream());
345      while (true) {
346        int read1 = ByteStreams.read(in1, buf1, 0, buf1.length);
347        int read2 = ByteStreams.read(in2, buf2, 0, buf2.length);
348        if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
349          return false;
350        } else if (read1 != buf1.length) {
351          return true;
352        }
353      }
354    } catch (Throwable e) {
355      throw closer.rethrow(e);
356    } finally {
357      closer.close();
358    }
359  }
360
361  /**
362   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
363   * the source will contain the concatenated data from the streams of the underlying sources.
364   *
365   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
366   * close the open underlying stream.
367   *
368   * @param sources the sources to concatenate
369   * @return a {@code ByteSource} containing the concatenated data
370   * @since 15.0
371   */
372  public static ByteSource concat(Iterable<? extends ByteSource> sources) {
373    return new ConcatenatedByteSource(sources);
374  }
375
376  /**
377   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
378   * the source will contain the concatenated data from the streams of the underlying sources.
379   *
380   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
381   * close the open underlying stream.
382   *
383   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
384   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
385   * eagerly fetches data for each source when iterated (rather than producing sources that only
386   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
387   * possible.
388   *
389   * @param sources the sources to concatenate
390   * @return a {@code ByteSource} containing the concatenated data
391   * @throws NullPointerException if any of {@code sources} is {@code null}
392   * @since 15.0
393   */
394  public static ByteSource concat(Iterator<? extends ByteSource> sources) {
395    return concat(ImmutableList.copyOf(sources));
396  }
397
398  /**
399   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
400   * the source will contain the concatenated data from the streams of the underlying sources.
401   *
402   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
403   * close the open underlying stream.
404   *
405   * @param sources the sources to concatenate
406   * @return a {@code ByteSource} containing the concatenated data
407   * @throws NullPointerException if any of {@code sources} is {@code null}
408   * @since 15.0
409   */
410  public static ByteSource concat(ByteSource... sources) {
411    return concat(ImmutableList.copyOf(sources));
412  }
413
414  /**
415   * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
416   * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
417   *
418   * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
419   */
420  public static ByteSource wrap(byte[] b) {
421    return new ByteArrayByteSource(b);
422  }
423
424  /**
425   * Returns an immutable {@link ByteSource} that contains no bytes.
426   *
427   * @since 15.0
428   */
429  public static ByteSource empty() {
430    return EmptyByteSource.INSTANCE;
431  }
432
433  /**
434   * A char source that reads bytes from this source and decodes them as characters using a charset.
435   */
436  class AsCharSource extends CharSource {
437
438    final Charset charset;
439
440    AsCharSource(Charset charset) {
441      this.charset = checkNotNull(charset);
442    }
443
444    @Override
445    public ByteSource asByteSource(Charset charset) {
446      if (charset.equals(this.charset)) {
447        return ByteSource.this;
448      }
449      return super.asByteSource(charset);
450    }
451
452    @Override
453    public Reader openStream() throws IOException {
454      return new InputStreamReader(ByteSource.this.openStream(), charset);
455    }
456
457    @Override
458    public String read() throws IOException {
459      // Reading all the data as a byte array is more efficient than the default read()
460      // implementation because:
461      // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the
462      //    internal char array (hard to avoid using StringBuilder)
463      // 2. we avoid extra copies into temporary buffers altogether
464      // The downside is that this will cause us to store the file bytes in memory twice for a short
465      // amount of time.
466      return new String(ByteSource.this.read(), charset);
467    }
468
469    @Override
470    public String toString() {
471      return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
472    }
473  }
474
475  /** A view of a subsection of the containing byte source. */
476  private final class SlicedByteSource extends ByteSource {
477
478    final long offset;
479    final long length;
480
481    SlicedByteSource(long offset, long length) {
482      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
483      checkArgument(length >= 0, "length (%s) may not be negative", length);
484      this.offset = offset;
485      this.length = length;
486    }
487
488    @Override
489    public InputStream openStream() throws IOException {
490      return sliceStream(ByteSource.this.openStream());
491    }
492
493    @Override
494    public InputStream openBufferedStream() throws IOException {
495      return sliceStream(ByteSource.this.openBufferedStream());
496    }
497
498    private InputStream sliceStream(InputStream in) throws IOException {
499      if (offset > 0) {
500        long skipped;
501        try {
502          skipped = ByteStreams.skipUpTo(in, offset);
503        } catch (Throwable e) {
504          Closer closer = Closer.create();
505          closer.register(in);
506          try {
507            throw closer.rethrow(e);
508          } finally {
509            closer.close();
510          }
511        }
512
513        if (skipped < offset) {
514          // offset was beyond EOF
515          in.close();
516          return new ByteArrayInputStream(new byte[0]);
517        }
518      }
519      return ByteStreams.limit(in, length);
520    }
521
522    @Override
523    public ByteSource slice(long offset, long length) {
524      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
525      checkArgument(length >= 0, "length (%s) may not be negative", length);
526      long maxLength = this.length - offset;
527      return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
528    }
529
530    @Override
531    public boolean isEmpty() throws IOException {
532      return length == 0 || super.isEmpty();
533    }
534
535    @Override
536    public Optional<Long> sizeIfKnown() {
537      Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown();
538      if (optionalUnslicedSize.isPresent()) {
539        long unslicedSize = optionalUnslicedSize.get();
540        long off = Math.min(offset, unslicedSize);
541        return Optional.of(Math.min(length, unslicedSize - off));
542      }
543      return Optional.absent();
544    }
545
546    @Override
547    public String toString() {
548      return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
549    }
550  }
551
552  private static class ByteArrayByteSource extends ByteSource {
553
554    final byte[] bytes;
555    final int offset;
556    final int length;
557
558    ByteArrayByteSource(byte[] bytes) {
559      this(bytes, 0, bytes.length);
560    }
561
562    // NOTE: Preconditions are enforced by slice, the only non-trivial caller.
563    ByteArrayByteSource(byte[] bytes, int offset, int length) {
564      this.bytes = bytes;
565      this.offset = offset;
566      this.length = length;
567    }
568
569    @Override
570    public InputStream openStream() {
571      return new ByteArrayInputStream(bytes, offset, length);
572    }
573
574    @Override
575    public InputStream openBufferedStream() throws IOException {
576      return openStream();
577    }
578
579    @Override
580    public boolean isEmpty() {
581      return length == 0;
582    }
583
584    @Override
585    public long size() {
586      return length;
587    }
588
589    @Override
590    public Optional<Long> sizeIfKnown() {
591      return Optional.of((long) length);
592    }
593
594    @Override
595    public byte[] read() {
596      return Arrays.copyOfRange(bytes, offset, offset + length);
597    }
598
599    @Override
600    public long copyTo(OutputStream output) throws IOException {
601      output.write(bytes, offset, length);
602      return length;
603    }
604
605    @SuppressWarnings("CheckReturnValue") // it doesn't matter what processBytes returns here
606    @Override
607    public <T> T read(ByteProcessor<T> processor) throws IOException {
608      processor.processBytes(bytes, offset, length);
609      return processor.getResult();
610    }
611
612    @Override
613    public HashCode hash(HashFunction hashFunction) throws IOException {
614      return hashFunction.hashBytes(bytes, offset, length);
615    }
616
617    @Override
618    public ByteSource slice(long offset, long length) {
619      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
620      checkArgument(length >= 0, "length (%s) may not be negative", length);
621
622      offset = Math.min(offset, this.length);
623      length = Math.min(length, this.length - offset);
624      int newOffset = this.offset + (int) offset;
625      return new ByteArrayByteSource(bytes, newOffset, (int) length);
626    }
627
628    @Override
629    public String toString() {
630      return "ByteSource.wrap("
631          + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...")
632          + ")";
633    }
634  }
635
636  private static final class EmptyByteSource extends ByteArrayByteSource {
637
638    static final EmptyByteSource INSTANCE = new EmptyByteSource();
639
640    EmptyByteSource() {
641      super(new byte[0]);
642    }
643
644    @Override
645    public CharSource asCharSource(Charset charset) {
646      checkNotNull(charset);
647      return CharSource.empty();
648    }
649
650    @Override
651    public byte[] read() {
652      return bytes; // length is 0, no need to clone
653    }
654
655    @Override
656    public String toString() {
657      return "ByteSource.empty()";
658    }
659  }
660
661  private static final class ConcatenatedByteSource extends ByteSource {
662
663    final Iterable<? extends ByteSource> sources;
664
665    ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
666      this.sources = checkNotNull(sources);
667    }
668
669    @Override
670    public InputStream openStream() throws IOException {
671      return new MultiInputStream(sources.iterator());
672    }
673
674    @Override
675    public boolean isEmpty() throws IOException {
676      for (ByteSource source : sources) {
677        if (!source.isEmpty()) {
678          return false;
679        }
680      }
681      return true;
682    }
683
684    @Override
685    public Optional<Long> sizeIfKnown() {
686      long result = 0L;
687      for (ByteSource source : sources) {
688        Optional<Long> sizeIfKnown = source.sizeIfKnown();
689        if (!sizeIfKnown.isPresent()) {
690          return Optional.absent();
691        }
692        result += sizeIfKnown.get();
693      }
694      return Optional.of(result);
695    }
696
697    @Override
698    public long size() throws IOException {
699      long result = 0L;
700      for (ByteSource source : sources) {
701        result += source.size();
702      }
703      return result;
704    }
705
706    @Override
707    public String toString() {
708      return "ByteSource.concat(" + sources + ")";
709    }
710  }
711}