001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.io;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021import static com.google.common.io.ByteStreams.BUF_SIZE;
022import static com.google.common.io.ByteStreams.skipUpTo;
023
024import com.google.common.annotations.Beta;
025import com.google.common.base.Ascii;
026import com.google.common.base.Optional;
027import com.google.common.collect.ImmutableList;
028import com.google.common.hash.Funnels;
029import com.google.common.hash.HashCode;
030import com.google.common.hash.HashFunction;
031import com.google.common.hash.Hasher;
032
033import java.io.BufferedInputStream;
034import java.io.ByteArrayInputStream;
035import java.io.IOException;
036import java.io.InputStream;
037import java.io.InputStreamReader;
038import java.io.OutputStream;
039import java.io.Reader;
040import java.nio.charset.Charset;
041import java.util.Arrays;
042import java.util.Iterator;
043
044/**
045 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a
046 * {@code ByteSource} is not an open, stateful stream for input that can be read and closed.
047 * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances.
048 *
049 * <p>{@code ByteSource} provides two kinds of methods:
050 * <ul>
051 *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
052 *   instance each time they are called. The caller is responsible for ensuring that the returned
053 *   stream is closed.
054 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
055 *   typically implemented by opening a stream using one of the methods in the first category, doing
056 *   something and finally closing the stream that was opened.
057 * </ul>
058 *
059 * @since 14.0
060 * @author Colin Decker
061 */
062public abstract class ByteSource {
063
064  /**
065   * Constructor for use by subclasses.
066   */
067  protected ByteSource() {}
068
069  /**
070   * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
071   * as characters using the given {@link Charset}.
072   */
073  public CharSource asCharSource(Charset charset) {
074    return new AsCharSource(charset);
075  }
076
077  /**
078   * Opens a new {@link InputStream} for reading from this source. This method should return a new,
079   * independent stream each time it is called.
080   *
081   * <p>The caller is responsible for ensuring that the returned stream is closed.
082   *
083   * @throws IOException if an I/O error occurs in the process of opening the stream
084   */
085  public abstract InputStream openStream() throws IOException;
086
087  /**
088   * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
089   * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
090   * delegate to {@link #openStream()} when the stream returned by that method does not benefit
091   * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should
092   * return a new, independent stream each time it is called.
093   *
094   * <p>The caller is responsible for ensuring that the returned stream is closed.
095   *
096   * @throws IOException if an I/O error occurs in the process of opening the stream
097   * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
098   */
099  public InputStream openBufferedStream() throws IOException {
100    InputStream in = openStream();
101    return (in instanceof BufferedInputStream)
102        ? (BufferedInputStream) in
103        : new BufferedInputStream(in);
104  }
105
106  /**
107   * Returns a view of a slice of this byte source that is at most {@code length} bytes long
108   * starting at the given {@code offset}. If {@code offset} is greater than the size of this
109   * source, the returned source will be empty. If {@code offset + length} is greater than the size
110   * of this source, the returned source will contain the slice starting at {@code offset} and
111   * ending at the end of this source.
112   *
113   * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
114   */
115  public ByteSource slice(long offset, long length) {
116    return new SlicedByteSource(offset, length);
117  }
118
119  /**
120   * Returns whether the source has zero bytes. The default implementation returns true if
121   * {@link #sizeIfKnown} returns zero, falling back to opening a stream and checking for
122   * EOF if the size is not known.
123   *
124   * <p>Note that, in cases where {@code sizeIfKnown} returns zero, it is <i>possible</i> that bytes
125   * are actually available for reading. (For example, some special files may return a size of 0
126   * despite actually having content when read.) This means that a source may return {@code true}
127   * from {@code isEmpty()} despite having readable content.
128   *
129   * @throws IOException if an I/O error occurs
130   * @since 15.0
131   */
132  public boolean isEmpty() throws IOException {
133    Optional<Long> sizeIfKnown = sizeIfKnown();
134    if (sizeIfKnown.isPresent() && sizeIfKnown.get() == 0L) {
135      return true;
136    }
137    Closer closer = Closer.create();
138    try {
139      InputStream in = closer.register(openStream());
140      return in.read() == -1;
141    } catch (Throwable e) {
142      throw closer.rethrow(e);
143    } finally {
144      closer.close();
145    }
146  }
147
148  /**
149   * Returns the size of this source in bytes, if the size can be easily determined without
150   * actually opening the data stream.
151   *
152   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a file,
153   * may return a non-absent value. Note that in such cases, it is <i>possible</i> that this method
154   * will return a different number of bytes than would be returned by reading all of the bytes (for
155   * example, some special files may return a size of 0 despite actually having content when read).
156   *
157   * <p>Additionally, for mutable sources such as files, a subsequent read may return a different
158   * number of bytes if the contents are changed.
159   *
160   * @since 19.0
161   */
162  @Beta
163  public Optional<Long> sizeIfKnown() {
164    return Optional.absent();
165  }
166
167  /**
168   * Returns the size of this source in bytes, even if doing so requires opening and traversing
169   * an entire stream. To avoid a potentially expensive operation, see {@link #sizeIfKnown}.
170   *
171   * <p>The default implementation calls {@link #sizeIfKnown} and returns the value if present.
172   * If absent, it will fall back to a heavyweight operation that will open a stream, read (or
173   * {@link InputStream#skip(long) skip}, if possible) to the end of the stream and return the total
174   * number of bytes that were read.
175   *
176   * <p>Note that for some sources that implement {@link #sizeIfKnown} to provide a more efficient
177   * implementation, it is <i>possible</i> that this method will return a different number of bytes
178   * than would be returned by reading all of the bytes (for example, some special files may return
179   * a size of 0 despite actually having content when read).
180   *
181   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
182   * number of bytes if the contents are changed.
183   *
184   * @throws IOException if an I/O error occurs in the process of reading the size of this source
185   */
186  public long size() throws IOException {
187    Optional<Long> sizeIfKnown = sizeIfKnown();
188    if (sizeIfKnown.isPresent()) {
189      return sizeIfKnown.get();
190    }
191
192    Closer closer = Closer.create();
193    try {
194      InputStream in = closer.register(openStream());
195      return countBySkipping(in);
196    } catch (IOException e) {
197      // skip may not be supported... at any rate, try reading
198    } finally {
199      closer.close();
200    }
201
202    closer = Closer.create();
203    try {
204      InputStream in = closer.register(openStream());
205      return countByReading(in);
206    } catch (Throwable e) {
207      throw closer.rethrow(e);
208    } finally {
209      closer.close();
210    }
211  }
212
213  /**
214   * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
215   * first call to skip threw, in which case skip may just not be supported.
216   */
217  private long countBySkipping(InputStream in) throws IOException {
218    long count = 0;
219    long skipped;
220    while ((skipped = skipUpTo(in, Integer.MAX_VALUE)) > 0) {
221      count += skipped;
222    }
223    return count;
224  }
225
226  private long countByReading(InputStream in) throws IOException {
227    long count = 0;
228    long read;
229    while ((read = in.read(ByteStreams.skipBuffer)) != -1) {
230      count += read;
231    }
232    return count;
233  }
234
235  /**
236   * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
237   * {@code output}.
238   *
239   * @throws IOException if an I/O error occurs in the process of reading from this source or
240   *     writing to {@code output}
241   */
242  public long copyTo(OutputStream output) throws IOException {
243    checkNotNull(output);
244
245    Closer closer = Closer.create();
246    try {
247      InputStream in = closer.register(openStream());
248      return ByteStreams.copy(in, output);
249    } catch (Throwable e) {
250      throw closer.rethrow(e);
251    } finally {
252      closer.close();
253    }
254  }
255
256  /**
257   * Copies the contents of this byte source to the given {@code ByteSink}.
258   *
259   * @throws IOException if an I/O error occurs in the process of reading from this source or
260   *     writing to {@code sink}
261   */
262  public long copyTo(ByteSink sink) throws IOException {
263    checkNotNull(sink);
264
265    Closer closer = Closer.create();
266    try {
267      InputStream in = closer.register(openStream());
268      OutputStream out = closer.register(sink.openStream());
269      return ByteStreams.copy(in, out);
270    } catch (Throwable e) {
271      throw closer.rethrow(e);
272    } finally {
273      closer.close();
274    }
275  }
276
277  /**
278   * Reads the full contents of this byte source as a byte array.
279   *
280   * @throws IOException if an I/O error occurs in the process of reading from this source
281   */
282  public byte[] read() throws IOException {
283    Closer closer = Closer.create();
284    try {
285      InputStream in = closer.register(openStream());
286      return ByteStreams.toByteArray(in);
287    } catch (Throwable e) {
288      throw closer.rethrow(e);
289    } finally {
290      closer.close();
291    }
292  }
293
294  /**
295   * Reads the contents of this byte source using the given {@code processor} to process bytes as
296   * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
297   * Returns the result produced by the processor.
298   *
299   * @throws IOException if an I/O error occurs in the process of reading from this source or if
300   *     {@code processor} throws an {@code IOException}
301   * @since 16.0
302   */
303  @Beta
304  public <T> T read(ByteProcessor<T> processor) throws IOException {
305    checkNotNull(processor);
306
307    Closer closer = Closer.create();
308    try {
309      InputStream in = closer.register(openStream());
310      return ByteStreams.readBytes(in, processor);
311    } catch (Throwable e) {
312      throw closer.rethrow(e);
313    } finally {
314      closer.close();
315    }
316  }
317
318  /**
319   * Hashes the contents of this byte source using the given hash function.
320   *
321   * @throws IOException if an I/O error occurs in the process of reading from this source
322   */
323  public HashCode hash(HashFunction hashFunction) throws IOException {
324    Hasher hasher = hashFunction.newHasher();
325    copyTo(Funnels.asOutputStream(hasher));
326    return hasher.hash();
327  }
328
329  /**
330   * Checks that the contents of this byte source are equal to the contents of the given byte
331   * source.
332   *
333   * @throws IOException if an I/O error occurs in the process of reading from this source or
334   *     {@code other}
335   */
336  public boolean contentEquals(ByteSource other) throws IOException {
337    checkNotNull(other);
338
339    byte[] buf1 = new byte[BUF_SIZE];
340    byte[] buf2 = new byte[BUF_SIZE];
341
342    Closer closer = Closer.create();
343    try {
344      InputStream in1 = closer.register(openStream());
345      InputStream in2 = closer.register(other.openStream());
346      while (true) {
347        int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE);
348        int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE);
349        if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
350          return false;
351        } else if (read1 != BUF_SIZE) {
352          return true;
353        }
354      }
355    } catch (Throwable e) {
356      throw closer.rethrow(e);
357    } finally {
358      closer.close();
359    }
360  }
361
362  /**
363   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
364   * the source will contain the concatenated data from the streams of the underlying sources.
365   *
366   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
367   * close the open underlying stream.
368   *
369   * @param sources the sources to concatenate
370   * @return a {@code ByteSource} containing the concatenated data
371   * @since 15.0
372   */
373  public static ByteSource concat(Iterable<? extends ByteSource> sources) {
374    return new ConcatenatedByteSource(sources);
375  }
376
377  /**
378   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
379   * the source will contain the concatenated data from the streams of the underlying sources.
380   *
381   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
382   * close the open underlying stream.
383   *
384   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
385   * method is called. This will fail if the iterator is infinite and may cause problems if the
386   * iterator eagerly fetches data for each source when iterated (rather than producing sources
387   * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
388   * overload if possible.
389   *
390   * @param sources the sources to concatenate
391   * @return a {@code ByteSource} containing the concatenated data
392   * @throws NullPointerException if any of {@code sources} is {@code null}
393   * @since 15.0
394   */
395  public static ByteSource concat(Iterator<? extends ByteSource> sources) {
396    return concat(ImmutableList.copyOf(sources));
397  }
398
399  /**
400   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
401   * the source will contain the concatenated data from the streams of the underlying sources.
402   *
403   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
404   * close the open underlying stream.
405   *
406   * @param sources the sources to concatenate
407   * @return a {@code ByteSource} containing the concatenated data
408   * @throws NullPointerException if any of {@code sources} is {@code null}
409   * @since 15.0
410   */
411  public static ByteSource concat(ByteSource... sources) {
412    return concat(ImmutableList.copyOf(sources));
413  }
414
415  /**
416   * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
417   * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
418   *
419   * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
420   */
421  public static ByteSource wrap(byte[] b) {
422    return new ByteArrayByteSource(b);
423  }
424
425  /**
426   * Returns an immutable {@link ByteSource} that contains no bytes.
427   *
428   * @since 15.0
429   */
430  public static ByteSource empty() {
431    return EmptyByteSource.INSTANCE;
432  }
433
434  /**
435   * A char source that reads bytes from this source and decodes them as characters using a
436   * charset.
437   */
438  private final class AsCharSource extends CharSource {
439
440    private final Charset charset;
441
442    private AsCharSource(Charset charset) {
443      this.charset = checkNotNull(charset);
444    }
445
446    @Override
447    public Reader openStream() throws IOException {
448      return new InputStreamReader(ByteSource.this.openStream(), charset);
449    }
450
451    @Override
452    public String toString() {
453      return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
454    }
455  }
456
457  /**
458   * A view of a subsection of the containing byte source.
459   */
460  private final class SlicedByteSource extends ByteSource {
461
462    final long offset;
463    final long length;
464
465    SlicedByteSource(long offset, long length) {
466      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
467      checkArgument(length >= 0, "length (%s) may not be negative", length);
468      this.offset = offset;
469      this.length = length;
470    }
471
472    @Override
473    public InputStream openStream() throws IOException {
474      return sliceStream(ByteSource.this.openStream());
475    }
476
477    @Override
478    public InputStream openBufferedStream() throws IOException {
479      return sliceStream(ByteSource.this.openBufferedStream());
480    }
481
482    private InputStream sliceStream(InputStream in) throws IOException {
483      if (offset > 0) {
484        long skipped;
485        try {
486          skipped = ByteStreams.skipUpTo(in, offset);
487        } catch (Throwable e) {
488          Closer closer = Closer.create();
489          closer.register(in);
490          try {
491            throw closer.rethrow(e);
492          } finally {
493            closer.close();
494          }
495        }
496
497        if (skipped < offset) {
498          // offset was beyond EOF
499          in.close();
500          return new ByteArrayInputStream(new byte[0]);
501        }
502      }
503      return ByteStreams.limit(in, length);
504    }
505
506    @Override
507    public ByteSource slice(long offset, long length) {
508      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
509      checkArgument(length >= 0, "length (%s) may not be negative", length);
510      long maxLength = this.length - offset;
511      return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
512    }
513
514    @Override
515    public boolean isEmpty() throws IOException {
516      return length == 0 || super.isEmpty();
517    }
518
519    @Override
520    public Optional<Long> sizeIfKnown() {
521      Optional<Long> optionalUnslicedSize = ByteSource.this.sizeIfKnown();
522      if (optionalUnslicedSize.isPresent()) {
523        long unslicedSize = optionalUnslicedSize.get();
524        long off = Math.min(offset, unslicedSize);
525        return Optional.of(Math.min(length, unslicedSize - off));
526      }
527      return Optional.absent();
528    }
529
530    @Override
531    public String toString() {
532      return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
533    }
534  }
535
536  private static class ByteArrayByteSource extends ByteSource {
537
538    final byte[] bytes;
539    final int offset;
540    final int length;
541
542    ByteArrayByteSource(byte[] bytes) {
543      this(bytes, 0, bytes.length);
544    }
545
546    // NOTE: Preconditions are enforced by slice, the only non-trivial caller.
547    ByteArrayByteSource(byte[] bytes, int offset, int length) {
548      this.bytes = bytes;
549      this.offset = offset;
550      this.length = length;
551    }
552
553    @Override
554    public InputStream openStream() {
555      return new ByteArrayInputStream(bytes, offset, length);
556    }
557
558    @Override
559    public InputStream openBufferedStream() throws IOException {
560      return openStream();
561    }
562
563    @Override
564    public boolean isEmpty() {
565      return length == 0;
566    }
567
568    @Override
569    public long size() {
570      return length;
571    }
572
573    @Override
574    public Optional<Long> sizeIfKnown() {
575      return Optional.of((long) length);
576    }
577
578    @Override
579    public byte[] read() {
580      return Arrays.copyOfRange(bytes, offset, offset + length);
581    }
582
583    @Override
584    public long copyTo(OutputStream output) throws IOException {
585      output.write(bytes, offset, length);
586      return length;
587    }
588
589    @Override
590    public <T> T read(ByteProcessor<T> processor) throws IOException {
591      processor.processBytes(bytes, offset, length);
592      return processor.getResult();
593    }
594
595    @Override
596    public HashCode hash(HashFunction hashFunction) throws IOException {
597      return hashFunction.hashBytes(bytes, offset, length);
598    }
599
600    @Override
601    public ByteSource slice(long offset, long length) {
602      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
603      checkArgument(length >= 0, "length (%s) may not be negative", length);
604
605      offset = Math.min(offset, this.length);
606      length = Math.min(length, this.length - offset);
607      int newOffset = this.offset + (int) offset;
608      return new ByteArrayByteSource(bytes, newOffset, (int) length);
609    }
610
611    @Override
612    public String toString() {
613      return "ByteSource.wrap("
614          + Ascii.truncate(BaseEncoding.base16().encode(bytes, offset, length), 30, "...") + ")";
615    }
616  }
617
618  private static final class EmptyByteSource extends ByteArrayByteSource {
619
620    static final EmptyByteSource INSTANCE = new EmptyByteSource();
621
622    EmptyByteSource() {
623      super(new byte[0]);
624    }
625
626    @Override
627    public CharSource asCharSource(Charset charset) {
628      checkNotNull(charset);
629      return CharSource.empty();
630    }
631
632    @Override
633    public byte[] read() {
634      return bytes; // length is 0, no need to clone
635    }
636
637    @Override
638    public String toString() {
639      return "ByteSource.empty()";
640    }
641  }
642
643  private static final class ConcatenatedByteSource extends ByteSource {
644
645    final Iterable<? extends ByteSource> sources;
646
647    ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
648      this.sources = checkNotNull(sources);
649    }
650
651    @Override
652    public InputStream openStream() throws IOException {
653      return new MultiInputStream(sources.iterator());
654    }
655
656    @Override
657    public boolean isEmpty() throws IOException {
658      for (ByteSource source : sources) {
659        if (!source.isEmpty()) {
660          return false;
661        }
662      }
663      return true;
664    }
665
666    @Override
667    public Optional<Long> sizeIfKnown() {
668      long result = 0L;
669      for (ByteSource source : sources) {
670        Optional<Long> sizeIfKnown = source.sizeIfKnown();
671        if (!sizeIfKnown.isPresent()) {
672          return Optional.absent();
673        }
674        result += sizeIfKnown.get();
675      }
676      return Optional.of(result);
677    }
678
679    @Override
680    public long size() throws IOException {
681      long result = 0L;
682      for (ByteSource source : sources) {
683        result += source.size();
684      }
685      return result;
686    }
687
688    @Override
689    public String toString() {
690      return "ByteSource.concat(" + sources + ")";
691    }
692  }
693}