001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.io;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021
022import com.google.common.annotations.Beta;
023import com.google.common.collect.ImmutableList;
024import com.google.common.hash.Funnels;
025import com.google.common.hash.HashCode;
026import com.google.common.hash.HashFunction;
027import com.google.common.hash.Hasher;
028
029import java.io.BufferedInputStream;
030import java.io.ByteArrayInputStream;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.InputStreamReader;
034import java.io.OutputStream;
035import java.io.Reader;
036import java.nio.charset.Charset;
037import java.util.Arrays;
038import java.util.Iterator;
039
040/**
041 * A readable source of bytes, such as a file. Unlike an {@link InputStream}, a
042 * {@code ByteSource} is not an open, stateful stream for input that can be read and closed.
043 * Instead, it is an immutable <i>supplier</i> of {@code InputStream} instances.
044 *
045 * <p>{@code ByteSource} provides two kinds of methods:
046 * <ul>
047 *   <li><b>Methods that return a stream:</b> These methods should return a <i>new</i>, independent
048 *   instance each time they are called. The caller is responsible for ensuring that the returned
049 *   stream is closed.
050 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
051 *   typically implemented by opening a stream using one of the methods in the first category, doing
052 *   something and finally closing the stream that was opened.
053 * </ul>
054 *
055 * @since 14.0
056 * @author Colin Decker
057 */
058public abstract class ByteSource implements InputSupplier<InputStream> {
059
060  private static final int BUF_SIZE = 0x1000; // 4K
061
062  /**
063   * Constructor for use by subclasses.
064   */
065  protected ByteSource() {}
066
067  /**
068   * Returns a {@link CharSource} view of this byte source that decodes bytes read from this source
069   * as characters using the given {@link Charset}.
070   */
071  public CharSource asCharSource(Charset charset) {
072    return new AsCharSource(charset);
073  }
074
075  /**
076   * Opens a new {@link InputStream} for reading from this source. This method should return a new,
077   * independent stream each time it is called.
078   *
079   * <p>The caller is responsible for ensuring that the returned stream is closed.
080   *
081   * @throws IOException if an I/O error occurs in the process of opening the stream
082   */
083  public abstract InputStream openStream() throws IOException;
084
085  /**
086   * This method is a temporary method provided for easing migration from suppliers to sources and
087   * sinks.
088   *
089   * @since 15.0
090   * @deprecated This method is only provided for temporary compatibility with the
091   *     {@link InputSupplier} interface and should not be called directly. Use {@link #openStream}
092   *     instead. This method is scheduled for removal in Guava 18.0.
093   */
094  @Override
095  @Deprecated
096  public final InputStream getInput() throws IOException {
097    return openStream();
098  }
099
100  /**
101   * Opens a new buffered {@link InputStream} for reading from this source. The returned stream is
102   * not required to be a {@link BufferedInputStream} in order to allow implementations to simply
103   * delegate to {@link #openStream()} when the stream returned by that method does not benefit
104   * from additional buffering (for example, a {@code ByteArrayInputStream}). This method should
105   * return a new, independent stream each time it is called.
106   *
107   * <p>The caller is responsible for ensuring that the returned stream is closed.
108   *
109   * @throws IOException if an I/O error occurs in the process of opening the stream
110   * @since 15.0 (in 14.0 with return type {@link BufferedInputStream})
111   */
112  public InputStream openBufferedStream() throws IOException {
113    InputStream in = openStream();
114    return (in instanceof BufferedInputStream)
115        ? (BufferedInputStream) in
116        : new BufferedInputStream(in);
117  }
118
119  /**
120   * Returns a view of a slice of this byte source that is at most {@code length} bytes long
121   * starting at the given {@code offset}.
122   *
123   * @throws IllegalArgumentException if {@code offset} or {@code length} is negative
124   */
125  public ByteSource slice(long offset, long length) {
126    return new SlicedByteSource(offset, length);
127  }
128
129  /**
130   * Returns whether the source has zero bytes. The default implementation is to open a stream and
131   * check for EOF.
132   *
133   * @throws IOException if an I/O error occurs
134   * @since 15.0
135   */
136  public boolean isEmpty() throws IOException {
137    Closer closer = Closer.create();
138    try {
139      InputStream in = closer.register(openStream());
140      return in.read() == -1;
141    } catch (Throwable e) {
142      throw closer.rethrow(e);
143    } finally {
144      closer.close();
145    }
146  }
147
148  /**
149   * Returns the size of this source in bytes. For most implementations, this is a heavyweight
150   * operation that will open a stream, read (or {@link InputStream#skip(long) skip}, if possible)
151   * to the end of the stream and return the total number of bytes that were read.
152   *
153   * <p>For some sources, such as a file, this method may use a more efficient implementation. Note
154   * that in such cases, it is <i>possible</i> that this method will return a different number of
155   * bytes than would be returned by reading all of the bytes (for example, some special files may
156   * return a size of 0 despite actually having content when read).
157   *
158   * <p>In either case, if this is a mutable source such as a file, the size it returns may not be
159   * the same number of bytes a subsequent read would return.
160   *
161   * @throws IOException if an I/O error occurs in the process of reading the size of this source
162   */
163  public long size() throws IOException {
164    Closer closer = Closer.create();
165    try {
166      InputStream in = closer.register(openStream());
167      return countBySkipping(in);
168    } catch (IOException e) {
169      // skip may not be supported... at any rate, try reading
170    } finally {
171      closer.close();
172    }
173
174    closer = Closer.create();
175    try {
176      InputStream in = closer.register(openStream());
177      return countByReading(in);
178    } catch (Throwable e) {
179      throw closer.rethrow(e);
180    } finally {
181      closer.close();
182    }
183  }
184
185  /**
186   * Counts the bytes in the given input stream using skip if possible. Returns SKIP_FAILED if the
187   * first call to skip threw, in which case skip may just not be supported.
188   */
189  private long countBySkipping(InputStream in) throws IOException {
190    long count = 0;
191    while (true) {
192      // don't try to skip more than available()
193      // things may work really wrong with FileInputStream otherwise
194      long skipped = in.skip(Math.min(in.available(), Integer.MAX_VALUE));
195      if (skipped <= 0) {
196        if (in.read() == -1) {
197          return count;
198        } else if (count == 0 && in.available() == 0) {
199          // if available is still zero after reading a single byte, it
200          // will probably always be zero, so we should countByReading
201          throw new IOException();
202        }
203        count++;
204      } else {
205        count += skipped;
206      }
207    }
208  }
209
210  private static final byte[] countBuffer = new byte[BUF_SIZE];
211
212  private long countByReading(InputStream in) throws IOException {
213    long count = 0;
214    long read;
215    while ((read = in.read(countBuffer)) != -1) {
216      count += read;
217    }
218    return count;
219  }
220
221  /**
222   * Copies the contents of this byte source to the given {@code OutputStream}. Does not close
223   * {@code output}.
224   *
225   * @throws IOException if an I/O error occurs in the process of reading from this source or
226   *     writing to {@code output}
227   */
228  public long copyTo(OutputStream output) throws IOException {
229    checkNotNull(output);
230
231    Closer closer = Closer.create();
232    try {
233      InputStream in = closer.register(openStream());
234      return ByteStreams.copy(in, output);
235    } catch (Throwable e) {
236      throw closer.rethrow(e);
237    } finally {
238      closer.close();
239    }
240  }
241
242  /**
243   * Copies the contents of this byte source to the given {@code ByteSink}.
244   *
245   * @throws IOException if an I/O error occurs in the process of reading from this source or
246   *     writing to {@code sink}
247   */
248  public long copyTo(ByteSink sink) throws IOException {
249    checkNotNull(sink);
250
251    Closer closer = Closer.create();
252    try {
253      InputStream in = closer.register(openStream());
254      OutputStream out = closer.register(sink.openStream());
255      return ByteStreams.copy(in, out);
256    } catch (Throwable e) {
257      throw closer.rethrow(e);
258    } finally {
259      closer.close();
260    }
261  }
262
263  /**
264   * Reads the full contents of this byte source as a byte array.
265   *
266   * @throws IOException if an I/O error occurs in the process of reading from this source
267   */
268  public byte[] read() throws IOException {
269    Closer closer = Closer.create();
270    try {
271      InputStream in = closer.register(openStream());
272      return ByteStreams.toByteArray(in);
273    } catch (Throwable e) {
274      throw closer.rethrow(e);
275    } finally {
276      closer.close();
277    }
278  }
279
280  /**
281   * Reads the contents of this byte source using the given {@code processor} to process bytes as
282   * they are read. Stops when all bytes have been read or the consumer returns {@code false}.
283   * Returns the result produced by the processor.
284   *
285   * @throws IOException if an I/O error occurs in the process of reading from this source or if
286   *     {@code processor} throws an {@code IOException}
287   * @since 16.0
288   */
289  @Beta
290  public <T> T read(ByteProcessor<T> processor) throws IOException {
291    checkNotNull(processor);
292
293    Closer closer = Closer.create();
294    try {
295      InputStream in = closer.register(openStream());
296      return ByteStreams.readBytes(in, processor);
297    } catch (Throwable e) {
298      throw closer.rethrow(e);
299    } finally {
300      closer.close();
301    }
302  }
303
304  /**
305   * Hashes the contents of this byte source using the given hash function.
306   *
307   * @throws IOException if an I/O error occurs in the process of reading from this source
308   */
309  public HashCode hash(HashFunction hashFunction) throws IOException {
310    Hasher hasher = hashFunction.newHasher();
311    copyTo(Funnels.asOutputStream(hasher));
312    return hasher.hash();
313  }
314
315  /**
316   * Checks that the contents of this byte source are equal to the contents of the given byte
317   * source.
318   *
319   * @throws IOException if an I/O error occurs in the process of reading from this source or
320   *     {@code other}
321   */
322  public boolean contentEquals(ByteSource other) throws IOException {
323    checkNotNull(other);
324
325    byte[] buf1 = new byte[BUF_SIZE];
326    byte[] buf2 = new byte[BUF_SIZE];
327
328    Closer closer = Closer.create();
329    try {
330      InputStream in1 = closer.register(openStream());
331      InputStream in2 = closer.register(other.openStream());
332      while (true) {
333        int read1 = ByteStreams.read(in1, buf1, 0, BUF_SIZE);
334        int read2 = ByteStreams.read(in2, buf2, 0, BUF_SIZE);
335        if (read1 != read2 || !Arrays.equals(buf1, buf2)) {
336          return false;
337        } else if (read1 != BUF_SIZE) {
338          return true;
339        }
340      }
341    } catch (Throwable e) {
342      throw closer.rethrow(e);
343    } finally {
344      closer.close();
345    }
346  }
347
348  /**
349   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
350   * the source will contain the concatenated data from the streams of the underlying sources.
351   *
352   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
353   * close the open underlying stream.
354   *
355   * @param sources the sources to concatenate
356   * @return a {@code ByteSource} containing the concatenated data
357   * @since 15.0
358   */
359  public static ByteSource concat(Iterable<? extends ByteSource> sources) {
360    return new ConcatenatedByteSource(sources);
361  }
362
363  /**
364   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
365   * the source will contain the concatenated data from the streams of the underlying sources.
366   *
367   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
368   * close the open underlying stream.
369   *
370   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
371   * method is called. This will fail if the iterator is infinite and may cause problems if the
372   * iterator eagerly fetches data for each source when iterated (rather than producing sources
373   * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
374   * overload if possible.
375   *
376   * @param sources the sources to concatenate
377   * @return a {@code ByteSource} containing the concatenated data
378   * @throws NullPointerException if any of {@code sources} is {@code null}
379   * @since 15.0
380   */
381  public static ByteSource concat(Iterator<? extends ByteSource> sources) {
382    return concat(ImmutableList.copyOf(sources));
383  }
384
385  /**
386   * Concatenates multiple {@link ByteSource} instances into a single source. Streams returned from
387   * the source will contain the concatenated data from the streams of the underlying sources.
388   *
389   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
390   * close the open underlying stream.
391   *
392   * @param sources the sources to concatenate
393   * @return a {@code ByteSource} containing the concatenated data
394   * @throws NullPointerException if any of {@code sources} is {@code null}
395   * @since 15.0
396   */
397  public static ByteSource concat(ByteSource... sources) {
398    return concat(ImmutableList.copyOf(sources));
399  }
400
401  /**
402   * Returns a view of the given byte array as a {@link ByteSource}. To view only a specific range
403   * in the array, use {@code ByteSource.wrap(b).slice(offset, length)}.
404   *
405   * @since 15.0 (since 14.0 as {@code ByteStreams.asByteSource(byte[])}).
406   */
407  public static ByteSource wrap(byte[] b) {
408    return new ByteArrayByteSource(b);
409  }
410
411  /**
412   * Returns an immutable {@link ByteSource} that contains no bytes.
413   *
414   * @since 15.0
415   */
416  public static ByteSource empty() {
417    return EmptyByteSource.INSTANCE;
418  }
419
420  /**
421   * A char source that reads bytes from this source and decodes them as characters using a
422   * charset.
423   */
424  private final class AsCharSource extends CharSource {
425
426    private final Charset charset;
427
428    private AsCharSource(Charset charset) {
429      this.charset = checkNotNull(charset);
430    }
431
432    @Override
433    public Reader openStream() throws IOException {
434      return new InputStreamReader(ByteSource.this.openStream(), charset);
435    }
436
437    @Override
438    public String toString() {
439      return ByteSource.this.toString() + ".asCharSource(" + charset + ")";
440    }
441  }
442
443  /**
444   * A view of a subsection of the containing byte source.
445   */
446  private final class SlicedByteSource extends ByteSource {
447
448    private final long offset;
449    private final long length;
450
451    private SlicedByteSource(long offset, long length) {
452      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
453      checkArgument(length >= 0, "length (%s) may not be negative", length);
454      this.offset = offset;
455      this.length = length;
456    }
457
458    @Override
459    public InputStream openStream() throws IOException {
460      return sliceStream(ByteSource.this.openStream());
461    }
462
463    @Override
464    public InputStream openBufferedStream() throws IOException {
465      return sliceStream(ByteSource.this.openBufferedStream());
466    }
467
468    private InputStream sliceStream(InputStream in) throws IOException {
469      if (offset > 0) {
470        try {
471          ByteStreams.skipFully(in, offset);
472        } catch (Throwable e) {
473          Closer closer = Closer.create();
474          closer.register(in);
475          try {
476            throw closer.rethrow(e);
477          } finally {
478            closer.close();
479          }
480        }
481      }
482      return ByteStreams.limit(in, length);
483    }
484
485    @Override
486    public ByteSource slice(long offset, long length) {
487      checkArgument(offset >= 0, "offset (%s) may not be negative", offset);
488      checkArgument(length >= 0, "length (%s) may not be negative", length);
489      long maxLength = this.length - offset;
490      return ByteSource.this.slice(this.offset + offset, Math.min(length, maxLength));
491    }
492
493    @Override
494    public boolean isEmpty() throws IOException {
495      return length == 0 || super.isEmpty();
496    }
497
498    @Override
499    public String toString() {
500      return ByteSource.this.toString() + ".slice(" + offset + ", " + length + ")";
501    }
502  }
503
504  private static class ByteArrayByteSource extends ByteSource {
505
506    protected final byte[] bytes;
507
508    protected ByteArrayByteSource(byte[] bytes) {
509      this.bytes = checkNotNull(bytes);
510    }
511
512    @Override
513    public InputStream openStream() {
514      return new ByteArrayInputStream(bytes);
515    }
516
517    @Override
518    public InputStream openBufferedStream() throws IOException {
519      return openStream();
520    }
521
522    @Override
523    public boolean isEmpty() {
524      return bytes.length == 0;
525    }
526
527    @Override
528    public long size() {
529      return bytes.length;
530    }
531
532    @Override
533    public byte[] read() {
534      return bytes.clone();
535    }
536
537    @Override
538    public long copyTo(OutputStream output) throws IOException {
539      output.write(bytes);
540      return bytes.length;
541    }
542
543    @Override
544    public <T> T read(ByteProcessor<T> processor) throws IOException {
545      processor.processBytes(bytes, 0, bytes.length);
546      return processor.getResult();
547    }
548
549    @Override
550    public HashCode hash(HashFunction hashFunction) throws IOException {
551      return hashFunction.hashBytes(bytes);
552    }
553
554    // TODO(user): Possibly override slice()
555
556    @Override
557    public String toString() {
558      return "ByteSource.wrap(" + BaseEncoding.base16().encode(bytes) + ")";
559    }
560  }
561
562  private static final class EmptyByteSource extends ByteArrayByteSource {
563
564    private static final EmptyByteSource INSTANCE = new EmptyByteSource();
565
566    private EmptyByteSource() {
567      super(new byte[0]);
568    }
569
570    @Override
571    public CharSource asCharSource(Charset charset) {
572      checkNotNull(charset);
573      return CharSource.empty();
574    }
575
576    @Override
577    public byte[] read() {
578      return bytes; // length is 0, no need to clone
579    }
580
581    @Override
582    public String toString() {
583      return "ByteSource.empty()";
584    }
585  }
586
587  private static final class ConcatenatedByteSource extends ByteSource {
588
589    private final Iterable<? extends ByteSource> sources;
590
591    ConcatenatedByteSource(Iterable<? extends ByteSource> sources) {
592      this.sources = checkNotNull(sources);
593    }
594
595    @Override
596    public InputStream openStream() throws IOException {
597      return new MultiInputStream(sources.iterator());
598    }
599
600    @Override
601    public boolean isEmpty() throws IOException {
602      for (ByteSource source : sources) {
603        if (!source.isEmpty()) {
604          return false;
605        }
606      }
607      return true;
608    }
609
610    @Override
611    public long size() throws IOException {
612      long result = 0L;
613      for (ByteSource source : sources) {
614        result += source.size();
615      }
616      return result;
617    }
618
619    @Override
620    public String toString() {
621      return "ByteSource.concat(" + sources + ")";
622    }
623  }
624}