001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.common.collect.Streams;
028import com.google.errorprone.annotations.CanIgnoreReturnValue;
029import com.google.errorprone.annotations.MustBeClosed;
030import java.io.BufferedReader;
031import java.io.IOException;
032import java.io.InputStream;
033import java.io.Reader;
034import java.io.StringReader;
035import java.io.UncheckedIOException;
036import java.io.Writer;
037import java.nio.charset.Charset;
038import java.util.Iterator;
039import java.util.List;
040import java.util.function.Consumer;
041import java.util.stream.Stream;
042import org.checkerframework.checker.nullness.qual.Nullable;
043
044/**
045 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
046 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
047 * it is an immutable <i>supplier</i> of {@code Reader} instances.
048 *
049 * <p>{@code CharSource} provides two kinds of methods:
050 *
051 * <ul>
052 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
053 *       instance each time they are called. The caller is responsible for ensuring that the
054 *       returned reader is closed.
055 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
056 *       typically implemented by opening a reader using one of the methods in the first category,
057 *       doing something and finally closing the reader that was opened.
058 * </ul>
059 *
060 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
061 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
062 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
063 * be an empty line at the end if the contents are terminated with a line separator.
064 *
065 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
066 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
067 *
068 * @since 14.0
069 * @author Colin Decker
070 */
071@GwtIncompatible
072public abstract class CharSource {
073
074  /** Constructor for use by subclasses. */
075  protected CharSource() {}
076
077  /**
078   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
079   * as bytes using the given {@link Charset}.
080   *
081   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
082   * the default implementation of this method will ensure that the original {@code CharSource} is
083   * returned, rather than round-trip encoding. Subclasses that override this method should behave
084   * the same way.
085   *
086   * @since 20.0
087   */
088  @Beta
089  public ByteSource asByteSource(Charset charset) {
090    return new AsByteSource(charset);
091  }
092
093  /**
094   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
095   * reader each time it is called.
096   *
097   * <p>The caller is responsible for ensuring that the returned reader is closed.
098   *
099   * @throws IOException if an I/O error occurs while opening the reader
100   */
101  public abstract Reader openStream() throws IOException;
102
103  /**
104   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
105   * independent reader each time it is called.
106   *
107   * <p>The caller is responsible for ensuring that the returned reader is closed.
108   *
109   * @throws IOException if an I/O error occurs while of opening the reader
110   */
111  public BufferedReader openBufferedStream() throws IOException {
112    Reader reader = openStream();
113    return (reader instanceof BufferedReader)
114        ? (BufferedReader) reader
115        : new BufferedReader(reader);
116  }
117
118  /**
119   * Opens a new {@link Stream} for reading text one line at a time from this source. This method
120   * returns a new, independent stream each time it is called.
121   *
122   * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an
123   * I/O error occurs while the stream is reading from the source or when the stream is closed, an
124   * {@link UncheckedIOException} is thrown.
125   *
126   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
127   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
128   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
129   * it does.
130   *
131   * <p>The caller is responsible for ensuring that the returned stream is closed. For example:
132   *
133   * <pre>{@code
134   * try (Stream<String> lines = source.lines()) {
135   *   lines.map(...)
136   *      .filter(...)
137   *      .forEach(...);
138   * }
139   * }</pre>
140   *
141   * @throws IOException if an I/O error occurs while opening the stream
142   * @since 22.0
143   */
144  @Beta
145  @MustBeClosed
146  public Stream<String> lines() throws IOException {
147    BufferedReader reader = openBufferedStream();
148    return reader
149        .lines()
150        .onClose(
151            () -> {
152              try {
153                reader.close();
154              } catch (IOException e) {
155                throw new UncheckedIOException(e);
156              }
157            });
158  }
159
160  /**
161   * Returns the size of this source in chars, if the size can be easily determined without actually
162   * opening the data stream.
163   *
164   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
165   * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
166   * that this method will return a different number of chars than would be returned by reading all
167   * of the chars.
168   *
169   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
170   * return a different number of chars if the contents are changed.
171   *
172   * @since 19.0
173   */
174  @Beta
175  public Optional<Long> lengthIfKnown() {
176    return Optional.absent();
177  }
178
179  /**
180   * Returns the length of this source in chars, even if doing so requires opening and traversing an
181   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
182   *
183   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
184   * absent, it will fall back to a heavyweight operation that will open a stream, {@link
185   * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
186   * were skipped.
187   *
188   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
189   * implementation, it is <i>possible</i> that this method will return a different number of chars
190   * than would be returned by reading all of the chars.
191   *
192   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
193   * number of chars if the contents are changed.
194   *
195   * @throws IOException if an I/O error occurs while reading the length of this source
196   * @since 19.0
197   */
198  @Beta
199  public long length() throws IOException {
200    Optional<Long> lengthIfKnown = lengthIfKnown();
201    if (lengthIfKnown.isPresent()) {
202      return lengthIfKnown.get();
203    }
204
205    Closer closer = Closer.create();
206    try {
207      Reader reader = closer.register(openStream());
208      return countBySkipping(reader);
209    } catch (Throwable e) {
210      throw closer.rethrow(e);
211    } finally {
212      closer.close();
213    }
214  }
215
216  private long countBySkipping(Reader reader) throws IOException {
217    long count = 0;
218    long read;
219    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
220      count += read;
221    }
222    return count;
223  }
224
225  /**
226   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
227   * Does not close {@code appendable} if it is {@code Closeable}.
228   *
229   * @return the number of characters copied
230   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
231   *     appendable}
232   */
233  @CanIgnoreReturnValue
234  public long copyTo(Appendable appendable) throws IOException {
235    checkNotNull(appendable);
236
237    Closer closer = Closer.create();
238    try {
239      Reader reader = closer.register(openStream());
240      return CharStreams.copy(reader, appendable);
241    } catch (Throwable e) {
242      throw closer.rethrow(e);
243    } finally {
244      closer.close();
245    }
246  }
247
248  /**
249   * Copies the contents of this source to the given sink.
250   *
251   * @return the number of characters copied
252   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
253   *     sink}
254   */
255  @CanIgnoreReturnValue
256  public long copyTo(CharSink sink) throws IOException {
257    checkNotNull(sink);
258
259    Closer closer = Closer.create();
260    try {
261      Reader reader = closer.register(openStream());
262      Writer writer = closer.register(sink.openStream());
263      return CharStreams.copy(reader, writer);
264    } catch (Throwable e) {
265      throw closer.rethrow(e);
266    } finally {
267      closer.close();
268    }
269  }
270
271  /**
272   * Reads the contents of this source as a string.
273   *
274   * @throws IOException if an I/O error occurs while reading from this source
275   */
276  public String read() throws IOException {
277    Closer closer = Closer.create();
278    try {
279      Reader reader = closer.register(openStream());
280      return CharStreams.toString(reader);
281    } catch (Throwable e) {
282      throw closer.rethrow(e);
283    } finally {
284      closer.close();
285    }
286  }
287
288  /**
289   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
290   *
291   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
292   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
293   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
294   * it does.
295   *
296   * @throws IOException if an I/O error occurs while reading from this source
297   */
298  public @Nullable String readFirstLine() throws IOException {
299    Closer closer = Closer.create();
300    try {
301      BufferedReader reader = closer.register(openBufferedStream());
302      return reader.readLine();
303    } catch (Throwable e) {
304      throw closer.rethrow(e);
305    } finally {
306      closer.close();
307    }
308  }
309
310  /**
311   * Reads all the lines of this source as a list of strings. The returned list will be empty if
312   * this source is empty.
313   *
314   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
315   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
316   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
317   * it does.
318   *
319   * @throws IOException if an I/O error occurs while reading from this source
320   */
321  public ImmutableList<String> readLines() throws IOException {
322    Closer closer = Closer.create();
323    try {
324      BufferedReader reader = closer.register(openBufferedStream());
325      List<String> result = Lists.newArrayList();
326      String line;
327      while ((line = reader.readLine()) != null) {
328        result.add(line);
329      }
330      return ImmutableList.copyOf(result);
331    } catch (Throwable e) {
332      throw closer.rethrow(e);
333    } finally {
334      closer.close();
335    }
336  }
337
338  /**
339   * Reads lines of text from this source, processing each line as it is read using the given {@link
340   * LineProcessor processor}. Stops when all lines have been processed or the processor returns
341   * {@code false} and returns the result produced by the processor.
342   *
343   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
344   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
345   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
346   * it does.
347   *
348   * @throws IOException if an I/O error occurs while reading from this source or if {@code
349   *     processor} throws an {@code IOException}
350   * @since 16.0
351   */
352  @Beta
353  @CanIgnoreReturnValue // some processors won't return a useful result
354  public <T> T readLines(LineProcessor<T> processor) throws IOException {
355    checkNotNull(processor);
356
357    Closer closer = Closer.create();
358    try {
359      Reader reader = closer.register(openStream());
360      return CharStreams.readLines(reader, processor);
361    } catch (Throwable e) {
362      throw closer.rethrow(e);
363    } finally {
364      closer.close();
365    }
366  }
367
368  /**
369   * Reads all lines of text from this source, running the given {@code action} for each line as it
370   * is read.
371   *
372   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
373   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
374   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
375   * it does.
376   *
377   * @throws IOException if an I/O error occurs while reading from this source or if {@code action}
378   *     throws an {@code UncheckedIOException}
379   * @since 22.0
380   */
381  @Beta
382  public void forEachLine(Consumer<? super String> action) throws IOException {
383    try (Stream<String> lines = lines()) {
384      // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure
385      lines.forEachOrdered(action);
386    } catch (UncheckedIOException e) {
387      throw e.getCause();
388    }
389  }
390
391  /**
392   * Returns whether the source has zero chars. The default implementation first checks {@link
393   * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
394   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
395   *
396   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
397   * chars are actually available for reading. This means that a source may return {@code true} from
398   * {@code isEmpty()} despite having readable content.
399   *
400   * @throws IOException if an I/O error occurs
401   * @since 15.0
402   */
403  public boolean isEmpty() throws IOException {
404    Optional<Long> lengthIfKnown = lengthIfKnown();
405    if (lengthIfKnown.isPresent()) {
406      return lengthIfKnown.get() == 0L;
407    }
408    Closer closer = Closer.create();
409    try {
410      Reader reader = closer.register(openStream());
411      return reader.read() == -1;
412    } catch (Throwable e) {
413      throw closer.rethrow(e);
414    } finally {
415      closer.close();
416    }
417  }
418
419  /**
420   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
421   * the source will contain the concatenated data from the streams of the underlying sources.
422   *
423   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
424   * close the open underlying stream.
425   *
426   * @param sources the sources to concatenate
427   * @return a {@code CharSource} containing the concatenated data
428   * @since 15.0
429   */
430  public static CharSource concat(Iterable<? extends CharSource> sources) {
431    return new ConcatenatedCharSource(sources);
432  }
433
434  /**
435   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
436   * the source will contain the concatenated data from the streams of the underlying sources.
437   *
438   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
439   * close the open underlying stream.
440   *
441   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
442   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
443   * eagerly fetches data for each source when iterated (rather than producing sources that only
444   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
445   * possible.
446   *
447   * @param sources the sources to concatenate
448   * @return a {@code CharSource} containing the concatenated data
449   * @throws NullPointerException if any of {@code sources} is {@code null}
450   * @since 15.0
451   */
452  public static CharSource concat(Iterator<? extends CharSource> sources) {
453    return concat(ImmutableList.copyOf(sources));
454  }
455
456  /**
457   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
458   * the source will contain the concatenated data from the streams of the underlying sources.
459   *
460   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
461   * close the open underlying stream.
462   *
463   * @param sources the sources to concatenate
464   * @return a {@code CharSource} containing the concatenated data
465   * @throws NullPointerException if any of {@code sources} is {@code null}
466   * @since 15.0
467   */
468  public static CharSource concat(CharSource... sources) {
469    return concat(ImmutableList.copyOf(sources));
470  }
471
472  /**
473   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
474   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
475   * the {@code charSequence} is mutated while it is being read, so don't do that.
476   *
477   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
478   */
479  public static CharSource wrap(CharSequence charSequence) {
480    return charSequence instanceof String
481        ? new StringCharSource((String) charSequence)
482        : new CharSequenceCharSource(charSequence);
483  }
484
485  /**
486   * Returns an immutable {@link CharSource} that contains no characters.
487   *
488   * @since 15.0
489   */
490  public static CharSource empty() {
491    return EmptyCharSource.INSTANCE;
492  }
493
494  /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
495  private final class AsByteSource extends ByteSource {
496
497    final Charset charset;
498
499    AsByteSource(Charset charset) {
500      this.charset = checkNotNull(charset);
501    }
502
503    @Override
504    public CharSource asCharSource(Charset charset) {
505      if (charset.equals(this.charset)) {
506        return CharSource.this;
507      }
508      return super.asCharSource(charset);
509    }
510
511    @Override
512    public InputStream openStream() throws IOException {
513      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
514    }
515
516    @Override
517    public String toString() {
518      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
519    }
520  }
521
522  private static class CharSequenceCharSource extends CharSource {
523
524    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
525
526    protected final CharSequence seq;
527
528    protected CharSequenceCharSource(CharSequence seq) {
529      this.seq = checkNotNull(seq);
530    }
531
532    @Override
533    public Reader openStream() {
534      return new CharSequenceReader(seq);
535    }
536
537    @Override
538    public String read() {
539      return seq.toString();
540    }
541
542    @Override
543    public boolean isEmpty() {
544      return seq.length() == 0;
545    }
546
547    @Override
548    public long length() {
549      return seq.length();
550    }
551
552    @Override
553    public Optional<Long> lengthIfKnown() {
554      return Optional.of((long) seq.length());
555    }
556
557    /**
558     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
559     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
560     */
561    private Iterator<String> linesIterator() {
562      return new AbstractIterator<String>() {
563        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
564
565        @Override
566        protected String computeNext() {
567          if (lines.hasNext()) {
568            String next = lines.next();
569            // skip last line if it's empty
570            if (lines.hasNext() || !next.isEmpty()) {
571              return next;
572            }
573          }
574          return endOfData();
575        }
576      };
577    }
578
579    @Override
580    public Stream<String> lines() {
581      return Streams.stream(linesIterator());
582    }
583
584    @Override
585    public String readFirstLine() {
586      Iterator<String> lines = linesIterator();
587      return lines.hasNext() ? lines.next() : null;
588    }
589
590    @Override
591    public ImmutableList<String> readLines() {
592      return ImmutableList.copyOf(linesIterator());
593    }
594
595    @Override
596    public <T> T readLines(LineProcessor<T> processor) throws IOException {
597      Iterator<String> lines = linesIterator();
598      while (lines.hasNext()) {
599        if (!processor.processLine(lines.next())) {
600          break;
601        }
602      }
603      return processor.getResult();
604    }
605
606    @Override
607    public String toString() {
608      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
609    }
610  }
611
612  /**
613   * Subclass specialized for string instances.
614   *
615   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
616   *
617   * <ul>
618   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
619   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
620   *       one with {@link CharSequence#charAt(int)}.
621   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
622   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
623   *       can't change, and it is faster because many writers and appendables are optimized for
624   *       appending string instances.
625   * </ul>
626   */
627  private static class StringCharSource extends CharSequenceCharSource {
628    protected StringCharSource(String seq) {
629      super(seq);
630    }
631
632    @Override
633    public Reader openStream() {
634      return new StringReader((String) seq);
635    }
636
637    @Override
638    public long copyTo(Appendable appendable) throws IOException {
639      appendable.append(seq);
640      return seq.length();
641    }
642
643    @Override
644    public long copyTo(CharSink sink) throws IOException {
645      checkNotNull(sink);
646      Closer closer = Closer.create();
647      try {
648        Writer writer = closer.register(sink.openStream());
649        writer.write((String) seq);
650        return seq.length();
651      } catch (Throwable e) {
652        throw closer.rethrow(e);
653      } finally {
654        closer.close();
655      }
656    }
657  }
658
659  private static final class EmptyCharSource extends StringCharSource {
660
661    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
662
663    private EmptyCharSource() {
664      super("");
665    }
666
667    @Override
668    public String toString() {
669      return "CharSource.empty()";
670    }
671  }
672
673  private static final class ConcatenatedCharSource extends CharSource {
674
675    private final Iterable<? extends CharSource> sources;
676
677    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
678      this.sources = checkNotNull(sources);
679    }
680
681    @Override
682    public Reader openStream() throws IOException {
683      return new MultiReader(sources.iterator());
684    }
685
686    @Override
687    public boolean isEmpty() throws IOException {
688      for (CharSource source : sources) {
689        if (!source.isEmpty()) {
690          return false;
691        }
692      }
693      return true;
694    }
695
696    @Override
697    public Optional<Long> lengthIfKnown() {
698      long result = 0L;
699      for (CharSource source : sources) {
700        Optional<Long> lengthIfKnown = source.lengthIfKnown();
701        if (!lengthIfKnown.isPresent()) {
702          return Optional.absent();
703        }
704        result += lengthIfKnown.get();
705      }
706      return Optional.of(result);
707    }
708
709    @Override
710    public long length() throws IOException {
711      long result = 0L;
712      for (CharSource source : sources) {
713        result += source.length();
714      }
715      return result;
716    }
717
718    @Override
719    public String toString() {
720      return "CharSource.concat(" + sources + ")";
721    }
722  }
723}