001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.errorprone.annotations.CanIgnoreReturnValue;
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.Reader;
032import java.io.StringReader;
033import java.io.Writer;
034import java.nio.charset.Charset;
035import java.util.Iterator;
036import java.util.List;
037import javax.annotation.CheckForNull;
038import org.checkerframework.checker.nullness.qual.Nullable;
039
040/**
041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
042 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
043 * it is an immutable <i>supplier</i> of {@code Reader} instances.
044 *
045 * <p>{@code CharSource} provides two kinds of methods:
046 *
047 * <ul>
048 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
049 *       instance each time they are called. The caller is responsible for ensuring that the
050 *       returned reader is closed.
051 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
052 *       typically implemented by opening a reader using one of the methods in the first category,
053 *       doing something and finally closing the reader that was opened.
054 * </ul>
055 *
056 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
057 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
058 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
059 * be an empty line at the end if the contents are terminated with a line separator.
060 *
061 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
062 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
063 *
064 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources
065 * that provide readers that are:
066 *
067 * <ul>
068 *   <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either
069 *       block indefinitely or fail if the source creates an infinite reader.
070 *   <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the
071 *       source as they are read from it. A source that provides such readers will not be reusable,
072 *       and operations that read from the stream (including {@link #length()}, in some
073 *       implementations) will prevent further operations from completing as expected.
074 * </ul>
075 *
076 * @since 14.0
077 * @author Colin Decker
078 */
079@GwtIncompatible
080@ElementTypesAreNonnullByDefault
081public abstract class CharSource {
082
083  /** Constructor for use by subclasses. */
084  protected CharSource() {}
085
086  /**
087   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
088   * as bytes using the given {@link Charset}.
089   *
090   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
091   * the default implementation of this method will ensure that the original {@code CharSource} is
092   * returned, rather than round-trip encoding. Subclasses that override this method should behave
093   * the same way.
094   *
095   * @since 20.0
096   */
097  @Beta
098  public ByteSource asByteSource(Charset charset) {
099    return new AsByteSource(charset);
100  }
101
102  /**
103   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
104   * reader each time it is called.
105   *
106   * <p>The caller is responsible for ensuring that the returned reader is closed.
107   *
108   * @throws IOException if an I/O error occurs while opening the reader
109   */
110  public abstract Reader openStream() throws IOException;
111
112  /**
113   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
114   * independent reader each time it is called.
115   *
116   * <p>The caller is responsible for ensuring that the returned reader is closed.
117   *
118   * @throws IOException if an I/O error occurs while of opening the reader
119   */
120  public BufferedReader openBufferedStream() throws IOException {
121    Reader reader = openStream();
122    return (reader instanceof BufferedReader)
123        ? (BufferedReader) reader
124        : new BufferedReader(reader);
125  }
126
127  /**
128   * Returns the size of this source in chars, if the size can be easily determined without actually
129   * opening the data stream.
130   *
131   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
132   * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
133   * that this method will return a different number of chars than would be returned by reading all
134   * of the chars.
135   *
136   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
137   * return a different number of chars if the contents are changed.
138   *
139   * @since 19.0
140   */
141  @Beta
142  public Optional<Long> lengthIfKnown() {
143    return Optional.absent();
144  }
145
146  /**
147   * Returns the length of this source in chars, even if doing so requires opening and traversing an
148   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
149   *
150   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
151   * absent, it will fall back to a heavyweight operation that will open a stream, {@link
152   * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
153   * were skipped.
154   *
155   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
156   * implementation, it is <i>possible</i> that this method will return a different number of chars
157   * than would be returned by reading all of the chars.
158   *
159   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
160   * number of chars if the contents are changed.
161   *
162   * @throws IOException if an I/O error occurs while reading the length of this source
163   * @since 19.0
164   */
165  @Beta
166  public long length() throws IOException {
167    Optional<Long> lengthIfKnown = lengthIfKnown();
168    if (lengthIfKnown.isPresent()) {
169      return lengthIfKnown.get();
170    }
171
172    Closer closer = Closer.create();
173    try {
174      Reader reader = closer.register(openStream());
175      return countBySkipping(reader);
176    } catch (Throwable e) {
177      throw closer.rethrow(e);
178    } finally {
179      closer.close();
180    }
181  }
182
183  private long countBySkipping(Reader reader) throws IOException {
184    long count = 0;
185    long read;
186    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
187      count += read;
188    }
189    return count;
190  }
191
192  /**
193   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
194   * Does not close {@code appendable} if it is {@code Closeable}.
195   *
196   * @return the number of characters copied
197   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
198   *     appendable}
199   */
200  @CanIgnoreReturnValue
201  public long copyTo(Appendable appendable) throws IOException {
202    checkNotNull(appendable);
203
204    Closer closer = Closer.create();
205    try {
206      Reader reader = closer.register(openStream());
207      return CharStreams.copy(reader, appendable);
208    } catch (Throwable e) {
209      throw closer.rethrow(e);
210    } finally {
211      closer.close();
212    }
213  }
214
215  /**
216   * Copies the contents of this source to the given sink.
217   *
218   * @return the number of characters copied
219   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
220   *     sink}
221   */
222  @CanIgnoreReturnValue
223  public long copyTo(CharSink sink) throws IOException {
224    checkNotNull(sink);
225
226    Closer closer = Closer.create();
227    try {
228      Reader reader = closer.register(openStream());
229      Writer writer = closer.register(sink.openStream());
230      return CharStreams.copy(reader, writer);
231    } catch (Throwable e) {
232      throw closer.rethrow(e);
233    } finally {
234      closer.close();
235    }
236  }
237
238  /**
239   * Reads the contents of this source as a string.
240   *
241   * @throws IOException if an I/O error occurs while reading from this source
242   */
243  public String read() throws IOException {
244    Closer closer = Closer.create();
245    try {
246      Reader reader = closer.register(openStream());
247      return CharStreams.toString(reader);
248    } catch (Throwable e) {
249      throw closer.rethrow(e);
250    } finally {
251      closer.close();
252    }
253  }
254
255  /**
256   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
257   *
258   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
259   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
260   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
261   * it does.
262   *
263   * @throws IOException if an I/O error occurs while reading from this source
264   */
265  @CheckForNull
266  public String readFirstLine() throws IOException {
267    Closer closer = Closer.create();
268    try {
269      BufferedReader reader = closer.register(openBufferedStream());
270      return reader.readLine();
271    } catch (Throwable e) {
272      throw closer.rethrow(e);
273    } finally {
274      closer.close();
275    }
276  }
277
278  /**
279   * Reads all the lines of this source as a list of strings. The returned list will be empty if
280   * this source is empty.
281   *
282   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
283   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
284   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
285   * it does.
286   *
287   * @throws IOException if an I/O error occurs while reading from this source
288   */
289  public ImmutableList<String> readLines() throws IOException {
290    Closer closer = Closer.create();
291    try {
292      BufferedReader reader = closer.register(openBufferedStream());
293      List<String> result = Lists.newArrayList();
294      String line;
295      while ((line = reader.readLine()) != null) {
296        result.add(line);
297      }
298      return ImmutableList.copyOf(result);
299    } catch (Throwable e) {
300      throw closer.rethrow(e);
301    } finally {
302      closer.close();
303    }
304  }
305
306  /**
307   * Reads lines of text from this source, processing each line as it is read using the given {@link
308   * LineProcessor processor}. Stops when all lines have been processed or the processor returns
309   * {@code false} and returns the result produced by the processor.
310   *
311   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
312   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
313   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
314   * it does.
315   *
316   * @throws IOException if an I/O error occurs while reading from this source or if {@code
317   *     processor} throws an {@code IOException}
318   * @since 16.0
319   */
320  @Beta
321  @CanIgnoreReturnValue // some processors won't return a useful result
322  @ParametricNullness
323  public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
324    checkNotNull(processor);
325
326    Closer closer = Closer.create();
327    try {
328      Reader reader = closer.register(openStream());
329      return CharStreams.readLines(reader, processor);
330    } catch (Throwable e) {
331      throw closer.rethrow(e);
332    } finally {
333      closer.close();
334    }
335  }
336
337  /**
338   * Returns whether the source has zero chars. The default implementation first checks {@link
339   * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
340   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
341   *
342   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
343   * chars are actually available for reading. This means that a source may return {@code true} from
344   * {@code isEmpty()} despite having readable content.
345   *
346   * @throws IOException if an I/O error occurs
347   * @since 15.0
348   */
349  public boolean isEmpty() throws IOException {
350    Optional<Long> lengthIfKnown = lengthIfKnown();
351    if (lengthIfKnown.isPresent()) {
352      return lengthIfKnown.get() == 0L;
353    }
354    Closer closer = Closer.create();
355    try {
356      Reader reader = closer.register(openStream());
357      return reader.read() == -1;
358    } catch (Throwable e) {
359      throw closer.rethrow(e);
360    } finally {
361      closer.close();
362    }
363  }
364
365  /**
366   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
367   * the source will contain the concatenated data from the streams of the underlying sources.
368   *
369   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
370   * close the open underlying stream.
371   *
372   * @param sources the sources to concatenate
373   * @return a {@code CharSource} containing the concatenated data
374   * @since 15.0
375   */
376  public static CharSource concat(Iterable<? extends CharSource> sources) {
377    return new ConcatenatedCharSource(sources);
378  }
379
380  /**
381   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
382   * the source will contain the concatenated data from the streams of the underlying sources.
383   *
384   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
385   * close the open underlying stream.
386   *
387   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
388   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
389   * eagerly fetches data for each source when iterated (rather than producing sources that only
390   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
391   * possible.
392   *
393   * @param sources the sources to concatenate
394   * @return a {@code CharSource} containing the concatenated data
395   * @throws NullPointerException if any of {@code sources} is {@code null}
396   * @since 15.0
397   */
398  public static CharSource concat(Iterator<? extends CharSource> sources) {
399    return concat(ImmutableList.copyOf(sources));
400  }
401
402  /**
403   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
404   * the source will contain the concatenated data from the streams of the underlying sources.
405   *
406   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
407   * close the open underlying stream.
408   *
409   * @param sources the sources to concatenate
410   * @return a {@code CharSource} containing the concatenated data
411   * @throws NullPointerException if any of {@code sources} is {@code null}
412   * @since 15.0
413   */
414  public static CharSource concat(CharSource... sources) {
415    return concat(ImmutableList.copyOf(sources));
416  }
417
418  /**
419   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
420   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
421   * the {@code charSequence} is mutated while it is being read, so don't do that.
422   *
423   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
424   */
425  public static CharSource wrap(CharSequence charSequence) {
426    return charSequence instanceof String
427        ? new StringCharSource((String) charSequence)
428        : new CharSequenceCharSource(charSequence);
429  }
430
431  /**
432   * Returns an immutable {@link CharSource} that contains no characters.
433   *
434   * @since 15.0
435   */
436  public static CharSource empty() {
437    return EmptyCharSource.INSTANCE;
438  }
439
440  /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
441  private final class AsByteSource extends ByteSource {
442
443    final Charset charset;
444
445    AsByteSource(Charset charset) {
446      this.charset = checkNotNull(charset);
447    }
448
449    @Override
450    public CharSource asCharSource(Charset charset) {
451      if (charset.equals(this.charset)) {
452        return CharSource.this;
453      }
454      return super.asCharSource(charset);
455    }
456
457    @Override
458    public InputStream openStream() throws IOException {
459      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
460    }
461
462    @Override
463    public String toString() {
464      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
465    }
466  }
467
468  private static class CharSequenceCharSource extends CharSource {
469
470    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
471
472    protected final CharSequence seq;
473
474    protected CharSequenceCharSource(CharSequence seq) {
475      this.seq = checkNotNull(seq);
476    }
477
478    @Override
479    public Reader openStream() {
480      return new CharSequenceReader(seq);
481    }
482
483    @Override
484    public String read() {
485      return seq.toString();
486    }
487
488    @Override
489    public boolean isEmpty() {
490      return seq.length() == 0;
491    }
492
493    @Override
494    public long length() {
495      return seq.length();
496    }
497
498    @Override
499    public Optional<Long> lengthIfKnown() {
500      return Optional.of((long) seq.length());
501    }
502
503    /**
504     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
505     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
506     */
507    private Iterator<String> linesIterator() {
508      return new AbstractIterator<String>() {
509        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
510
511        @Override
512        @CheckForNull
513        protected String computeNext() {
514          if (lines.hasNext()) {
515            String next = lines.next();
516            // skip last line if it's empty
517            if (lines.hasNext() || !next.isEmpty()) {
518              return next;
519            }
520          }
521          return endOfData();
522        }
523      };
524    }
525
526    @Override
527    @CheckForNull
528    public String readFirstLine() {
529      Iterator<String> lines = linesIterator();
530      return lines.hasNext() ? lines.next() : null;
531    }
532
533    @Override
534    public ImmutableList<String> readLines() {
535      return ImmutableList.copyOf(linesIterator());
536    }
537
538    @Override
539    @ParametricNullness
540    public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
541      Iterator<String> lines = linesIterator();
542      while (lines.hasNext()) {
543        if (!processor.processLine(lines.next())) {
544          break;
545        }
546      }
547      return processor.getResult();
548    }
549
550    @Override
551    public String toString() {
552      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
553    }
554  }
555
556  /**
557   * Subclass specialized for string instances.
558   *
559   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
560   *
561   * <ul>
562   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
563   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
564   *       one with {@link CharSequence#charAt(int)}.
565   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
566   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
567   *       can't change, and it is faster because many writers and appendables are optimized for
568   *       appending string instances.
569   * </ul>
570   */
571  private static class StringCharSource extends CharSequenceCharSource {
572    protected StringCharSource(String seq) {
573      super(seq);
574    }
575
576    @Override
577    public Reader openStream() {
578      return new StringReader((String) seq);
579    }
580
581    @Override
582    public long copyTo(Appendable appendable) throws IOException {
583      appendable.append(seq);
584      return seq.length();
585    }
586
587    @Override
588    public long copyTo(CharSink sink) throws IOException {
589      checkNotNull(sink);
590      Closer closer = Closer.create();
591      try {
592        Writer writer = closer.register(sink.openStream());
593        writer.write((String) seq);
594        return seq.length();
595      } catch (Throwable e) {
596        throw closer.rethrow(e);
597      } finally {
598        closer.close();
599      }
600    }
601  }
602
603  private static final class EmptyCharSource extends StringCharSource {
604
605    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
606
607    private EmptyCharSource() {
608      super("");
609    }
610
611    @Override
612    public String toString() {
613      return "CharSource.empty()";
614    }
615  }
616
617  private static final class ConcatenatedCharSource extends CharSource {
618
619    private final Iterable<? extends CharSource> sources;
620
621    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
622      this.sources = checkNotNull(sources);
623    }
624
625    @Override
626    public Reader openStream() throws IOException {
627      return new MultiReader(sources.iterator());
628    }
629
630    @Override
631    public boolean isEmpty() throws IOException {
632      for (CharSource source : sources) {
633        if (!source.isEmpty()) {
634          return false;
635        }
636      }
637      return true;
638    }
639
640    @Override
641    public Optional<Long> lengthIfKnown() {
642      long result = 0L;
643      for (CharSource source : sources) {
644        Optional<Long> lengthIfKnown = source.lengthIfKnown();
645        if (!lengthIfKnown.isPresent()) {
646          return Optional.absent();
647        }
648        result += lengthIfKnown.get();
649      }
650      return Optional.of(result);
651    }
652
653    @Override
654    public long length() throws IOException {
655      long result = 0L;
656      for (CharSource source : sources) {
657        result += source.length();
658      }
659      return result;
660    }
661
662    @Override
663    public String toString() {
664      return "CharSource.concat(" + sources + ")";
665    }
666  }
667}