001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.GwtIncompatible;
020import com.google.common.annotations.J2ktIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.errorprone.annotations.CanIgnoreReturnValue;
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.Reader;
032import java.io.StringReader;
033import java.io.Writer;
034import java.nio.charset.Charset;
035import java.util.Iterator;
036import java.util.List;
037import javax.annotation.CheckForNull;
038import org.checkerframework.checker.nullness.qual.Nullable;
039
040/**
041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
042 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
043 * it is an immutable <i>supplier</i> of {@code Reader} instances.
044 *
045 * <p>{@code CharSource} provides two kinds of methods:
046 *
047 * <ul>
048 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
049 *       instance each time they are called. The caller is responsible for ensuring that the
050 *       returned reader is closed.
051 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
052 *       typically implemented by opening a reader using one of the methods in the first category,
053 *       doing something and finally closing the reader that was opened.
054 * </ul>
055 *
056 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
057 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
058 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
059 * be an empty line at the end if the contents are terminated with a line separator.
060 *
061 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
062 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
063 *
064 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources
065 * that provide readers that are:
066 *
067 * <ul>
068 *   <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either
069 *       block indefinitely or fail if the source creates an infinite reader.
070 *   <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the
071 *       source as they are read from it. A source that provides such readers will not be reusable,
072 *       and operations that read from the stream (including {@link #length()}, in some
073 *       implementations) will prevent further operations from completing as expected.
074 * </ul>
075 *
076 * @since 14.0
077 * @author Colin Decker
078 */
079@J2ktIncompatible
080@GwtIncompatible
081@ElementTypesAreNonnullByDefault
082public abstract class CharSource {
083
084  /** Constructor for use by subclasses. */
085  protected CharSource() {}
086
087  /**
088   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
089   * as bytes using the given {@link Charset}.
090   *
091   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
092   * the default implementation of this method will ensure that the original {@code CharSource} is
093   * returned, rather than round-trip encoding. Subclasses that override this method should behave
094   * the same way.
095   *
096   * @since 20.0
097   */
098  public ByteSource asByteSource(Charset charset) {
099    return new AsByteSource(charset);
100  }
101
102  /**
103   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
104   * reader each time it is called.
105   *
106   * <p>The caller is responsible for ensuring that the returned reader is closed.
107   *
108   * @throws IOException if an I/O error occurs while opening the reader
109   */
110  public abstract Reader openStream() throws IOException;
111
112  /**
113   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
114   * independent reader each time it is called.
115   *
116   * <p>The caller is responsible for ensuring that the returned reader is closed.
117   *
118   * @throws IOException if an I/O error occurs while of opening the reader
119   */
120  public BufferedReader openBufferedStream() throws IOException {
121    Reader reader = openStream();
122    return (reader instanceof BufferedReader)
123        ? (BufferedReader) reader
124        : new BufferedReader(reader);
125  }
126
127  /**
128   * Returns the size of this source in chars, if the size can be easily determined without actually
129   * opening the data stream.
130   *
131   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
132   * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
133   * that this method will return a different number of chars than would be returned by reading all
134   * of the chars.
135   *
136   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
137   * return a different number of chars if the contents are changed.
138   *
139   * @since 19.0
140   */
141  public Optional<Long> lengthIfKnown() {
142    return Optional.absent();
143  }
144
145  /**
146   * Returns the length of this source in chars, even if doing so requires opening and traversing an
147   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
148   *
149   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
150   * absent, it will fall back to a heavyweight operation that will open a stream, {@link
151   * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
152   * were skipped.
153   *
154   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
155   * implementation, it is <i>possible</i> that this method will return a different number of chars
156   * than would be returned by reading all of the chars.
157   *
158   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
159   * number of chars if the contents are changed.
160   *
161   * @throws IOException if an I/O error occurs while reading the length of this source
162   * @since 19.0
163   */
164  public long length() throws IOException {
165    Optional<Long> lengthIfKnown = lengthIfKnown();
166    if (lengthIfKnown.isPresent()) {
167      return lengthIfKnown.get();
168    }
169
170    Closer closer = Closer.create();
171    try {
172      Reader reader = closer.register(openStream());
173      return countBySkipping(reader);
174    } catch (Throwable e) {
175      throw closer.rethrow(e);
176    } finally {
177      closer.close();
178    }
179  }
180
181  private long countBySkipping(Reader reader) throws IOException {
182    long count = 0;
183    long read;
184    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
185      count += read;
186    }
187    return count;
188  }
189
190  /**
191   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
192   * Does not close {@code appendable} if it is {@code Closeable}.
193   *
194   * @return the number of characters copied
195   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
196   *     appendable}
197   */
198  @CanIgnoreReturnValue
199  public long copyTo(Appendable appendable) throws IOException {
200    checkNotNull(appendable);
201
202    Closer closer = Closer.create();
203    try {
204      Reader reader = closer.register(openStream());
205      return CharStreams.copy(reader, appendable);
206    } catch (Throwable e) {
207      throw closer.rethrow(e);
208    } finally {
209      closer.close();
210    }
211  }
212
213  /**
214   * Copies the contents of this source to the given sink.
215   *
216   * @return the number of characters copied
217   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
218   *     sink}
219   */
220  @CanIgnoreReturnValue
221  public long copyTo(CharSink sink) throws IOException {
222    checkNotNull(sink);
223
224    Closer closer = Closer.create();
225    try {
226      Reader reader = closer.register(openStream());
227      Writer writer = closer.register(sink.openStream());
228      return CharStreams.copy(reader, writer);
229    } catch (Throwable e) {
230      throw closer.rethrow(e);
231    } finally {
232      closer.close();
233    }
234  }
235
236  /**
237   * Reads the contents of this source as a string.
238   *
239   * @throws IOException if an I/O error occurs while reading from this source
240   */
241  public String read() throws IOException {
242    Closer closer = Closer.create();
243    try {
244      Reader reader = closer.register(openStream());
245      return CharStreams.toString(reader);
246    } catch (Throwable e) {
247      throw closer.rethrow(e);
248    } finally {
249      closer.close();
250    }
251  }
252
253  /**
254   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
255   *
256   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
257   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
258   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
259   * it does.
260   *
261   * @throws IOException if an I/O error occurs while reading from this source
262   */
263  @CheckForNull
264  public String readFirstLine() throws IOException {
265    Closer closer = Closer.create();
266    try {
267      BufferedReader reader = closer.register(openBufferedStream());
268      return reader.readLine();
269    } catch (Throwable e) {
270      throw closer.rethrow(e);
271    } finally {
272      closer.close();
273    }
274  }
275
276  /**
277   * Reads all the lines of this source as a list of strings. The returned list will be empty if
278   * this source is empty.
279   *
280   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
281   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
282   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
283   * it does.
284   *
285   * @throws IOException if an I/O error occurs while reading from this source
286   */
287  public ImmutableList<String> readLines() throws IOException {
288    Closer closer = Closer.create();
289    try {
290      BufferedReader reader = closer.register(openBufferedStream());
291      List<String> result = Lists.newArrayList();
292      String line;
293      while ((line = reader.readLine()) != null) {
294        result.add(line);
295      }
296      return ImmutableList.copyOf(result);
297    } catch (Throwable e) {
298      throw closer.rethrow(e);
299    } finally {
300      closer.close();
301    }
302  }
303
304  /**
305   * Reads lines of text from this source, processing each line as it is read using the given {@link
306   * LineProcessor processor}. Stops when all lines have been processed or the processor returns
307   * {@code false} and returns the result produced by the processor.
308   *
309   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
310   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
311   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
312   * it does.
313   *
314   * @throws IOException if an I/O error occurs while reading from this source or if {@code
315   *     processor} throws an {@code IOException}
316   * @since 16.0
317   */
318  @CanIgnoreReturnValue // some processors won't return a useful result
319  @ParametricNullness
320  public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
321    checkNotNull(processor);
322
323    Closer closer = Closer.create();
324    try {
325      Reader reader = closer.register(openStream());
326      return CharStreams.readLines(reader, processor);
327    } catch (Throwable e) {
328      throw closer.rethrow(e);
329    } finally {
330      closer.close();
331    }
332  }
333
334  /**
335   * Returns whether the source has zero chars. The default implementation first checks {@link
336   * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
337   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
338   *
339   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
340   * chars are actually available for reading. This means that a source may return {@code true} from
341   * {@code isEmpty()} despite having readable content.
342   *
343   * @throws IOException if an I/O error occurs
344   * @since 15.0
345   */
346  public boolean isEmpty() throws IOException {
347    Optional<Long> lengthIfKnown = lengthIfKnown();
348    if (lengthIfKnown.isPresent()) {
349      return lengthIfKnown.get() == 0L;
350    }
351    Closer closer = Closer.create();
352    try {
353      Reader reader = closer.register(openStream());
354      return reader.read() == -1;
355    } catch (Throwable e) {
356      throw closer.rethrow(e);
357    } finally {
358      closer.close();
359    }
360  }
361
362  /**
363   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
364   * the source will contain the concatenated data from the streams of the underlying sources.
365   *
366   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
367   * close the open underlying stream.
368   *
369   * @param sources the sources to concatenate
370   * @return a {@code CharSource} containing the concatenated data
371   * @since 15.0
372   */
373  public static CharSource concat(Iterable<? extends CharSource> sources) {
374    return new ConcatenatedCharSource(sources);
375  }
376
377  /**
378   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
379   * the source will contain the concatenated data from the streams of the underlying sources.
380   *
381   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
382   * close the open underlying stream.
383   *
384   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
385   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
386   * eagerly fetches data for each source when iterated (rather than producing sources that only
387   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
388   * possible.
389   *
390   * @param sources the sources to concatenate
391   * @return a {@code CharSource} containing the concatenated data
392   * @throws NullPointerException if any of {@code sources} is {@code null}
393   * @since 15.0
394   */
395  public static CharSource concat(Iterator<? extends CharSource> sources) {
396    return concat(ImmutableList.copyOf(sources));
397  }
398
399  /**
400   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
401   * the source will contain the concatenated data from the streams of the underlying sources.
402   *
403   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
404   * close the open underlying stream.
405   *
406   * @param sources the sources to concatenate
407   * @return a {@code CharSource} containing the concatenated data
408   * @throws NullPointerException if any of {@code sources} is {@code null}
409   * @since 15.0
410   */
411  public static CharSource concat(CharSource... sources) {
412    return concat(ImmutableList.copyOf(sources));
413  }
414
415  /**
416   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
417   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
418   * the {@code charSequence} is mutated while it is being read, so don't do that.
419   *
420   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
421   */
422  public static CharSource wrap(CharSequence charSequence) {
423    return charSequence instanceof String
424        ? new StringCharSource((String) charSequence)
425        : new CharSequenceCharSource(charSequence);
426  }
427
428  /**
429   * Returns an immutable {@link CharSource} that contains no characters.
430   *
431   * @since 15.0
432   */
433  public static CharSource empty() {
434    return EmptyCharSource.INSTANCE;
435  }
436
437  /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
438  private final class AsByteSource extends ByteSource {
439
440    final Charset charset;
441
442    AsByteSource(Charset charset) {
443      this.charset = checkNotNull(charset);
444    }
445
446    @Override
447    public CharSource asCharSource(Charset charset) {
448      if (charset.equals(this.charset)) {
449        return CharSource.this;
450      }
451      return super.asCharSource(charset);
452    }
453
454    @Override
455    public InputStream openStream() throws IOException {
456      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
457    }
458
459    @Override
460    public String toString() {
461      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
462    }
463  }
464
465  private static class CharSequenceCharSource extends CharSource {
466
467    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
468
469    protected final CharSequence seq;
470
471    protected CharSequenceCharSource(CharSequence seq) {
472      this.seq = checkNotNull(seq);
473    }
474
475    @Override
476    public Reader openStream() {
477      return new CharSequenceReader(seq);
478    }
479
480    @Override
481    public String read() {
482      return seq.toString();
483    }
484
485    @Override
486    public boolean isEmpty() {
487      return seq.length() == 0;
488    }
489
490    @Override
491    public long length() {
492      return seq.length();
493    }
494
495    @Override
496    public Optional<Long> lengthIfKnown() {
497      return Optional.of((long) seq.length());
498    }
499
500    /**
501     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
502     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
503     */
504    private Iterator<String> linesIterator() {
505      return new AbstractIterator<String>() {
506        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
507
508        @Override
509        @CheckForNull
510        protected String computeNext() {
511          if (lines.hasNext()) {
512            String next = lines.next();
513            // skip last line if it's empty
514            if (lines.hasNext() || !next.isEmpty()) {
515              return next;
516            }
517          }
518          return endOfData();
519        }
520      };
521    }
522
523    @Override
524    @CheckForNull
525    public String readFirstLine() {
526      Iterator<String> lines = linesIterator();
527      return lines.hasNext() ? lines.next() : null;
528    }
529
530    @Override
531    public ImmutableList<String> readLines() {
532      return ImmutableList.copyOf(linesIterator());
533    }
534
535    @Override
536    @ParametricNullness
537    public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException {
538      Iterator<String> lines = linesIterator();
539      while (lines.hasNext()) {
540        if (!processor.processLine(lines.next())) {
541          break;
542        }
543      }
544      return processor.getResult();
545    }
546
547    @Override
548    public String toString() {
549      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
550    }
551  }
552
553  /**
554   * Subclass specialized for string instances.
555   *
556   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
557   *
558   * <ul>
559   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
560   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
561   *       one with {@link CharSequence#charAt(int)}.
562   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
563   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
564   *       can't change, and it is faster because many writers and appendables are optimized for
565   *       appending string instances.
566   * </ul>
567   */
568  private static class StringCharSource extends CharSequenceCharSource {
569    protected StringCharSource(String seq) {
570      super(seq);
571    }
572
573    @Override
574    public Reader openStream() {
575      return new StringReader((String) seq);
576    }
577
578    @Override
579    public long copyTo(Appendable appendable) throws IOException {
580      appendable.append(seq);
581      return seq.length();
582    }
583
584    @Override
585    public long copyTo(CharSink sink) throws IOException {
586      checkNotNull(sink);
587      Closer closer = Closer.create();
588      try {
589        Writer writer = closer.register(sink.openStream());
590        writer.write((String) seq);
591        return seq.length();
592      } catch (Throwable e) {
593        throw closer.rethrow(e);
594      } finally {
595        closer.close();
596      }
597    }
598  }
599
600  private static final class EmptyCharSource extends StringCharSource {
601
602    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
603
604    private EmptyCharSource() {
605      super("");
606    }
607
608    @Override
609    public String toString() {
610      return "CharSource.empty()";
611    }
612  }
613
614  private static final class ConcatenatedCharSource extends CharSource {
615
616    private final Iterable<? extends CharSource> sources;
617
618    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
619      this.sources = checkNotNull(sources);
620    }
621
622    @Override
623    public Reader openStream() throws IOException {
624      return new MultiReader(sources.iterator());
625    }
626
627    @Override
628    public boolean isEmpty() throws IOException {
629      for (CharSource source : sources) {
630        if (!source.isEmpty()) {
631          return false;
632        }
633      }
634      return true;
635    }
636
637    @Override
638    public Optional<Long> lengthIfKnown() {
639      long result = 0L;
640      for (CharSource source : sources) {
641        Optional<Long> lengthIfKnown = source.lengthIfKnown();
642        if (!lengthIfKnown.isPresent()) {
643          return Optional.absent();
644        }
645        result += lengthIfKnown.get();
646      }
647      return Optional.of(result);
648    }
649
650    @Override
651    public long length() throws IOException {
652      long result = 0L;
653      for (CharSource source : sources) {
654        result += source.length();
655      }
656      return result;
657    }
658
659    @Override
660    public String toString() {
661      return "CharSource.concat(" + sources + ")";
662    }
663  }
664}