001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.errorprone.annotations.CanIgnoreReturnValue;
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.Reader;
032import java.io.StringReader;
033import java.io.Writer;
034import java.nio.charset.Charset;
035import java.util.Iterator;
036import java.util.List;
037import org.checkerframework.checker.nullness.compatqual.NullableDecl;
038
039/**
040 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code
041 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead,
042 * it is an immutable <i>supplier</i> of {@code Reader} instances.
043 *
044 * <p>{@code CharSource} provides two kinds of methods:
045 *
046 * <ul>
047 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
048 *       instance each time they are called. The caller is responsible for ensuring that the
049 *       returned reader is closed.
050 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
051 *       typically implemented by opening a reader using one of the methods in the first category,
052 *       doing something and finally closing the reader that was opened.
053 * </ul>
054 *
055 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
056 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code
057 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to
058 * be an empty line at the end if the contents are terminated with a line separator.
059 *
060 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
061 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
062 *
063 * @since 14.0
064 * @author Colin Decker
065 */
066@GwtIncompatible
067public abstract class CharSource {
068
069  /** Constructor for use by subclasses. */
070  protected CharSource() {}
071
072  /**
073   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
074   * as bytes using the given {@link Charset}.
075   *
076   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
077   * the default implementation of this method will ensure that the original {@code CharSource} is
078   * returned, rather than round-trip encoding. Subclasses that override this method should behave
079   * the same way.
080   *
081   * @since 20.0
082   */
083  @Beta
084  public ByteSource asByteSource(Charset charset) {
085    return new AsByteSource(charset);
086  }
087
088  /**
089   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
090   * reader each time it is called.
091   *
092   * <p>The caller is responsible for ensuring that the returned reader is closed.
093   *
094   * @throws IOException if an I/O error occurs while opening the reader
095   */
096  public abstract Reader openStream() throws IOException;
097
098  /**
099   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
100   * independent reader each time it is called.
101   *
102   * <p>The caller is responsible for ensuring that the returned reader is closed.
103   *
104   * @throws IOException if an I/O error occurs while of opening the reader
105   */
106  public BufferedReader openBufferedStream() throws IOException {
107    Reader reader = openStream();
108    return (reader instanceof BufferedReader)
109        ? (BufferedReader) reader
110        : new BufferedReader(reader);
111  }
112
113  /**
114   * Returns the size of this source in chars, if the size can be easily determined without actually
115   * opening the data stream.
116   *
117   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code
118   * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i>
119   * that this method will return a different number of chars than would be returned by reading all
120   * of the chars.
121   *
122   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
123   * return a different number of chars if the contents are changed.
124   *
125   * @since 19.0
126   */
127  @Beta
128  public Optional<Long> lengthIfKnown() {
129    return Optional.absent();
130  }
131
132  /**
133   * Returns the length of this source in chars, even if doing so requires opening and traversing an
134   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
135   *
136   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
137   * absent, it will fall back to a heavyweight operation that will open a stream, {@link
138   * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that
139   * were skipped.
140   *
141   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
142   * implementation, it is <i>possible</i> that this method will return a different number of chars
143   * than would be returned by reading all of the chars.
144   *
145   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
146   * number of chars if the contents are changed.
147   *
148   * @throws IOException if an I/O error occurs while reading the length of this source
149   * @since 19.0
150   */
151  @Beta
152  public long length() throws IOException {
153    Optional<Long> lengthIfKnown = lengthIfKnown();
154    if (lengthIfKnown.isPresent()) {
155      return lengthIfKnown.get();
156    }
157
158    Closer closer = Closer.create();
159    try {
160      Reader reader = closer.register(openStream());
161      return countBySkipping(reader);
162    } catch (Throwable e) {
163      throw closer.rethrow(e);
164    } finally {
165      closer.close();
166    }
167  }
168
169  private long countBySkipping(Reader reader) throws IOException {
170    long count = 0;
171    long read;
172    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
173      count += read;
174    }
175    return count;
176  }
177
178  /**
179   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
180   * Does not close {@code appendable} if it is {@code Closeable}.
181   *
182   * @return the number of characters copied
183   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
184   *     appendable}
185   */
186  @CanIgnoreReturnValue
187  public long copyTo(Appendable appendable) throws IOException {
188    checkNotNull(appendable);
189
190    Closer closer = Closer.create();
191    try {
192      Reader reader = closer.register(openStream());
193      return CharStreams.copy(reader, appendable);
194    } catch (Throwable e) {
195      throw closer.rethrow(e);
196    } finally {
197      closer.close();
198    }
199  }
200
201  /**
202   * Copies the contents of this source to the given sink.
203   *
204   * @return the number of characters copied
205   * @throws IOException if an I/O error occurs while reading from this source or writing to {@code
206   *     sink}
207   */
208  @CanIgnoreReturnValue
209  public long copyTo(CharSink sink) throws IOException {
210    checkNotNull(sink);
211
212    Closer closer = Closer.create();
213    try {
214      Reader reader = closer.register(openStream());
215      Writer writer = closer.register(sink.openStream());
216      return CharStreams.copy(reader, writer);
217    } catch (Throwable e) {
218      throw closer.rethrow(e);
219    } finally {
220      closer.close();
221    }
222  }
223
224  /**
225   * Reads the contents of this source as a string.
226   *
227   * @throws IOException if an I/O error occurs while reading from this source
228   */
229  public String read() throws IOException {
230    Closer closer = Closer.create();
231    try {
232      Reader reader = closer.register(openStream());
233      return CharStreams.toString(reader);
234    } catch (Throwable e) {
235      throw closer.rethrow(e);
236    } finally {
237      closer.close();
238    }
239  }
240
241  /**
242   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
243   *
244   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
245   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
246   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
247   * it does.
248   *
249   * @throws IOException if an I/O error occurs while reading from this source
250   */
251  @NullableDecl
252  public String readFirstLine() throws IOException {
253    Closer closer = Closer.create();
254    try {
255      BufferedReader reader = closer.register(openBufferedStream());
256      return reader.readLine();
257    } catch (Throwable e) {
258      throw closer.rethrow(e);
259    } finally {
260      closer.close();
261    }
262  }
263
264  /**
265   * Reads all the lines of this source as a list of strings. The returned list will be empty if
266   * this source is empty.
267   *
268   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
269   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
270   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
271   * it does.
272   *
273   * @throws IOException if an I/O error occurs while reading from this source
274   */
275  public ImmutableList<String> readLines() throws IOException {
276    Closer closer = Closer.create();
277    try {
278      BufferedReader reader = closer.register(openBufferedStream());
279      List<String> result = Lists.newArrayList();
280      String line;
281      while ((line = reader.readLine()) != null) {
282        result.add(line);
283      }
284      return ImmutableList.copyOf(result);
285    } catch (Throwable e) {
286      throw closer.rethrow(e);
287    } finally {
288      closer.close();
289    }
290  }
291
292  /**
293   * Reads lines of text from this source, processing each line as it is read using the given {@link
294   * LineProcessor processor}. Stops when all lines have been processed or the processor returns
295   * {@code false} and returns the result produced by the processor.
296   *
297   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
298   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code
299   * \n}. If the source's content does not end in a line termination sequence, it is treated as if
300   * it does.
301   *
302   * @throws IOException if an I/O error occurs while reading from this source or if {@code
303   *     processor} throws an {@code IOException}
304   * @since 16.0
305   */
306  @Beta
307  @CanIgnoreReturnValue // some processors won't return a useful result
308  public <T> T readLines(LineProcessor<T> processor) throws IOException {
309    checkNotNull(processor);
310
311    Closer closer = Closer.create();
312    try {
313      Reader reader = closer.register(openStream());
314      return CharStreams.readLines(reader, processor);
315    } catch (Throwable e) {
316      throw closer.rethrow(e);
317    } finally {
318      closer.close();
319    }
320  }
321
322  /**
323   * Returns whether the source has zero chars. The default implementation first checks {@link
324   * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
325   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
326   *
327   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
328   * chars are actually available for reading. This means that a source may return {@code true} from
329   * {@code isEmpty()} despite having readable content.
330   *
331   * @throws IOException if an I/O error occurs
332   * @since 15.0
333   */
334  public boolean isEmpty() throws IOException {
335    Optional<Long> lengthIfKnown = lengthIfKnown();
336    if (lengthIfKnown.isPresent()) {
337      return lengthIfKnown.get() == 0L;
338    }
339    Closer closer = Closer.create();
340    try {
341      Reader reader = closer.register(openStream());
342      return reader.read() == -1;
343    } catch (Throwable e) {
344      throw closer.rethrow(e);
345    } finally {
346      closer.close();
347    }
348  }
349
350  /**
351   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
352   * the source will contain the concatenated data from the streams of the underlying sources.
353   *
354   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
355   * close the open underlying stream.
356   *
357   * @param sources the sources to concatenate
358   * @return a {@code CharSource} containing the concatenated data
359   * @since 15.0
360   */
361  public static CharSource concat(Iterable<? extends CharSource> sources) {
362    return new ConcatenatedCharSource(sources);
363  }
364
365  /**
366   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
367   * the source will contain the concatenated data from the streams of the underlying sources.
368   *
369   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
370   * close the open underlying stream.
371   *
372   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
373   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
374   * eagerly fetches data for each source when iterated (rather than producing sources that only
375   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
376   * possible.
377   *
378   * @param sources the sources to concatenate
379   * @return a {@code CharSource} containing the concatenated data
380   * @throws NullPointerException if any of {@code sources} is {@code null}
381   * @since 15.0
382   */
383  public static CharSource concat(Iterator<? extends CharSource> sources) {
384    return concat(ImmutableList.copyOf(sources));
385  }
386
387  /**
388   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
389   * the source will contain the concatenated data from the streams of the underlying sources.
390   *
391   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
392   * close the open underlying stream.
393   *
394   * @param sources the sources to concatenate
395   * @return a {@code CharSource} containing the concatenated data
396   * @throws NullPointerException if any of {@code sources} is {@code null}
397   * @since 15.0
398   */
399  public static CharSource concat(CharSource... sources) {
400    return concat(ImmutableList.copyOf(sources));
401  }
402
403  /**
404   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
405   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
406   * the {@code charSequence} is mutated while it is being read, so don't do that.
407   *
408   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
409   */
410  public static CharSource wrap(CharSequence charSequence) {
411    return charSequence instanceof String
412        ? new StringCharSource((String) charSequence)
413        : new CharSequenceCharSource(charSequence);
414  }
415
416  /**
417   * Returns an immutable {@link CharSource} that contains no characters.
418   *
419   * @since 15.0
420   */
421  public static CharSource empty() {
422    return EmptyCharSource.INSTANCE;
423  }
424
425  /** A byte source that reads chars from this source and encodes them as bytes using a charset. */
426  private final class AsByteSource extends ByteSource {
427
428    final Charset charset;
429
430    AsByteSource(Charset charset) {
431      this.charset = checkNotNull(charset);
432    }
433
434    @Override
435    public CharSource asCharSource(Charset charset) {
436      if (charset.equals(this.charset)) {
437        return CharSource.this;
438      }
439      return super.asCharSource(charset);
440    }
441
442    @Override
443    public InputStream openStream() throws IOException {
444      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
445    }
446
447    @Override
448    public String toString() {
449      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
450    }
451  }
452
453  private static class CharSequenceCharSource extends CharSource {
454
455    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
456
457    protected final CharSequence seq;
458
459    protected CharSequenceCharSource(CharSequence seq) {
460      this.seq = checkNotNull(seq);
461    }
462
463    @Override
464    public Reader openStream() {
465      return new CharSequenceReader(seq);
466    }
467
468    @Override
469    public String read() {
470      return seq.toString();
471    }
472
473    @Override
474    public boolean isEmpty() {
475      return seq.length() == 0;
476    }
477
478    @Override
479    public long length() {
480      return seq.length();
481    }
482
483    @Override
484    public Optional<Long> lengthIfKnown() {
485      return Optional.of((long) seq.length());
486    }
487
488    /**
489     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
490     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
491     */
492    private Iterator<String> linesIterator() {
493      return new AbstractIterator<String>() {
494        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
495
496        @Override
497        protected String computeNext() {
498          if (lines.hasNext()) {
499            String next = lines.next();
500            // skip last line if it's empty
501            if (lines.hasNext() || !next.isEmpty()) {
502              return next;
503            }
504          }
505          return endOfData();
506        }
507      };
508    }
509
510    @Override
511    public String readFirstLine() {
512      Iterator<String> lines = linesIterator();
513      return lines.hasNext() ? lines.next() : null;
514    }
515
516    @Override
517    public ImmutableList<String> readLines() {
518      return ImmutableList.copyOf(linesIterator());
519    }
520
521    @Override
522    public <T> T readLines(LineProcessor<T> processor) throws IOException {
523      Iterator<String> lines = linesIterator();
524      while (lines.hasNext()) {
525        if (!processor.processLine(lines.next())) {
526          break;
527        }
528      }
529      return processor.getResult();
530    }
531
532    @Override
533    public String toString() {
534      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
535    }
536  }
537
538  /**
539   * Subclass specialized for string instances.
540   *
541   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
542   *
543   * <ul>
544   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
545   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
546   *       one with {@link CharSequence#charAt(int)}.
547   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
548   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
549   *       can't change, and it is faster because many writers and appendables are optimized for
550   *       appending string instances.
551   * </ul>
552   */
553  private static class StringCharSource extends CharSequenceCharSource {
554    protected StringCharSource(String seq) {
555      super(seq);
556    }
557
558    @Override
559    public Reader openStream() {
560      return new StringReader((String) seq);
561    }
562
563    @Override
564    public long copyTo(Appendable appendable) throws IOException {
565      appendable.append(seq);
566      return seq.length();
567    }
568
569    @Override
570    public long copyTo(CharSink sink) throws IOException {
571      checkNotNull(sink);
572      Closer closer = Closer.create();
573      try {
574        Writer writer = closer.register(sink.openStream());
575        writer.write((String) seq);
576        return seq.length();
577      } catch (Throwable e) {
578        throw closer.rethrow(e);
579      } finally {
580        closer.close();
581      }
582    }
583  }
584
585  private static final class EmptyCharSource extends StringCharSource {
586
587    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
588
589    private EmptyCharSource() {
590      super("");
591    }
592
593    @Override
594    public String toString() {
595      return "CharSource.empty()";
596    }
597  }
598
599  private static final class ConcatenatedCharSource extends CharSource {
600
601    private final Iterable<? extends CharSource> sources;
602
603    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
604      this.sources = checkNotNull(sources);
605    }
606
607    @Override
608    public Reader openStream() throws IOException {
609      return new MultiReader(sources.iterator());
610    }
611
612    @Override
613    public boolean isEmpty() throws IOException {
614      for (CharSource source : sources) {
615        if (!source.isEmpty()) {
616          return false;
617        }
618      }
619      return true;
620    }
621
622    @Override
623    public Optional<Long> lengthIfKnown() {
624      long result = 0L;
625      for (CharSource source : sources) {
626        Optional<Long> lengthIfKnown = source.lengthIfKnown();
627        if (!lengthIfKnown.isPresent()) {
628          return Optional.absent();
629        }
630        result += lengthIfKnown.get();
631      }
632      return Optional.of(result);
633    }
634
635    @Override
636    public long length() throws IOException {
637      long result = 0L;
638      for (CharSource source : sources) {
639        result += source.length();
640      }
641      return result;
642    }
643
644    @Override
645    public String toString() {
646      return "CharSource.concat(" + sources + ")";
647    }
648  }
649}