001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.errorprone.annotations.CanIgnoreReturnValue;
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.Reader;
032import java.io.StringReader;
033import java.io.Writer;
034import java.nio.charset.Charset;
035import java.util.Iterator;
036import java.util.List;
037import javax.annotation.Nullable;
038
039/**
040 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
041 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
042 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
043 *
044 * <p>{@code CharSource} provides two kinds of methods:
045 * <ul>
046 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
047 *     instance each time they are called. The caller is responsible for ensuring that the returned
048 *     reader is closed.
049 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically
050 *     implemented by opening a reader using one of the methods in the first category, doing
051 *     something and finally closing the reader that was opened.
052 * </ul>
053 *
054 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
055 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
056 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
057 * there to be an empty line at the end if the contents are terminated with a line separator.
058 *
059 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
060 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
061 *
062 * @since 14.0
063 * @author Colin Decker
064 */
065@GwtIncompatible
066public abstract class CharSource {
067
068  /**
069   * Constructor for use by subclasses.
070   */
071  protected CharSource() {}
072
073  /**
074   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
075   * as bytes using the given {@link Charset}.
076   *
077   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
078   * the default implementation of this method will ensure that the original {@code CharSource} is
079   * returned, rather than round-trip encoding. Subclasses that override this method should behave
080   * the same way.
081   *
082   * @since 20.0
083   */
084  @Beta
085  public ByteSource asByteSource(Charset charset) {
086    return new AsByteSource(charset);
087  }
088
089  /**
090   * Opens a new {@link Reader} for reading from this source. This method returns a new, independent
091   * reader each time it is called.
092   *
093   * <p>The caller is responsible for ensuring that the returned reader is closed.
094   *
095   * @throws IOException if an I/O error occurs while opening the reader
096   */
097  public abstract Reader openStream() throws IOException;
098
099  /**
100   * Opens a new {@link BufferedReader} for reading from this source. This method returns a new,
101   * independent reader each time it is called.
102   *
103   * <p>The caller is responsible for ensuring that the returned reader is closed.
104   *
105   * @throws IOException if an I/O error occurs while of opening the reader
106   */
107  public BufferedReader openBufferedStream() throws IOException {
108    Reader reader = openStream();
109    return (reader instanceof BufferedReader)
110        ? (BufferedReader) reader
111        : new BufferedReader(reader);
112  }
113
114  /**
115   * Returns the size of this source in chars, if the size can be easily determined without actually
116   * opening the data stream.
117   *
118   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a
119   * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
120   * <i>possible</i> that this method will return a different number of chars than would be returned
121   * by reading all of the chars.
122   *
123   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
124   * return a different number of chars if the contents are changed.
125   *
126   * @since 19.0
127   */
128  @Beta
129  public Optional<Long> lengthIfKnown() {
130    return Optional.absent();
131  }
132
133  /**
134   * Returns the length of this source in chars, even if doing so requires opening and traversing an
135   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
136   *
137   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
138   * absent, it will fall back to a heavyweight operation that will open a stream,
139   * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
140   * that were skipped.
141   *
142   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
143   * implementation, it is <i>possible</i> that this method will return a different number of chars
144   * than would be returned by reading all of the chars.
145   *
146   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
147   * number of chars if the contents are changed.
148   *
149   * @throws IOException if an I/O error occurs while reading the length of this source
150   * @since 19.0
151   */
152  @Beta
153  public long length() throws IOException {
154    Optional<Long> lengthIfKnown = lengthIfKnown();
155    if (lengthIfKnown.isPresent()) {
156      return lengthIfKnown.get();
157    }
158
159    Closer closer = Closer.create();
160    try {
161      Reader reader = closer.register(openStream());
162      return countBySkipping(reader);
163    } catch (Throwable e) {
164      throw closer.rethrow(e);
165    } finally {
166      closer.close();
167    }
168  }
169
170  private long countBySkipping(Reader reader) throws IOException {
171    long count = 0;
172    long read;
173    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
174      count += read;
175    }
176    return count;
177  }
178
179  /**
180   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
181   * Does not close {@code appendable} if it is {@code Closeable}.
182   *
183   * @return the number of characters copied
184   * @throws IOException if an I/O error occurs while reading from this source or writing to
185   *     {@code appendable}
186   */
187  @CanIgnoreReturnValue
188  public long copyTo(Appendable appendable) throws IOException {
189    checkNotNull(appendable);
190
191    Closer closer = Closer.create();
192    try {
193      Reader reader = closer.register(openStream());
194      return CharStreams.copy(reader, appendable);
195    } catch (Throwable e) {
196      throw closer.rethrow(e);
197    } finally {
198      closer.close();
199    }
200  }
201
202  /**
203   * Copies the contents of this source to the given sink.
204   *
205   * @return the number of characters copied
206   * @throws IOException if an I/O error occurs while reading from this source or writing to
207   *     {@code sink}
208   */
209  @CanIgnoreReturnValue
210  public long copyTo(CharSink sink) throws IOException {
211    checkNotNull(sink);
212
213    Closer closer = Closer.create();
214    try {
215      Reader reader = closer.register(openStream());
216      Writer writer = closer.register(sink.openStream());
217      return CharStreams.copy(reader, writer);
218    } catch (Throwable e) {
219      throw closer.rethrow(e);
220    } finally {
221      closer.close();
222    }
223  }
224
225  /**
226   * Reads the contents of this source as a string.
227   *
228   * @throws IOException if an I/O error occurs while reading from this source
229   */
230  public String read() throws IOException {
231    Closer closer = Closer.create();
232    try {
233      Reader reader = closer.register(openStream());
234      return CharStreams.toString(reader);
235    } catch (Throwable e) {
236      throw closer.rethrow(e);
237    } finally {
238      closer.close();
239    }
240  }
241
242  /**
243   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
244   *
245   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
246   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
247   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
248   * as if it does.
249   *
250   * @throws IOException if an I/O error occurs while reading from this source
251   */
252  @Nullable
253  public String readFirstLine() throws IOException {
254    Closer closer = Closer.create();
255    try {
256      BufferedReader reader = closer.register(openBufferedStream());
257      return reader.readLine();
258    } catch (Throwable e) {
259      throw closer.rethrow(e);
260    } finally {
261      closer.close();
262    }
263  }
264
265  /**
266   * Reads all the lines of this source as a list of strings. The returned list will be empty if
267   * this source is empty.
268   *
269   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
270   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
271   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
272   * as if it does.
273   *
274   * @throws IOException if an I/O error occurs while reading from this source
275   */
276  public ImmutableList<String> readLines() throws IOException {
277    Closer closer = Closer.create();
278    try {
279      BufferedReader reader = closer.register(openBufferedStream());
280      List<String> result = Lists.newArrayList();
281      String line;
282      while ((line = reader.readLine()) != null) {
283        result.add(line);
284      }
285      return ImmutableList.copyOf(result);
286    } catch (Throwable e) {
287      throw closer.rethrow(e);
288    } finally {
289      closer.close();
290    }
291  }
292
293  /**
294   * Reads lines of text from this source, processing each line as it is read using the given
295   * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
296   * returns {@code false} and returns the result produced by the processor.
297   *
298   * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of
299   * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or
300   * {@code \n}. If the source's content does not end in a line termination sequence, it is treated
301   * as if it does.
302   *
303   * @throws IOException if an I/O error occurs while reading from this source or if
304   *     {@code processor} throws an {@code IOException}
305   * @since 16.0
306   */
307  @Beta
308  @CanIgnoreReturnValue // some processors won't return a useful result
309  public <T> T readLines(LineProcessor<T> processor) throws IOException {
310    checkNotNull(processor);
311
312    Closer closer = Closer.create();
313    try {
314      Reader reader = closer.register(openStream());
315      return CharStreams.readLines(reader, processor);
316    } catch (Throwable e) {
317      throw closer.rethrow(e);
318    } finally {
319      closer.close();
320    }
321  }
322
323  /**
324   * Returns whether the source has zero chars. The default implementation first checks
325   * {@link #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be
326   * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF.
327   *
328   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
329   * chars are actually available for reading. This means that a source may return {@code true} from
330   * {@code isEmpty()} despite having readable content.
331   *
332   * @throws IOException if an I/O error occurs
333   * @since 15.0
334   */
335  public boolean isEmpty() throws IOException {
336    Optional<Long> lengthIfKnown = lengthIfKnown();
337    if (lengthIfKnown.isPresent()) {
338      return lengthIfKnown.get() == 0L;
339    }
340    Closer closer = Closer.create();
341    try {
342      Reader reader = closer.register(openStream());
343      return reader.read() == -1;
344    } catch (Throwable e) {
345      throw closer.rethrow(e);
346    } finally {
347      closer.close();
348    }
349  }
350
351  /**
352   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
353   * the source will contain the concatenated data from the streams of the underlying sources.
354   *
355   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
356   * close the open underlying stream.
357   *
358   * @param sources the sources to concatenate
359   * @return a {@code CharSource} containing the concatenated data
360   * @since 15.0
361   */
362  public static CharSource concat(Iterable<? extends CharSource> sources) {
363    return new ConcatenatedCharSource(sources);
364  }
365
366  /**
367   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
368   * the source will contain the concatenated data from the streams of the underlying sources.
369   *
370   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
371   * close the open underlying stream.
372   *
373   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
374   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
375   * eagerly fetches data for each source when iterated (rather than producing sources that only
376   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
377   * possible.
378   *
379   * @param sources the sources to concatenate
380   * @return a {@code CharSource} containing the concatenated data
381   * @throws NullPointerException if any of {@code sources} is {@code null}
382   * @since 15.0
383   */
384  public static CharSource concat(Iterator<? extends CharSource> sources) {
385    return concat(ImmutableList.copyOf(sources));
386  }
387
388  /**
389   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
390   * the source will contain the concatenated data from the streams of the underlying sources.
391   *
392   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
393   * close the open underlying stream.
394   *
395   * @param sources the sources to concatenate
396   * @return a {@code CharSource} containing the concatenated data
397   * @throws NullPointerException if any of {@code sources} is {@code null}
398   * @since 15.0
399   */
400  public static CharSource concat(CharSource... sources) {
401    return concat(ImmutableList.copyOf(sources));
402  }
403
404  /**
405   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
406   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
407   * the {@code charSequence} is mutated while it is being read, so don't do that.
408   *
409   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
410   */
411  public static CharSource wrap(CharSequence charSequence) {
412    return charSequence instanceof String
413        ? new StringCharSource((String) charSequence)
414        : new CharSequenceCharSource(charSequence);
415  }
416
417  /**
418   * Returns an immutable {@link CharSource} that contains no characters.
419   *
420   * @since 15.0
421   */
422  public static CharSource empty() {
423    return EmptyCharSource.INSTANCE;
424  }
425
426  /**
427   * A byte source that reads chars from this source and encodes them as bytes using a charset.
428   */
429  private final class AsByteSource extends ByteSource {
430
431    final Charset charset;
432
433    AsByteSource(Charset charset) {
434      this.charset = checkNotNull(charset);
435    }
436
437    @Override
438    public CharSource asCharSource(Charset charset) {
439      if (charset.equals(this.charset)) {
440        return CharSource.this;
441      }
442      return super.asCharSource(charset);
443    }
444
445    @Override
446    public InputStream openStream() throws IOException {
447      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
448    }
449
450    @Override
451    public String toString() {
452      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
453    }
454  }
455
456  private static class CharSequenceCharSource extends CharSource {
457
458    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
459
460    protected final CharSequence seq;
461
462    protected CharSequenceCharSource(CharSequence seq) {
463      this.seq = checkNotNull(seq);
464    }
465
466    @Override
467    public Reader openStream() {
468      return new CharSequenceReader(seq);
469    }
470
471    @Override
472    public String read() {
473      return seq.toString();
474    }
475
476    @Override
477    public boolean isEmpty() {
478      return seq.length() == 0;
479    }
480
481    @Override
482    public long length() {
483      return seq.length();
484    }
485
486    @Override
487    public Optional<Long> lengthIfKnown() {
488      return Optional.of((long) seq.length());
489    }
490
491    /**
492     * Returns an iterator over the lines in the string. If the string ends in a newline, a final
493     * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine().
494     */
495    private Iterator<String> linesIterator() {
496      return new AbstractIterator<String>() {
497        Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
498
499        @Override
500        protected String computeNext() {
501          if (lines.hasNext()) {
502            String next = lines.next();
503            // skip last line if it's empty
504            if (lines.hasNext() || !next.isEmpty()) {
505              return next;
506            }
507          }
508          return endOfData();
509        }
510      };
511    }
512
513    @Override
514    public String readFirstLine() {
515      Iterator<String> lines = linesIterator();
516      return lines.hasNext() ? lines.next() : null;
517    }
518
519    @Override
520    public ImmutableList<String> readLines() {
521      return ImmutableList.copyOf(linesIterator());
522    }
523
524    @Override
525    public <T> T readLines(LineProcessor<T> processor) throws IOException {
526      Iterator<String> lines = linesIterator();
527      while (lines.hasNext()) {
528        if (!processor.processLine(lines.next())) {
529          break;
530        }
531      }
532      return processor.getResult();
533    }
534
535    @Override
536    public String toString() {
537      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
538    }
539  }
540
541  /**
542   * Subclass specialized for string instances.
543   *
544   * <p>Since Strings are immutable and built into the jdk we can optimize some operations
545   *
546   * <ul>
547   *   <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can
548   *       use {@link String#getChars(int, int, char[], int)} instead of copying characters one by
549   *       one with {@link CharSequence#charAt(int)}.
550   *   <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link
551   *       #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length
552   *       can't change, and it is faster because many writers and appendables are optimized for
553   *       appending string instances.
554   * </ul>
555   */
556  private static class StringCharSource extends CharSequenceCharSource {
557    protected StringCharSource(String seq) {
558      super(seq);
559    }
560
561    @Override
562    public Reader openStream() {
563      return new StringReader((String) seq);
564    }
565
566    @Override
567    public long copyTo(Appendable appendable) throws IOException {
568      appendable.append(seq);
569      return seq.length();
570    }
571
572    @Override
573    public long copyTo(CharSink sink) throws IOException {
574      checkNotNull(sink);
575      Closer closer = Closer.create();
576      try {
577        Writer writer = closer.register(sink.openStream());
578        writer.write((String) seq);
579        return seq.length();
580      } catch (Throwable e) {
581        throw closer.rethrow(e);
582      } finally {
583        closer.close();
584      }
585    }
586  }
587
588  private static final class EmptyCharSource extends StringCharSource {
589
590    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
591
592    private EmptyCharSource() {
593      super("");
594    }
595
596    @Override
597    public String toString() {
598      return "CharSource.empty()";
599    }
600  }
601
602  private static final class ConcatenatedCharSource extends CharSource {
603
604    private final Iterable<? extends CharSource> sources;
605
606    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
607      this.sources = checkNotNull(sources);
608    }
609
610    @Override
611    public Reader openStream() throws IOException {
612      return new MultiReader(sources.iterator());
613    }
614
615    @Override
616    public boolean isEmpty() throws IOException {
617      for (CharSource source : sources) {
618        if (!source.isEmpty()) {
619          return false;
620        }
621      }
622      return true;
623    }
624
625    @Override
626    public Optional<Long> lengthIfKnown() {
627      long result = 0L;
628      for (CharSource source : sources) {
629        Optional<Long> lengthIfKnown = source.lengthIfKnown();
630        if (!lengthIfKnown.isPresent()) {
631          return Optional.absent();
632        }
633        result += lengthIfKnown.get();
634      }
635      return Optional.of(result);
636    }
637
638    @Override
639    public long length() throws IOException {
640      long result = 0L;
641      for (CharSource source : sources) {
642        result += source.length();
643      }
644      return result;
645    }
646
647    @Override
648    public String toString() {
649      return "CharSource.concat(" + sources + ")";
650    }
651  }
652}