001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.io;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.Beta;
020import com.google.common.annotations.GwtIncompatible;
021import com.google.common.base.Ascii;
022import com.google.common.base.Optional;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027import com.google.errorprone.annotations.CanIgnoreReturnValue;
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.Reader;
032import java.io.Writer;
033import java.nio.charset.Charset;
034import java.util.Iterator;
035import java.util.List;
036import javax.annotation.Nullable;
037
038/**
039 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
040 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
041 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
042 *
043 * <p>{@code CharSource} provides two kinds of methods:
044 * <ul>
045 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
046 *     instance each time they are called. The caller is responsible for ensuring that the returned
047 *     reader is closed.
048 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically
049 *     implemented by opening a reader using one of the methods in the first category, doing
050 *     something and finally closing the reader that was opened.
051 * </ul>
052 *
053 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source
054 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
055 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
056 * there to be an empty line at the end if the contents are terminated with a line separator.
057 *
058 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
059 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
060 *
061 * @since 14.0
062 * @author Colin Decker
063 */
064@GwtIncompatible
065public abstract class CharSource {
066
067  /**
068   * Constructor for use by subclasses.
069   */
070  protected CharSource() {}
071
072  /**
073   * Returns a {@link ByteSource} view of this char source that encodes chars read from this source
074   * as bytes using the given {@link Charset}.
075   *
076   * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset,
077   * the default implementation of this method will ensure that the original {@code CharSource} is
078   * returned, rather than round-trip encoding. Subclasses that override this method should behave
079   * the same way.
080   *
081   * @since 20.0
082   */
083  @Beta
084  public ByteSource asByteSource(Charset charset) {
085    return new AsByteSource(charset);
086  }
087
088  /**
089   * Opens a new {@link Reader} for reading from this source. This method should return a new,
090   * independent reader each time it is called.
091   *
092   * <p>The caller is responsible for ensuring that the returned reader is closed.
093   *
094   * @throws IOException if an I/O error occurs in the process of opening the reader
095   */
096  public abstract Reader openStream() throws IOException;
097
098  /**
099   * Opens a new {@link BufferedReader} for reading from this source. This method should return a
100   * new, independent reader each time it is called.
101   *
102   * <p>The caller is responsible for ensuring that the returned reader is closed.
103   *
104   * @throws IOException if an I/O error occurs in the process of opening the reader
105   */
106  public BufferedReader openBufferedStream() throws IOException {
107    Reader reader = openStream();
108    return (reader instanceof BufferedReader)
109        ? (BufferedReader) reader
110        : new BufferedReader(reader);
111  }
112
113  /**
114   * Returns the size of this source in chars, if the size can be easily determined without actually
115   * opening the data stream.
116   *
117   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a
118   * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
119   * <i>possible</i> that this method will return a different number of chars than would be returned
120   * by reading all of the chars.
121   *
122   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may
123   * return a different number of chars if the contents are changed.
124   *
125   * @since 19.0
126   */
127  @Beta
128  public Optional<Long> lengthIfKnown() {
129    return Optional.absent();
130  }
131
132  /**
133   * Returns the length of this source in chars, even if doing so requires opening and traversing an
134   * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
135   *
136   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If
137   * absent, it will fall back to a heavyweight operation that will open a stream,
138   * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
139   * that were skipped.
140   *
141   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
142   * implementation, it is <i>possible</i> that this method will return a different number of chars
143   * than would be returned by reading all of the chars.
144   *
145   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
146   * number of chars if the contents are changed.
147   *
148   * @throws IOException if an I/O error occurs in the process of reading the length of this source
149   * @since 19.0
150   */
151  @Beta
152  public long length() throws IOException {
153    Optional<Long> lengthIfKnown = lengthIfKnown();
154    if (lengthIfKnown.isPresent()) {
155      return lengthIfKnown.get();
156    }
157
158    Closer closer = Closer.create();
159    try {
160      Reader reader = closer.register(openStream());
161      return countBySkipping(reader);
162    } catch (Throwable e) {
163      throw closer.rethrow(e);
164    } finally {
165      closer.close();
166    }
167  }
168
169  private long countBySkipping(Reader reader) throws IOException {
170    long count = 0;
171    long read;
172    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
173      count += read;
174    }
175    return count;
176  }
177
178  /**
179   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
180   * Does not close {@code appendable} if it is {@code Closeable}.
181   *
182   * @return the number of characters copied
183   * @throws IOException if an I/O error occurs in the process of reading from this source or
184   *     writing to {@code appendable}
185   */
186  @CanIgnoreReturnValue
187  public long copyTo(Appendable appendable) throws IOException {
188    checkNotNull(appendable);
189
190    Closer closer = Closer.create();
191    try {
192      Reader reader = closer.register(openStream());
193      return CharStreams.copy(reader, appendable);
194    } catch (Throwable e) {
195      throw closer.rethrow(e);
196    } finally {
197      closer.close();
198    }
199  }
200
201  /**
202   * Copies the contents of this source to the given sink.
203   *
204   * @return the number of characters copied
205   * @throws IOException if an I/O error occurs in the process of reading from this source or
206   *     writing to {@code sink}
207   */
208  @CanIgnoreReturnValue
209  public long copyTo(CharSink sink) throws IOException {
210    checkNotNull(sink);
211
212    Closer closer = Closer.create();
213    try {
214      Reader reader = closer.register(openStream());
215      Writer writer = closer.register(sink.openStream());
216      return CharStreams.copy(reader, writer);
217    } catch (Throwable e) {
218      throw closer.rethrow(e);
219    } finally {
220      closer.close();
221    }
222  }
223
224  /**
225   * Reads the contents of this source as a string.
226   *
227   * @throws IOException if an I/O error occurs in the process of reading from this source
228   */
229  public String read() throws IOException {
230    Closer closer = Closer.create();
231    try {
232      Reader reader = closer.register(openStream());
233      return CharStreams.toString(reader);
234    } catch (Throwable e) {
235      throw closer.rethrow(e);
236    } finally {
237      closer.close();
238    }
239  }
240
241  /**
242   * Reads the first line of this source as a string. Returns {@code null} if this source is empty.
243   *
244   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
245   * {@code \r\n}, does not include the line separator in the returned line and does not consider
246   * there to be an extra empty line at the end if the content is terminated with a line separator.
247   *
248   * @throws IOException if an I/O error occurs in the process of reading from this source
249   */
250  @Nullable
251  public String readFirstLine() throws IOException {
252    Closer closer = Closer.create();
253    try {
254      BufferedReader reader = closer.register(openBufferedStream());
255      return reader.readLine();
256    } catch (Throwable e) {
257      throw closer.rethrow(e);
258    } finally {
259      closer.close();
260    }
261  }
262
263  /**
264   * Reads all the lines of this source as a list of strings. The returned list will be empty if
265   * this source is empty.
266   *
267   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
268   * {@code \r\n}, does not include the line separator in the returned lines and does not consider
269   * there to be an extra empty line at the end if the content is terminated with a line separator.
270   *
271   * @throws IOException if an I/O error occurs in the process of reading from this source
272   */
273  public ImmutableList<String> readLines() throws IOException {
274    Closer closer = Closer.create();
275    try {
276      BufferedReader reader = closer.register(openBufferedStream());
277      List<String> result = Lists.newArrayList();
278      String line;
279      while ((line = reader.readLine()) != null) {
280        result.add(line);
281      }
282      return ImmutableList.copyOf(result);
283    } catch (Throwable e) {
284      throw closer.rethrow(e);
285    } finally {
286      closer.close();
287    }
288  }
289
290  /**
291   * Reads lines of text from this source, processing each line as it is read using the given
292   * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
293   * returns {@code false} and returns the result produced by the processor.
294   *
295   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
296   * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
297   * and does not consider there to be an extra empty line at the end if the content is terminated
298   * with a line separator.
299   *
300   * @throws IOException if an I/O error occurs in the process of reading from this source or if
301   *     {@code processor} throws an {@code IOException}
302   * @since 16.0
303   */
304  @Beta
305  @CanIgnoreReturnValue // some processors won't return a useful result
306  public <T> T readLines(LineProcessor<T> processor) throws IOException {
307    checkNotNull(processor);
308
309    Closer closer = Closer.create();
310    try {
311      Reader reader = closer.register(openStream());
312      return CharStreams.readLines(reader, processor);
313    } catch (Throwable e) {
314      throw closer.rethrow(e);
315    } finally {
316      closer.close();
317    }
318  }
319
320  /**
321   * Returns whether the source has zero chars. The default implementation returns true if
322   * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking for EOF if
323   * the length is not known.
324   *
325   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
326   * chars are actually available for reading. This means that a source may return {@code true} from
327   * {@code isEmpty()} despite having readable content.
328   *
329   * @throws IOException if an I/O error occurs
330   * @since 15.0
331   */
332  public boolean isEmpty() throws IOException {
333    Optional<Long> lengthIfKnown = lengthIfKnown();
334    if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) {
335      return true;
336    }
337    Closer closer = Closer.create();
338    try {
339      Reader reader = closer.register(openStream());
340      return reader.read() == -1;
341    } catch (Throwable e) {
342      throw closer.rethrow(e);
343    } finally {
344      closer.close();
345    }
346  }
347
348  /**
349   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
350   * the source will contain the concatenated data from the streams of the underlying sources.
351   *
352   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
353   * close the open underlying stream.
354   *
355   * @param sources the sources to concatenate
356   * @return a {@code CharSource} containing the concatenated data
357   * @since 15.0
358   */
359  public static CharSource concat(Iterable<? extends CharSource> sources) {
360    return new ConcatenatedCharSource(sources);
361  }
362
363  /**
364   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
365   * the source will contain the concatenated data from the streams of the underlying sources.
366   *
367   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
368   * close the open underlying stream.
369   *
370   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method
371   * is called. This will fail if the iterator is infinite and may cause problems if the iterator
372   * eagerly fetches data for each source when iterated (rather than producing sources that only
373   * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if
374   * possible.
375   *
376   * @param sources the sources to concatenate
377   * @return a {@code CharSource} containing the concatenated data
378   * @throws NullPointerException if any of {@code sources} is {@code null}
379   * @since 15.0
380   */
381  public static CharSource concat(Iterator<? extends CharSource> sources) {
382    return concat(ImmutableList.copyOf(sources));
383  }
384
385  /**
386   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
387   * the source will contain the concatenated data from the streams of the underlying sources.
388   *
389   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
390   * close the open underlying stream.
391   *
392   * @param sources the sources to concatenate
393   * @return a {@code CharSource} containing the concatenated data
394   * @throws NullPointerException if any of {@code sources} is {@code null}
395   * @since 15.0
396   */
397  public static CharSource concat(CharSource... sources) {
398    return concat(ImmutableList.copyOf(sources));
399  }
400
401  /**
402   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
403   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
404   * the {@code charSequence} is mutated while it is being read, so don't do that.
405   *
406   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
407   */
408  public static CharSource wrap(CharSequence charSequence) {
409    return new CharSequenceCharSource(charSequence);
410  }
411
412  /**
413   * Returns an immutable {@link CharSource} that contains no characters.
414   *
415   * @since 15.0
416   */
417  public static CharSource empty() {
418    return EmptyCharSource.INSTANCE;
419  }
420
421  /**
422   * A byte source that reads chars from this source and encodes them as bytes using a charset.
423   */
424  private final class AsByteSource extends ByteSource {
425
426    final Charset charset;
427
428    AsByteSource(Charset charset) {
429      this.charset = checkNotNull(charset);
430    }
431
432    @Override
433    public CharSource asCharSource(Charset charset) {
434      if (charset.equals(this.charset)) {
435        return CharSource.this;
436      }
437      return super.asCharSource(charset);
438    }
439
440    @Override
441    public InputStream openStream() throws IOException {
442      return new ReaderInputStream(CharSource.this.openStream(), charset, 8192);
443    }
444
445    @Override
446    public String toString() {
447      return CharSource.this.toString() + ".asByteSource(" + charset + ")";
448    }
449  }
450
451  private static class CharSequenceCharSource extends CharSource {
452
453    private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r");
454
455    private final CharSequence seq;
456
457    protected CharSequenceCharSource(CharSequence seq) {
458      this.seq = checkNotNull(seq);
459    }
460
461    @Override
462    public Reader openStream() {
463      return new CharSequenceReader(seq);
464    }
465
466    @Override
467    public String read() {
468      return seq.toString();
469    }
470
471    @Override
472    public boolean isEmpty() {
473      return seq.length() == 0;
474    }
475
476    @Override
477    public long length() {
478      return seq.length();
479    }
480
481    @Override
482    public Optional<Long> lengthIfKnown() {
483      return Optional.of((long) seq.length());
484    }
485
486    /**
487     * Returns an iterable over the lines in the string. If the string ends in a newline, a final
488     * empty string is not included to match the behavior of BufferedReader/LineReader.readLine().
489     */
490    private Iterable<String> lines() {
491      return new Iterable<String>() {
492        @Override
493        public Iterator<String> iterator() {
494          return new AbstractIterator<String>() {
495            Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
496
497            @Override
498            protected String computeNext() {
499              if (lines.hasNext()) {
500                String next = lines.next();
501                // skip last line if it's empty
502                if (lines.hasNext() || !next.isEmpty()) {
503                  return next;
504                }
505              }
506              return endOfData();
507            }
508          };
509        }
510      };
511    }
512
513    @Override
514    public String readFirstLine() {
515      Iterator<String> lines = lines().iterator();
516      return lines.hasNext() ? lines.next() : null;
517    }
518
519    @Override
520    public ImmutableList<String> readLines() {
521      return ImmutableList.copyOf(lines());
522    }
523
524    @Override
525    public <T> T readLines(LineProcessor<T> processor) throws IOException {
526      for (String line : lines()) {
527        if (!processor.processLine(line)) {
528          break;
529        }
530      }
531      return processor.getResult();
532    }
533
534    @Override
535    public String toString() {
536      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
537    }
538  }
539
540  private static final class EmptyCharSource extends CharSequenceCharSource {
541
542    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
543
544    private EmptyCharSource() {
545      super("");
546    }
547
548    @Override
549    public String toString() {
550      return "CharSource.empty()";
551    }
552  }
553
554  private static final class ConcatenatedCharSource extends CharSource {
555
556    private final Iterable<? extends CharSource> sources;
557
558    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
559      this.sources = checkNotNull(sources);
560    }
561
562    @Override
563    public Reader openStream() throws IOException {
564      return new MultiReader(sources.iterator());
565    }
566
567    @Override
568    public boolean isEmpty() throws IOException {
569      for (CharSource source : sources) {
570        if (!source.isEmpty()) {
571          return false;
572        }
573      }
574      return true;
575    }
576
577    @Override
578    public Optional<Long> lengthIfKnown() {
579      long result = 0L;
580      for (CharSource source : sources) {
581        Optional<Long> lengthIfKnown = source.lengthIfKnown();
582        if (!lengthIfKnown.isPresent()) {
583          return Optional.absent();
584        }
585        result += lengthIfKnown.get();
586      }
587      return Optional.of(result);
588    }
589
590    @Override
591    public long length() throws IOException {
592      long result = 0L;
593      for (CharSource source : sources) {
594        result += source.length();
595      }
596      return result;
597    }
598
599    @Override
600    public String toString() {
601      return "CharSource.concat(" + sources + ")";
602    }
603  }
604}