001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.io;
018
019import static com.google.common.base.Preconditions.checkNotNull;
020
021import com.google.common.annotations.Beta;
022import com.google.common.base.Ascii;
023import com.google.common.base.Optional;
024import com.google.common.base.Splitter;
025import com.google.common.collect.AbstractIterator;
026import com.google.common.collect.ImmutableList;
027import com.google.common.collect.Lists;
028
029import java.io.BufferedReader;
030import java.io.IOException;
031import java.io.Reader;
032import java.io.Writer;
033import java.nio.charset.Charset;
034import java.util.Iterator;
035import java.util.List;
036import java.util.regex.Pattern;
037
038import javax.annotation.Nullable;
039
040/**
041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
042 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
043 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
044 *
045 * <p>{@code CharSource} provides two kinds of methods:
046 * <ul>
047 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
048 *   instance each time they are called. The caller is responsible for ensuring that the returned
049 *   reader is closed.
050 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
051 *   typically implemented by opening a reader using one of the methods in the first category,
052 *   doing something and finally closing the reader that was opened.
053 * </ul>
054 *
055 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the
056 * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
057 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
058 * there to be an empty line at the end if the contents are terminated with a line separator.
059 *
060 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
061 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
062 *
063 * @since 14.0
064 * @author Colin Decker
065 */
066public abstract class CharSource {
067
068  /**
069   * Constructor for use by subclasses.
070   */
071  protected CharSource() {}
072
073  /**
074   * Opens a new {@link Reader} for reading from this source. This method should return a new,
075   * independent reader each time it is called.
076   *
077   * <p>The caller is responsible for ensuring that the returned reader is closed.
078   *
079   * @throws IOException if an I/O error occurs in the process of opening the reader
080   */
081  public abstract Reader openStream() throws IOException;
082
083  /**
084   * Opens a new {@link BufferedReader} for reading from this source. This method should return a
085   * new, independent reader each time it is called.
086   *
087   * <p>The caller is responsible for ensuring that the returned reader is closed.
088   *
089   * @throws IOException if an I/O error occurs in the process of opening the reader
090   */
091  public BufferedReader openBufferedStream() throws IOException {
092    Reader reader = openStream();
093    return (reader instanceof BufferedReader)
094        ? (BufferedReader) reader
095        : new BufferedReader(reader);
096  }
097
098  /**
099   * Returns the size of this source in chars, if the size can be easily determined without
100   * actually opening the data stream.
101   *
102   * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a
103   * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is
104   * <i>possible</i> that this method will return a different number of chars than would be
105   * returned by reading all of the chars.
106   *
107   * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read
108   * may return a different number of chars if the contents are changed.
109   *
110   * @since 19.0
111   */
112  @Beta
113  public Optional<Long> lengthIfKnown() {
114    return Optional.absent();
115  }
116
117  /**
118   * Returns the length of this source in chars, even if doing so requires opening and traversing
119   * an entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}.
120   *
121   * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present.
122   * If absent, it will fall back to a heavyweight operation that will open a stream,
123   * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars
124   * that were skipped.
125   *
126   * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient
127   * implementation, it is <i>possible</i> that this method will return a different number of chars
128   * than would be returned by reading all of the chars.
129   *
130   * <p>In either case, for mutable sources such as files, a subsequent read may return a different
131   * number of chars if the contents are changed.
132   *
133   * @throws IOException if an I/O error occurs in the process of reading the length of this source
134   * @since 19.0
135   */
136  @Beta
137  public long length() throws IOException {
138    Optional<Long> lengthIfKnown = lengthIfKnown();
139    if (lengthIfKnown.isPresent()) {
140      return lengthIfKnown.get();
141    }
142
143    Closer closer = Closer.create();
144    try {
145      Reader reader = closer.register(openStream());
146      return countBySkipping(reader);
147    } catch (Throwable e) {
148      throw closer.rethrow(e);
149    } finally {
150      closer.close();
151    }
152  }
153
154  private long countBySkipping(Reader reader) throws IOException {
155    long count = 0;
156    long read;
157    while ((read = reader.skip(Long.MAX_VALUE)) != 0) {
158      count += read;
159    }
160    return count;
161  }
162
163  /**
164   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
165   * Does not close {@code appendable} if it is {@code Closeable}.
166   *
167   * @throws IOException if an I/O error occurs in the process of reading from this source or
168   *     writing to {@code appendable}
169   */
170  public long copyTo(Appendable appendable) throws IOException {
171    checkNotNull(appendable);
172
173    Closer closer = Closer.create();
174    try {
175      Reader reader = closer.register(openStream());
176      return CharStreams.copy(reader, appendable);
177    } catch (Throwable e) {
178      throw closer.rethrow(e);
179    } finally {
180      closer.close();
181    }
182  }
183
184  /**
185   * Copies the contents of this source to the given sink.
186   *
187   * @throws IOException if an I/O error occurs in the process of reading from this source or
188   *     writing to {@code sink}
189   */
190  public long copyTo(CharSink sink) throws IOException {
191    checkNotNull(sink);
192
193    Closer closer = Closer.create();
194    try {
195      Reader reader = closer.register(openStream());
196      Writer writer = closer.register(sink.openStream());
197      return CharStreams.copy(reader, writer);
198    } catch (Throwable e) {
199      throw closer.rethrow(e);
200    } finally {
201      closer.close();
202    }
203  }
204
205  /**
206   * Reads the contents of this source as a string.
207   *
208   * @throws IOException if an I/O error occurs in the process of reading from this source
209   */
210  public String read() throws IOException {
211    Closer closer = Closer.create();
212    try {
213      Reader reader = closer.register(openStream());
214      return CharStreams.toString(reader);
215    } catch (Throwable e) {
216      throw closer.rethrow(e);
217    } finally {
218      closer.close();
219    }
220  }
221
222  /**
223   * Reads the first link of this source as a string. Returns {@code null} if this source is empty.
224   *
225   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
226   * {@code \r\n}, does not include the line separator in the returned line and does not consider
227   * there to be an extra empty line at the end if the content is terminated with a line separator.
228   *
229   * @throws IOException if an I/O error occurs in the process of reading from this source
230   */
231  @Nullable public String readFirstLine() throws IOException {
232    Closer closer = Closer.create();
233    try {
234      BufferedReader reader = closer.register(openBufferedStream());
235      return reader.readLine();
236    } catch (Throwable e) {
237      throw closer.rethrow(e);
238    } finally {
239      closer.close();
240    }
241  }
242
243  /**
244   * Reads all the lines of this source as a list of strings. The returned list will be empty if
245   * this source is empty.
246   *
247   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
248   * {@code \r\n}, does not include the line separator in the returned lines and does not consider
249   * there to be an extra empty line at the end if the content is terminated with a line separator.
250   *
251   * @throws IOException if an I/O error occurs in the process of reading from this source
252   */
253  public ImmutableList<String> readLines() throws IOException {
254    Closer closer = Closer.create();
255    try {
256      BufferedReader reader = closer.register(openBufferedStream());
257      List<String> result = Lists.newArrayList();
258      String line;
259      while ((line = reader.readLine()) != null) {
260        result.add(line);
261      }
262      return ImmutableList.copyOf(result);
263    } catch (Throwable e) {
264      throw closer.rethrow(e);
265    } finally {
266      closer.close();
267    }
268  }
269
270  /**
271   * Reads lines of text from this source, processing each line as it is read using the given
272   * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
273   * returns {@code false} and returns the result produced by the processor.
274   *
275   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
276   * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
277   * and does not consider there to be an extra empty line at the end if the content is terminated
278   * with a line separator.
279   *
280   * @throws IOException if an I/O error occurs in the process of reading from this source or if
281   *     {@code processor} throws an {@code IOException}
282   * @since 16.0
283   */
284  @Beta
285  public <T> T readLines(LineProcessor<T> processor) throws IOException {
286    checkNotNull(processor);
287
288    Closer closer = Closer.create();
289    try {
290      Reader reader = closer.register(openStream());
291      return CharStreams.readLines(reader, processor);
292    } catch (Throwable e) {
293      throw closer.rethrow(e);
294    } finally {
295      closer.close();
296    }
297  }
298
299  /**
300   * Returns whether the source has zero chars. The default implementation returns true if
301   * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking
302   * for EOF if the length is not known.
303   *
304   * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that
305   * chars are actually available for reading. This means that a source may return {@code true} from
306   * {@code isEmpty()} despite having readable content.
307   *
308   * @throws IOException if an I/O error occurs
309   * @since 15.0
310   */
311  public boolean isEmpty() throws IOException {
312    Optional<Long> lengthIfKnown = lengthIfKnown();
313    if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) {
314      return true;
315    }
316    Closer closer = Closer.create();
317    try {
318      Reader reader = closer.register(openStream());
319      return reader.read() == -1;
320    } catch (Throwable e) {
321      throw closer.rethrow(e);
322    } finally {
323      closer.close();
324    }
325  }
326
327  /**
328   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
329   * the source will contain the concatenated data from the streams of the underlying sources.
330   *
331   * <p>Only one underlying stream will be open at a time. Closing the  concatenated stream will
332   * close the open underlying stream.
333   *
334   * @param sources the sources to concatenate
335   * @return a {@code CharSource} containing the concatenated data
336   * @since 15.0
337   */
338  public static CharSource concat(Iterable<? extends CharSource> sources) {
339    return new ConcatenatedCharSource(sources);
340  }
341
342  /**
343   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
344   * the source will contain the concatenated data from the streams of the underlying sources.
345   *
346   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
347   * close the open underlying stream.
348   *
349   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
350   * method is called. This will fail if the iterator is infinite and may cause problems if the
351   * iterator eagerly fetches data for each source when iterated (rather than producing sources
352   * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
353   * overload if possible.
354   *
355   * @param sources the sources to concatenate
356   * @return a {@code CharSource} containing the concatenated data
357   * @throws NullPointerException if any of {@code sources} is {@code null}
358   * @since 15.0
359   */
360  public static CharSource concat(Iterator<? extends CharSource> sources) {
361    return concat(ImmutableList.copyOf(sources));
362  }
363
364  /**
365   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
366   * the source will contain the concatenated data from the streams of the underlying sources.
367   *
368   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
369   * close the open underlying stream.
370   *
371   * @param sources the sources to concatenate
372   * @return a {@code CharSource} containing the concatenated data
373   * @throws NullPointerException if any of {@code sources} is {@code null}
374   * @since 15.0
375   */
376  public static CharSource concat(CharSource... sources) {
377    return concat(ImmutableList.copyOf(sources));
378  }
379
380  /**
381   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
382   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
383   * the {@code charSequence} is mutated while it is being read, so don't do that.
384   *
385   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
386   */
387  public static CharSource wrap(CharSequence charSequence) {
388    return new CharSequenceCharSource(charSequence);
389  }
390
391  /**
392   * Returns an immutable {@link CharSource} that contains no characters.
393   *
394   * @since 15.0
395   */
396  public static CharSource empty() {
397    return EmptyCharSource.INSTANCE;
398  }
399
400  private static class CharSequenceCharSource extends CharSource {
401
402    private static final Splitter LINE_SPLITTER
403        = Splitter.on(Pattern.compile("\r\n|\n|\r"));
404
405    private final CharSequence seq;
406
407    protected CharSequenceCharSource(CharSequence seq) {
408      this.seq = checkNotNull(seq);
409    }
410
411    @Override
412    public Reader openStream() {
413      return new CharSequenceReader(seq);
414    }
415
416    @Override
417    public String read() {
418      return seq.toString();
419    }
420
421    @Override
422    public boolean isEmpty() {
423      return seq.length() == 0;
424    }
425
426    @Override
427    public long length() {
428      return seq.length();
429    }
430
431    @Override
432    public Optional<Long> lengthIfKnown() {
433      return Optional.of((long) seq.length());
434    }
435
436    /**
437     * Returns an iterable over the lines in the string. If the string ends in
438     * a newline, a final empty string is not included to match the behavior of
439     * BufferedReader/LineReader.readLine().
440     */
441    private Iterable<String> lines() {
442      return new Iterable<String>() {
443        @Override
444        public Iterator<String> iterator() {
445          return new AbstractIterator<String>() {
446            Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
447
448            @Override
449            protected String computeNext() {
450              if (lines.hasNext()) {
451                String next = lines.next();
452                // skip last line if it's empty
453                if (lines.hasNext() || !next.isEmpty()) {
454                  return next;
455                }
456              }
457              return endOfData();
458            }
459          };
460        }
461      };
462    }
463
464    @Override
465    public String readFirstLine() {
466      Iterator<String> lines = lines().iterator();
467      return lines.hasNext() ? lines.next() : null;
468    }
469
470    @Override
471    public ImmutableList<String> readLines() {
472      return ImmutableList.copyOf(lines());
473    }
474
475    @Override
476    public <T> T readLines(LineProcessor<T> processor) throws IOException {
477      for (String line : lines()) {
478        if (!processor.processLine(line)) {
479          break;
480        }
481      }
482      return processor.getResult();
483    }
484
485    @Override
486    public String toString() {
487      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
488    }
489  }
490
491  private static final class EmptyCharSource extends CharSequenceCharSource {
492
493    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
494
495    private EmptyCharSource() {
496      super("");
497    }
498
499    @Override
500    public String toString() {
501      return "CharSource.empty()";
502    }
503  }
504
505  private static final class ConcatenatedCharSource extends CharSource {
506
507    private final Iterable<? extends CharSource> sources;
508
509    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
510      this.sources = checkNotNull(sources);
511    }
512
513    @Override
514    public Reader openStream() throws IOException {
515      return new MultiReader(sources.iterator());
516    }
517
518    @Override
519    public boolean isEmpty() throws IOException {
520      for (CharSource source : sources) {
521        if (!source.isEmpty()) {
522          return false;
523        }
524      }
525      return true;
526    }
527
528    @Override
529    public Optional<Long> lengthIfKnown() {
530      long result = 0L;
531      for (CharSource source : sources) {
532        Optional<Long> lengthIfKnown = source.lengthIfKnown();
533        if (!lengthIfKnown.isPresent()) {
534          return Optional.absent();
535        }
536        result += lengthIfKnown.get();
537      }
538      return Optional.of(result);
539    }
540
541    @Override
542    public long length() throws IOException {
543      long result = 0L;
544      for (CharSource source : sources) {
545        result += source.length();
546      }
547      return result;
548    }
549
550    @Override
551    public String toString() {
552      return "CharSource.concat(" + sources + ")";
553    }
554  }
555}