001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.io;
018
019import static com.google.common.base.Preconditions.checkNotNull;
020
021import com.google.common.annotations.Beta;
022import com.google.common.base.Ascii;
023import com.google.common.base.Splitter;
024import com.google.common.collect.AbstractIterator;
025import com.google.common.collect.ImmutableList;
026import com.google.common.collect.Lists;
027
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.Reader;
031import java.io.Writer;
032import java.nio.charset.Charset;
033import java.util.Iterator;
034import java.util.List;
035import java.util.regex.Pattern;
036
037import javax.annotation.Nullable;
038
039/**
040 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
041 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
042 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
043 *
044 * <p>{@code CharSource} provides two kinds of methods:
045 * <ul>
046 *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
047 *   instance each time they are called. The caller is responsible for ensuring that the returned
048 *   reader is closed.
049 *   <li><b>Convenience methods:</b> These are implementations of common operations that are
050 *   typically implemented by opening a reader using one of the methods in the first category,
051 *   doing something and finally closing the reader that was opened.
052 * </ul>
053 *
054 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the
055 * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
056 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
057 * there to be an empty line at the end if the contents are terminated with a line separator.
058 *
059 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
060 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
061 *
062 * @since 14.0
063 * @author Colin Decker
064 */
065public abstract class CharSource {
066
067  /**
068   * Constructor for use by subclasses.
069   */
070  protected CharSource() {}
071
072  /**
073   * Opens a new {@link Reader} for reading from this source. This method should return a new,
074   * independent reader each time it is called.
075   *
076   * <p>The caller is responsible for ensuring that the returned reader is closed.
077   *
078   * @throws IOException if an I/O error occurs in the process of opening the reader
079   */
080  public abstract Reader openStream() throws IOException;
081
082  /**
083   * Opens a new {@link BufferedReader} for reading from this source. This method should return a
084   * new, independent reader each time it is called.
085   *
086   * <p>The caller is responsible for ensuring that the returned reader is closed.
087   *
088   * @throws IOException if an I/O error occurs in the process of opening the reader
089   */
090  public BufferedReader openBufferedStream() throws IOException {
091    Reader reader = openStream();
092    return (reader instanceof BufferedReader)
093        ? (BufferedReader) reader
094        : new BufferedReader(reader);
095  }
096
097  /**
098   * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
099   * Does not close {@code appendable} if it is {@code Closeable}.
100   *
101   * @throws IOException if an I/O error occurs in the process of reading from this source or
102   *     writing to {@code appendable}
103   */
104  public long copyTo(Appendable appendable) throws IOException {
105    checkNotNull(appendable);
106
107    Closer closer = Closer.create();
108    try {
109      Reader reader = closer.register(openStream());
110      return CharStreams.copy(reader, appendable);
111    } catch (Throwable e) {
112      throw closer.rethrow(e);
113    } finally {
114      closer.close();
115    }
116  }
117
118  /**
119   * Copies the contents of this source to the given sink.
120   *
121   * @throws IOException if an I/O error occurs in the process of reading from this source or
122   *     writing to {@code sink}
123   */
124  public long copyTo(CharSink sink) throws IOException {
125    checkNotNull(sink);
126
127    Closer closer = Closer.create();
128    try {
129      Reader reader = closer.register(openStream());
130      Writer writer = closer.register(sink.openStream());
131      return CharStreams.copy(reader, writer);
132    } catch (Throwable e) {
133      throw closer.rethrow(e);
134    } finally {
135      closer.close();
136    }
137  }
138
139  /**
140   * Reads the contents of this source as a string.
141   *
142   * @throws IOException if an I/O error occurs in the process of reading from this source
143   */
144  public String read() throws IOException {
145    Closer closer = Closer.create();
146    try {
147      Reader reader = closer.register(openStream());
148      return CharStreams.toString(reader);
149    } catch (Throwable e) {
150      throw closer.rethrow(e);
151    } finally {
152      closer.close();
153    }
154  }
155
156  /**
157   * Reads the first link of this source as a string. Returns {@code null} if this source is empty.
158   *
159   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
160   * {@code \r\n}, does not include the line separator in the returned line and does not consider
161   * there to be an extra empty line at the end if the content is terminated with a line separator.
162   *
163   * @throws IOException if an I/O error occurs in the process of reading from this source
164   */
165  public @Nullable String readFirstLine() throws IOException {
166    Closer closer = Closer.create();
167    try {
168      BufferedReader reader = closer.register(openBufferedStream());
169      return reader.readLine();
170    } catch (Throwable e) {
171      throw closer.rethrow(e);
172    } finally {
173      closer.close();
174    }
175  }
176
177  /**
178   * Reads all the lines of this source as a list of strings. The returned list will be empty if
179   * this source is empty.
180   *
181   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
182   * {@code \r\n}, does not include the line separator in the returned lines and does not consider
183   * there to be an extra empty line at the end if the content is terminated with a line separator.
184   *
185   * @throws IOException if an I/O error occurs in the process of reading from this source
186   */
187  public ImmutableList<String> readLines() throws IOException {
188    Closer closer = Closer.create();
189    try {
190      BufferedReader reader = closer.register(openBufferedStream());
191      List<String> result = Lists.newArrayList();
192      String line;
193      while ((line = reader.readLine()) != null) {
194        result.add(line);
195      }
196      return ImmutableList.copyOf(result);
197    } catch (Throwable e) {
198      throw closer.rethrow(e);
199    } finally {
200      closer.close();
201    }
202  }
203
204  /**
205   * Reads lines of text from this source, processing each line as it is read using the given
206   * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
207   * returns {@code false} and returns the result produced by the processor.
208   *
209   * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
210   * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
211   * and does not consider there to be an extra empty line at the end if the content is terminated
212   * with a line separator.
213   *
214   * @throws IOException if an I/O error occurs in the process of reading from this source or if
215   *     {@code processor} throws an {@code IOException}
216   * @since 16.0
217   */
218  @Beta
219  public <T> T readLines(LineProcessor<T> processor) throws IOException {
220    checkNotNull(processor);
221
222    Closer closer = Closer.create();
223    try {
224      Reader reader = closer.register(openStream());
225      return CharStreams.readLines(reader, processor);
226    } catch (Throwable e) {
227      throw closer.rethrow(e);
228    } finally {
229      closer.close();
230    }
231  }
232
233  /**
234   * Returns whether the source has zero chars. The default implementation is to open a stream and
235   * check for EOF.
236   *
237   * @throws IOException if an I/O error occurs
238   * @since 15.0
239   */
240  public boolean isEmpty() throws IOException {
241    Closer closer = Closer.create();
242    try {
243      Reader reader = closer.register(openStream());
244      return reader.read() == -1;
245    } catch (Throwable e) {
246      throw closer.rethrow(e);
247    } finally {
248      closer.close();
249    }
250  }
251
252  /**
253   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
254   * the source will contain the concatenated data from the streams of the underlying sources.
255   *
256   * <p>Only one underlying stream will be open at a time. Closing the  concatenated stream will
257   * close the open underlying stream.
258   *
259   * @param sources the sources to concatenate
260   * @return a {@code CharSource} containing the concatenated data
261   * @since 15.0
262   */
263  public static CharSource concat(Iterable<? extends CharSource> sources) {
264    return new ConcatenatedCharSource(sources);
265  }
266
267  /**
268   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
269   * the source will contain the concatenated data from the streams of the underlying sources.
270   *
271   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
272   * close the open underlying stream.
273   *
274   * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
275   * method is called. This will fail if the iterator is infinite and may cause problems if the
276   * iterator eagerly fetches data for each source when iterated (rather than producing sources
277   * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
278   * overload if possible.
279   *
280   * @param sources the sources to concatenate
281   * @return a {@code CharSource} containing the concatenated data
282   * @throws NullPointerException if any of {@code sources} is {@code null}
283   * @since 15.0
284   */
285  public static CharSource concat(Iterator<? extends CharSource> sources) {
286    return concat(ImmutableList.copyOf(sources));
287  }
288
289  /**
290   * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
291   * the source will contain the concatenated data from the streams of the underlying sources.
292   *
293   * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
294   * close the open underlying stream.
295   *
296   * @param sources the sources to concatenate
297   * @return a {@code CharSource} containing the concatenated data
298   * @throws NullPointerException if any of {@code sources} is {@code null}
299   * @since 15.0
300   */
301  public static CharSource concat(CharSource... sources) {
302    return concat(ImmutableList.copyOf(sources));
303  }
304
305  /**
306   * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
307   * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
308   * the {@code charSequence} is mutated while it is being read, so don't do that.
309   *
310   * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
311   */
312  public static CharSource wrap(CharSequence charSequence) {
313    return new CharSequenceCharSource(charSequence);
314  }
315
316  /**
317   * Returns an immutable {@link CharSource} that contains no characters.
318   *
319   * @since 15.0
320   */
321  public static CharSource empty() {
322    return EmptyCharSource.INSTANCE;
323  }
324
325  private static class CharSequenceCharSource extends CharSource {
326
327    private static final Splitter LINE_SPLITTER
328        = Splitter.on(Pattern.compile("\r\n|\n|\r"));
329
330    private final CharSequence seq;
331
332    protected CharSequenceCharSource(CharSequence seq) {
333      this.seq = checkNotNull(seq);
334    }
335
336    @Override
337    public Reader openStream() {
338      return new CharSequenceReader(seq);
339    }
340
341    @Override
342    public String read() {
343      return seq.toString();
344    }
345
346    @Override
347    public boolean isEmpty() {
348      return seq.length() == 0;
349    }
350
351    /**
352     * Returns an iterable over the lines in the string. If the string ends in
353     * a newline, a final empty string is not included to match the behavior of
354     * BufferedReader/LineReader.readLine().
355     */
356    private Iterable<String> lines() {
357      return new Iterable<String>() {
358        @Override
359        public Iterator<String> iterator() {
360          return new AbstractIterator<String>() {
361            Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
362
363            @Override
364            protected String computeNext() {
365              if (lines.hasNext()) {
366                String next = lines.next();
367                // skip last line if it's empty
368                if (lines.hasNext() || !next.isEmpty()) {
369                  return next;
370                }
371              }
372              return endOfData();
373            }
374          };
375        }
376      };
377    }
378
379    @Override
380    public String readFirstLine() {
381      Iterator<String> lines = lines().iterator();
382      return lines.hasNext() ? lines.next() : null;
383    }
384
385    @Override
386    public ImmutableList<String> readLines() {
387      return ImmutableList.copyOf(lines());
388    }
389
390    @Override
391    public <T> T readLines(LineProcessor<T> processor) throws IOException {
392      for (String line : lines()) {
393        if (!processor.processLine(line)) {
394          break;
395        }
396      }
397      return processor.getResult();
398    }
399
400    @Override
401    public String toString() {
402      return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
403    }
404  }
405
406  private static final class EmptyCharSource extends CharSequenceCharSource {
407
408    private static final EmptyCharSource INSTANCE = new EmptyCharSource();
409
410    private EmptyCharSource() {
411      super("");
412    }
413
414    @Override
415    public String toString() {
416      return "CharSource.empty()";
417    }
418  }
419
420  private static final class ConcatenatedCharSource extends CharSource {
421
422    private final Iterable<? extends CharSource> sources;
423
424    ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
425      this.sources = checkNotNull(sources);
426    }
427
428    @Override
429    public Reader openStream() throws IOException {
430      return new MultiReader(sources.iterator());
431    }
432
433    @Override
434    public boolean isEmpty() throws IOException {
435      for (CharSource source : sources) {
436        if (!source.isEmpty()) {
437          return false;
438        }
439      }
440      return true;
441    }
442
443    @Override
444    public String toString() {
445      return "CharSource.concat(" + sources + ")";
446    }
447  }
448}