001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.GwtIncompatible; 020import com.google.common.annotations.J2ktIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.common.collect.Streams; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.MustBeClosed; 030import java.io.BufferedReader; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.Reader; 034import java.io.StringReader; 035import java.io.UncheckedIOException; 036import java.io.Writer; 037import java.nio.charset.Charset; 038import java.util.Iterator; 039import java.util.List; 040import java.util.function.Consumer; 041import java.util.stream.Stream; 042import javax.annotation.CheckForNull; 043import org.checkerframework.checker.nullness.qual.Nullable; 044 045/** 046 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 047 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 048 * it is an immutable <i>supplier</i> of {@code Reader} instances. 049 * 050 * <p>{@code CharSource} provides two kinds of methods: 051 * 052 * <ul> 053 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 054 * instance each time they are called. The caller is responsible for ensuring that the 055 * returned reader is closed. 056 * <li><b>Convenience methods:</b> These are implementations of common operations that are 057 * typically implemented by opening a reader using one of the methods in the first category, 058 * doing something and finally closing the reader that was opened. 059 * </ul> 060 * 061 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 062 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 063 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 064 * be an empty line at the end if the contents are terminated with a line separator. 065 * 066 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 067 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 068 * 069 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources 070 * that provide readers that are: 071 * 072 * <ul> 073 * <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either 074 * block indefinitely or fail if the source creates an infinite reader. 075 * <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the 076 * source as they are read from it. A source that provides such readers will not be reusable, 077 * and operations that read from the stream (including {@link #length()}, in some 078 * implementations) will prevent further operations from completing as expected. 079 * </ul> 080 * 081 * @since 14.0 082 * @author Colin Decker 083 */ 084@J2ktIncompatible 085@GwtIncompatible 086@ElementTypesAreNonnullByDefault 087public abstract class CharSource { 088 089 /** Constructor for use by subclasses. */ 090 protected CharSource() {} 091 092 /** 093 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 094 * as bytes using the given {@link Charset}. 095 * 096 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 097 * the default implementation of this method will ensure that the original {@code CharSource} is 098 * returned, rather than round-trip encoding. Subclasses that override this method should behave 099 * the same way. 100 * 101 * @since 20.0 102 */ 103 public ByteSource asByteSource(Charset charset) { 104 return new AsByteSource(charset); 105 } 106 107 /** 108 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 109 * reader each time it is called. 110 * 111 * <p>The caller is responsible for ensuring that the returned reader is closed. 112 * 113 * @throws IOException if an I/O error occurs while opening the reader 114 */ 115 public abstract Reader openStream() throws IOException; 116 117 /** 118 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 119 * independent reader each time it is called. 120 * 121 * <p>The caller is responsible for ensuring that the returned reader is closed. 122 * 123 * @throws IOException if an I/O error occurs while of opening the reader 124 */ 125 public BufferedReader openBufferedStream() throws IOException { 126 Reader reader = openStream(); 127 return (reader instanceof BufferedReader) 128 ? (BufferedReader) reader 129 : new BufferedReader(reader); 130 } 131 132 /** 133 * Opens a new {@link Stream} for reading text one line at a time from this source. This method 134 * returns a new, independent stream each time it is called. 135 * 136 * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an 137 * I/O error occurs while the stream is reading from the source or when the stream is closed, an 138 * {@link UncheckedIOException} is thrown. 139 * 140 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 141 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 142 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 143 * it does. 144 * 145 * <p>The caller is responsible for ensuring that the returned stream is closed. For example: 146 * 147 * <pre>{@code 148 * try (Stream<String> lines = source.lines()) { 149 * lines.map(...) 150 * .filter(...) 151 * .forEach(...); 152 * } 153 * }</pre> 154 * 155 * @throws IOException if an I/O error occurs while opening the stream 156 * @since 22.0 157 */ 158 @MustBeClosed 159 public Stream<String> lines() throws IOException { 160 BufferedReader reader = openBufferedStream(); 161 return reader 162 .lines() 163 .onClose( 164 () -> { 165 try { 166 reader.close(); 167 } catch (IOException e) { 168 throw new UncheckedIOException(e); 169 } 170 }); 171 } 172 173 /** 174 * Returns the size of this source in chars, if the size can be easily determined without actually 175 * opening the data stream. 176 * 177 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 178 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 179 * that this method will return a different number of chars than would be returned by reading all 180 * of the chars. 181 * 182 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 183 * return a different number of chars if the contents are changed. 184 * 185 * @since 19.0 186 */ 187 public Optional<Long> lengthIfKnown() { 188 return Optional.absent(); 189 } 190 191 /** 192 * Returns the length of this source in chars, even if doing so requires opening and traversing an 193 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 194 * 195 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 196 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 197 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 198 * were skipped. 199 * 200 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 201 * implementation, it is <i>possible</i> that this method will return a different number of chars 202 * than would be returned by reading all of the chars. 203 * 204 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 205 * number of chars if the contents are changed. 206 * 207 * @throws IOException if an I/O error occurs while reading the length of this source 208 * @since 19.0 209 */ 210 public long length() throws IOException { 211 Optional<Long> lengthIfKnown = lengthIfKnown(); 212 if (lengthIfKnown.isPresent()) { 213 return lengthIfKnown.get(); 214 } 215 216 Closer closer = Closer.create(); 217 try { 218 Reader reader = closer.register(openStream()); 219 return countBySkipping(reader); 220 } catch (Throwable e) { 221 throw closer.rethrow(e); 222 } finally { 223 closer.close(); 224 } 225 } 226 227 private long countBySkipping(Reader reader) throws IOException { 228 long count = 0; 229 long read; 230 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 231 count += read; 232 } 233 return count; 234 } 235 236 /** 237 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 238 * Does not close {@code appendable} if it is {@code Closeable}. 239 * 240 * @return the number of characters copied 241 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 242 * appendable} 243 */ 244 @CanIgnoreReturnValue 245 public long copyTo(Appendable appendable) throws IOException { 246 checkNotNull(appendable); 247 248 Closer closer = Closer.create(); 249 try { 250 Reader reader = closer.register(openStream()); 251 return CharStreams.copy(reader, appendable); 252 } catch (Throwable e) { 253 throw closer.rethrow(e); 254 } finally { 255 closer.close(); 256 } 257 } 258 259 /** 260 * Copies the contents of this source to the given sink. 261 * 262 * @return the number of characters copied 263 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 264 * sink} 265 */ 266 @CanIgnoreReturnValue 267 public long copyTo(CharSink sink) throws IOException { 268 checkNotNull(sink); 269 270 Closer closer = Closer.create(); 271 try { 272 Reader reader = closer.register(openStream()); 273 Writer writer = closer.register(sink.openStream()); 274 return CharStreams.copy(reader, writer); 275 } catch (Throwable e) { 276 throw closer.rethrow(e); 277 } finally { 278 closer.close(); 279 } 280 } 281 282 /** 283 * Reads the contents of this source as a string. 284 * 285 * @throws IOException if an I/O error occurs while reading from this source 286 */ 287 public String read() throws IOException { 288 Closer closer = Closer.create(); 289 try { 290 Reader reader = closer.register(openStream()); 291 return CharStreams.toString(reader); 292 } catch (Throwable e) { 293 throw closer.rethrow(e); 294 } finally { 295 closer.close(); 296 } 297 } 298 299 /** 300 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 301 * 302 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 303 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 304 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 305 * it does. 306 * 307 * @throws IOException if an I/O error occurs while reading from this source 308 */ 309 @CheckForNull 310 public String readFirstLine() throws IOException { 311 Closer closer = Closer.create(); 312 try { 313 BufferedReader reader = closer.register(openBufferedStream()); 314 return reader.readLine(); 315 } catch (Throwable e) { 316 throw closer.rethrow(e); 317 } finally { 318 closer.close(); 319 } 320 } 321 322 /** 323 * Reads all the lines of this source as a list of strings. The returned list will be empty if 324 * this source is empty. 325 * 326 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 327 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 328 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 329 * it does. 330 * 331 * @throws IOException if an I/O error occurs while reading from this source 332 */ 333 public ImmutableList<String> readLines() throws IOException { 334 Closer closer = Closer.create(); 335 try { 336 BufferedReader reader = closer.register(openBufferedStream()); 337 List<String> result = Lists.newArrayList(); 338 String line; 339 while ((line = reader.readLine()) != null) { 340 result.add(line); 341 } 342 return ImmutableList.copyOf(result); 343 } catch (Throwable e) { 344 throw closer.rethrow(e); 345 } finally { 346 closer.close(); 347 } 348 } 349 350 /** 351 * Reads lines of text from this source, processing each line as it is read using the given {@link 352 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 353 * {@code false} and returns the result produced by the processor. 354 * 355 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 356 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 357 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 358 * it does. 359 * 360 * @throws IOException if an I/O error occurs while reading from this source or if {@code 361 * processor} throws an {@code IOException} 362 * @since 16.0 363 */ 364 @CanIgnoreReturnValue // some processors won't return a useful result 365 @ParametricNullness 366 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 367 checkNotNull(processor); 368 369 Closer closer = Closer.create(); 370 try { 371 Reader reader = closer.register(openStream()); 372 return CharStreams.readLines(reader, processor); 373 } catch (Throwable e) { 374 throw closer.rethrow(e); 375 } finally { 376 closer.close(); 377 } 378 } 379 380 /** 381 * Reads all lines of text from this source, running the given {@code action} for each line as it 382 * is read. 383 * 384 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 385 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 386 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 387 * it does. 388 * 389 * @throws IOException if an I/O error occurs while reading from this source or if {@code action} 390 * throws an {@code UncheckedIOException} 391 * @since 22.0 392 */ 393 public void forEachLine(Consumer<? super String> action) throws IOException { 394 try (Stream<String> lines = lines()) { 395 // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure 396 lines.forEachOrdered(action); 397 } catch (UncheckedIOException e) { 398 throw e.getCause(); 399 } 400 } 401 402 /** 403 * Returns whether the source has zero chars. The default implementation first checks {@link 404 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 405 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 406 * 407 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 408 * chars are actually available for reading. This means that a source may return {@code true} from 409 * {@code isEmpty()} despite having readable content. 410 * 411 * @throws IOException if an I/O error occurs 412 * @since 15.0 413 */ 414 public boolean isEmpty() throws IOException { 415 Optional<Long> lengthIfKnown = lengthIfKnown(); 416 if (lengthIfKnown.isPresent()) { 417 return lengthIfKnown.get() == 0L; 418 } 419 Closer closer = Closer.create(); 420 try { 421 Reader reader = closer.register(openStream()); 422 return reader.read() == -1; 423 } catch (Throwable e) { 424 throw closer.rethrow(e); 425 } finally { 426 closer.close(); 427 } 428 } 429 430 /** 431 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 432 * the source will contain the concatenated data from the streams of the underlying sources. 433 * 434 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 435 * close the open underlying stream. 436 * 437 * @param sources the sources to concatenate 438 * @return a {@code CharSource} containing the concatenated data 439 * @since 15.0 440 */ 441 public static CharSource concat(Iterable<? extends CharSource> sources) { 442 return new ConcatenatedCharSource(sources); 443 } 444 445 /** 446 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 447 * the source will contain the concatenated data from the streams of the underlying sources. 448 * 449 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 450 * close the open underlying stream. 451 * 452 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 453 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 454 * eagerly fetches data for each source when iterated (rather than producing sources that only 455 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 456 * possible. 457 * 458 * @param sources the sources to concatenate 459 * @return a {@code CharSource} containing the concatenated data 460 * @throws NullPointerException if any of {@code sources} is {@code null} 461 * @since 15.0 462 */ 463 public static CharSource concat(Iterator<? extends CharSource> sources) { 464 return concat(ImmutableList.copyOf(sources)); 465 } 466 467 /** 468 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 469 * the source will contain the concatenated data from the streams of the underlying sources. 470 * 471 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 472 * close the open underlying stream. 473 * 474 * @param sources the sources to concatenate 475 * @return a {@code CharSource} containing the concatenated data 476 * @throws NullPointerException if any of {@code sources} is {@code null} 477 * @since 15.0 478 */ 479 public static CharSource concat(CharSource... sources) { 480 return concat(ImmutableList.copyOf(sources)); 481 } 482 483 /** 484 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 485 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 486 * the {@code charSequence} is mutated while it is being read, so don't do that. 487 * 488 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 489 */ 490 public static CharSource wrap(CharSequence charSequence) { 491 return charSequence instanceof String 492 ? new StringCharSource((String) charSequence) 493 : new CharSequenceCharSource(charSequence); 494 } 495 496 /** 497 * Returns an immutable {@link CharSource} that contains no characters. 498 * 499 * @since 15.0 500 */ 501 public static CharSource empty() { 502 return EmptyCharSource.INSTANCE; 503 } 504 505 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 506 private final class AsByteSource extends ByteSource { 507 508 final Charset charset; 509 510 AsByteSource(Charset charset) { 511 this.charset = checkNotNull(charset); 512 } 513 514 @Override 515 public CharSource asCharSource(Charset charset) { 516 if (charset.equals(this.charset)) { 517 return CharSource.this; 518 } 519 return super.asCharSource(charset); 520 } 521 522 @Override 523 public InputStream openStream() throws IOException { 524 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 525 } 526 527 @Override 528 public String toString() { 529 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 530 } 531 } 532 533 private static class CharSequenceCharSource extends CharSource { 534 535 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 536 537 protected final CharSequence seq; 538 539 protected CharSequenceCharSource(CharSequence seq) { 540 this.seq = checkNotNull(seq); 541 } 542 543 @Override 544 public Reader openStream() { 545 return new CharSequenceReader(seq); 546 } 547 548 @Override 549 public String read() { 550 return seq.toString(); 551 } 552 553 @Override 554 public boolean isEmpty() { 555 return seq.length() == 0; 556 } 557 558 @Override 559 public long length() { 560 return seq.length(); 561 } 562 563 @Override 564 public Optional<Long> lengthIfKnown() { 565 return Optional.of((long) seq.length()); 566 } 567 568 /** 569 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 570 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 571 */ 572 private Iterator<String> linesIterator() { 573 return new AbstractIterator<String>() { 574 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 575 576 @Override 577 @CheckForNull 578 protected String computeNext() { 579 if (lines.hasNext()) { 580 String next = lines.next(); 581 // skip last line if it's empty 582 if (lines.hasNext() || !next.isEmpty()) { 583 return next; 584 } 585 } 586 return endOfData(); 587 } 588 }; 589 } 590 591 @Override 592 public Stream<String> lines() { 593 return Streams.stream(linesIterator()); 594 } 595 596 @Override 597 @CheckForNull 598 public String readFirstLine() { 599 Iterator<String> lines = linesIterator(); 600 return lines.hasNext() ? lines.next() : null; 601 } 602 603 @Override 604 public ImmutableList<String> readLines() { 605 return ImmutableList.copyOf(linesIterator()); 606 } 607 608 @Override 609 @ParametricNullness 610 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 611 Iterator<String> lines = linesIterator(); 612 while (lines.hasNext()) { 613 if (!processor.processLine(lines.next())) { 614 break; 615 } 616 } 617 return processor.getResult(); 618 } 619 620 @Override 621 public String toString() { 622 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 623 } 624 } 625 626 /** 627 * Subclass specialized for string instances. 628 * 629 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 630 * 631 * <ul> 632 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 633 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 634 * one with {@link CharSequence#charAt(int)}. 635 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 636 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 637 * can't change, and it is faster because many writers and appendables are optimized for 638 * appending string instances. 639 * </ul> 640 */ 641 private static class StringCharSource extends CharSequenceCharSource { 642 protected StringCharSource(String seq) { 643 super(seq); 644 } 645 646 @Override 647 public Reader openStream() { 648 return new StringReader((String) seq); 649 } 650 651 @Override 652 public long copyTo(Appendable appendable) throws IOException { 653 appendable.append(seq); 654 return seq.length(); 655 } 656 657 @Override 658 public long copyTo(CharSink sink) throws IOException { 659 checkNotNull(sink); 660 Closer closer = Closer.create(); 661 try { 662 Writer writer = closer.register(sink.openStream()); 663 writer.write((String) seq); 664 return seq.length(); 665 } catch (Throwable e) { 666 throw closer.rethrow(e); 667 } finally { 668 closer.close(); 669 } 670 } 671 } 672 673 private static final class EmptyCharSource extends StringCharSource { 674 675 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 676 677 private EmptyCharSource() { 678 super(""); 679 } 680 681 @Override 682 public String toString() { 683 return "CharSource.empty()"; 684 } 685 } 686 687 private static final class ConcatenatedCharSource extends CharSource { 688 689 private final Iterable<? extends CharSource> sources; 690 691 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 692 this.sources = checkNotNull(sources); 693 } 694 695 @Override 696 public Reader openStream() throws IOException { 697 return new MultiReader(sources.iterator()); 698 } 699 700 @Override 701 public boolean isEmpty() throws IOException { 702 for (CharSource source : sources) { 703 if (!source.isEmpty()) { 704 return false; 705 } 706 } 707 return true; 708 } 709 710 @Override 711 public Optional<Long> lengthIfKnown() { 712 long result = 0L; 713 for (CharSource source : sources) { 714 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 715 if (!lengthIfKnown.isPresent()) { 716 return Optional.absent(); 717 } 718 result += lengthIfKnown.get(); 719 } 720 return Optional.of(result); 721 } 722 723 @Override 724 public long length() throws IOException { 725 long result = 0L; 726 for (CharSource source : sources) { 727 result += source.length(); 728 } 729 return result; 730 } 731 732 @Override 733 public String toString() { 734 return "CharSource.concat(" + sources + ")"; 735 } 736 } 737}