001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.common.collect.Streams; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.MustBeClosed; 030import java.io.BufferedReader; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.Reader; 034import java.io.StringReader; 035import java.io.UncheckedIOException; 036import java.io.Writer; 037import java.nio.charset.Charset; 038import java.util.Iterator; 039import java.util.List; 040import java.util.function.Consumer; 041import java.util.stream.Stream; 042import org.checkerframework.checker.nullness.qual.Nullable; 043 044/** 045 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 046 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 047 * it is an immutable <i>supplier</i> of {@code Reader} instances. 048 * 049 * <p>{@code CharSource} provides two kinds of methods: 050 * 051 * <ul> 052 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 053 * instance each time they are called. The caller is responsible for ensuring that the 054 * returned reader is closed. 055 * <li><b>Convenience methods:</b> These are implementations of common operations that are 056 * typically implemented by opening a reader using one of the methods in the first category, 057 * doing something and finally closing the reader that was opened. 058 * </ul> 059 * 060 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 061 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 062 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 063 * be an empty line at the end if the contents are terminated with a line separator. 064 * 065 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 066 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 067 * 068 * @since 14.0 069 * @author Colin Decker 070 */ 071@GwtIncompatible 072public abstract class CharSource { 073 074 /** Constructor for use by subclasses. */ 075 protected CharSource() {} 076 077 /** 078 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 079 * as bytes using the given {@link Charset}. 080 * 081 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 082 * the default implementation of this method will ensure that the original {@code CharSource} is 083 * returned, rather than round-trip encoding. Subclasses that override this method should behave 084 * the same way. 085 * 086 * @since 20.0 087 */ 088 @Beta 089 public ByteSource asByteSource(Charset charset) { 090 return new AsByteSource(charset); 091 } 092 093 /** 094 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 095 * reader each time it is called. 096 * 097 * <p>The caller is responsible for ensuring that the returned reader is closed. 098 * 099 * @throws IOException if an I/O error occurs while opening the reader 100 */ 101 public abstract Reader openStream() throws IOException; 102 103 /** 104 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 105 * independent reader each time it is called. 106 * 107 * <p>The caller is responsible for ensuring that the returned reader is closed. 108 * 109 * @throws IOException if an I/O error occurs while of opening the reader 110 */ 111 public BufferedReader openBufferedStream() throws IOException { 112 Reader reader = openStream(); 113 return (reader instanceof BufferedReader) 114 ? (BufferedReader) reader 115 : new BufferedReader(reader); 116 } 117 118 /** 119 * Opens a new {@link Stream} for reading text one line at a time from this source. This method 120 * returns a new, independent stream each time it is called. 121 * 122 * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an 123 * I/O error occurs while the stream is reading from the source or when the stream is closed, an 124 * {@link UncheckedIOException} is thrown. 125 * 126 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 127 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 128 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 129 * it does. 130 * 131 * <p>The caller is responsible for ensuring that the returned stream is closed. For example: 132 * 133 * <pre>{@code 134 * try (Stream<String> lines = source.lines()) { 135 * lines.map(...) 136 * .filter(...) 137 * .forEach(...); 138 * } 139 * }</pre> 140 * 141 * @throws IOException if an I/O error occurs while opening the stream 142 * @since 22.0 143 */ 144 @Beta 145 @MustBeClosed 146 public Stream<String> lines() throws IOException { 147 BufferedReader reader = openBufferedStream(); 148 return reader 149 .lines() 150 .onClose( 151 () -> { 152 try { 153 reader.close(); 154 } catch (IOException e) { 155 throw new UncheckedIOException(e); 156 } 157 }); 158 } 159 160 /** 161 * Returns the size of this source in chars, if the size can be easily determined without actually 162 * opening the data stream. 163 * 164 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 165 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 166 * that this method will return a different number of chars than would be returned by reading all 167 * of the chars. 168 * 169 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 170 * return a different number of chars if the contents are changed. 171 * 172 * @since 19.0 173 */ 174 @Beta 175 public Optional<Long> lengthIfKnown() { 176 return Optional.absent(); 177 } 178 179 /** 180 * Returns the length of this source in chars, even if doing so requires opening and traversing an 181 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 182 * 183 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 184 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 185 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 186 * were skipped. 187 * 188 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 189 * implementation, it is <i>possible</i> that this method will return a different number of chars 190 * than would be returned by reading all of the chars. 191 * 192 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 193 * number of chars if the contents are changed. 194 * 195 * @throws IOException if an I/O error occurs while reading the length of this source 196 * @since 19.0 197 */ 198 @Beta 199 public long length() throws IOException { 200 Optional<Long> lengthIfKnown = lengthIfKnown(); 201 if (lengthIfKnown.isPresent()) { 202 return lengthIfKnown.get(); 203 } 204 205 Closer closer = Closer.create(); 206 try { 207 Reader reader = closer.register(openStream()); 208 return countBySkipping(reader); 209 } catch (Throwable e) { 210 throw closer.rethrow(e); 211 } finally { 212 closer.close(); 213 } 214 } 215 216 private long countBySkipping(Reader reader) throws IOException { 217 long count = 0; 218 long read; 219 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 220 count += read; 221 } 222 return count; 223 } 224 225 /** 226 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 227 * Does not close {@code appendable} if it is {@code Closeable}. 228 * 229 * @return the number of characters copied 230 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 231 * appendable} 232 */ 233 @CanIgnoreReturnValue 234 public long copyTo(Appendable appendable) throws IOException { 235 checkNotNull(appendable); 236 237 Closer closer = Closer.create(); 238 try { 239 Reader reader = closer.register(openStream()); 240 return CharStreams.copy(reader, appendable); 241 } catch (Throwable e) { 242 throw closer.rethrow(e); 243 } finally { 244 closer.close(); 245 } 246 } 247 248 /** 249 * Copies the contents of this source to the given sink. 250 * 251 * @return the number of characters copied 252 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 253 * sink} 254 */ 255 @CanIgnoreReturnValue 256 public long copyTo(CharSink sink) throws IOException { 257 checkNotNull(sink); 258 259 Closer closer = Closer.create(); 260 try { 261 Reader reader = closer.register(openStream()); 262 Writer writer = closer.register(sink.openStream()); 263 return CharStreams.copy(reader, writer); 264 } catch (Throwable e) { 265 throw closer.rethrow(e); 266 } finally { 267 closer.close(); 268 } 269 } 270 271 /** 272 * Reads the contents of this source as a string. 273 * 274 * @throws IOException if an I/O error occurs while reading from this source 275 */ 276 public String read() throws IOException { 277 Closer closer = Closer.create(); 278 try { 279 Reader reader = closer.register(openStream()); 280 return CharStreams.toString(reader); 281 } catch (Throwable e) { 282 throw closer.rethrow(e); 283 } finally { 284 closer.close(); 285 } 286 } 287 288 /** 289 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 290 * 291 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 292 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 293 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 294 * it does. 295 * 296 * @throws IOException if an I/O error occurs while reading from this source 297 */ 298 public @Nullable String readFirstLine() throws IOException { 299 Closer closer = Closer.create(); 300 try { 301 BufferedReader reader = closer.register(openBufferedStream()); 302 return reader.readLine(); 303 } catch (Throwable e) { 304 throw closer.rethrow(e); 305 } finally { 306 closer.close(); 307 } 308 } 309 310 /** 311 * Reads all the lines of this source as a list of strings. The returned list will be empty if 312 * this source is empty. 313 * 314 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 315 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 316 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 317 * it does. 318 * 319 * @throws IOException if an I/O error occurs while reading from this source 320 */ 321 public ImmutableList<String> readLines() throws IOException { 322 Closer closer = Closer.create(); 323 try { 324 BufferedReader reader = closer.register(openBufferedStream()); 325 List<String> result = Lists.newArrayList(); 326 String line; 327 while ((line = reader.readLine()) != null) { 328 result.add(line); 329 } 330 return ImmutableList.copyOf(result); 331 } catch (Throwable e) { 332 throw closer.rethrow(e); 333 } finally { 334 closer.close(); 335 } 336 } 337 338 /** 339 * Reads lines of text from this source, processing each line as it is read using the given {@link 340 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 341 * {@code false} and returns the result produced by the processor. 342 * 343 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 344 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 345 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 346 * it does. 347 * 348 * @throws IOException if an I/O error occurs while reading from this source or if {@code 349 * processor} throws an {@code IOException} 350 * @since 16.0 351 */ 352 @Beta 353 @CanIgnoreReturnValue // some processors won't return a useful result 354 public <T> T readLines(LineProcessor<T> processor) throws IOException { 355 checkNotNull(processor); 356 357 Closer closer = Closer.create(); 358 try { 359 Reader reader = closer.register(openStream()); 360 return CharStreams.readLines(reader, processor); 361 } catch (Throwable e) { 362 throw closer.rethrow(e); 363 } finally { 364 closer.close(); 365 } 366 } 367 368 /** 369 * Reads all lines of text from this source, running the given {@code action} for each line as it 370 * is read. 371 * 372 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 373 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 374 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 375 * it does. 376 * 377 * @throws IOException if an I/O error occurs while reading from this source or if {@code action} 378 * throws an {@code UncheckedIOException} 379 * @since 22.0 380 */ 381 @Beta 382 public void forEachLine(Consumer<? super String> action) throws IOException { 383 try (Stream<String> lines = lines()) { 384 // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure 385 lines.forEachOrdered(action); 386 } catch (UncheckedIOException e) { 387 throw e.getCause(); 388 } 389 } 390 391 /** 392 * Returns whether the source has zero chars. The default implementation first checks {@link 393 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 394 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 395 * 396 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 397 * chars are actually available for reading. This means that a source may return {@code true} from 398 * {@code isEmpty()} despite having readable content. 399 * 400 * @throws IOException if an I/O error occurs 401 * @since 15.0 402 */ 403 public boolean isEmpty() throws IOException { 404 Optional<Long> lengthIfKnown = lengthIfKnown(); 405 if (lengthIfKnown.isPresent()) { 406 return lengthIfKnown.get() == 0L; 407 } 408 Closer closer = Closer.create(); 409 try { 410 Reader reader = closer.register(openStream()); 411 return reader.read() == -1; 412 } catch (Throwable e) { 413 throw closer.rethrow(e); 414 } finally { 415 closer.close(); 416 } 417 } 418 419 /** 420 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 421 * the source will contain the concatenated data from the streams of the underlying sources. 422 * 423 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 424 * close the open underlying stream. 425 * 426 * @param sources the sources to concatenate 427 * @return a {@code CharSource} containing the concatenated data 428 * @since 15.0 429 */ 430 public static CharSource concat(Iterable<? extends CharSource> sources) { 431 return new ConcatenatedCharSource(sources); 432 } 433 434 /** 435 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 436 * the source will contain the concatenated data from the streams of the underlying sources. 437 * 438 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 439 * close the open underlying stream. 440 * 441 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 442 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 443 * eagerly fetches data for each source when iterated (rather than producing sources that only 444 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 445 * possible. 446 * 447 * @param sources the sources to concatenate 448 * @return a {@code CharSource} containing the concatenated data 449 * @throws NullPointerException if any of {@code sources} is {@code null} 450 * @since 15.0 451 */ 452 public static CharSource concat(Iterator<? extends CharSource> sources) { 453 return concat(ImmutableList.copyOf(sources)); 454 } 455 456 /** 457 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 458 * the source will contain the concatenated data from the streams of the underlying sources. 459 * 460 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 461 * close the open underlying stream. 462 * 463 * @param sources the sources to concatenate 464 * @return a {@code CharSource} containing the concatenated data 465 * @throws NullPointerException if any of {@code sources} is {@code null} 466 * @since 15.0 467 */ 468 public static CharSource concat(CharSource... sources) { 469 return concat(ImmutableList.copyOf(sources)); 470 } 471 472 /** 473 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 474 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 475 * the {@code charSequence} is mutated while it is being read, so don't do that. 476 * 477 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 478 */ 479 public static CharSource wrap(CharSequence charSequence) { 480 return charSequence instanceof String 481 ? new StringCharSource((String) charSequence) 482 : new CharSequenceCharSource(charSequence); 483 } 484 485 /** 486 * Returns an immutable {@link CharSource} that contains no characters. 487 * 488 * @since 15.0 489 */ 490 public static CharSource empty() { 491 return EmptyCharSource.INSTANCE; 492 } 493 494 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 495 private final class AsByteSource extends ByteSource { 496 497 final Charset charset; 498 499 AsByteSource(Charset charset) { 500 this.charset = checkNotNull(charset); 501 } 502 503 @Override 504 public CharSource asCharSource(Charset charset) { 505 if (charset.equals(this.charset)) { 506 return CharSource.this; 507 } 508 return super.asCharSource(charset); 509 } 510 511 @Override 512 public InputStream openStream() throws IOException { 513 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 514 } 515 516 @Override 517 public String toString() { 518 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 519 } 520 } 521 522 private static class CharSequenceCharSource extends CharSource { 523 524 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 525 526 protected final CharSequence seq; 527 528 protected CharSequenceCharSource(CharSequence seq) { 529 this.seq = checkNotNull(seq); 530 } 531 532 @Override 533 public Reader openStream() { 534 return new CharSequenceReader(seq); 535 } 536 537 @Override 538 public String read() { 539 return seq.toString(); 540 } 541 542 @Override 543 public boolean isEmpty() { 544 return seq.length() == 0; 545 } 546 547 @Override 548 public long length() { 549 return seq.length(); 550 } 551 552 @Override 553 public Optional<Long> lengthIfKnown() { 554 return Optional.of((long) seq.length()); 555 } 556 557 /** 558 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 559 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 560 */ 561 private Iterator<String> linesIterator() { 562 return new AbstractIterator<String>() { 563 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 564 565 @Override 566 protected String computeNext() { 567 if (lines.hasNext()) { 568 String next = lines.next(); 569 // skip last line if it's empty 570 if (lines.hasNext() || !next.isEmpty()) { 571 return next; 572 } 573 } 574 return endOfData(); 575 } 576 }; 577 } 578 579 @Override 580 public Stream<String> lines() { 581 return Streams.stream(linesIterator()); 582 } 583 584 @Override 585 public String readFirstLine() { 586 Iterator<String> lines = linesIterator(); 587 return lines.hasNext() ? lines.next() : null; 588 } 589 590 @Override 591 public ImmutableList<String> readLines() { 592 return ImmutableList.copyOf(linesIterator()); 593 } 594 595 @Override 596 public <T> T readLines(LineProcessor<T> processor) throws IOException { 597 Iterator<String> lines = linesIterator(); 598 while (lines.hasNext()) { 599 if (!processor.processLine(lines.next())) { 600 break; 601 } 602 } 603 return processor.getResult(); 604 } 605 606 @Override 607 public String toString() { 608 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 609 } 610 } 611 612 /** 613 * Subclass specialized for string instances. 614 * 615 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 616 * 617 * <ul> 618 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 619 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 620 * one with {@link CharSequence#charAt(int)}. 621 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 622 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 623 * can't change, and it is faster because many writers and appendables are optimized for 624 * appending string instances. 625 * </ul> 626 */ 627 private static class StringCharSource extends CharSequenceCharSource { 628 protected StringCharSource(String seq) { 629 super(seq); 630 } 631 632 @Override 633 public Reader openStream() { 634 return new StringReader((String) seq); 635 } 636 637 @Override 638 public long copyTo(Appendable appendable) throws IOException { 639 appendable.append(seq); 640 return seq.length(); 641 } 642 643 @Override 644 public long copyTo(CharSink sink) throws IOException { 645 checkNotNull(sink); 646 Closer closer = Closer.create(); 647 try { 648 Writer writer = closer.register(sink.openStream()); 649 writer.write((String) seq); 650 return seq.length(); 651 } catch (Throwable e) { 652 throw closer.rethrow(e); 653 } finally { 654 closer.close(); 655 } 656 } 657 } 658 659 private static final class EmptyCharSource extends StringCharSource { 660 661 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 662 663 private EmptyCharSource() { 664 super(""); 665 } 666 667 @Override 668 public String toString() { 669 return "CharSource.empty()"; 670 } 671 } 672 673 private static final class ConcatenatedCharSource extends CharSource { 674 675 private final Iterable<? extends CharSource> sources; 676 677 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 678 this.sources = checkNotNull(sources); 679 } 680 681 @Override 682 public Reader openStream() throws IOException { 683 return new MultiReader(sources.iterator()); 684 } 685 686 @Override 687 public boolean isEmpty() throws IOException { 688 for (CharSource source : sources) { 689 if (!source.isEmpty()) { 690 return false; 691 } 692 } 693 return true; 694 } 695 696 @Override 697 public Optional<Long> lengthIfKnown() { 698 long result = 0L; 699 for (CharSource source : sources) { 700 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 701 if (!lengthIfKnown.isPresent()) { 702 return Optional.absent(); 703 } 704 result += lengthIfKnown.get(); 705 } 706 return Optional.of(result); 707 } 708 709 @Override 710 public long length() throws IOException { 711 long result = 0L; 712 for (CharSource source : sources) { 713 result += source.length(); 714 } 715 return result; 716 } 717 718 @Override 719 public String toString() { 720 return "CharSource.concat(" + sources + ")"; 721 } 722 } 723}