001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.common.collect.Streams; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.MustBeClosed; 030import java.io.BufferedReader; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.Reader; 034import java.io.StringReader; 035import java.io.UncheckedIOException; 036import java.io.Writer; 037import java.nio.charset.Charset; 038import java.util.Iterator; 039import java.util.List; 040import java.util.function.Consumer; 041import java.util.stream.Stream; 042import javax.annotation.Nullable; 043 044/** 045 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 046 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 047 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 048 * 049 * <p>{@code CharSource} provides two kinds of methods: 050 * <ul> 051 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 052 * instance each time they are called. The caller is responsible for ensuring that the returned 053 * reader is closed. 054 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically 055 * implemented by opening a reader using one of the methods in the first category, doing 056 * something and finally closing the reader that was opened. 057 * </ul> 058 * 059 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 060 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 061 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 062 * there to be an empty line at the end if the contents are terminated with a line separator. 063 * 064 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 065 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 066 * 067 * @since 14.0 068 * @author Colin Decker 069 */ 070@GwtIncompatible 071public abstract class CharSource { 072 073 /** 074 * Constructor for use by subclasses. 075 */ 076 protected CharSource() {} 077 078 /** 079 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 080 * as bytes using the given {@link Charset}. 081 * 082 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 083 * the default implementation of this method will ensure that the original {@code CharSource} is 084 * returned, rather than round-trip encoding. Subclasses that override this method should behave 085 * the same way. 086 * 087 * @since 20.0 088 */ 089 @Beta 090 public ByteSource asByteSource(Charset charset) { 091 return new AsByteSource(charset); 092 } 093 094 /** 095 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 096 * reader each time it is called. 097 * 098 * <p>The caller is responsible for ensuring that the returned reader is closed. 099 * 100 * @throws IOException if an I/O error occurs while opening the reader 101 */ 102 public abstract Reader openStream() throws IOException; 103 104 /** 105 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 106 * independent reader each time it is called. 107 * 108 * <p>The caller is responsible for ensuring that the returned reader is closed. 109 * 110 * @throws IOException if an I/O error occurs while of opening the reader 111 */ 112 public BufferedReader openBufferedStream() throws IOException { 113 Reader reader = openStream(); 114 return (reader instanceof BufferedReader) 115 ? (BufferedReader) reader 116 : new BufferedReader(reader); 117 } 118 119 /** 120 * Opens a new {@link Stream} for reading text one line at a time from this source. This method 121 * returns a new, independent stream each time it is called. 122 * 123 * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an 124 * I/O error occurs while the stream is reading from the source or when the stream is closed, an 125 * {@link UncheckedIOException} is thrown. 126 * 127 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 128 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 129 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 130 * as if it does. 131 * 132 * <p>The caller is responsible for ensuring that the returned stream is closed. For example: 133 * 134 * <pre>{@code 135 * try (Stream<String> lines = source.lines()) { 136 * lines.map(...) 137 * .filter(...) 138 * .forEach(...); 139 * } 140 * }</pre> 141 * 142 * @throws IOException if an I/O error occurs while opening the stream 143 * @since 22.0 144 */ 145 @Beta 146 @MustBeClosed 147 public Stream<String> lines() throws IOException { 148 BufferedReader reader = openBufferedStream(); 149 return reader.lines().onClose(() -> { 150 try { 151 reader.close(); 152 } catch (IOException e) { 153 throw new UncheckedIOException(e); 154 } 155 }); 156 } 157 158 /** 159 * Returns the size of this source in chars, if the size can be easily determined without actually 160 * opening the data stream. 161 * 162 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 163 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 164 * <i>possible</i> that this method will return a different number of chars than would be returned 165 * by reading all of the chars. 166 * 167 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 168 * return a different number of chars if the contents are changed. 169 * 170 * @since 19.0 171 */ 172 @Beta 173 public Optional<Long> lengthIfKnown() { 174 return Optional.absent(); 175 } 176 177 /** 178 * Returns the length of this source in chars, even if doing so requires opening and traversing an 179 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 180 * 181 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 182 * absent, it will fall back to a heavyweight operation that will open a stream, 183 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 184 * that were skipped. 185 * 186 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 187 * implementation, it is <i>possible</i> that this method will return a different number of chars 188 * than would be returned by reading all of the chars. 189 * 190 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 191 * number of chars if the contents are changed. 192 * 193 * @throws IOException if an I/O error occurs while reading the length of this source 194 * @since 19.0 195 */ 196 @Beta 197 public long length() throws IOException { 198 Optional<Long> lengthIfKnown = lengthIfKnown(); 199 if (lengthIfKnown.isPresent()) { 200 return lengthIfKnown.get(); 201 } 202 203 Closer closer = Closer.create(); 204 try { 205 Reader reader = closer.register(openStream()); 206 return countBySkipping(reader); 207 } catch (Throwable e) { 208 throw closer.rethrow(e); 209 } finally { 210 closer.close(); 211 } 212 } 213 214 private long countBySkipping(Reader reader) throws IOException { 215 long count = 0; 216 long read; 217 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 218 count += read; 219 } 220 return count; 221 } 222 223 /** 224 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 225 * Does not close {@code appendable} if it is {@code Closeable}. 226 * 227 * @return the number of characters copied 228 * @throws IOException if an I/O error occurs while reading from this source or writing to 229 * {@code appendable} 230 */ 231 @CanIgnoreReturnValue 232 public long copyTo(Appendable appendable) throws IOException { 233 checkNotNull(appendable); 234 235 Closer closer = Closer.create(); 236 try { 237 Reader reader = closer.register(openStream()); 238 return CharStreams.copy(reader, appendable); 239 } catch (Throwable e) { 240 throw closer.rethrow(e); 241 } finally { 242 closer.close(); 243 } 244 } 245 246 /** 247 * Copies the contents of this source to the given sink. 248 * 249 * @return the number of characters copied 250 * @throws IOException if an I/O error occurs while reading from this source or writing to 251 * {@code sink} 252 */ 253 @CanIgnoreReturnValue 254 public long copyTo(CharSink sink) throws IOException { 255 checkNotNull(sink); 256 257 Closer closer = Closer.create(); 258 try { 259 Reader reader = closer.register(openStream()); 260 Writer writer = closer.register(sink.openStream()); 261 return CharStreams.copy(reader, writer); 262 } catch (Throwable e) { 263 throw closer.rethrow(e); 264 } finally { 265 closer.close(); 266 } 267 } 268 269 /** 270 * Reads the contents of this source as a string. 271 * 272 * @throws IOException if an I/O error occurs while reading from this source 273 */ 274 public String read() throws IOException { 275 Closer closer = Closer.create(); 276 try { 277 Reader reader = closer.register(openStream()); 278 return CharStreams.toString(reader); 279 } catch (Throwable e) { 280 throw closer.rethrow(e); 281 } finally { 282 closer.close(); 283 } 284 } 285 286 /** 287 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 288 * 289 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 290 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 291 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 292 * as if it does. 293 * 294 * @throws IOException if an I/O error occurs while reading from this source 295 */ 296 @Nullable 297 public String readFirstLine() throws IOException { 298 Closer closer = Closer.create(); 299 try { 300 BufferedReader reader = closer.register(openBufferedStream()); 301 return reader.readLine(); 302 } catch (Throwable e) { 303 throw closer.rethrow(e); 304 } finally { 305 closer.close(); 306 } 307 } 308 309 /** 310 * Reads all the lines of this source as a list of strings. The returned list will be empty if 311 * this source is empty. 312 * 313 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 314 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 315 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 316 * as if it does. 317 * 318 * @throws IOException if an I/O error occurs while reading from this source 319 */ 320 public ImmutableList<String> readLines() throws IOException { 321 Closer closer = Closer.create(); 322 try { 323 BufferedReader reader = closer.register(openBufferedStream()); 324 List<String> result = Lists.newArrayList(); 325 String line; 326 while ((line = reader.readLine()) != null) { 327 result.add(line); 328 } 329 return ImmutableList.copyOf(result); 330 } catch (Throwable e) { 331 throw closer.rethrow(e); 332 } finally { 333 closer.close(); 334 } 335 } 336 337 /** 338 * Reads lines of text from this source, processing each line as it is read using the given 339 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 340 * returns {@code false} and returns the result produced by the processor. 341 * 342 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 343 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 344 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 345 * as if it does. 346 * 347 * @throws IOException if an I/O error occurs while reading from this source or if 348 * {@code processor} throws an {@code IOException} 349 * @since 16.0 350 */ 351 @Beta 352 @CanIgnoreReturnValue // some processors won't return a useful result 353 public <T> T readLines(LineProcessor<T> processor) throws IOException { 354 checkNotNull(processor); 355 356 Closer closer = Closer.create(); 357 try { 358 Reader reader = closer.register(openStream()); 359 return CharStreams.readLines(reader, processor); 360 } catch (Throwable e) { 361 throw closer.rethrow(e); 362 } finally { 363 closer.close(); 364 } 365 } 366 367 /** 368 * Reads all lines of text from this source, running the given {@code action} for each line as 369 * it is read. 370 * 371 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 372 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 373 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 374 * as if it does. 375 * 376 * @throws IOException if an I/O error occurs while reading from this source or if 377 * {@code action} throws an {@code UncheckedIOException} 378 * @since 22.0 379 */ 380 @Beta 381 public void forEachLine(Consumer<? super String> action) throws IOException { 382 try (Stream<String> lines = lines()) { 383 // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure 384 lines.forEachOrdered(action); 385 } catch (UncheckedIOException e) { 386 throw e.getCause(); 387 } 388 } 389 390 /** 391 * Returns whether the source has zero chars. The default implementation first checks 392 * {@link #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 393 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 394 * 395 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 396 * chars are actually available for reading. This means that a source may return {@code true} from 397 * {@code isEmpty()} despite having readable content. 398 * 399 * @throws IOException if an I/O error occurs 400 * @since 15.0 401 */ 402 public boolean isEmpty() throws IOException { 403 Optional<Long> lengthIfKnown = lengthIfKnown(); 404 if (lengthIfKnown.isPresent()) { 405 return lengthIfKnown.get() == 0L; 406 } 407 Closer closer = Closer.create(); 408 try { 409 Reader reader = closer.register(openStream()); 410 return reader.read() == -1; 411 } catch (Throwable e) { 412 throw closer.rethrow(e); 413 } finally { 414 closer.close(); 415 } 416 } 417 418 /** 419 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 420 * the source will contain the concatenated data from the streams of the underlying sources. 421 * 422 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 423 * close the open underlying stream. 424 * 425 * @param sources the sources to concatenate 426 * @return a {@code CharSource} containing the concatenated data 427 * @since 15.0 428 */ 429 public static CharSource concat(Iterable<? extends CharSource> sources) { 430 return new ConcatenatedCharSource(sources); 431 } 432 433 /** 434 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 435 * the source will contain the concatenated data from the streams of the underlying sources. 436 * 437 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 438 * close the open underlying stream. 439 * 440 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 441 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 442 * eagerly fetches data for each source when iterated (rather than producing sources that only 443 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 444 * possible. 445 * 446 * @param sources the sources to concatenate 447 * @return a {@code CharSource} containing the concatenated data 448 * @throws NullPointerException if any of {@code sources} is {@code null} 449 * @since 15.0 450 */ 451 public static CharSource concat(Iterator<? extends CharSource> sources) { 452 return concat(ImmutableList.copyOf(sources)); 453 } 454 455 /** 456 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 457 * the source will contain the concatenated data from the streams of the underlying sources. 458 * 459 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 460 * close the open underlying stream. 461 * 462 * @param sources the sources to concatenate 463 * @return a {@code CharSource} containing the concatenated data 464 * @throws NullPointerException if any of {@code sources} is {@code null} 465 * @since 15.0 466 */ 467 public static CharSource concat(CharSource... sources) { 468 return concat(ImmutableList.copyOf(sources)); 469 } 470 471 /** 472 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 473 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 474 * the {@code charSequence} is mutated while it is being read, so don't do that. 475 * 476 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 477 */ 478 public static CharSource wrap(CharSequence charSequence) { 479 return charSequence instanceof String 480 ? new StringCharSource((String) charSequence) 481 : new CharSequenceCharSource(charSequence); 482 } 483 484 /** 485 * Returns an immutable {@link CharSource} that contains no characters. 486 * 487 * @since 15.0 488 */ 489 public static CharSource empty() { 490 return EmptyCharSource.INSTANCE; 491 } 492 493 /** 494 * A byte source that reads chars from this source and encodes them as bytes using a charset. 495 */ 496 private final class AsByteSource extends ByteSource { 497 498 final Charset charset; 499 500 AsByteSource(Charset charset) { 501 this.charset = checkNotNull(charset); 502 } 503 504 @Override 505 public CharSource asCharSource(Charset charset) { 506 if (charset.equals(this.charset)) { 507 return CharSource.this; 508 } 509 return super.asCharSource(charset); 510 } 511 512 @Override 513 public InputStream openStream() throws IOException { 514 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 515 } 516 517 @Override 518 public String toString() { 519 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 520 } 521 } 522 523 private static class CharSequenceCharSource extends CharSource { 524 525 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 526 527 protected final CharSequence seq; 528 529 protected CharSequenceCharSource(CharSequence seq) { 530 this.seq = checkNotNull(seq); 531 } 532 533 @Override 534 public Reader openStream() { 535 return new CharSequenceReader(seq); 536 } 537 538 @Override 539 public String read() { 540 return seq.toString(); 541 } 542 543 @Override 544 public boolean isEmpty() { 545 return seq.length() == 0; 546 } 547 548 @Override 549 public long length() { 550 return seq.length(); 551 } 552 553 @Override 554 public Optional<Long> lengthIfKnown() { 555 return Optional.of((long) seq.length()); 556 } 557 558 /** 559 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 560 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 561 */ 562 private Iterator<String> linesIterator() { 563 return new AbstractIterator<String>() { 564 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 565 566 @Override 567 protected String computeNext() { 568 if (lines.hasNext()) { 569 String next = lines.next(); 570 // skip last line if it's empty 571 if (lines.hasNext() || !next.isEmpty()) { 572 return next; 573 } 574 } 575 return endOfData(); 576 } 577 }; 578 } 579 580 @Override 581 public Stream<String> lines() { 582 return Streams.stream(linesIterator()); 583 } 584 585 @Override 586 public String readFirstLine() { 587 Iterator<String> lines = linesIterator(); 588 return lines.hasNext() ? lines.next() : null; 589 } 590 591 @Override 592 public ImmutableList<String> readLines() { 593 return ImmutableList.copyOf(linesIterator()); 594 } 595 596 @Override 597 public <T> T readLines(LineProcessor<T> processor) throws IOException { 598 Iterator<String> lines = linesIterator(); 599 while (lines.hasNext()) { 600 if (!processor.processLine(lines.next())) { 601 break; 602 } 603 } 604 return processor.getResult(); 605 } 606 607 @Override 608 public String toString() { 609 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 610 } 611 } 612 613 /** 614 * Subclass specialized for string instances. 615 * 616 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 617 * 618 * <ul> 619 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 620 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 621 * one with {@link CharSequence#charAt(int)}. 622 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 623 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 624 * can't change, and it is faster because many writers and appendables are optimized for 625 * appending string instances. 626 * </ul> 627 */ 628 private static class StringCharSource extends CharSequenceCharSource { 629 protected StringCharSource(String seq) { 630 super(seq); 631 } 632 633 @Override 634 public Reader openStream() { 635 return new StringReader((String) seq); 636 } 637 638 @Override 639 public long copyTo(Appendable appendable) throws IOException { 640 appendable.append(seq); 641 return seq.length(); 642 } 643 644 @Override 645 public long copyTo(CharSink sink) throws IOException { 646 checkNotNull(sink); 647 Closer closer = Closer.create(); 648 try { 649 Writer writer = closer.register(sink.openStream()); 650 writer.write((String) seq); 651 return seq.length(); 652 } catch (Throwable e) { 653 throw closer.rethrow(e); 654 } finally { 655 closer.close(); 656 } 657 } 658 } 659 660 private static final class EmptyCharSource extends StringCharSource { 661 662 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 663 664 private EmptyCharSource() { 665 super(""); 666 } 667 668 @Override 669 public String toString() { 670 return "CharSource.empty()"; 671 } 672 } 673 674 private static final class ConcatenatedCharSource extends CharSource { 675 676 private final Iterable<? extends CharSource> sources; 677 678 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 679 this.sources = checkNotNull(sources); 680 } 681 682 @Override 683 public Reader openStream() throws IOException { 684 return new MultiReader(sources.iterator()); 685 } 686 687 @Override 688 public boolean isEmpty() throws IOException { 689 for (CharSource source : sources) { 690 if (!source.isEmpty()) { 691 return false; 692 } 693 } 694 return true; 695 } 696 697 @Override 698 public Optional<Long> lengthIfKnown() { 699 long result = 0L; 700 for (CharSource source : sources) { 701 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 702 if (!lengthIfKnown.isPresent()) { 703 return Optional.absent(); 704 } 705 result += lengthIfKnown.get(); 706 } 707 return Optional.of(result); 708 } 709 710 @Override 711 public long length() throws IOException { 712 long result = 0L; 713 for (CharSource source : sources) { 714 result += source.length(); 715 } 716 return result; 717 } 718 719 @Override 720 public String toString() { 721 return "CharSource.concat(" + sources + ")"; 722 } 723 } 724}