001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.common.collect.Streams; 028import com.google.errorprone.annotations.CanIgnoreReturnValue; 029import com.google.errorprone.annotations.MustBeClosed; 030import java.io.BufferedReader; 031import java.io.IOException; 032import java.io.InputStream; 033import java.io.Reader; 034import java.io.UncheckedIOException; 035import java.io.Writer; 036import java.nio.charset.Charset; 037import java.util.Iterator; 038import java.util.List; 039import java.util.function.Consumer; 040import java.util.stream.Stream; 041import javax.annotation.Nullable; 042 043/** 044 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 045 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 046 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 047 * 048 * <p>{@code CharSource} provides two kinds of methods: 049 * <ul> 050 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 051 * instance each time they are called. The caller is responsible for ensuring that the returned 052 * reader is closed. 053 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically 054 * implemented by opening a reader using one of the methods in the first category, doing 055 * something and finally closing the reader that was opened. 056 * </ul> 057 * 058 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 059 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 060 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 061 * there to be an empty line at the end if the contents are terminated with a line separator. 062 * 063 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 064 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 065 * 066 * @since 14.0 067 * @author Colin Decker 068 */ 069@GwtIncompatible 070public abstract class CharSource { 071 072 /** 073 * Constructor for use by subclasses. 074 */ 075 protected CharSource() {} 076 077 /** 078 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 079 * as bytes using the given {@link Charset}. 080 * 081 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 082 * the default implementation of this method will ensure that the original {@code CharSource} is 083 * returned, rather than round-trip encoding. Subclasses that override this method should behave 084 * the same way. 085 * 086 * @since 20.0 087 */ 088 @Beta 089 public ByteSource asByteSource(Charset charset) { 090 return new AsByteSource(charset); 091 } 092 093 /** 094 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 095 * reader each time it is called. 096 * 097 * <p>The caller is responsible for ensuring that the returned reader is closed. 098 * 099 * @throws IOException if an I/O error occurs while opening the reader 100 */ 101 public abstract Reader openStream() throws IOException; 102 103 /** 104 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 105 * independent reader each time it is called. 106 * 107 * <p>The caller is responsible for ensuring that the returned reader is closed. 108 * 109 * @throws IOException if an I/O error occurs while of opening the reader 110 */ 111 public BufferedReader openBufferedStream() throws IOException { 112 Reader reader = openStream(); 113 return (reader instanceof BufferedReader) 114 ? (BufferedReader) reader 115 : new BufferedReader(reader); 116 } 117 118 /** 119 * Opens a new {@link Stream} for reading text one line at a time from this source. This method 120 * returns a new, independent stream each time it is called. 121 * 122 * <p>The returned stream is lazy and only reads from the source in the terminal operation. If an 123 * I/O error occurs while the stream is reading from the source or when the stream is closed, an 124 * {@link UncheckedIOException} is thrown. 125 * 126 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 127 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 128 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 129 * as if it does. 130 * 131 * <p>The caller is responsible for ensuring that the returned stream is closed. For example: 132 * 133 * <pre>{@code 134 * try (Stream<String> lines = source.lines()) { 135 * lines.map(...) 136 * .filter(...) 137 * .forEach(...); 138 * } 139 * }</pre> 140 * 141 * @throws IOException if an I/O error occurs while opening the stream 142 * @since 22.0 143 */ 144 @Beta 145 @MustBeClosed 146 public Stream<String> lines() throws IOException { 147 BufferedReader reader = openBufferedStream(); 148 return reader.lines().onClose(() -> { 149 try { 150 reader.close(); 151 } catch (IOException e) { 152 throw new UncheckedIOException(e); 153 } 154 }); 155 } 156 157 /** 158 * Returns the size of this source in chars, if the size can be easily determined without actually 159 * opening the data stream. 160 * 161 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 162 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 163 * <i>possible</i> that this method will return a different number of chars than would be returned 164 * by reading all of the chars. 165 * 166 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 167 * return a different number of chars if the contents are changed. 168 * 169 * @since 19.0 170 */ 171 @Beta 172 public Optional<Long> lengthIfKnown() { 173 return Optional.absent(); 174 } 175 176 /** 177 * Returns the length of this source in chars, even if doing so requires opening and traversing an 178 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 179 * 180 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 181 * absent, it will fall back to a heavyweight operation that will open a stream, 182 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 183 * that were skipped. 184 * 185 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 186 * implementation, it is <i>possible</i> that this method will return a different number of chars 187 * than would be returned by reading all of the chars. 188 * 189 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 190 * number of chars if the contents are changed. 191 * 192 * @throws IOException if an I/O error occurs while reading the length of this source 193 * @since 19.0 194 */ 195 @Beta 196 public long length() throws IOException { 197 Optional<Long> lengthIfKnown = lengthIfKnown(); 198 if (lengthIfKnown.isPresent()) { 199 return lengthIfKnown.get(); 200 } 201 202 Closer closer = Closer.create(); 203 try { 204 Reader reader = closer.register(openStream()); 205 return countBySkipping(reader); 206 } catch (Throwable e) { 207 throw closer.rethrow(e); 208 } finally { 209 closer.close(); 210 } 211 } 212 213 private long countBySkipping(Reader reader) throws IOException { 214 long count = 0; 215 long read; 216 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 217 count += read; 218 } 219 return count; 220 } 221 222 /** 223 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 224 * Does not close {@code appendable} if it is {@code Closeable}. 225 * 226 * @return the number of characters copied 227 * @throws IOException if an I/O error occurs while reading from this source or writing to 228 * {@code appendable} 229 */ 230 @CanIgnoreReturnValue 231 public long copyTo(Appendable appendable) throws IOException { 232 checkNotNull(appendable); 233 234 Closer closer = Closer.create(); 235 try { 236 Reader reader = closer.register(openStream()); 237 return CharStreams.copy(reader, appendable); 238 } catch (Throwable e) { 239 throw closer.rethrow(e); 240 } finally { 241 closer.close(); 242 } 243 } 244 245 /** 246 * Copies the contents of this source to the given sink. 247 * 248 * @return the number of characters copied 249 * @throws IOException if an I/O error occurs while reading from this source or writing to 250 * {@code sink} 251 */ 252 @CanIgnoreReturnValue 253 public long copyTo(CharSink sink) throws IOException { 254 checkNotNull(sink); 255 256 Closer closer = Closer.create(); 257 try { 258 Reader reader = closer.register(openStream()); 259 Writer writer = closer.register(sink.openStream()); 260 return CharStreams.copy(reader, writer); 261 } catch (Throwable e) { 262 throw closer.rethrow(e); 263 } finally { 264 closer.close(); 265 } 266 } 267 268 /** 269 * Reads the contents of this source as a string. 270 * 271 * @throws IOException if an I/O error occurs while reading from this source 272 */ 273 public String read() throws IOException { 274 Closer closer = Closer.create(); 275 try { 276 Reader reader = closer.register(openStream()); 277 return CharStreams.toString(reader); 278 } catch (Throwable e) { 279 throw closer.rethrow(e); 280 } finally { 281 closer.close(); 282 } 283 } 284 285 /** 286 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 287 * 288 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 289 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 290 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 291 * as if it does. 292 * 293 * @throws IOException if an I/O error occurs while reading from this source 294 */ 295 @Nullable 296 public String readFirstLine() throws IOException { 297 Closer closer = Closer.create(); 298 try { 299 BufferedReader reader = closer.register(openBufferedStream()); 300 return reader.readLine(); 301 } catch (Throwable e) { 302 throw closer.rethrow(e); 303 } finally { 304 closer.close(); 305 } 306 } 307 308 /** 309 * Reads all the lines of this source as a list of strings. The returned list will be empty if 310 * this source is empty. 311 * 312 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 313 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 314 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 315 * as if it does. 316 * 317 * @throws IOException if an I/O error occurs while reading from this source 318 */ 319 public ImmutableList<String> readLines() throws IOException { 320 Closer closer = Closer.create(); 321 try { 322 BufferedReader reader = closer.register(openBufferedStream()); 323 List<String> result = Lists.newArrayList(); 324 String line; 325 while ((line = reader.readLine()) != null) { 326 result.add(line); 327 } 328 return ImmutableList.copyOf(result); 329 } catch (Throwable e) { 330 throw closer.rethrow(e); 331 } finally { 332 closer.close(); 333 } 334 } 335 336 /** 337 * Reads lines of text from this source, processing each line as it is read using the given 338 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 339 * returns {@code false} and returns the result produced by the processor. 340 * 341 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 342 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 343 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 344 * as if it does. 345 * 346 * @throws IOException if an I/O error occurs while reading from this source or if 347 * {@code processor} throws an {@code IOException} 348 * @since 16.0 349 */ 350 @Beta 351 @CanIgnoreReturnValue // some processors won't return a useful result 352 public <T> T readLines(LineProcessor<T> processor) throws IOException { 353 checkNotNull(processor); 354 355 Closer closer = Closer.create(); 356 try { 357 Reader reader = closer.register(openStream()); 358 return CharStreams.readLines(reader, processor); 359 } catch (Throwable e) { 360 throw closer.rethrow(e); 361 } finally { 362 closer.close(); 363 } 364 } 365 366 /** 367 * Reads all lines of text from this source, running the given {@code action} for each line as 368 * it is read. 369 * 370 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 371 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 372 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 373 * as if it does. 374 * 375 * @throws IOException if an I/O error occurs while reading from this source or if 376 * {@code action} throws an {@code UncheckedIOException} 377 * @since 22.0 378 */ 379 @Beta 380 public void forEachLine(Consumer<? super String> action) throws IOException { 381 try (Stream<String> lines = lines()) { 382 // The lines should be ordered regardless in most cases, but use forEachOrdered to be sure 383 lines.forEachOrdered(action); 384 } catch (UncheckedIOException e) { 385 throw e.getCause(); 386 } 387 } 388 389 /** 390 * Returns whether the source has zero chars. The default implementation returns true if 391 * {@link #lengthIfKnown} returns zero, falling back to opening a stream and checking for EOF if 392 * the length is not known. 393 * 394 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 395 * chars are actually available for reading. This means that a source may return {@code true} from 396 * {@code isEmpty()} despite having readable content. 397 * 398 * @throws IOException if an I/O error occurs 399 * @since 15.0 400 */ 401 public boolean isEmpty() throws IOException { 402 Optional<Long> lengthIfKnown = lengthIfKnown(); 403 if (lengthIfKnown.isPresent() && lengthIfKnown.get() == 0L) { 404 return true; 405 } 406 Closer closer = Closer.create(); 407 try { 408 Reader reader = closer.register(openStream()); 409 return reader.read() == -1; 410 } catch (Throwable e) { 411 throw closer.rethrow(e); 412 } finally { 413 closer.close(); 414 } 415 } 416 417 /** 418 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 419 * the source will contain the concatenated data from the streams of the underlying sources. 420 * 421 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 422 * close the open underlying stream. 423 * 424 * @param sources the sources to concatenate 425 * @return a {@code CharSource} containing the concatenated data 426 * @since 15.0 427 */ 428 public static CharSource concat(Iterable<? extends CharSource> sources) { 429 return new ConcatenatedCharSource(sources); 430 } 431 432 /** 433 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 434 * the source will contain the concatenated data from the streams of the underlying sources. 435 * 436 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 437 * close the open underlying stream. 438 * 439 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 440 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 441 * eagerly fetches data for each source when iterated (rather than producing sources that only 442 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 443 * possible. 444 * 445 * @param sources the sources to concatenate 446 * @return a {@code CharSource} containing the concatenated data 447 * @throws NullPointerException if any of {@code sources} is {@code null} 448 * @since 15.0 449 */ 450 public static CharSource concat(Iterator<? extends CharSource> sources) { 451 return concat(ImmutableList.copyOf(sources)); 452 } 453 454 /** 455 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 456 * the source will contain the concatenated data from the streams of the underlying sources. 457 * 458 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 459 * close the open underlying stream. 460 * 461 * @param sources the sources to concatenate 462 * @return a {@code CharSource} containing the concatenated data 463 * @throws NullPointerException if any of {@code sources} is {@code null} 464 * @since 15.0 465 */ 466 public static CharSource concat(CharSource... sources) { 467 return concat(ImmutableList.copyOf(sources)); 468 } 469 470 /** 471 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 472 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 473 * the {@code charSequence} is mutated while it is being read, so don't do that. 474 * 475 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 476 */ 477 public static CharSource wrap(CharSequence charSequence) { 478 return new CharSequenceCharSource(charSequence); 479 } 480 481 /** 482 * Returns an immutable {@link CharSource} that contains no characters. 483 * 484 * @since 15.0 485 */ 486 public static CharSource empty() { 487 return EmptyCharSource.INSTANCE; 488 } 489 490 /** 491 * A byte source that reads chars from this source and encodes them as bytes using a charset. 492 */ 493 private final class AsByteSource extends ByteSource { 494 495 final Charset charset; 496 497 AsByteSource(Charset charset) { 498 this.charset = checkNotNull(charset); 499 } 500 501 @Override 502 public CharSource asCharSource(Charset charset) { 503 if (charset.equals(this.charset)) { 504 return CharSource.this; 505 } 506 return super.asCharSource(charset); 507 } 508 509 @Override 510 public InputStream openStream() throws IOException { 511 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 512 } 513 514 @Override 515 public String toString() { 516 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 517 } 518 } 519 520 private static class CharSequenceCharSource extends CharSource { 521 522 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 523 524 private final CharSequence seq; 525 526 protected CharSequenceCharSource(CharSequence seq) { 527 this.seq = checkNotNull(seq); 528 } 529 530 @Override 531 public Reader openStream() { 532 return new CharSequenceReader(seq); 533 } 534 535 @Override 536 public String read() { 537 return seq.toString(); 538 } 539 540 @Override 541 public boolean isEmpty() { 542 return seq.length() == 0; 543 } 544 545 @Override 546 public long length() { 547 return seq.length(); 548 } 549 550 @Override 551 public Optional<Long> lengthIfKnown() { 552 return Optional.of((long) seq.length()); 553 } 554 555 /** 556 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 557 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 558 */ 559 private Iterator<String> linesIterator() { 560 return new AbstractIterator<String>() { 561 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 562 563 @Override 564 protected String computeNext() { 565 if (lines.hasNext()) { 566 String next = lines.next(); 567 // skip last line if it's empty 568 if (lines.hasNext() || !next.isEmpty()) { 569 return next; 570 } 571 } 572 return endOfData(); 573 } 574 }; 575 } 576 577 @Override 578 public Stream<String> lines() { 579 return Streams.stream(linesIterator()); 580 } 581 582 @Override 583 public String readFirstLine() { 584 Iterator<String> lines = linesIterator(); 585 return lines.hasNext() ? lines.next() : null; 586 } 587 588 @Override 589 public ImmutableList<String> readLines() { 590 return ImmutableList.copyOf(linesIterator()); 591 } 592 593 @Override 594 public <T> T readLines(LineProcessor<T> processor) throws IOException { 595 Iterator<String> lines = linesIterator(); 596 while (lines.hasNext()) { 597 if (!processor.processLine(lines.next())) { 598 break; 599 } 600 } 601 return processor.getResult(); 602 } 603 604 @Override 605 public String toString() { 606 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 607 } 608 } 609 610 private static final class EmptyCharSource extends CharSequenceCharSource { 611 612 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 613 614 private EmptyCharSource() { 615 super(""); 616 } 617 618 @Override 619 public String toString() { 620 return "CharSource.empty()"; 621 } 622 } 623 624 private static final class ConcatenatedCharSource extends CharSource { 625 626 private final Iterable<? extends CharSource> sources; 627 628 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 629 this.sources = checkNotNull(sources); 630 } 631 632 @Override 633 public Reader openStream() throws IOException { 634 return new MultiReader(sources.iterator()); 635 } 636 637 @Override 638 public boolean isEmpty() throws IOException { 639 for (CharSource source : sources) { 640 if (!source.isEmpty()) { 641 return false; 642 } 643 } 644 return true; 645 } 646 647 @Override 648 public Optional<Long> lengthIfKnown() { 649 long result = 0L; 650 for (CharSource source : sources) { 651 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 652 if (!lengthIfKnown.isPresent()) { 653 return Optional.absent(); 654 } 655 result += lengthIfKnown.get(); 656 } 657 return Optional.of(result); 658 } 659 660 @Override 661 public long length() throws IOException { 662 long result = 0L; 663 for (CharSource source : sources) { 664 result += source.length(); 665 } 666 return result; 667 } 668 669 @Override 670 public String toString() { 671 return "CharSource.concat(" + sources + ")"; 672 } 673 } 674}