001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.GwtIncompatible; 020import com.google.common.annotations.J2ktIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.errorprone.annotations.CanIgnoreReturnValue; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.Reader; 032import java.io.StringReader; 033import java.io.Writer; 034import java.nio.charset.Charset; 035import java.util.Iterator; 036import java.util.List; 037import javax.annotation.CheckForNull; 038import org.checkerframework.checker.nullness.qual.Nullable; 039 040/** 041 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 042 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 043 * it is an immutable <i>supplier</i> of {@code Reader} instances. 044 * 045 * <p>{@code CharSource} provides two kinds of methods: 046 * 047 * <ul> 048 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 049 * instance each time they are called. The caller is responsible for ensuring that the 050 * returned reader is closed. 051 * <li><b>Convenience methods:</b> These are implementations of common operations that are 052 * typically implemented by opening a reader using one of the methods in the first category, 053 * doing something and finally closing the reader that was opened. 054 * </ul> 055 * 056 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 057 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 058 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 059 * be an empty line at the end if the contents are terminated with a line separator. 060 * 061 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 062 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 063 * 064 * <p><b>Note:</b> In general, {@code CharSource} is intended to be used for "file-like" sources 065 * that provide readers that are: 066 * 067 * <ul> 068 * <li><b>Finite:</b> Many operations, such as {@link #length()} and {@link #read()}, will either 069 * block indefinitely or fail if the source creates an infinite reader. 070 * <li><b>Non-destructive:</b> A <i>destructive</i> reader will consume or otherwise alter the 071 * source as they are read from it. A source that provides such readers will not be reusable, 072 * and operations that read from the stream (including {@link #length()}, in some 073 * implementations) will prevent further operations from completing as expected. 074 * </ul> 075 * 076 * @since 14.0 077 * @author Colin Decker 078 */ 079@J2ktIncompatible 080@GwtIncompatible 081@ElementTypesAreNonnullByDefault 082public abstract class CharSource { 083 084 /** Constructor for use by subclasses. */ 085 protected CharSource() {} 086 087 /** 088 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 089 * as bytes using the given {@link Charset}. 090 * 091 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 092 * the default implementation of this method will ensure that the original {@code CharSource} is 093 * returned, rather than round-trip encoding. Subclasses that override this method should behave 094 * the same way. 095 * 096 * @since 20.0 097 */ 098 public ByteSource asByteSource(Charset charset) { 099 return new AsByteSource(charset); 100 } 101 102 /** 103 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 104 * reader each time it is called. 105 * 106 * <p>The caller is responsible for ensuring that the returned reader is closed. 107 * 108 * @throws IOException if an I/O error occurs while opening the reader 109 */ 110 public abstract Reader openStream() throws IOException; 111 112 /** 113 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 114 * independent reader each time it is called. 115 * 116 * <p>The caller is responsible for ensuring that the returned reader is closed. 117 * 118 * @throws IOException if an I/O error occurs while of opening the reader 119 */ 120 public BufferedReader openBufferedStream() throws IOException { 121 Reader reader = openStream(); 122 return (reader instanceof BufferedReader) 123 ? (BufferedReader) reader 124 : new BufferedReader(reader); 125 } 126 127 /** 128 * Returns the size of this source in chars, if the size can be easily determined without actually 129 * opening the data stream. 130 * 131 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 132 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 133 * that this method will return a different number of chars than would be returned by reading all 134 * of the chars. 135 * 136 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 137 * return a different number of chars if the contents are changed. 138 * 139 * @since 19.0 140 */ 141 public Optional<Long> lengthIfKnown() { 142 return Optional.absent(); 143 } 144 145 /** 146 * Returns the length of this source in chars, even if doing so requires opening and traversing an 147 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 148 * 149 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 150 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 151 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 152 * were skipped. 153 * 154 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 155 * implementation, it is <i>possible</i> that this method will return a different number of chars 156 * than would be returned by reading all of the chars. 157 * 158 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 159 * number of chars if the contents are changed. 160 * 161 * @throws IOException if an I/O error occurs while reading the length of this source 162 * @since 19.0 163 */ 164 public long length() throws IOException { 165 Optional<Long> lengthIfKnown = lengthIfKnown(); 166 if (lengthIfKnown.isPresent()) { 167 return lengthIfKnown.get(); 168 } 169 170 Closer closer = Closer.create(); 171 try { 172 Reader reader = closer.register(openStream()); 173 return countBySkipping(reader); 174 } catch (Throwable e) { 175 throw closer.rethrow(e); 176 } finally { 177 closer.close(); 178 } 179 } 180 181 private long countBySkipping(Reader reader) throws IOException { 182 long count = 0; 183 long read; 184 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 185 count += read; 186 } 187 return count; 188 } 189 190 /** 191 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 192 * Does not close {@code appendable} if it is {@code Closeable}. 193 * 194 * @return the number of characters copied 195 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 196 * appendable} 197 */ 198 @CanIgnoreReturnValue 199 public long copyTo(Appendable appendable) throws IOException { 200 checkNotNull(appendable); 201 202 Closer closer = Closer.create(); 203 try { 204 Reader reader = closer.register(openStream()); 205 return CharStreams.copy(reader, appendable); 206 } catch (Throwable e) { 207 throw closer.rethrow(e); 208 } finally { 209 closer.close(); 210 } 211 } 212 213 /** 214 * Copies the contents of this source to the given sink. 215 * 216 * @return the number of characters copied 217 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 218 * sink} 219 */ 220 @CanIgnoreReturnValue 221 public long copyTo(CharSink sink) throws IOException { 222 checkNotNull(sink); 223 224 Closer closer = Closer.create(); 225 try { 226 Reader reader = closer.register(openStream()); 227 Writer writer = closer.register(sink.openStream()); 228 return CharStreams.copy(reader, writer); 229 } catch (Throwable e) { 230 throw closer.rethrow(e); 231 } finally { 232 closer.close(); 233 } 234 } 235 236 /** 237 * Reads the contents of this source as a string. 238 * 239 * @throws IOException if an I/O error occurs while reading from this source 240 */ 241 public String read() throws IOException { 242 Closer closer = Closer.create(); 243 try { 244 Reader reader = closer.register(openStream()); 245 return CharStreams.toString(reader); 246 } catch (Throwable e) { 247 throw closer.rethrow(e); 248 } finally { 249 closer.close(); 250 } 251 } 252 253 /** 254 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 255 * 256 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 257 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 258 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 259 * it does. 260 * 261 * @throws IOException if an I/O error occurs while reading from this source 262 */ 263 @CheckForNull 264 public String readFirstLine() throws IOException { 265 Closer closer = Closer.create(); 266 try { 267 BufferedReader reader = closer.register(openBufferedStream()); 268 return reader.readLine(); 269 } catch (Throwable e) { 270 throw closer.rethrow(e); 271 } finally { 272 closer.close(); 273 } 274 } 275 276 /** 277 * Reads all the lines of this source as a list of strings. The returned list will be empty if 278 * this source is empty. 279 * 280 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 281 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 282 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 283 * it does. 284 * 285 * @throws IOException if an I/O error occurs while reading from this source 286 */ 287 public ImmutableList<String> readLines() throws IOException { 288 Closer closer = Closer.create(); 289 try { 290 BufferedReader reader = closer.register(openBufferedStream()); 291 List<String> result = Lists.newArrayList(); 292 String line; 293 while ((line = reader.readLine()) != null) { 294 result.add(line); 295 } 296 return ImmutableList.copyOf(result); 297 } catch (Throwable e) { 298 throw closer.rethrow(e); 299 } finally { 300 closer.close(); 301 } 302 } 303 304 /** 305 * Reads lines of text from this source, processing each line as it is read using the given {@link 306 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 307 * {@code false} and returns the result produced by the processor. 308 * 309 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 310 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 311 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 312 * it does. 313 * 314 * @throws IOException if an I/O error occurs while reading from this source or if {@code 315 * processor} throws an {@code IOException} 316 * @since 16.0 317 */ 318 @CanIgnoreReturnValue // some processors won't return a useful result 319 @ParametricNullness 320 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 321 checkNotNull(processor); 322 323 Closer closer = Closer.create(); 324 try { 325 Reader reader = closer.register(openStream()); 326 return CharStreams.readLines(reader, processor); 327 } catch (Throwable e) { 328 throw closer.rethrow(e); 329 } finally { 330 closer.close(); 331 } 332 } 333 334 /** 335 * Returns whether the source has zero chars. The default implementation first checks {@link 336 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 337 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 338 * 339 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 340 * chars are actually available for reading. This means that a source may return {@code true} from 341 * {@code isEmpty()} despite having readable content. 342 * 343 * @throws IOException if an I/O error occurs 344 * @since 15.0 345 */ 346 public boolean isEmpty() throws IOException { 347 Optional<Long> lengthIfKnown = lengthIfKnown(); 348 if (lengthIfKnown.isPresent()) { 349 return lengthIfKnown.get() == 0L; 350 } 351 Closer closer = Closer.create(); 352 try { 353 Reader reader = closer.register(openStream()); 354 return reader.read() == -1; 355 } catch (Throwable e) { 356 throw closer.rethrow(e); 357 } finally { 358 closer.close(); 359 } 360 } 361 362 /** 363 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 364 * the source will contain the concatenated data from the streams of the underlying sources. 365 * 366 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 367 * close the open underlying stream. 368 * 369 * @param sources the sources to concatenate 370 * @return a {@code CharSource} containing the concatenated data 371 * @since 15.0 372 */ 373 public static CharSource concat(Iterable<? extends CharSource> sources) { 374 return new ConcatenatedCharSource(sources); 375 } 376 377 /** 378 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 379 * the source will contain the concatenated data from the streams of the underlying sources. 380 * 381 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 382 * close the open underlying stream. 383 * 384 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 385 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 386 * eagerly fetches data for each source when iterated (rather than producing sources that only 387 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 388 * possible. 389 * 390 * @param sources the sources to concatenate 391 * @return a {@code CharSource} containing the concatenated data 392 * @throws NullPointerException if any of {@code sources} is {@code null} 393 * @since 15.0 394 */ 395 public static CharSource concat(Iterator<? extends CharSource> sources) { 396 return concat(ImmutableList.copyOf(sources)); 397 } 398 399 /** 400 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 401 * the source will contain the concatenated data from the streams of the underlying sources. 402 * 403 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 404 * close the open underlying stream. 405 * 406 * @param sources the sources to concatenate 407 * @return a {@code CharSource} containing the concatenated data 408 * @throws NullPointerException if any of {@code sources} is {@code null} 409 * @since 15.0 410 */ 411 public static CharSource concat(CharSource... sources) { 412 return concat(ImmutableList.copyOf(sources)); 413 } 414 415 /** 416 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 417 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 418 * the {@code charSequence} is mutated while it is being read, so don't do that. 419 * 420 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 421 */ 422 public static CharSource wrap(CharSequence charSequence) { 423 return charSequence instanceof String 424 ? new StringCharSource((String) charSequence) 425 : new CharSequenceCharSource(charSequence); 426 } 427 428 /** 429 * Returns an immutable {@link CharSource} that contains no characters. 430 * 431 * @since 15.0 432 */ 433 public static CharSource empty() { 434 return EmptyCharSource.INSTANCE; 435 } 436 437 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 438 private final class AsByteSource extends ByteSource { 439 440 final Charset charset; 441 442 AsByteSource(Charset charset) { 443 this.charset = checkNotNull(charset); 444 } 445 446 @Override 447 public CharSource asCharSource(Charset charset) { 448 if (charset.equals(this.charset)) { 449 return CharSource.this; 450 } 451 return super.asCharSource(charset); 452 } 453 454 @Override 455 public InputStream openStream() throws IOException { 456 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 457 } 458 459 @Override 460 public String toString() { 461 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 462 } 463 } 464 465 private static class CharSequenceCharSource extends CharSource { 466 467 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 468 469 protected final CharSequence seq; 470 471 protected CharSequenceCharSource(CharSequence seq) { 472 this.seq = checkNotNull(seq); 473 } 474 475 @Override 476 public Reader openStream() { 477 return new CharSequenceReader(seq); 478 } 479 480 @Override 481 public String read() { 482 return seq.toString(); 483 } 484 485 @Override 486 public boolean isEmpty() { 487 return seq.length() == 0; 488 } 489 490 @Override 491 public long length() { 492 return seq.length(); 493 } 494 495 @Override 496 public Optional<Long> lengthIfKnown() { 497 return Optional.of((long) seq.length()); 498 } 499 500 /** 501 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 502 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 503 */ 504 private Iterator<String> linesIterator() { 505 return new AbstractIterator<String>() { 506 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 507 508 @Override 509 @CheckForNull 510 protected String computeNext() { 511 if (lines.hasNext()) { 512 String next = lines.next(); 513 // skip last line if it's empty 514 if (lines.hasNext() || !next.isEmpty()) { 515 return next; 516 } 517 } 518 return endOfData(); 519 } 520 }; 521 } 522 523 @Override 524 @CheckForNull 525 public String readFirstLine() { 526 Iterator<String> lines = linesIterator(); 527 return lines.hasNext() ? lines.next() : null; 528 } 529 530 @Override 531 public ImmutableList<String> readLines() { 532 return ImmutableList.copyOf(linesIterator()); 533 } 534 535 @Override 536 @ParametricNullness 537 public <T extends @Nullable Object> T readLines(LineProcessor<T> processor) throws IOException { 538 Iterator<String> lines = linesIterator(); 539 while (lines.hasNext()) { 540 if (!processor.processLine(lines.next())) { 541 break; 542 } 543 } 544 return processor.getResult(); 545 } 546 547 @Override 548 public String toString() { 549 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 550 } 551 } 552 553 /** 554 * Subclass specialized for string instances. 555 * 556 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 557 * 558 * <ul> 559 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 560 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 561 * one with {@link CharSequence#charAt(int)}. 562 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 563 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 564 * can't change, and it is faster because many writers and appendables are optimized for 565 * appending string instances. 566 * </ul> 567 */ 568 private static class StringCharSource extends CharSequenceCharSource { 569 protected StringCharSource(String seq) { 570 super(seq); 571 } 572 573 @Override 574 public Reader openStream() { 575 return new StringReader((String) seq); 576 } 577 578 @Override 579 public long copyTo(Appendable appendable) throws IOException { 580 appendable.append(seq); 581 return seq.length(); 582 } 583 584 @Override 585 public long copyTo(CharSink sink) throws IOException { 586 checkNotNull(sink); 587 Closer closer = Closer.create(); 588 try { 589 Writer writer = closer.register(sink.openStream()); 590 writer.write((String) seq); 591 return seq.length(); 592 } catch (Throwable e) { 593 throw closer.rethrow(e); 594 } finally { 595 closer.close(); 596 } 597 } 598 } 599 600 private static final class EmptyCharSource extends StringCharSource { 601 602 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 603 604 private EmptyCharSource() { 605 super(""); 606 } 607 608 @Override 609 public String toString() { 610 return "CharSource.empty()"; 611 } 612 } 613 614 private static final class ConcatenatedCharSource extends CharSource { 615 616 private final Iterable<? extends CharSource> sources; 617 618 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 619 this.sources = checkNotNull(sources); 620 } 621 622 @Override 623 public Reader openStream() throws IOException { 624 return new MultiReader(sources.iterator()); 625 } 626 627 @Override 628 public boolean isEmpty() throws IOException { 629 for (CharSource source : sources) { 630 if (!source.isEmpty()) { 631 return false; 632 } 633 } 634 return true; 635 } 636 637 @Override 638 public Optional<Long> lengthIfKnown() { 639 long result = 0L; 640 for (CharSource source : sources) { 641 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 642 if (!lengthIfKnown.isPresent()) { 643 return Optional.absent(); 644 } 645 result += lengthIfKnown.get(); 646 } 647 return Optional.of(result); 648 } 649 650 @Override 651 public long length() throws IOException { 652 long result = 0L; 653 for (CharSource source : sources) { 654 result += source.length(); 655 } 656 return result; 657 } 658 659 @Override 660 public String toString() { 661 return "CharSource.concat(" + sources + ")"; 662 } 663 } 664}