001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.errorprone.annotations.CanIgnoreReturnValue; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.Reader; 032import java.io.StringReader; 033import java.io.Writer; 034import java.nio.charset.Charset; 035import java.util.Iterator; 036import java.util.List; 037import org.checkerframework.checker.nullness.compatqual.NullableDecl; 038 039/** 040 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a {@code 041 * CharSource} is not an open, stateful stream of characters that can be read and closed. Instead, 042 * it is an immutable <i>supplier</i> of {@code Reader} instances. 043 * 044 * <p>{@code CharSource} provides two kinds of methods: 045 * 046 * <ul> 047 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 048 * instance each time they are called. The caller is responsible for ensuring that the 049 * returned reader is closed. 050 * <li><b>Convenience methods:</b> These are implementations of common operations that are 051 * typically implemented by opening a reader using one of the methods in the first category, 052 * doing something and finally closing the reader that was opened. 053 * </ul> 054 * 055 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 056 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, {@code 057 * \r} or {@code \r\n}, do not include the line separator in each line and do not consider there to 058 * be an empty line at the end if the contents are terminated with a line separator. 059 * 060 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 061 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 062 * 063 * @since 14.0 064 * @author Colin Decker 065 */ 066@GwtIncompatible 067public abstract class CharSource { 068 069 /** Constructor for use by subclasses. */ 070 protected CharSource() {} 071 072 /** 073 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 074 * as bytes using the given {@link Charset}. 075 * 076 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 077 * the default implementation of this method will ensure that the original {@code CharSource} is 078 * returned, rather than round-trip encoding. Subclasses that override this method should behave 079 * the same way. 080 * 081 * @since 20.0 082 */ 083 @Beta 084 public ByteSource asByteSource(Charset charset) { 085 return new AsByteSource(charset); 086 } 087 088 /** 089 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 090 * reader each time it is called. 091 * 092 * <p>The caller is responsible for ensuring that the returned reader is closed. 093 * 094 * @throws IOException if an I/O error occurs while opening the reader 095 */ 096 public abstract Reader openStream() throws IOException; 097 098 /** 099 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 100 * independent reader each time it is called. 101 * 102 * <p>The caller is responsible for ensuring that the returned reader is closed. 103 * 104 * @throws IOException if an I/O error occurs while of opening the reader 105 */ 106 public BufferedReader openBufferedStream() throws IOException { 107 Reader reader = openStream(); 108 return (reader instanceof BufferedReader) 109 ? (BufferedReader) reader 110 : new BufferedReader(reader); 111 } 112 113 /** 114 * Returns the size of this source in chars, if the size can be easily determined without actually 115 * opening the data stream. 116 * 117 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a {@code 118 * CharSequence}, may return a non-absent value. Note that in such cases, it is <i>possible</i> 119 * that this method will return a different number of chars than would be returned by reading all 120 * of the chars. 121 * 122 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 123 * return a different number of chars if the contents are changed. 124 * 125 * @since 19.0 126 */ 127 @Beta 128 public Optional<Long> lengthIfKnown() { 129 return Optional.absent(); 130 } 131 132 /** 133 * Returns the length of this source in chars, even if doing so requires opening and traversing an 134 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 135 * 136 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 137 * absent, it will fall back to a heavyweight operation that will open a stream, {@link 138 * Reader#skip(long) skip} to the end of the stream, and return the total number of chars that 139 * were skipped. 140 * 141 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 142 * implementation, it is <i>possible</i> that this method will return a different number of chars 143 * than would be returned by reading all of the chars. 144 * 145 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 146 * number of chars if the contents are changed. 147 * 148 * @throws IOException if an I/O error occurs while reading the length of this source 149 * @since 19.0 150 */ 151 @Beta 152 public long length() throws IOException { 153 Optional<Long> lengthIfKnown = lengthIfKnown(); 154 if (lengthIfKnown.isPresent()) { 155 return lengthIfKnown.get(); 156 } 157 158 Closer closer = Closer.create(); 159 try { 160 Reader reader = closer.register(openStream()); 161 return countBySkipping(reader); 162 } catch (Throwable e) { 163 throw closer.rethrow(e); 164 } finally { 165 closer.close(); 166 } 167 } 168 169 private long countBySkipping(Reader reader) throws IOException { 170 long count = 0; 171 long read; 172 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 173 count += read; 174 } 175 return count; 176 } 177 178 /** 179 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 180 * Does not close {@code appendable} if it is {@code Closeable}. 181 * 182 * @return the number of characters copied 183 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 184 * appendable} 185 */ 186 @CanIgnoreReturnValue 187 public long copyTo(Appendable appendable) throws IOException { 188 checkNotNull(appendable); 189 190 Closer closer = Closer.create(); 191 try { 192 Reader reader = closer.register(openStream()); 193 return CharStreams.copy(reader, appendable); 194 } catch (Throwable e) { 195 throw closer.rethrow(e); 196 } finally { 197 closer.close(); 198 } 199 } 200 201 /** 202 * Copies the contents of this source to the given sink. 203 * 204 * @return the number of characters copied 205 * @throws IOException if an I/O error occurs while reading from this source or writing to {@code 206 * sink} 207 */ 208 @CanIgnoreReturnValue 209 public long copyTo(CharSink sink) throws IOException { 210 checkNotNull(sink); 211 212 Closer closer = Closer.create(); 213 try { 214 Reader reader = closer.register(openStream()); 215 Writer writer = closer.register(sink.openStream()); 216 return CharStreams.copy(reader, writer); 217 } catch (Throwable e) { 218 throw closer.rethrow(e); 219 } finally { 220 closer.close(); 221 } 222 } 223 224 /** 225 * Reads the contents of this source as a string. 226 * 227 * @throws IOException if an I/O error occurs while reading from this source 228 */ 229 public String read() throws IOException { 230 Closer closer = Closer.create(); 231 try { 232 Reader reader = closer.register(openStream()); 233 return CharStreams.toString(reader); 234 } catch (Throwable e) { 235 throw closer.rethrow(e); 236 } finally { 237 closer.close(); 238 } 239 } 240 241 /** 242 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 243 * 244 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 245 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 246 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 247 * it does. 248 * 249 * @throws IOException if an I/O error occurs while reading from this source 250 */ 251 @NullableDecl 252 public String readFirstLine() throws IOException { 253 Closer closer = Closer.create(); 254 try { 255 BufferedReader reader = closer.register(openBufferedStream()); 256 return reader.readLine(); 257 } catch (Throwable e) { 258 throw closer.rethrow(e); 259 } finally { 260 closer.close(); 261 } 262 } 263 264 /** 265 * Reads all the lines of this source as a list of strings. The returned list will be empty if 266 * this source is empty. 267 * 268 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 269 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 270 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 271 * it does. 272 * 273 * @throws IOException if an I/O error occurs while reading from this source 274 */ 275 public ImmutableList<String> readLines() throws IOException { 276 Closer closer = Closer.create(); 277 try { 278 BufferedReader reader = closer.register(openBufferedStream()); 279 List<String> result = Lists.newArrayList(); 280 String line; 281 while ((line = reader.readLine()) != null) { 282 result.add(line); 283 } 284 return ImmutableList.copyOf(result); 285 } catch (Throwable e) { 286 throw closer.rethrow(e); 287 } finally { 288 closer.close(); 289 } 290 } 291 292 /** 293 * Reads lines of text from this source, processing each line as it is read using the given {@link 294 * LineProcessor processor}. Stops when all lines have been processed or the processor returns 295 * {@code false} and returns the result produced by the processor. 296 * 297 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 298 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or {@code 299 * \n}. If the source's content does not end in a line termination sequence, it is treated as if 300 * it does. 301 * 302 * @throws IOException if an I/O error occurs while reading from this source or if {@code 303 * processor} throws an {@code IOException} 304 * @since 16.0 305 */ 306 @Beta 307 @CanIgnoreReturnValue // some processors won't return a useful result 308 public <T> T readLines(LineProcessor<T> processor) throws IOException { 309 checkNotNull(processor); 310 311 Closer closer = Closer.create(); 312 try { 313 Reader reader = closer.register(openStream()); 314 return CharStreams.readLines(reader, processor); 315 } catch (Throwable e) { 316 throw closer.rethrow(e); 317 } finally { 318 closer.close(); 319 } 320 } 321 322 /** 323 * Returns whether the source has zero chars. The default implementation first checks {@link 324 * #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 325 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 326 * 327 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 328 * chars are actually available for reading. This means that a source may return {@code true} from 329 * {@code isEmpty()} despite having readable content. 330 * 331 * @throws IOException if an I/O error occurs 332 * @since 15.0 333 */ 334 public boolean isEmpty() throws IOException { 335 Optional<Long> lengthIfKnown = lengthIfKnown(); 336 if (lengthIfKnown.isPresent()) { 337 return lengthIfKnown.get() == 0L; 338 } 339 Closer closer = Closer.create(); 340 try { 341 Reader reader = closer.register(openStream()); 342 return reader.read() == -1; 343 } catch (Throwable e) { 344 throw closer.rethrow(e); 345 } finally { 346 closer.close(); 347 } 348 } 349 350 /** 351 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 352 * the source will contain the concatenated data from the streams of the underlying sources. 353 * 354 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 355 * close the open underlying stream. 356 * 357 * @param sources the sources to concatenate 358 * @return a {@code CharSource} containing the concatenated data 359 * @since 15.0 360 */ 361 public static CharSource concat(Iterable<? extends CharSource> sources) { 362 return new ConcatenatedCharSource(sources); 363 } 364 365 /** 366 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 367 * the source will contain the concatenated data from the streams of the underlying sources. 368 * 369 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 370 * close the open underlying stream. 371 * 372 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 373 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 374 * eagerly fetches data for each source when iterated (rather than producing sources that only 375 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 376 * possible. 377 * 378 * @param sources the sources to concatenate 379 * @return a {@code CharSource} containing the concatenated data 380 * @throws NullPointerException if any of {@code sources} is {@code null} 381 * @since 15.0 382 */ 383 public static CharSource concat(Iterator<? extends CharSource> sources) { 384 return concat(ImmutableList.copyOf(sources)); 385 } 386 387 /** 388 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 389 * the source will contain the concatenated data from the streams of the underlying sources. 390 * 391 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 392 * close the open underlying stream. 393 * 394 * @param sources the sources to concatenate 395 * @return a {@code CharSource} containing the concatenated data 396 * @throws NullPointerException if any of {@code sources} is {@code null} 397 * @since 15.0 398 */ 399 public static CharSource concat(CharSource... sources) { 400 return concat(ImmutableList.copyOf(sources)); 401 } 402 403 /** 404 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 405 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 406 * the {@code charSequence} is mutated while it is being read, so don't do that. 407 * 408 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 409 */ 410 public static CharSource wrap(CharSequence charSequence) { 411 return charSequence instanceof String 412 ? new StringCharSource((String) charSequence) 413 : new CharSequenceCharSource(charSequence); 414 } 415 416 /** 417 * Returns an immutable {@link CharSource} that contains no characters. 418 * 419 * @since 15.0 420 */ 421 public static CharSource empty() { 422 return EmptyCharSource.INSTANCE; 423 } 424 425 /** A byte source that reads chars from this source and encodes them as bytes using a charset. */ 426 private final class AsByteSource extends ByteSource { 427 428 final Charset charset; 429 430 AsByteSource(Charset charset) { 431 this.charset = checkNotNull(charset); 432 } 433 434 @Override 435 public CharSource asCharSource(Charset charset) { 436 if (charset.equals(this.charset)) { 437 return CharSource.this; 438 } 439 return super.asCharSource(charset); 440 } 441 442 @Override 443 public InputStream openStream() throws IOException { 444 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 445 } 446 447 @Override 448 public String toString() { 449 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 450 } 451 } 452 453 private static class CharSequenceCharSource extends CharSource { 454 455 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 456 457 protected final CharSequence seq; 458 459 protected CharSequenceCharSource(CharSequence seq) { 460 this.seq = checkNotNull(seq); 461 } 462 463 @Override 464 public Reader openStream() { 465 return new CharSequenceReader(seq); 466 } 467 468 @Override 469 public String read() { 470 return seq.toString(); 471 } 472 473 @Override 474 public boolean isEmpty() { 475 return seq.length() == 0; 476 } 477 478 @Override 479 public long length() { 480 return seq.length(); 481 } 482 483 @Override 484 public Optional<Long> lengthIfKnown() { 485 return Optional.of((long) seq.length()); 486 } 487 488 /** 489 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 490 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 491 */ 492 private Iterator<String> linesIterator() { 493 return new AbstractIterator<String>() { 494 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 495 496 @Override 497 protected String computeNext() { 498 if (lines.hasNext()) { 499 String next = lines.next(); 500 // skip last line if it's empty 501 if (lines.hasNext() || !next.isEmpty()) { 502 return next; 503 } 504 } 505 return endOfData(); 506 } 507 }; 508 } 509 510 @Override 511 public String readFirstLine() { 512 Iterator<String> lines = linesIterator(); 513 return lines.hasNext() ? lines.next() : null; 514 } 515 516 @Override 517 public ImmutableList<String> readLines() { 518 return ImmutableList.copyOf(linesIterator()); 519 } 520 521 @Override 522 public <T> T readLines(LineProcessor<T> processor) throws IOException { 523 Iterator<String> lines = linesIterator(); 524 while (lines.hasNext()) { 525 if (!processor.processLine(lines.next())) { 526 break; 527 } 528 } 529 return processor.getResult(); 530 } 531 532 @Override 533 public String toString() { 534 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 535 } 536 } 537 538 /** 539 * Subclass specialized for string instances. 540 * 541 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 542 * 543 * <ul> 544 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 545 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 546 * one with {@link CharSequence#charAt(int)}. 547 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 548 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 549 * can't change, and it is faster because many writers and appendables are optimized for 550 * appending string instances. 551 * </ul> 552 */ 553 private static class StringCharSource extends CharSequenceCharSource { 554 protected StringCharSource(String seq) { 555 super(seq); 556 } 557 558 @Override 559 public Reader openStream() { 560 return new StringReader((String) seq); 561 } 562 563 @Override 564 public long copyTo(Appendable appendable) throws IOException { 565 appendable.append(seq); 566 return seq.length(); 567 } 568 569 @Override 570 public long copyTo(CharSink sink) throws IOException { 571 checkNotNull(sink); 572 Closer closer = Closer.create(); 573 try { 574 Writer writer = closer.register(sink.openStream()); 575 writer.write((String) seq); 576 return seq.length(); 577 } catch (Throwable e) { 578 throw closer.rethrow(e); 579 } finally { 580 closer.close(); 581 } 582 } 583 } 584 585 private static final class EmptyCharSource extends StringCharSource { 586 587 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 588 589 private EmptyCharSource() { 590 super(""); 591 } 592 593 @Override 594 public String toString() { 595 return "CharSource.empty()"; 596 } 597 } 598 599 private static final class ConcatenatedCharSource extends CharSource { 600 601 private final Iterable<? extends CharSource> sources; 602 603 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 604 this.sources = checkNotNull(sources); 605 } 606 607 @Override 608 public Reader openStream() throws IOException { 609 return new MultiReader(sources.iterator()); 610 } 611 612 @Override 613 public boolean isEmpty() throws IOException { 614 for (CharSource source : sources) { 615 if (!source.isEmpty()) { 616 return false; 617 } 618 } 619 return true; 620 } 621 622 @Override 623 public Optional<Long> lengthIfKnown() { 624 long result = 0L; 625 for (CharSource source : sources) { 626 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 627 if (!lengthIfKnown.isPresent()) { 628 return Optional.absent(); 629 } 630 result += lengthIfKnown.get(); 631 } 632 return Optional.of(result); 633 } 634 635 @Override 636 public long length() throws IOException { 637 long result = 0L; 638 for (CharSource source : sources) { 639 result += source.length(); 640 } 641 return result; 642 } 643 644 @Override 645 public String toString() { 646 return "CharSource.concat(" + sources + ")"; 647 } 648 } 649}