001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.io; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtIncompatible; 021import com.google.common.base.Ascii; 022import com.google.common.base.Optional; 023import com.google.common.base.Splitter; 024import com.google.common.collect.AbstractIterator; 025import com.google.common.collect.ImmutableList; 026import com.google.common.collect.Lists; 027import com.google.errorprone.annotations.CanIgnoreReturnValue; 028import java.io.BufferedReader; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.Reader; 032import java.io.StringReader; 033import java.io.Writer; 034import java.nio.charset.Charset; 035import java.util.Iterator; 036import java.util.List; 037import javax.annotation.Nullable; 038 039/** 040 * A readable source of characters, such as a text file. Unlike a {@link Reader}, a 041 * {@code CharSource} is not an open, stateful stream of characters that can be read and closed. 042 * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances. 043 * 044 * <p>{@code CharSource} provides two kinds of methods: 045 * <ul> 046 * <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent 047 * instance each time they are called. The caller is responsible for ensuring that the returned 048 * reader is closed. 049 * <li><b>Convenience methods:</b> These are implementations of common operations that are typically 050 * implemented by opening a reader using one of the methods in the first category, doing 051 * something and finally closing the reader that was opened. 052 * </ul> 053 * 054 * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the source 055 * into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n}, 056 * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider 057 * there to be an empty line at the end if the contents are terminated with a line separator. 058 * 059 * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character 060 * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}. 061 * 062 * @since 14.0 063 * @author Colin Decker 064 */ 065@GwtIncompatible 066public abstract class CharSource { 067 068 /** 069 * Constructor for use by subclasses. 070 */ 071 protected CharSource() {} 072 073 /** 074 * Returns a {@link ByteSource} view of this char source that encodes chars read from this source 075 * as bytes using the given {@link Charset}. 076 * 077 * <p>If {@link ByteSource#asCharSource} is called on the returned source with the same charset, 078 * the default implementation of this method will ensure that the original {@code CharSource} is 079 * returned, rather than round-trip encoding. Subclasses that override this method should behave 080 * the same way. 081 * 082 * @since 20.0 083 */ 084 @Beta 085 public ByteSource asByteSource(Charset charset) { 086 return new AsByteSource(charset); 087 } 088 089 /** 090 * Opens a new {@link Reader} for reading from this source. This method returns a new, independent 091 * reader each time it is called. 092 * 093 * <p>The caller is responsible for ensuring that the returned reader is closed. 094 * 095 * @throws IOException if an I/O error occurs while opening the reader 096 */ 097 public abstract Reader openStream() throws IOException; 098 099 /** 100 * Opens a new {@link BufferedReader} for reading from this source. This method returns a new, 101 * independent reader each time it is called. 102 * 103 * <p>The caller is responsible for ensuring that the returned reader is closed. 104 * 105 * @throws IOException if an I/O error occurs while of opening the reader 106 */ 107 public BufferedReader openBufferedStream() throws IOException { 108 Reader reader = openStream(); 109 return (reader instanceof BufferedReader) 110 ? (BufferedReader) reader 111 : new BufferedReader(reader); 112 } 113 114 /** 115 * Returns the size of this source in chars, if the size can be easily determined without actually 116 * opening the data stream. 117 * 118 * <p>The default implementation returns {@link Optional#absent}. Some sources, such as a 119 * {@code CharSequence}, may return a non-absent value. Note that in such cases, it is 120 * <i>possible</i> that this method will return a different number of chars than would be returned 121 * by reading all of the chars. 122 * 123 * <p>Additionally, for mutable sources such as {@code StringBuilder}s, a subsequent read may 124 * return a different number of chars if the contents are changed. 125 * 126 * @since 19.0 127 */ 128 @Beta 129 public Optional<Long> lengthIfKnown() { 130 return Optional.absent(); 131 } 132 133 /** 134 * Returns the length of this source in chars, even if doing so requires opening and traversing an 135 * entire stream. To avoid a potentially expensive operation, see {@link #lengthIfKnown}. 136 * 137 * <p>The default implementation calls {@link #lengthIfKnown} and returns the value if present. If 138 * absent, it will fall back to a heavyweight operation that will open a stream, 139 * {@link Reader#skip(long) skip} to the end of the stream, and return the total number of chars 140 * that were skipped. 141 * 142 * <p>Note that for sources that implement {@link #lengthIfKnown} to provide a more efficient 143 * implementation, it is <i>possible</i> that this method will return a different number of chars 144 * than would be returned by reading all of the chars. 145 * 146 * <p>In either case, for mutable sources such as files, a subsequent read may return a different 147 * number of chars if the contents are changed. 148 * 149 * @throws IOException if an I/O error occurs while reading the length of this source 150 * @since 19.0 151 */ 152 @Beta 153 public long length() throws IOException { 154 Optional<Long> lengthIfKnown = lengthIfKnown(); 155 if (lengthIfKnown.isPresent()) { 156 return lengthIfKnown.get(); 157 } 158 159 Closer closer = Closer.create(); 160 try { 161 Reader reader = closer.register(openStream()); 162 return countBySkipping(reader); 163 } catch (Throwable e) { 164 throw closer.rethrow(e); 165 } finally { 166 closer.close(); 167 } 168 } 169 170 private long countBySkipping(Reader reader) throws IOException { 171 long count = 0; 172 long read; 173 while ((read = reader.skip(Long.MAX_VALUE)) != 0) { 174 count += read; 175 } 176 return count; 177 } 178 179 /** 180 * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}). 181 * Does not close {@code appendable} if it is {@code Closeable}. 182 * 183 * @return the number of characters copied 184 * @throws IOException if an I/O error occurs while reading from this source or writing to 185 * {@code appendable} 186 */ 187 @CanIgnoreReturnValue 188 public long copyTo(Appendable appendable) throws IOException { 189 checkNotNull(appendable); 190 191 Closer closer = Closer.create(); 192 try { 193 Reader reader = closer.register(openStream()); 194 return CharStreams.copy(reader, appendable); 195 } catch (Throwable e) { 196 throw closer.rethrow(e); 197 } finally { 198 closer.close(); 199 } 200 } 201 202 /** 203 * Copies the contents of this source to the given sink. 204 * 205 * @return the number of characters copied 206 * @throws IOException if an I/O error occurs while reading from this source or writing to 207 * {@code sink} 208 */ 209 @CanIgnoreReturnValue 210 public long copyTo(CharSink sink) throws IOException { 211 checkNotNull(sink); 212 213 Closer closer = Closer.create(); 214 try { 215 Reader reader = closer.register(openStream()); 216 Writer writer = closer.register(sink.openStream()); 217 return CharStreams.copy(reader, writer); 218 } catch (Throwable e) { 219 throw closer.rethrow(e); 220 } finally { 221 closer.close(); 222 } 223 } 224 225 /** 226 * Reads the contents of this source as a string. 227 * 228 * @throws IOException if an I/O error occurs while reading from this source 229 */ 230 public String read() throws IOException { 231 Closer closer = Closer.create(); 232 try { 233 Reader reader = closer.register(openStream()); 234 return CharStreams.toString(reader); 235 } catch (Throwable e) { 236 throw closer.rethrow(e); 237 } finally { 238 closer.close(); 239 } 240 } 241 242 /** 243 * Reads the first line of this source as a string. Returns {@code null} if this source is empty. 244 * 245 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 246 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 247 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 248 * as if it does. 249 * 250 * @throws IOException if an I/O error occurs while reading from this source 251 */ 252 @Nullable 253 public String readFirstLine() throws IOException { 254 Closer closer = Closer.create(); 255 try { 256 BufferedReader reader = closer.register(openBufferedStream()); 257 return reader.readLine(); 258 } catch (Throwable e) { 259 throw closer.rethrow(e); 260 } finally { 261 closer.close(); 262 } 263 } 264 265 /** 266 * Reads all the lines of this source as a list of strings. The returned list will be empty if 267 * this source is empty. 268 * 269 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 270 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 271 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 272 * as if it does. 273 * 274 * @throws IOException if an I/O error occurs while reading from this source 275 */ 276 public ImmutableList<String> readLines() throws IOException { 277 Closer closer = Closer.create(); 278 try { 279 BufferedReader reader = closer.register(openBufferedStream()); 280 List<String> result = Lists.newArrayList(); 281 String line; 282 while ((line = reader.readLine()) != null) { 283 result.add(line); 284 } 285 return ImmutableList.copyOf(result); 286 } catch (Throwable e) { 287 throw closer.rethrow(e); 288 } finally { 289 closer.close(); 290 } 291 } 292 293 /** 294 * Reads lines of text from this source, processing each line as it is read using the given 295 * {@link LineProcessor processor}. Stops when all lines have been processed or the processor 296 * returns {@code false} and returns the result produced by the processor. 297 * 298 * <p>Like {@link BufferedReader#readLine()}, this method considers a line to be a sequence of 299 * text that is terminated by (but does not include) one of {@code \r\n}, {@code \r} or 300 * {@code \n}. If the source's content does not end in a line termination sequence, it is treated 301 * as if it does. 302 * 303 * @throws IOException if an I/O error occurs while reading from this source or if 304 * {@code processor} throws an {@code IOException} 305 * @since 16.0 306 */ 307 @Beta 308 @CanIgnoreReturnValue // some processors won't return a useful result 309 public <T> T readLines(LineProcessor<T> processor) throws IOException { 310 checkNotNull(processor); 311 312 Closer closer = Closer.create(); 313 try { 314 Reader reader = closer.register(openStream()); 315 return CharStreams.readLines(reader, processor); 316 } catch (Throwable e) { 317 throw closer.rethrow(e); 318 } finally { 319 closer.close(); 320 } 321 } 322 323 /** 324 * Returns whether the source has zero chars. The default implementation first checks 325 * {@link #lengthIfKnown}, returning true if it's known to be zero and false if it's known to be 326 * non-zero. If the length is not known, it falls back to opening a stream and checking for EOF. 327 * 328 * <p>Note that, in cases where {@code lengthIfKnown} returns zero, it is <i>possible</i> that 329 * chars are actually available for reading. This means that a source may return {@code true} from 330 * {@code isEmpty()} despite having readable content. 331 * 332 * @throws IOException if an I/O error occurs 333 * @since 15.0 334 */ 335 public boolean isEmpty() throws IOException { 336 Optional<Long> lengthIfKnown = lengthIfKnown(); 337 if (lengthIfKnown.isPresent()) { 338 return lengthIfKnown.get() == 0L; 339 } 340 Closer closer = Closer.create(); 341 try { 342 Reader reader = closer.register(openStream()); 343 return reader.read() == -1; 344 } catch (Throwable e) { 345 throw closer.rethrow(e); 346 } finally { 347 closer.close(); 348 } 349 } 350 351 /** 352 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 353 * the source will contain the concatenated data from the streams of the underlying sources. 354 * 355 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 356 * close the open underlying stream. 357 * 358 * @param sources the sources to concatenate 359 * @return a {@code CharSource} containing the concatenated data 360 * @since 15.0 361 */ 362 public static CharSource concat(Iterable<? extends CharSource> sources) { 363 return new ConcatenatedCharSource(sources); 364 } 365 366 /** 367 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 368 * the source will contain the concatenated data from the streams of the underlying sources. 369 * 370 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 371 * close the open underlying stream. 372 * 373 * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this method 374 * is called. This will fail if the iterator is infinite and may cause problems if the iterator 375 * eagerly fetches data for each source when iterated (rather than producing sources that only 376 * load data through their streams). Prefer using the {@link #concat(Iterable)} overload if 377 * possible. 378 * 379 * @param sources the sources to concatenate 380 * @return a {@code CharSource} containing the concatenated data 381 * @throws NullPointerException if any of {@code sources} is {@code null} 382 * @since 15.0 383 */ 384 public static CharSource concat(Iterator<? extends CharSource> sources) { 385 return concat(ImmutableList.copyOf(sources)); 386 } 387 388 /** 389 * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from 390 * the source will contain the concatenated data from the streams of the underlying sources. 391 * 392 * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will 393 * close the open underlying stream. 394 * 395 * @param sources the sources to concatenate 396 * @return a {@code CharSource} containing the concatenated data 397 * @throws NullPointerException if any of {@code sources} is {@code null} 398 * @since 15.0 399 */ 400 public static CharSource concat(CharSource... sources) { 401 return concat(ImmutableList.copyOf(sources)); 402 } 403 404 /** 405 * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the 406 * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if 407 * the {@code charSequence} is mutated while it is being read, so don't do that. 408 * 409 * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)}) 410 */ 411 public static CharSource wrap(CharSequence charSequence) { 412 return charSequence instanceof String 413 ? new StringCharSource((String) charSequence) 414 : new CharSequenceCharSource(charSequence); 415 } 416 417 /** 418 * Returns an immutable {@link CharSource} that contains no characters. 419 * 420 * @since 15.0 421 */ 422 public static CharSource empty() { 423 return EmptyCharSource.INSTANCE; 424 } 425 426 /** 427 * A byte source that reads chars from this source and encodes them as bytes using a charset. 428 */ 429 private final class AsByteSource extends ByteSource { 430 431 final Charset charset; 432 433 AsByteSource(Charset charset) { 434 this.charset = checkNotNull(charset); 435 } 436 437 @Override 438 public CharSource asCharSource(Charset charset) { 439 if (charset.equals(this.charset)) { 440 return CharSource.this; 441 } 442 return super.asCharSource(charset); 443 } 444 445 @Override 446 public InputStream openStream() throws IOException { 447 return new ReaderInputStream(CharSource.this.openStream(), charset, 8192); 448 } 449 450 @Override 451 public String toString() { 452 return CharSource.this.toString() + ".asByteSource(" + charset + ")"; 453 } 454 } 455 456 private static class CharSequenceCharSource extends CharSource { 457 458 private static final Splitter LINE_SPLITTER = Splitter.onPattern("\r\n|\n|\r"); 459 460 protected final CharSequence seq; 461 462 protected CharSequenceCharSource(CharSequence seq) { 463 this.seq = checkNotNull(seq); 464 } 465 466 @Override 467 public Reader openStream() { 468 return new CharSequenceReader(seq); 469 } 470 471 @Override 472 public String read() { 473 return seq.toString(); 474 } 475 476 @Override 477 public boolean isEmpty() { 478 return seq.length() == 0; 479 } 480 481 @Override 482 public long length() { 483 return seq.length(); 484 } 485 486 @Override 487 public Optional<Long> lengthIfKnown() { 488 return Optional.of((long) seq.length()); 489 } 490 491 /** 492 * Returns an iterator over the lines in the string. If the string ends in a newline, a final 493 * empty string is not included, to match the behavior of BufferedReader/LineReader.readLine(). 494 */ 495 private Iterator<String> linesIterator() { 496 return new AbstractIterator<String>() { 497 Iterator<String> lines = LINE_SPLITTER.split(seq).iterator(); 498 499 @Override 500 protected String computeNext() { 501 if (lines.hasNext()) { 502 String next = lines.next(); 503 // skip last line if it's empty 504 if (lines.hasNext() || !next.isEmpty()) { 505 return next; 506 } 507 } 508 return endOfData(); 509 } 510 }; 511 } 512 513 @Override 514 public String readFirstLine() { 515 Iterator<String> lines = linesIterator(); 516 return lines.hasNext() ? lines.next() : null; 517 } 518 519 @Override 520 public ImmutableList<String> readLines() { 521 return ImmutableList.copyOf(linesIterator()); 522 } 523 524 @Override 525 public <T> T readLines(LineProcessor<T> processor) throws IOException { 526 Iterator<String> lines = linesIterator(); 527 while (lines.hasNext()) { 528 if (!processor.processLine(lines.next())) { 529 break; 530 } 531 } 532 return processor.getResult(); 533 } 534 535 @Override 536 public String toString() { 537 return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")"; 538 } 539 } 540 541 /** 542 * Subclass specialized for string instances. 543 * 544 * <p>Since Strings are immutable and built into the jdk we can optimize some operations 545 * 546 * <ul> 547 * <li>use {@link StringReader} instead of {@link CharSequenceReader}. It is faster since it can 548 * use {@link String#getChars(int, int, char[], int)} instead of copying characters one by 549 * one with {@link CharSequence#charAt(int)}. 550 * <li>use {@link Appendable#append(CharSequence)} in {@link #copyTo(Appendable)} and {@link 551 * #copyTo(CharSink)}. We know this is correct since strings are immutable and so the length 552 * can't change, and it is faster because many writers and appendables are optimized for 553 * appending string instances. 554 * </ul> 555 */ 556 private static class StringCharSource extends CharSequenceCharSource { 557 protected StringCharSource(String seq) { 558 super(seq); 559 } 560 561 @Override 562 public Reader openStream() { 563 return new StringReader((String) seq); 564 } 565 566 @Override 567 public long copyTo(Appendable appendable) throws IOException { 568 appendable.append(seq); 569 return seq.length(); 570 } 571 572 @Override 573 public long copyTo(CharSink sink) throws IOException { 574 checkNotNull(sink); 575 Closer closer = Closer.create(); 576 try { 577 Writer writer = closer.register(sink.openStream()); 578 writer.write((String) seq); 579 return seq.length(); 580 } catch (Throwable e) { 581 throw closer.rethrow(e); 582 } finally { 583 closer.close(); 584 } 585 } 586 } 587 588 private static final class EmptyCharSource extends StringCharSource { 589 590 private static final EmptyCharSource INSTANCE = new EmptyCharSource(); 591 592 private EmptyCharSource() { 593 super(""); 594 } 595 596 @Override 597 public String toString() { 598 return "CharSource.empty()"; 599 } 600 } 601 602 private static final class ConcatenatedCharSource extends CharSource { 603 604 private final Iterable<? extends CharSource> sources; 605 606 ConcatenatedCharSource(Iterable<? extends CharSource> sources) { 607 this.sources = checkNotNull(sources); 608 } 609 610 @Override 611 public Reader openStream() throws IOException { 612 return new MultiReader(sources.iterator()); 613 } 614 615 @Override 616 public boolean isEmpty() throws IOException { 617 for (CharSource source : sources) { 618 if (!source.isEmpty()) { 619 return false; 620 } 621 } 622 return true; 623 } 624 625 @Override 626 public Optional<Long> lengthIfKnown() { 627 long result = 0L; 628 for (CharSource source : sources) { 629 Optional<Long> lengthIfKnown = source.lengthIfKnown(); 630 if (!lengthIfKnown.isPresent()) { 631 return Optional.absent(); 632 } 633 result += lengthIfKnown.get(); 634 } 635 return Optional.of(result); 636 } 637 638 @Override 639 public long length() throws IOException { 640 long result = 0L; 641 for (CharSource source : sources) { 642 result += source.length(); 643 } 644 return result; 645 } 646 647 @Override 648 public String toString() { 649 return "CharSource.concat(" + sources + ")"; 650 } 651 } 652}